Overview

Namespaces

  • PHP
  • SimpleExcel
    • Enums
    • Parser
    • Spreadsheet
    • Writer

Classes

  • BaseParser
  • CSVParser
  • HTMLParser
  • JSONParser
  • TSVParser
  • XLSXParser
  • XMLParser

Interfaces

  • IParser
  • Overview
  • Namespace
  • Class
  • Tree
 1: <?php
 2: 
 3: namespace SimpleExcel\Parser;
 4: 
 5: use SimpleExcel\Enums\SimpleExcelException;
 6: use SimpleExcel\Spreadsheet\Workbook;
 7: use SimpleExcel\Spreadsheet\Worksheet;
 8: 
 9: /**
10:  * SimpleExcel class for parsing HTML table
11:  *  
12:  * @author  Faisalman
13:  * @package SimpleExcel
14:  */ 
15: class HTMLParser extends BaseParser implements IParser
16: {
17:     /**
18:     * Defines valid file extension
19:     * 
20:     * @access   protected
21:     * @var      string
22:     */
23:     protected $file_extension = 'html';
24:     
25:     /**
26:     * Process the loaded file/string
27:     * 
28:     * @param    DOMDocument $html   DOMDocument object of HTML
29:     */
30:     protected function parseDOM($html){
31:         $this->workbook = new Workbook();
32:         $tables = $html->getElementsByTagName('table');    
33:         foreach ($tables as $table) {
34:             $sheet = new Worksheet();
35:             $table_child = $table->childNodes;
36:             foreach ($table_child as $twrap) {
37:                 if($twrap->nodeType === XML_ELEMENT_NODE) {
38:                     if ($twrap->nodeName === "thead" || $twrap->nodeName === "tbody") {
39:                         $twrap_child = $twrap->childNodes;
40:                         foreach ($twrap_child as $tr) {
41:                             if($tr->nodeType === XML_ELEMENT_NODE && $tr->nodeName === "tr") {
42:                                 $row = array();
43:                                 $tr_child = $tr->childNodes;
44:                                 foreach ($tr_child as $td) {
45:                                     if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
46:                                         array_push($row, $td->nodeValue);
47:                                     }
48:                                 }
49:                                 $sheet->insertRecord($row);
50:                             }
51:                         }                        
52:                     } else if ($twrap->nodeName === "tr") {
53:                         $row = array();
54:                         $twrap_child = $twrap->childNodes;
55:                         foreach ($twrap_child as $td) {
56:                             if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
57:                                 array_push($row, $td->nodeValue);
58:                             }
59:                         }
60:                         $sheet->insertRecord($row);
61:                     }
62:                 }
63:             }
64:             $this->workbook->insertWorksheet($sheet);
65:         }
66:     }
67:     
68:     /**
69:     * Load the HTML file to be parsed
70:     * 
71:     * @param    string  $file_path  Path to HTML file
72:     * @param    array   $options    Options
73:     */
74:     public function loadFile($file_path, $options = NULL) {
75:         if ($this->checkFile($file_path)) {
76:             $html = new \DOMDocument();        
77:             $html->loadHTMLFile($file_path);
78:             $this->loadString($html->saveHTML(), $options);
79:         }
80:     }
81:     
82:     /**
83:     * Load the string to be parsed
84:     * 
85:     * @param    string  $str        String with HTML format
86:     * @param    array   $options    Options
87:     */
88:     public function loadString($str, $options = NULL){
89:         $html = new \DOMDocument();        
90:         $html->loadHTML($str);
91:         $this->parseDOM($html);
92:     }
93: }
94: 
API documentation generated by ApiGen 2.8.0