1: <?php
2:
3: namespace SimpleExcel\Parser;
4:
5: use SimpleExcel\Enums\SimpleExcelException;
6: use SimpleExcel\Spreadsheet\Workbook;
7: use SimpleExcel\Spreadsheet\Worksheet;
8:
9: 10: 11: 12: 13: 14:
15: class HTMLParser extends BaseParser implements IParser
16: {
17: 18: 19: 20: 21: 22:
23: protected $file_extension = 'html';
24:
25: 26: 27: 28: 29:
30: protected function parseDOM($html){
31: $this->workbook = new Workbook();
32: $tables = $html->getElementsByTagName('table');
33: foreach ($tables as $table) {
34: $sheet = new Worksheet();
35: $table_child = $table->childNodes;
36: foreach ($table_child as $twrap) {
37: if($twrap->nodeType === XML_ELEMENT_NODE) {
38: if ($twrap->nodeName === "thead" || $twrap->nodeName === "tbody") {
39: $twrap_child = $twrap->childNodes;
40: foreach ($twrap_child as $tr) {
41: if($tr->nodeType === XML_ELEMENT_NODE && $tr->nodeName === "tr") {
42: $row = array();
43: $tr_child = $tr->childNodes;
44: foreach ($tr_child as $td) {
45: if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
46: array_push($row, $td->nodeValue);
47: }
48: }
49: $sheet->insertRecord($row);
50: }
51: }
52: } else if ($twrap->nodeName === "tr") {
53: $row = array();
54: $twrap_child = $twrap->childNodes;
55: foreach ($twrap_child as $td) {
56: if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
57: array_push($row, $td->nodeValue);
58: }
59: }
60: $sheet->insertRecord($row);
61: }
62: }
63: }
64: $this->workbook->insertWorksheet($sheet);
65: }
66: }
67:
68: 69: 70: 71: 72: 73:
74: public function loadFile($file_path, $options = NULL) {
75: if ($this->checkFile($file_path)) {
76: $html = new \DOMDocument();
77: $html->loadHTMLFile($file_path);
78: $this->loadString($html->saveHTML(), $options);
79: }
80: }
81:
82: 83: 84: 85: 86: 87:
88: public function loadString($str, $options = NULL){
89: $html = new \DOMDocument();
90: $html->loadHTML($str);
91: $this->parseDOM($html);
92: }
93: }
94: