88use Box \Spout \Reader \IteratorInterface ;
99use Box \Spout \Reader \ODS \Helper \CellValueFormatter ;
1010use Box \Spout \Reader \Wrapper \XMLReader ;
11+ use Box \Spout \Reader \Common \XMLProcessor ;
1112
1213/**
1314 * Class RowIterator
@@ -29,6 +30,9 @@ class RowIterator implements IteratorInterface
2930 /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
3031 protected $ xmlReader ;
3132
33+ /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
34+ protected $ xmlProcessor ;
35+
3236 /** @var bool Whether empty rows should be returned or skipped */
3337 protected $ shouldPreserveEmptyRows ;
3438
@@ -38,6 +42,9 @@ class RowIterator implements IteratorInterface
3842 /** @var bool Whether the iterator has already been rewound once */
3943 protected $ hasAlreadyBeenRewound = false ;
4044
45+ /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
46+ protected $ currentlyProcessedRowData = [];
47+
4148 /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
4249 protected $ rowDataBuffer = null ;
4350
@@ -72,6 +79,13 @@ public function __construct($xmlReader, $options)
7279 $ this ->xmlReader = $ xmlReader ;
7380 $ this ->shouldPreserveEmptyRows = $ options ->shouldPreserveEmptyRows ();
7481 $ this ->cellValueFormatter = new CellValueFormatter ($ options ->shouldFormatDates ());
82+
83+ // Register all callbacks to process different nodes when reading the XML file
84+ $ this ->xmlProcessor = new XMLProcessor ($ this ->xmlReader );
85+ $ this ->xmlProcessor ->registerCallback (self ::XML_NODE_ROW , XMLProcessor::NODE_TYPE_START , [$ this , 'processRowStartingNode ' ]);
86+ $ this ->xmlProcessor ->registerCallback (self ::XML_NODE_CELL , XMLProcessor::NODE_TYPE_START , [$ this , 'processCellStartingNode ' ]);
87+ $ this ->xmlProcessor ->registerCallback (self ::XML_NODE_ROW , XMLProcessor::NODE_TYPE_END , [$ this , 'processRowEndingNode ' ]);
88+ $ this ->xmlProcessor ->registerCallback (self ::XML_NODE_TABLE , XMLProcessor::NODE_TYPE_END , [$ this , 'processTableEndingNode ' ]);
7589 }
7690
7791 /**
@@ -122,7 +136,7 @@ public function valid()
122136 public function next ()
123137 {
124138 if ($ this ->doesNeedDataForNextRowToBeProcessed ()) {
125- $ this ->readDataForNextRow ($ this -> xmlReader );
139+ $ this ->readDataForNextRow ();
126140 }
127141
128142 $ this ->lastRowIndexProcessed ++;
@@ -148,54 +162,26 @@ protected function doesNeedDataForNextRowToBeProcessed()
148162 }
149163
150164 /**
151- * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
152165 * @return void
153166 * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
154167 * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
155168 */
156- protected function readDataForNextRow ($ xmlReader )
169+ protected function readDataForNextRow ()
157170 {
158- $ rowData = [];
171+ $ this -> currentlyProcessedRowData = [];
159172
160173 try {
161- while ($ xmlReader ->read ()) {
162- if ($ xmlReader ->isPositionedOnStartingNode (self ::XML_NODE_ROW )) {
163- $ this ->processRowStartingNode ($ xmlReader );
164-
165- } else if ($ xmlReader ->isPositionedOnStartingNode (self ::XML_NODE_CELL )) {
166- $ rowData = $ this ->processCellStartingNode ($ xmlReader , $ rowData );
167-
168- } else if ($ xmlReader ->isPositionedOnEndingNode (self ::XML_NODE_ROW )) {
169- $ isEmptyRow = $ this ->isEmptyRow ($ rowData , $ this ->lastProcessedCellValue );
170-
171- // if the fetched row is empty and we don't want to preserve it...
172- if (!$ this ->shouldPreserveEmptyRows && $ isEmptyRow ) {
173- // ... skip it
174- continue ;
175- }
176-
177- $ rowData = $ this ->processRowEndingNode ($ rowData , $ isEmptyRow );
178-
179- // at this point, we have all the data we need for the row
180- // so that we can populate the buffer
181- break ;
182-
183- } else if ($ xmlReader ->isPositionedOnEndingNode (self ::XML_NODE_TABLE )) {
184- $ this ->processTableEndingNode ();
185- break ;
186- }
187- }
188-
174+ $ this ->xmlProcessor ->readUntilStopped ();
189175 } catch (XMLProcessingException $ exception ) {
190176 throw new IOException ("The sheet's data cannot be read. [ {$ exception ->getMessage ()}] " );
191177 }
192178
193- $ this ->rowDataBuffer = $ rowData ;
179+ $ this ->rowDataBuffer = $ this -> currentlyProcessedRowData ;
194180 }
195181
196182 /**
197183 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
198- * @return void
184+ * @return int A return code that indicates what action should the processor take next
199185 */
200186 protected function processRowStartingNode ($ xmlReader )
201187 {
@@ -204,14 +190,15 @@ protected function processRowStartingNode($xmlReader)
204190 $ this ->lastProcessedCellValue = null ;
205191 $ this ->numColumnsRepeated = 1 ;
206192 $ this ->numRowsRepeated = $ this ->getNumRowsRepeatedForCurrentNode ($ xmlReader );
193+
194+ return XMLProcessor::PROCESSING_CONTINUE ;
207195 }
208196
209197 /**
210198 * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
211- * @param array $rowData Data of all cells read so far
212- * @return array Original row data + data for the cell that was just read
199+ * @return int A return code that indicates what action should the processor take next
213200 */
214- protected function processCellStartingNode ($ xmlReader, $ rowData )
201+ protected function processCellStartingNode ($ xmlReader )
215202 {
216203 $ currentNumColumnsRepeated = $ this ->getNumColumnsRepeatedForCurrentNode ($ xmlReader );
217204
@@ -221,53 +208,63 @@ protected function processCellStartingNode($xmlReader, $rowData)
221208 // process cell N only after having read cell N+1 (see below why)
222209 if ($ this ->hasAlreadyReadOneCellInCurrentRow ) {
223210 for ($ i = 0 ; $ i < $ this ->numColumnsRepeated ; $ i ++) {
224- $ rowData [] = $ this ->lastProcessedCellValue ;
211+ $ this -> currentlyProcessedRowData [] = $ this ->lastProcessedCellValue ;
225212 }
226213 }
227214
228215 $ this ->hasAlreadyReadOneCellInCurrentRow = true ;
229216 $ this ->lastProcessedCellValue = $ currentCellValue ;
230217 $ this ->numColumnsRepeated = $ currentNumColumnsRepeated ;
231218
232- return $ rowData ;
219+ return XMLProcessor:: PROCESSING_CONTINUE ;
233220 }
234221
235222 /**
236- * @param array $rowData Data of all cells read so far
237- * @param bool $isEmptyRow Whether the given row is empty
238- * @return array
223+ * @return int A return code that indicates what action should the processor take next
239224 */
240- protected function processRowEndingNode ($ rowData , $ isEmptyRow )
225+ protected function processRowEndingNode ()
241226 {
227+ $ isEmptyRow = $ this ->isEmptyRow ($ this ->currentlyProcessedRowData , $ this ->lastProcessedCellValue );
228+
229+ // if the fetched row is empty and we don't want to preserve it...
230+ if (!$ this ->shouldPreserveEmptyRows && $ isEmptyRow ) {
231+ // ... skip it
232+ return XMLProcessor::PROCESSING_CONTINUE ;
233+ }
234+
242235 // if the row is empty, we don't want to return more than one cell
243236 $ actualNumColumnsRepeated = (!$ isEmptyRow ) ? $ this ->numColumnsRepeated : 1 ;
244237
245238 // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
246- // The current count of read columns is determined by counting the values in $rowData .
239+ // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData" .
247240 // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
248241 // with a number-columns-repeated value equals to the number of (supported columns - used columns).
249242 // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
250243 // always 16384 cells.
251- if ((count ($ rowData ) + $ actualNumColumnsRepeated ) !== self ::MAX_COLUMNS_EXCEL ) {
244+ if ((count ($ this -> currentlyProcessedRowData ) + $ actualNumColumnsRepeated ) !== self ::MAX_COLUMNS_EXCEL ) {
252245 for ($ i = 0 ; $ i < $ actualNumColumnsRepeated ; $ i ++) {
253- $ rowData [] = $ this ->lastProcessedCellValue ;
246+ $ this -> currentlyProcessedRowData [] = $ this ->lastProcessedCellValue ;
254247 }
255248 }
256249
257250 // If we are processing row N and the row is repeated M times,
258251 // then the next row to be processed will be row (N+M).
259252 $ this ->nextRowIndexToBeProcessed += $ this ->numRowsRepeated ;
260253
261- return $ rowData ;
254+ // at this point, we have all the data we need for the row
255+ // so that we can populate the buffer
256+ return XMLProcessor::PROCESSING_STOP ;
262257 }
263258
264259 /**
265- * @return void
260+ * @return int A return code that indicates what action should the processor take next
266261 */
267262 protected function processTableEndingNode ()
268263 {
269264 // The closing "</table:table>" marks the end of the file
270265 $ this ->hasReachedEndOfFile = true ;
266+
267+ return XMLProcessor::PROCESSING_STOP ;
271268 }
272269
273270 /**
0 commit comments