Skip to content

Commit 18a5396

Browse files
committed
Improve Performance of Csv Writer
Fix #3904. PR #3839 provided a huge performance boost for sparsely populated spreadsheets. Unfortunately, it degraded performance for more densely populated spreadsheets when writing Csvs. The reason is that Csv Writer calls toArray for each row, meaning that a lot of the intermediate data used to speed things up needs to be recalculated for every row. It would be better off calling toArray just once for the entire spreadsheet; however this gives back some of the memory improvements of PR #3834. However, the memory effects can be substantially alleviated by supplying a Generator function to do the work. This PR does that; the result is that Csv Writer is now quite a bit faster, and with only a small memory uptick, vs. its performance in PhpSpreadsheet 1.29.
1 parent d620497 commit 18a5396

File tree

2 files changed

+47
-20
lines changed

2 files changed

+47
-20
lines changed

src/PhpSpreadsheet/Worksheet/Worksheet.php

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace PhpOffice\PhpSpreadsheet\Worksheet;
44

55
use ArrayObject;
6+
use Generator;
67
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
78
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
89
use PhpOffice\PhpSpreadsheet\Cell\AddressRange;
@@ -2800,9 +2801,6 @@ protected function cellToArray(Cell $cell, bool $calculateFormulas, bool $format
28002801
return $returnValue;
28012802
}
28022803

2803-
/** @var array<string, bool> */
2804-
private array $hiddenColumns;
2805-
28062804
/**
28072805
* Create array from a range of cells.
28082806
*
@@ -2822,9 +2820,40 @@ public function rangeToArray(
28222820
bool $returnCellRef = false,
28232821
bool $ignoreHidden = false
28242822
): array {
2823+
$returnValue = [];
2824+
2825+
// Loop through rows
2826+
foreach ($this->rangeToArrayYieldRows($range, $nullValue, $calculateFormulas, $formatData, $returnCellRef, $ignoreHidden) as $rowRef => $rowArray) {
2827+
$returnValue[$rowRef] = $rowArray;
2828+
}
2829+
2830+
// Return
2831+
return $returnValue;
2832+
}
2833+
2834+
/**
2835+
* Create array from a range of cells, yielding each row in turn.
2836+
*
2837+
* @param mixed $nullValue Value returned in the array entry if a cell doesn't exist
2838+
* @param bool $calculateFormulas Should formulas be calculated?
2839+
* @param bool $formatData Should formatting be applied to cell values?
2840+
* @param bool $returnCellRef False - Return a simple array of rows and columns indexed by number counting from zero
2841+
* True - Return rows and columns indexed by their actual row and column IDs
2842+
* @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden.
2843+
* True - Don't return values for rows/columns that are defined as hidden.
2844+
*
2845+
* @return Generator
2846+
*/
2847+
public function rangeToArrayYieldRows(
2848+
string $range,
2849+
mixed $nullValue = null,
2850+
bool $calculateFormulas = true,
2851+
bool $formatData = true,
2852+
bool $returnCellRef = false,
2853+
bool $ignoreHidden = false
2854+
) {
28252855
$range = Validations::validateCellOrCellRange($range);
28262856

2827-
$returnValue = [];
28282857
// Identify the range that we need to extract from the worksheet
28292858
[$rangeStart, $rangeEnd] = Coordinate::rangeBoundaries($range);
28302859
$minCol = Coordinate::stringFromColumnIndex($rangeStart[0]);
@@ -2835,8 +2864,10 @@ public function rangeToArray(
28352864
$maxColInt = $rangeEnd[0];
28362865

28372866
++$maxCol;
2838-
$nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden);
2839-
$hideColumns = !empty($this->hiddenColumns);
2867+
/** @var array<string, bool> */
2868+
$hiddenColumns = [];
2869+
$nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden, $hiddenColumns);
2870+
$hideColumns = !empty($hiddenColumns);
28402871

28412872
$keys = $this->cellCollection->getSortedCoordinatesInt();
28422873
$keyIndex = 0;
@@ -2847,7 +2878,7 @@ public function rangeToArray(
28472878
continue;
28482879
}
28492880
$rowRef = $returnCellRef ? $row : ($row - $minRow);
2850-
$returnValue[$rowRef] = $nullRow;
2881+
$returnValue = $nullRow;
28512882

28522883
$index = ($row - 1) * AddressRange::MAX_COLUMN_INT + 1;
28532884
$indexPlus = $index + AddressRange::MAX_COLUMN_INT - 1;
@@ -2860,24 +2891,22 @@ public function rangeToArray(
28602891
$thisCol = ($key % AddressRange::MAX_COLUMN_INT) ?: AddressRange::MAX_COLUMN_INT;
28612892
if ($thisCol >= $minColInt && $thisCol <= $maxColInt) {
28622893
$col = Coordinate::stringFromColumnIndex($thisCol);
2863-
if ($hideColumns === false || !isset($this->hiddenColumns[$col])) {
2894+
if ($hideColumns === false || !isset($hiddenColumns[$col])) {
28642895
$columnRef = $returnCellRef ? $col : ($thisCol - $minColInt);
28652896
$cell = $this->cellCollection->get("{$col}{$thisRow}");
28662897
if ($cell !== null) {
28672898
$value = $this->cellToArray($cell, $calculateFormulas, $formatData, $nullValue);
28682899
if ($value !== $nullValue) {
2869-
$returnValue[$rowRef][$columnRef] = $value;
2900+
$returnValue[$columnRef] = $value;
28702901
}
28712902
}
28722903
}
28732904
}
28742905
++$keyIndex;
28752906
}
2876-
}
2877-
unset($this->hiddenColumns);
28782907

2879-
// Return
2880-
return $returnValue;
2908+
yield $rowRef => $returnValue;
2909+
}
28812910
}
28822911

28832912
/**
@@ -2890,20 +2919,21 @@ public function rangeToArray(
28902919
* True - Return rows and columns indexed by their actual row and column IDs
28912920
* @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden.
28922921
* True - Don't return values for rows/columns that are defined as hidden.
2922+
* @param array<string, bool> $hiddenColumns
28932923
*/
28942924
private function buildNullRow(
28952925
mixed $nullValue,
28962926
string $minCol,
28972927
string $maxCol,
28982928
bool $returnCellRef,
28992929
bool $ignoreHidden,
2930+
array &$hiddenColumns
29002931
): array {
2901-
$this->hiddenColumns = [];
29022932
$nullRow = [];
29032933
$c = -1;
29042934
for ($col = $minCol; $col !== $maxCol; ++$col) {
29052935
if ($ignoreHidden === true && $this->columnDimensionExists($col) && $this->getColumnDimension($col)->getVisible() === false) {
2906-
$this->hiddenColumns[$col] = true;
2936+
$hiddenColumns[$col] = true;
29072937
} else {
29082938
$columnRef = $returnCellRef ? $col : ++$c;
29092939
$nullRow[$columnRef] = $nullValue;

src/PhpSpreadsheet/Writer/Csv.php

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,8 @@ public function save($filename, int $flags = 0): void
104104
$maxRow = $sheet->getHighestDataRow();
105105

106106
// Write rows to file
107-
for ($row = 1; $row <= $maxRow; ++$row) {
108-
// Convert the row to an array...
109-
$cellsArray = $sheet->rangeToArray('A' . $row . ':' . $maxCol . $row, '', $this->preCalculateFormulas);
110-
// ... and write to the file
111-
$this->writeLine($this->fileHandle, $cellsArray[0]);
107+
foreach ($sheet->rangeToArrayYieldRows("A1:$maxCol$maxRow", '', $this->preCalculateFormulas) as $cellsArray) {
108+
$this->writeLine($this->fileHandle, $cellsArray);
112109
}
113110

114111
$this->maybeCloseFileHandle();

0 commit comments

Comments
 (0)