Skip to content

Commit 7635b3f

Browse files
authored
Optimize applyFromArray by caching existing styles (#1785)
Prevent calling clone and getHashCode when not needed because these calls are very expensive. When applying styles to a range of cells can we cache the styles we encounter along the way so we don't need to look them up with getHashCode later.
1 parent 5b4b12f commit 7635b3f

File tree

3 files changed

+102
-3
lines changed

3 files changed

+102
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
144144
- Column width and Row height styles in the Html Reader when the value includes a unit of measure. [Issue #2145](https://github.com/PHPOffice/PhpSpreadsheet/issues/2145).
145145
- Data Validation flags not set correctly when reading XLSX files. [Issue #2224](https://github.com/PHPOffice/PhpSpreadsheet/issues/2224) [PR #2225](https://github.com/PHPOffice/PhpSpreadsheet/pull/2225)
146146
- Reading XLSX files without styles.xml throws an exception. [Issue #2246](https://github.com/PHPOffice/PhpSpreadsheet/issues/2246)
147+
- Improved performance of `Style::applyFromArray()` when applied to several cells. [PR #1785](https://github.com/PHPOffice/PhpSpreadsheet/issues/1785).
147148
- Improve XLSX parsing speed if no readFilter is applied (again) - [#772](https://github.com/PHPOffice/PhpSpreadsheet/issues/772)
148149

149150
## 1.18.0 - 2021-05-31

src/PhpSpreadsheet/Style/Style.php

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,27 @@ class Style extends Supervisor
6363
*/
6464
protected $quotePrefix = false;
6565

66+
/**
67+
* Internal cache for styles
68+
* Used when applying style on range of cells (column or row) and cleared when
69+
* all cells in range is styled.
70+
*
71+
* PhpSpreadsheet will always minimize the amount of styles used. So cells with
72+
* same styles will reference the same Style instance. To check if two styles
73+
* are similar Style::getHashCode() is used. This call is expensive. To minimize
74+
* the need to call this method we can cache the internal PHP object id of the
75+
* Style in the range. Style::getHashCode() will then only be called when we
76+
* encounter a unique style.
77+
*
78+
* @see Style::applyFromArray()
79+
* @see Style::getHashCode()
80+
*
81+
* @phpstan-var null|array{styleByHash: array<string, Style>, hashByObjId: array<int, string>}
82+
*
83+
* @var array<string, array>
84+
*/
85+
private static $cachedStyles;
86+
6687
/**
6788
* Create a new Style.
6889
*
@@ -341,8 +362,14 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
341362
// Selection type, inspect
342363
if (preg_match('/^[A-Z]+1:[A-Z]+1048576$/', $pRange)) {
343364
$selectionType = 'COLUMN';
365+
366+
// Enable caching of styles
367+
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
344368
} elseif (preg_match('/^A\d+:XFD\d+$/', $pRange)) {
345369
$selectionType = 'ROW';
370+
371+
// Enable caching of styles
372+
self::$cachedStyles = ['hashByObjId' => [], 'styleByHash' => []];
346373
} else {
347374
$selectionType = 'CELL';
348375
}
@@ -355,13 +382,55 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
355382
$newXfIndexes = [];
356383
foreach ($oldXfIndexes as $oldXfIndex => $dummy) {
357384
$style = $workbook->getCellXfByIndex($oldXfIndex);
358-
$newStyle = clone $style;
359-
$newStyle->applyFromArray($pStyles);
360385

361-
if ($existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode())) {
386+
// $cachedStyles is set when applying style for a range of cells, either column or row
387+
if (self::$cachedStyles === null) {
388+
// Clone the old style and apply style-array
389+
$newStyle = clone $style;
390+
$newStyle->applyFromArray($pStyles);
391+
392+
// Look for existing style we can use instead (reduce memory usage)
393+
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
394+
} else {
395+
// Style cache is stored by Style::getHashCode(). But calling this method is
396+
// expensive. So we cache the php obj id -> hash.
397+
$objId = spl_object_id($style);
398+
399+
// Look for the original HashCode
400+
$styleHash = self::$cachedStyles['hashByObjId'][$objId] ?? null;
401+
if ($styleHash === null) {
402+
// This object_id is not cached, store the hashcode in case encounter again
403+
$styleHash = self::$cachedStyles['hashByObjId'][$objId] = $style->getHashCode();
404+
}
405+
406+
// Find existing style by hash.
407+
$existingStyle = self::$cachedStyles['styleByHash'][$styleHash] ?? null;
408+
409+
if (!$existingStyle) {
410+
// The old style combined with the new style array is not cached, so we create it now
411+
$newStyle = clone $style;
412+
$newStyle->applyFromArray($pStyles);
413+
414+
// Look for similar style in workbook to reduce memory usage
415+
$existingStyle = $workbook->getCellXfByHashCode($newStyle->getHashCode());
416+
417+
// Cache the new style by original hashcode
418+
self::$cachedStyles['styleByHash'][$styleHash] = $existingStyle instanceof self ? $existingStyle : $newStyle;
419+
}
420+
}
421+
422+
if ($existingStyle) {
362423
// there is already such cell Xf in our collection
363424
$newXfIndexes[$oldXfIndex] = $existingStyle->getIndex();
364425
} else {
426+
if (!isset($newStyle)) {
427+
// Handle bug in PHPStan, see https://github.com/phpstan/phpstan/issues/5805
428+
// $newStyle should always be defined.
429+
// This block might not be needed in the future
430+
$newStyle = clone $style;
431+
$newStyle->applyFromArray($pStyles);
432+
}
433+
365434
// we don't have such a cell Xf, need to add
366435
$workbook->addCellXf($newStyle);
367436
$newXfIndexes[$oldXfIndex] = $newStyle->getIndex();
@@ -377,6 +446,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
377446
$columnDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
378447
}
379448

449+
// Disable caching of styles
450+
self::$cachedStyles = null;
451+
380452
break;
381453
case 'ROW':
382454
for ($row = $rangeStartIndexes[1]; $row <= $rangeEndIndexes[1]; ++$row) {
@@ -386,6 +458,9 @@ public function applyFromArray(array $pStyles, $pAdvanced = true)
386458
$rowDimension->setXfIndex($newXfIndexes[$oldXfIndex]);
387459
}
388460

461+
// Disable caching of styles
462+
self::$cachedStyles = null;
463+
389464
break;
390465
case 'CELL':
391466
for ($col = $rangeStartIndexes[0]; $col <= $rangeEndIndexes[0]; ++$col) {

tests/PhpSpreadsheetTests/Style/StyleTest.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,29 @@ public function testStyleColumn(): void
5555
self::assertFalse($sheet->getStyle('C3')->getFont()->getItalic());
5656
}
5757

58+
public function testStyleIsReused(): void
59+
{
60+
$spreadsheet = new Spreadsheet();
61+
$sheet = $spreadsheet->getActiveSheet();
62+
$styleArray = [
63+
'font' => [
64+
'italic' => true,
65+
],
66+
];
67+
68+
$sheet->getStyle('A1')->getFont()->setBold(true);
69+
$sheet->getStyle('A2')->getFont()->setBold(true);
70+
$sheet->getStyle('A3')->getFont()->setBold(true);
71+
$sheet->getStyle('A3')->getFont()->setItalic(true);
72+
73+
$sheet->getStyle('A')->applyFromArray($styleArray);
74+
75+
self::assertCount(4, $spreadsheet->getCellXfCollection());
76+
$spreadsheet->garbageCollect();
77+
78+
self::assertCount(3, $spreadsheet->getCellXfCollection());
79+
}
80+
5881
public function testStyleRow(): void
5982
{
6083
$spreadsheet = new Spreadsheet();

0 commit comments

Comments
 (0)