Skip to content

Commit c3d24e5

Browse files
committed
WIP Some Additional Support for Intersection and Union
Add some additional support for Intersection and Union in the Calculation Engine. This allows me to reinstate 2 tests which were formerly skipped, without breaking any existing tests. There are almost certainly edge cases which I haven't thought of yet; I will leave this PR as a draft for several weeks before moving it formward. This change gives some opportunities for users to go wrong. If you place the following formula in a cell: ``` =B1:B8 A7:D7 ``` Excel will treat the space as an intersection operator, and will return the value in B7, which is where the ranges intersect. PhpSpreadsheet will do so as well. But, if you use the following formula: ``` =B1:B8,A7:D7 ``` Excel will return `#VALUE!`. This seems like something they forgot to take care of when adding dynamic arrays. The comma should be interpreted as a union operator, and PhpSpreadsheet will now return the union of the ranges. It seems very difficult for PhpSpreadsheet to return an error when the formula seems easily evaluated. Furthermore, if you use the following formula: ``` =SUM(B1:B8,A7:D7) ``` Excel evaluates it as you would expect, summing the union of the ranges. So does PhpSpreadsheet. I guess there's no rule requiring Excel to be consistent, but ... Another way that users might go wrong is by actually entering the union or intersection symbols in a formula rather than comma or space. Excel will not allow this; PhpSpreadsheet needs to change comma or space to the appropriate symbol in order for the rest of this PR to work properly. By the time the parser gets to it, it can't tell whether the symbol was part of the cell's original formula or if PhpSpreadsheet substituted it, so it just has to permit it. I do not expect many users to fall afoul of this problem, at least not more than once. Just by way of explanation, the reason why PhpSpreadsheet has to make the substitution is because intersection has a higher priority than union, just as multiplication has a higher priority than addition. So, if you don't change the symbols beforehand, you may wind up figuring out that you need to perform an intersection too late - a lower-priority union may have already taken place.
1 parent 2933bfd commit c3d24e5

File tree

4 files changed

+100
-19
lines changed

4 files changed

+100
-19
lines changed

src/PhpSpreadsheet/Calculation/Calculation.php

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,10 @@ private function internalParseFormula(string $formula, ?Cell $cell = null): bool
10951095
$this->branchPruner->initialiseForLoop();
10961096

10971097
$opCharacter = $formula[$index]; // Get the first character of the value at the current index position
1098+
if ($opCharacter === "\xe2") { // intersection or union
1099+
$opCharacter .= $formula[++$index];
1100+
$opCharacter .= $formula[++$index];
1101+
}
10981102

10991103
// Check for two-character operators (e.g. >=, <=, <>)
11001104
if ((isset(self::COMPARISON_OPERATORS[$opCharacter])) && (strlen($formula) > $index) && isset($formula[$index + 1], self::COMPARISON_OPERATORS[$formula[$index + 1]])) {
@@ -1115,7 +1119,7 @@ private function internalParseFormula(string $formula, ?Cell $cell = null): bool
11151119
++$index;
11161120
} elseif ($opCharacter === '+' && !$expectingOperator) { // Positive (unary plus rather than binary operator plus) can be discarded?
11171121
++$index; // Drop the redundant plus symbol
1118-
} elseif ((($opCharacter === '~') || ($opCharacter === '') || ($opCharacter === '')) && (!$isOperandOrFunction)) {
1122+
} elseif ((($opCharacter === '~') /*|| ($opCharacter === '∩') || ($opCharacter === '∪')*/) && (!$isOperandOrFunction)) {
11191123
// We have to explicitly deny a tilde, union or intersect because they are legal
11201124
return $this->raiseFormulaError("Formula Error: Illegal character '~'"); // on the stack but not in the input expression
11211125
} elseif ((isset(self::CALCULATION_OPERATORS[$opCharacter]) || $isOperandOrFunction) && $expectingOperator) { // Are we putting an operator on the stack?
@@ -1232,7 +1236,13 @@ private function internalParseFormula(string $formula, ?Cell $cell = null): bool
12321236
// because at least the braces are paired up (at this stage in the formula)
12331237
// MS Excel allows this if the content is cell references; but doesn't allow actual values,
12341238
// but at this point, we can't differentiate (so allow both)
1235-
return $this->raiseFormulaError('Formula Error: Unexpected ,');
1239+
//return $this->raiseFormulaError('Formula Error: Unexpected ,');
1240+
$stack->push('Binary Operator', '');
1241+
1242+
++$index;
1243+
$expectingOperator = false;
1244+
1245+
continue;
12361246
}
12371247

12381248
/** @var array<string, int> $d */
@@ -1927,6 +1937,14 @@ private function processTokenStack(false|array $tokens, ?string $cellID = null,
19271937
$stack->push('Value', $cellIntersect, $cellRef);
19281938
}
19291939

1940+
break;
1941+
case '': // union
1942+
/** @var mixed[][] $operand1 */
1943+
/** @var mixed[][] $operand2 */
1944+
$cellUnion = array_merge($operand1, $operand2);
1945+
$this->debugLog->writeDebugLog('Evaluation Result is %s', $this->showTypeDetails($cellUnion));
1946+
$stack->push('Value', $cellUnion, 'A1');
1947+
19301948
break;
19311949
}
19321950
} elseif (($token === '~') || ($token === '%')) {
@@ -2792,6 +2810,14 @@ private function evaluateDefinedName(Cell $cell, DefinedName $namedRange, Worksh
27922810

27932811
$definedNameValue = $namedRange->getValue();
27942812
$definedNameType = $namedRange->isFormula() ? 'Formula' : 'Range';
2813+
if ($definedNameType === 'Range') {
2814+
if (preg_match('/^(.*!)?(.*)$/', $definedNameValue, $matches) === 1) {
2815+
$matches2 = trim($matches[2]);
2816+
$matches2 = preg_replace('/ +/', '', $matches2) ?? $matches2;
2817+
$matches2 = preg_replace('/,/', '', $matches2) ?? $matches2;
2818+
$definedNameValue = $matches[1] . $matches2;
2819+
}
2820+
}
27952821
$definedNameWorksheet = $namedRange->getWorksheet();
27962822

27972823
if ($definedNameValue[0] !== '=') {

src/PhpSpreadsheet/Cell/Cell.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,7 @@ public function getCalculatedValue(bool $resetLog = true): mixed
453453
}
454454
$newColumn = $this->getColumn();
455455
if (is_array($result)) {
456+
$result = self::convertSpecialArray($result);
456457
$this->formulaAttributes['t'] = 'array';
457458
$this->formulaAttributes['ref'] = $maxCoordinate = $coordinate;
458459
$newRow = $row = $this->getRow();
@@ -582,6 +583,36 @@ public function getCalculatedValue(bool $resetLog = true): mixed
582583
return $this->convertDateTimeInt($this->value);
583584
}
584585

586+
/**
587+
* Convert array like the following (preserve values, lose indexes):
588+
* [
589+
* rowNumber1 => [colLetter1 => value, colLetter2 => value ...],
590+
* rowNumber2 => [colLetter1 => value, colLetter2 => value ...],
591+
* ...
592+
* ].
593+
*
594+
* @param mixed[] $array
595+
*
596+
* @return mixed[]
597+
*/
598+
private static function convertSpecialArray(array $array): array
599+
{
600+
$newArray = [];
601+
foreach ($array as $rowIndex => $row) {
602+
if (!is_int($rowIndex) || $rowIndex <= 0 || !is_array($row)) {
603+
return $array;
604+
}
605+
$keys = array_keys($row);
606+
$key0 = $keys[0] ?? '';
607+
if (!is_string($key0)) {
608+
return $array;
609+
}
610+
$newArray[] = array_values($row);
611+
}
612+
613+
return $newArray;
614+
}
615+
585616
/**
586617
* Set old calculated value (cached).
587618
*

tests/PhpSpreadsheetTests/Calculation/Engine/RangeTest.php

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313

1414
class RangeTest extends TestCase
1515
{
16-
private string $incompleteMessage = 'Must be revisited';
17-
1816
private ?Spreadsheet $spreadSheet = null;
1917

2018
protected function getSpreadsheet(): Spreadsheet
@@ -162,9 +160,6 @@ public static function providerUTF8NamedRangeEvaluation(): array
162160
#[DataProvider('providerCompositeNamedRangeEvaluation')]
163161
public function testCompositeNamedRangeEvaluation(string $composite, int $expectedSum, int $expectedCount): void
164162
{
165-
if ($this->incompleteMessage !== '') {
166-
self::markTestIncomplete($this->incompleteMessage);
167-
}
168163
$this->spreadSheet = $this->getSpreadsheet();
169164

170165
$workSheet = $this->spreadSheet->getActiveSheet();
@@ -182,17 +177,47 @@ public function testCompositeNamedRangeEvaluation(string $composite, int $expect
182177
public static function providerCompositeNamedRangeEvaluation(): array
183178
{
184179
return [
185-
// Calculation engine doesn't yet handle union ranges with overlap
186180
'Union with overlap' => [
187-
'A1:C1,A3:C3,B1:C3',
188-
63,
181+
'$A$1:$C$1,$A$3:$C$3,$B$1:$C$3',
182+
99,
189183
12,
190184
],
191185
'Union and Intersection' => [
192-
'A1:C1,A3:C3 B1:C3',
193-
23,
186+
'$A$1:$C$1,$A$3:$C$3 $B$1:$C$3',
187+
35,
194188
5,
195189
],
196190
];
197191
}
192+
193+
public function testIntersectCellFormula(): void
194+
{
195+
$this->spreadSheet = $this->getSpreadsheet();
196+
197+
$sheet = $this->spreadSheet->getActiveSheet();
198+
$array = [
199+
[null, 'Planets', 'Lives', 'Babies'],
200+
['Batman', 5, 10, 4],
201+
['Superman', 4, 56, 34],
202+
['Spiderman', 23, 45, 67],
203+
['Hulk', 12, 34, 58],
204+
['Steve', 10, 34, 78],
205+
];
206+
$sheet->fromArray($array, null, 'A3', true);
207+
$this->spreadSheet->addNamedRange(new NamedRange('Hulk', $sheet, '$B$7:$D$7'));
208+
$this->spreadSheet->addNamedRange(new NamedRange('Planets', $sheet, '$B$4:$B$8'));
209+
$this->spreadSheet->addNamedRange(new NamedRange('Intersect', $sheet, '$A$6:$D$6 $C$4:$C$8'));
210+
$this->spreadSheet->addNamedRange(new NamedRange('SupHulk', $sheet, '$B$5:$D$5,$B$7:$D$7'));
211+
212+
$sheet->setCellValue('F1', '=Intersect');
213+
$sheet->setCellValue('F2', '=SUM(SupHulk)');
214+
$sheet->setCellValue('F3', '=Planets Hulk');
215+
$sheet->setCellValue('F4', '=B4:D4 B4:C5');
216+
217+
$this->spreadSheet->returnArrayAsArray();
218+
self::assertSame(45, $sheet->getCell('F1')->getCalculatedValue());
219+
self::assertSame(198, $sheet->getCell('F2')->getCalculatedValue());
220+
self::assertSame(12, $sheet->getCell('F3')->getCalculatedValue());
221+
self::assertSame([[5, 10]], $sheet->getCell('F4')->getCalculatedValue());
222+
}
198223
}

tests/PhpSpreadsheetTests/Worksheet/Table/Issue3659Test.php

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
namespace PhpOffice\PhpSpreadsheetTests\Worksheet\Table;
66

7-
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
87
use PhpOffice\PhpSpreadsheet\Worksheet\Table;
98

109
class Issue3659Test extends SetupTeardown
@@ -49,7 +48,7 @@ public function testTableOnOtherSheet(): void
4948
public function testTableAsArray(): void
5049
{
5150
$spreadsheet = $this->getSpreadsheet();
52-
Calculation::getInstance($spreadsheet)->setInstanceArrayReturnType(Calculation::RETURN_ARRAY_AS_ARRAY);
51+
$spreadsheet->returnArrayAsArray();
5352
$sheet = $this->getSheet();
5453
$sheet->setTitle('Feuil1');
5554
$tableSheet = $spreadsheet->createSheet();
@@ -76,13 +75,13 @@ public function testTableAsArray(): void
7675
$sheet->getCell('F9')->setValue('=Tableau1');
7776
$sheet->getCell('J9')->setValue('=CONCAT(Tableau1)');
7877
$sheet->getCell('J11')->setValue('=SUM(Tableau1[])');
79-
$expectedResult = [2 => ['B' => 10], ['B' => 2], ['B' => 3], ['B' => 4]];
78+
$expectedResult = [[10], [2], [3], [4]];
8079
self::assertSame($expectedResult, $sheet->getCell('F1')->getCalculatedValue());
8180
$expectedResult = [
82-
2 => ['B' => 10, 'C' => 20, 'D' => null],
83-
['B' => 2, 'C' => null, 'D' => null],
84-
['B' => 3, 'C' => null, 'D' => null],
85-
['B' => 4, 'C' => null, 'D' => null],
81+
[10, 20, null],
82+
[2, null, null],
83+
[3, null, null],
84+
[4, null, null],
8685
];
8786
self::assertSame($expectedResult, $sheet->getCell('H1')->getCalculatedValue());
8887
self::assertSame($expectedResult, $sheet->getCell('F9')->getCalculatedValue());

0 commit comments

Comments
 (0)