Skip to content

Commit c3a78e4

Browse files
committed
Xls Reader ListWorksheetInfo and New ListWorksheetDimensions
PR #4687 corrected how Xls Writer generated its Dimensions records. We ignore the Dimensions record on read since it does not affect our processing in the slightest. However, the PR raises the possibility that someone might wish to see the data in the Dimensions record. (The PR did an adequate test for retrieving Dimensions data, but it is not generalizable.) To accommodate such a case, we add a new ListWorksheetDimensions function to Xls Reader, similar to ListWorksheetInfo. As luck would have it, the spreadsheet with which I tested the new function produced incorrect results for ListWorksheetInfo, which was ignoring XLS_TYPE_MULRK records. So I added the necessary code to fix ListWorksheetInfo as well.
1 parent eecfb67 commit c3a78e4

File tree

5 files changed

+166
-6
lines changed

5 files changed

+166
-6
lines changed

src/PhpSpreadsheet/Reader/Xls.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,16 @@ public function listWorksheetInfo(string $filename): array
294294
return (new Xls\ListFunctions())->listWorksheetInfo2($filename, $this);
295295
}
296296

297+
/**
298+
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
299+
*
300+
* @return array<int, array{worksheetName: string, dimensionsMinR: int, dimensionsMinC: int, dimensionsMaxR: int, dimensionsMaxC: int, lastColumnLetter: string}>
301+
*/
302+
public function listWorksheetDimensions(string $filename): array
303+
{
304+
return (new Xls\ListFunctions())->listWorksheetDimensions2($filename, $this);
305+
}
306+
297307
/**
298308
* Loads PhpSpreadsheet from file.
299309
*/

src/PhpSpreadsheet/Reader/Xls/ListFunctions.php

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,19 @@ protected function listWorksheetInfo2(string $filename, Xls $xls): array
124124
case self::XLS_TYPE_FORMULA:
125125
case self::XLS_TYPE_BOOLERR:
126126
case self::XLS_TYPE_LABEL:
127+
case self::XLS_TYPE_MULRK:
127128
$length = self::getUInt2d($xls->data, $xls->pos + 2);
128129
$recordData = $xls->readRecordData($xls->data, $xls->pos + 4, $length);
129130

130131
// move stream pointer to next record
131132
$xls->pos += 4 + $length;
132133

133134
$rowIndex = self::getUInt2d($recordData, 0) + 1;
134-
$columnIndex = self::getUInt2d($recordData, 2);
135+
if ($code === self::XLS_TYPE_MULRK) {
136+
$columnIndex = self::getUInt2d($recordData, $length - 2);
137+
} else {
138+
$columnIndex = self::getUInt2d($recordData, 2);
139+
}
135140

136141
$tmpInfo['totalRows'] = max($tmpInfo['totalRows'], $rowIndex);
137142
$tmpInfo['lastColumnIndex'] = max($tmpInfo['lastColumnIndex'], $columnIndex);
@@ -160,4 +165,103 @@ protected function listWorksheetInfo2(string $filename, Xls $xls): array
160165

161166
return $worksheetInfo;
162167
}
168+
169+
/**
170+
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
171+
*
172+
* @return array<int, array{worksheetName: string, dimensionsMinR: int, dimensionsMinC: int, dimensionsMaxR: int, dimensionsMaxC: int, lastColumnLetter: string}>
173+
*/
174+
protected function listWorksheetDimensions2(string $filename, Xls $xls): array
175+
{
176+
File::assertFile($filename);
177+
178+
$worksheetInfo = [];
179+
180+
// Read the OLE file
181+
$xls->loadOLE($filename);
182+
183+
// total byte size of Excel data (workbook global substream + sheet substreams)
184+
$xls->dataSize = strlen($xls->data);
185+
186+
// initialize
187+
$xls->pos = 0;
188+
$xls->sheets = [];
189+
190+
// Parse Workbook Global Substream
191+
while ($xls->pos < $xls->dataSize) {
192+
$code = self::getUInt2d($xls->data, $xls->pos);
193+
194+
match ($code) {
195+
self::XLS_TYPE_BOF => $xls->readBof(),
196+
self::XLS_TYPE_SHEET => $xls->readSheet(),
197+
self::XLS_TYPE_EOF => $xls->readDefault(),
198+
self::XLS_TYPE_CODEPAGE => $xls->readCodepage(),
199+
default => $xls->readDefault(),
200+
};
201+
202+
if ($code === self::XLS_TYPE_EOF) {
203+
break;
204+
}
205+
}
206+
207+
// Parse the individual sheets
208+
foreach ($xls->sheets as $sheet) {
209+
if ($sheet['sheetType'] !== 0x00) {
210+
// 0x00: Worksheet
211+
// 0x02: Chart
212+
// 0x06: Visual Basic module
213+
continue;
214+
}
215+
216+
$tmpInfo = [];
217+
$tmpInfo['worksheetName'] = StringHelper::convertToString($sheet['name']);
218+
$tmpInfo['dimensionsMinR'] = -1;
219+
$tmpInfo['dimensionsMaxR'] = -1;
220+
$tmpInfo['dimensionsMinC'] = -1;
221+
$tmpInfo['dimensionsMaxC'] = -1;
222+
$tmpInfo['lastColumnLetter'] = '';
223+
224+
$xls->pos = $sheet['offset'];
225+
226+
while ($xls->pos <= $xls->dataSize - 4) {
227+
$code = self::getUInt2d($xls->data, $xls->pos);
228+
229+
switch ($code) {
230+
case self::XLS_TYPE_BOF:
231+
$xls->readBof();
232+
233+
break;
234+
case self::XLS_TYPE_EOF:
235+
$xls->readDefault();
236+
237+
break 2;
238+
case self::XLS_TYPE_DIMENSION:
239+
$length = self::getUInt2d($xls->data, $xls->pos + 2);
240+
if ($length === 14) {
241+
$dimensionsData = substr($xls->data, $xls->pos + 4, $length);
242+
$data = unpack('VrwMic/VrwMac/vcolMic/vcolMac/vreserved', $dimensionsData);
243+
if (is_array($data)) {
244+
/** @var int[] $data */
245+
$tmpInfo['dimensionsMinR'] = $data['rwMic'];
246+
$tmpInfo['dimensionsMaxR'] = $data['rwMac'];
247+
$tmpInfo['dimensionsMinC'] = $data['colMic'];
248+
$tmpInfo['dimensionsMaxC'] = $data['colMac'];
249+
$tmpInfo['lastColumnLetter'] = Coordinate::stringFromColumnIndex($tmpInfo['dimensionsMaxC']);
250+
}
251+
}
252+
$xls->readDefault();
253+
254+
break;
255+
default:
256+
$xls->readDefault();
257+
258+
break;
259+
}
260+
}
261+
262+
$worksheetInfo[] = $tmpInfo;
263+
}
264+
265+
return $worksheetInfo;
266+
}
163267
}

src/PhpSpreadsheet/Reader/XlsBase.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ class XlsBase extends BaseReader
2525
final const XLS_TYPE_FORMULA = 0x0006;
2626
final const XLS_TYPE_EOF = 0x000A;
2727
final const XLS_TYPE_PROTECT = 0x0012;
28-
final const XLS_TYPE_OBJECTPROTECT = 0x0063;
29-
final const XLS_TYPE_SCENPROTECT = 0x00DD;
3028
final const XLS_TYPE_PASSWORD = 0x0013;
3129
final const XLS_TYPE_HEADER = 0x0014;
3230
final const XLS_TYPE_FOOTER = 0x0015;
@@ -50,6 +48,7 @@ class XlsBase extends BaseReader
5048
final const XLS_TYPE_CODEPAGE = 0x0042;
5149
final const XLS_TYPE_DEFCOLWIDTH = 0x0055;
5250
final const XLS_TYPE_OBJ = 0x005D;
51+
final const XLS_TYPE_OBJECTPROTECT = 0x0063;
5352
final const XLS_TYPE_COLINFO = 0x007D;
5453
final const XLS_TYPE_IMDATA = 0x007F;
5554
final const XLS_TYPE_SHEETPR = 0x0081;
@@ -62,6 +61,7 @@ class XlsBase extends BaseReader
6261
final const XLS_TYPE_MULRK = 0x00BD;
6362
final const XLS_TYPE_MULBLANK = 0x00BE;
6463
final const XLS_TYPE_DBCELL = 0x00D7;
64+
final const XLS_TYPE_SCENPROTECT = 0x00DD;
6565
final const XLS_TYPE_XF = 0x00E0;
6666
final const XLS_TYPE_MERGEDCELLS = 0x00E5;
6767
final const XLS_TYPE_MSODRAWINGGROUP = 0x00EB;
@@ -70,6 +70,8 @@ class XlsBase extends BaseReader
7070
final const XLS_TYPE_LABELSST = 0x00FD;
7171
final const XLS_TYPE_EXTSST = 0x00FF;
7272
final const XLS_TYPE_EXTERNALBOOK = 0x01AE;
73+
final const XLS_TYPE_CFHEADER = 0x01B0;
74+
final const XLS_TYPE_CFRULE = 0x01B1;
7375
final const XLS_TYPE_DATAVALIDATIONS = 0x01B2;
7476
final const XLS_TYPE_TXO = 0x01B6;
7577
final const XLS_TYPE_HYPERLINK = 0x01B8;
@@ -90,13 +92,11 @@ class XlsBase extends BaseReader
9092
final const XLS_TYPE_FORMAT = 0x041E;
9193
final const XLS_TYPE_SHAREDFMLA = 0x04BC;
9294
final const XLS_TYPE_BOF = 0x0809;
95+
final const XLS_TYPE_SHEETLAYOUT = 0x0862;
9396
final const XLS_TYPE_SHEETPROTECTION = 0x0867;
9497
final const XLS_TYPE_RANGEPROTECTION = 0x0868;
95-
final const XLS_TYPE_SHEETLAYOUT = 0x0862;
9698
final const XLS_TYPE_XFEXT = 0x087D;
9799
final const XLS_TYPE_PAGELAYOUTVIEW = 0x088B;
98-
final const XLS_TYPE_CFHEADER = 0x01B0;
99-
final const XLS_TYPE_CFRULE = 0x01B1;
100100
final const XLS_TYPE_UNKNOWN = 0xFFFF;
101101

102102
// Encryption type

tests/PhpSpreadsheetTests/Reader/Xls/InfoNamesTest.php

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,4 +165,50 @@ public function testLoadMacCentralEuropeBiff8(): void
165165
self::assertSame('Użytkownik Microsoft Office', $properties->getLastModifiedBy());
166166
$spreadsheet->disconnectWorksheets();
167167
}
168+
169+
public function testDimensions(): void
170+
{
171+
$filename = 'tests/data/Reader/XLS/pr.4687.excel.xls';
172+
$reader = new Xls();
173+
$info = $reader->listWorksheetInfo($filename);
174+
$expected = [
175+
[
176+
'worksheetName' => 'Sheet1',
177+
'lastColumnLetter' => 'D',
178+
'lastColumnIndex' => 3,
179+
'totalRows' => 2,
180+
'totalColumns' => 4,
181+
'sheetState' => 'visible',
182+
],
183+
[
184+
'worksheetName' => 'Sheet2',
185+
'lastColumnLetter' => 'B',
186+
'lastColumnIndex' => 1,
187+
'totalRows' => 4,
188+
'totalColumns' => 2,
189+
'sheetState' => 'visible',
190+
],
191+
];
192+
self::assertSame($expected, $info);
193+
$info = $reader->listWorksheetDimensions($filename);
194+
$expected = [
195+
[
196+
'worksheetName' => 'Sheet1',
197+
'dimensionsMinR' => 0,
198+
'dimensionsMaxR' => 2,
199+
'dimensionsMinC' => 0,
200+
'dimensionsMaxC' => 4,
201+
'lastColumnLetter' => 'D',
202+
],
203+
[
204+
'worksheetName' => 'Sheet2',
205+
'dimensionsMinR' => 0,
206+
'dimensionsMaxR' => 4,
207+
'dimensionsMinC' => 0,
208+
'dimensionsMaxC' => 2,
209+
'lastColumnLetter' => 'B',
210+
],
211+
];
212+
self::assertSame($expected, $info);
213+
}
168214
}
25 KB
Binary file not shown.

0 commit comments

Comments
 (0)