Skip to content

Commit 8799a04

Browse files
authored
Merge pull request #4189 from oleibman/csvphp9
Method to Test Whether Csv Will Be Affected by Php9
2 parents d6a3676 + 75ccdb0 commit 8799a04

File tree

8 files changed

+124
-16
lines changed

8 files changed

+124
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org).
1111

1212
- Add Dynamic valueBinder Property to Spreadsheet and Readers. [Issue #1395](https://github.com/PHPOffice/PhpSpreadsheet/issues/1395) [PR #4185](https://github.com/PHPOffice/PhpSpreadsheet/pull/4185)
1313
- Allow Omitting Chart Border. [Issue #562](https://github.com/PHPOffice/PhpSpreadsheet/issues/562) [PR #4188](https://github.com/PHPOffice/PhpSpreadsheet/pull/4188)
14+
- Method to Test Whether Csv Will Be Affected by Php0. [PR #4189](https://github.com/PHPOffice/PhpSpreadsheet/pull/4189)
1415

1516
### Changed
1617

src/PhpSpreadsheet/Reader/Csv.php

Lines changed: 70 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
1111
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1212
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
13+
use Throwable;
1314

1415
class Csv extends BaseReader
1516
{
@@ -74,7 +75,7 @@ class Csv extends BaseReader
7475
* It is anticipated that it will conditionally be set
7576
* to null-string for Php9 and above.
7677
*/
77-
private static string $defaultEscapeCharacter = '\\';
78+
private static string $defaultEscapeCharacter = PHP_VERSION_ID < 90000 ? '\\' : '';
7879

7980
/**
8081
* Callback for setting defaults in construction.
@@ -288,6 +289,12 @@ private function openFileOrMemory(string $filename): void
288289
if (!$fhandle) {
289290
throw new ReaderException($filename . ' is an Invalid Spreadsheet file.');
290291
}
292+
if ($this->inputEncoding === 'UTF-8') {
293+
$encoding = self::guessEncodingBom($filename);
294+
if ($encoding !== '') {
295+
$this->inputEncoding = $encoding;
296+
}
297+
}
291298
if ($this->inputEncoding === self::GUESS_ENCODING) {
292299
$this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
293300
}
@@ -315,7 +322,7 @@ public function setTestAutoDetect(bool $value): self
315322
private function setAutoDetect(?string $value): ?string
316323
{
317324
$retVal = null;
318-
if ($value !== null && $this->testAutodetect) {
325+
if ($value !== null && $this->testAutodetect && PHP_VERSION_ID < 90000) {
319326
$retVal2 = @ini_set('auto_detect_line_endings', $value);
320327
if (is_string($retVal2)) {
321328
$retVal = $retVal2;
@@ -364,6 +371,21 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
364371
// Deprecated in Php8.1
365372
$iniset = $this->setAutoDetect('1');
366373

374+
try {
375+
$this->loadStringOrFile2($filename, $spreadsheet, $dataUri);
376+
$this->setAutoDetect($iniset);
377+
} catch (Throwable $e) {
378+
$this->setAutoDetect($iniset);
379+
380+
throw $e;
381+
}
382+
383+
return $spreadsheet;
384+
}
385+
386+
private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void
387+
{
388+
367389
// Open file
368390
if ($dataUri) {
369391
$this->openDataUri($filename);
@@ -435,11 +457,6 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
435457

436458
// Close file
437459
fclose($fileHandle);
438-
439-
$this->setAutoDetect($iniset);
440-
441-
// Return
442-
return $spreadsheet;
443460
}
444461

445462
/**
@@ -547,6 +564,10 @@ public function getContiguous(): bool
547564
*/
548565
public function setEscapeCharacter(string $escapeCharacter): self
549566
{
567+
if (PHP_VERSION_ID >= 90000 && $escapeCharacter !== '') {
568+
throw new ReaderException('Escape character must be null string for Php9+');
569+
}
570+
550571
$this->escapeCharacter = $escapeCharacter;
551572

552573
return $this;
@@ -624,17 +645,15 @@ private static function guessEncodingTestBom(string &$encoding, string $first4,
624645
}
625646
}
626647

627-
private static function guessEncodingBom(string $filename): string
648+
public static function guessEncodingBom(string $filename, ?string $convertString = null): string
628649
{
629650
$encoding = '';
630-
$first4 = file_get_contents($filename, false, null, 0, 4);
631-
if ($first4 !== false) {
632-
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
633-
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
634-
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
635-
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
636-
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
637-
}
651+
$first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4);
652+
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
653+
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
654+
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
655+
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
656+
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
638657

639658
return $encoding;
640659
}
@@ -691,4 +710,39 @@ private static function getCsv(
691710

692711
return fgetcsv($stream, $length, $separator, $enclosure, $escape);
693712
}
713+
714+
public static function affectedByPhp9(
715+
string $filename,
716+
string $inputEncoding = 'UTF-8',
717+
?string $delimiter = null,
718+
string $enclosure = '"',
719+
string $escapeCharacter = '\\'
720+
): bool {
721+
if (PHP_VERSION_ID < 70400 || PHP_VERSION_ID >= 90000) {
722+
throw new ReaderException('Function valid only for Php7.4 or Php8'); // @codeCoverageIgnore
723+
}
724+
$reader1 = new self();
725+
$reader1->setInputEncoding($inputEncoding)
726+
->setTestAutoDetect(true)
727+
->setEscapeCharacter($escapeCharacter)
728+
->setDelimiter($delimiter)
729+
->setEnclosure($enclosure);
730+
$spreadsheet1 = $reader1->load($filename);
731+
$sheet1 = $spreadsheet1->getActiveSheet();
732+
$array1 = $sheet1->toArray(null, false, false);
733+
$spreadsheet1->disconnectWorksheets();
734+
735+
$reader2 = new self();
736+
$reader2->setInputEncoding($inputEncoding)
737+
->setTestAutoDetect(false)
738+
->setEscapeCharacter('')
739+
->setDelimiter($delimiter)
740+
->setEnclosure($enclosure);
741+
$spreadsheet2 = $reader2->load($filename);
742+
$sheet2 = $spreadsheet2->getActiveSheet();
743+
$array2 = $sheet2->toArray(null, false, false);
744+
$spreadsheet2->disconnectWorksheets();
745+
746+
return $array1 !== $array2;
747+
}
694748
}

tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ protected function tearDown(): void
2525
*/
2626
public function testEndings(string $ending): void
2727
{
28+
if ($ending === "\r" && PHP_VERSION_ID >= 90000) {
29+
self::markTestSkipped('Mac line endings not supported for Php9+');
30+
}
2831
$this->tempFile = $filename = File::temporaryFilename();
2932
$data = ['123', '456', '789'];
3033
file_put_contents($filename, implode($ending, $data));

tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ public static function providerCanLoad(): array
114114

115115
public function testEscapeCharacters(): void
116116
{
117+
if (PHP_VERSION_ID >= 90000) {
118+
$this->expectException(ReaderException::class);
119+
$this->expectExceptionMessage('Escape character must be null string');
120+
}
117121
$reader = (new Csv())->setEscapeCharacter('"');
118122
$worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv')
119123
->getActiveSheet();
@@ -230,6 +234,10 @@ public function testReadNonexistentFileName(): void
230234
*/
231235
public function testInferSeparator(string $escape, string $delimiter): void
232236
{
237+
if (PHP_VERSION_ID >= 90000 && $escape !== '') {
238+
$this->expectException(ReaderException::class);
239+
$this->expectExceptionMessage('Escape character must be null string');
240+
}
233241
$reader = new Csv();
234242
$reader->setEscapeCharacter($escape);
235243
$filename = 'tests/data/Reader/CSV/escape.csv';
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
6+
7+
use PhpOffice\PhpSpreadsheet\Reader\Csv;
8+
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
9+
use PHPUnit\Framework\TestCase;
10+
11+
class Php9Test extends TestCase
12+
{
13+
public function testAffectedByPhp9(): void
14+
{
15+
if (PHP_VERSION_ID >= 90000) {
16+
$this->expectException(ReaderException::class);
17+
$this->expectExceptionMessage('Php7.4 or Php8');
18+
}
19+
$dir = 'tests/data/Reader/CSV';
20+
$files = glob("$dir/*");
21+
self::assertNotFalse($files);
22+
$affected = [];
23+
foreach ($files as $file) {
24+
$base = basename($file);
25+
$encoding = 'UTF-8';
26+
if (str_contains($base, 'utf') && !str_contains($base, 'bom')) {
27+
$encoding = 'guess';
28+
}
29+
$result = Csv::affectedByPhp9($file, $encoding);
30+
if ($result) {
31+
$affected[] = $base;
32+
}
33+
}
34+
$expected = ['backslash.csv', 'escape.csv', 'linend.mac.csv'];
35+
self::assertSame($expected, $affected);
36+
}
37+
}

tests/data/Reader/CSV/linend.mac.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A,12,3

tests/data/Reader/CSV/linend.unix.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
A,1
2+
2,3

tests/data/Reader/CSV/linend.win.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
A,1
2+
2,3

0 commit comments

Comments
 (0)