Skip to content

Commit 59463c9

Browse files
authored
[GoogleSheetExtractor] Add support for Schema (#1862)
1 parent fc4f061 commit 59463c9

File tree

2 files changed

+71
-32
lines changed

2 files changed

+71
-32
lines changed

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
use function Flow\ETL\DSL\array_to_rows;
88
use Flow\ETL\Exception\InvalidArgumentException;
99
use Flow\ETL\Extractor\{Limitable, LimitableExtractor, Signal};
10-
use Flow\ETL\{Extractor, FlowContext};
10+
use Flow\ETL\{Extractor, FlowContext, Schema};
1111
use Google\Service\Sheets;
1212

1313
final class GoogleSheetExtractor implements Extractor, LimitableExtractor
@@ -23,6 +23,8 @@ final class GoogleSheetExtractor implements Extractor, LimitableExtractor
2323

2424
private int $rowsPerPage = 1000;
2525

26+
private ?Schema $schema = null;
27+
2628
private bool $withHeader = true;
2729

2830
public function __construct(
@@ -38,7 +40,6 @@ public function extract(FlowContext $context) : \Generator
3840
$cellsRange = new SheetRange($this->columnRange, 1, $this->rowsPerPage);
3941
$headers = [];
4042

41-
/** @var Sheets\ValueRange $response */
4243
$response = $this->service->spreadsheets_values->get(
4344
$this->spreadsheetId,
4445
$cellsRange->toString(),
@@ -107,7 +108,7 @@ function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows)
107108
$totalRows += \count($rows);
108109

109110
foreach ($rows as $row) {
110-
$signal = yield array_to_rows($row, $context->entryFactory());
111+
$signal = yield array_to_rows($row, $context->entryFactory(), schema: $this->schema);
111112
$this->incrementReturnedRows();
112113

113114
if ($signal === Signal::STOP || $this->reachedLimit()) {
@@ -163,4 +164,11 @@ public function withRowsPerPage(int $rowsPerPage) : self
163164

164165
return $this;
165166
}
167+
168+
public function withSchema(Schema $schema) : self
169+
{
170+
$this->schema = $schema;
171+
172+
return $this;
173+
}
166174
}

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44

55
namespace Flow\ETL\Adapter\GoogleSheet\Tests\Integration;
66

7-
use function Flow\ETL\DSL\{config, flow_context};
8-
use Flow\ETL\Adapter\GoogleSheet\{Columns, GoogleSheetExtractor, Tests\GoogleSheetsContext};
7+
use function Flow\ETL\Adapter\GoogleSheet\from_google_sheet;
8+
use function Flow\ETL\DSL\{df, int_schema, schema, string_schema};
9+
use Flow\ETL\Adapter\GoogleSheet\{Tests\GoogleSheetsContext};
910
use Flow\ETL\Exception\InvalidArgumentException;
1011
use Flow\ETL\Tests\FlowTestCase;
1112

@@ -18,30 +19,62 @@ protected function setUp() : void
1819
$this->context = new GoogleSheetsContext();
1920
}
2021

21-
public function test_extract_skip_extra_empty_rows() : void
22+
public function test_extract_puts_null_in_not_matching_schema_rows() : void
2223
{
23-
$extractor = new GoogleSheetExtractor(
24-
$this->context->sheets(__DIR__ . '/../Fixtures/extra-empty-rows.json'),
25-
'1234567890',
26-
new Columns('Sheet', 'A', 'Z'),
27-
);
24+
$rows = df()
25+
->extract(
26+
from_google_sheet(
27+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-empty-rows.json'),
28+
'1234567890',
29+
'Sheet',
30+
)->withSchema(
31+
schema(
32+
string_schema('Header 1'),
33+
string_schema('Header 2'),
34+
int_schema('id'),
35+
)
36+
)
37+
)
38+
->fetch()
39+
->toArray();
2840

29-
$rows = $extractor->extract(flow_context(config()));
41+
foreach ($rows as $row) {
42+
self::assertNotSame([], $row);
43+
self::assertArrayNotHasKey('Header 3', $row);
44+
self::assertNull($row['id']);
45+
}
46+
}
47+
48+
public function test_extract_skip_extra_empty_rows() : void
49+
{
50+
$rows = df()
51+
->extract(
52+
from_google_sheet(
53+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-empty-rows.json'),
54+
'1234567890',
55+
'Sheet',
56+
)
57+
)
58+
->fetch()
59+
->toArray();
3060

3161
foreach ($rows as $row) {
32-
self::assertNotSame([], $row->toArray());
62+
self::assertNotSame([], $row);
3363
}
3464
}
3565

3666
public function test_extract_with_cut_extra_columns() : void
3767
{
38-
$extractor = new GoogleSheetExtractor(
39-
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
40-
'1234567890',
41-
new Columns('Sheet', 'A', 'Z'),
42-
);
43-
44-
$rows = $extractor->extract(flow_context(config()));
68+
$rows = df()
69+
->extract(
70+
from_google_sheet(
71+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
72+
'1234567890',
73+
'Sheet',
74+
)
75+
)
76+
->fetch()
77+
->toArray();
4578

4679
foreach ($rows as $row) {
4780
self::assertNotNull($row);
@@ -50,20 +83,18 @@ public function test_extract_with_cut_extra_columns() : void
5083

5184
public function test_extract_without_cut_extra_columns() : void
5285
{
53-
$extractor = new GoogleSheetExtractor(
54-
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
55-
'1234567890',
56-
new Columns('Sheet', 'A', 'Z'),
57-
);
58-
$extractor->withDropExtraColumns(false);
59-
60-
$rows = $extractor->extract(flow_context(config()));
61-
6286
$this->expectException(InvalidArgumentException::class);
6387
$this->expectExceptionMessage('Row has more columns (4) than headers (3)');
6488

65-
foreach ($rows as $row) {
66-
self::assertNotNull($row);
67-
}
89+
df()
90+
->extract(
91+
from_google_sheet(
92+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
93+
'1234567890',
94+
'Sheet',
95+
)->withDropExtraColumns(false)
96+
)
97+
->fetch()
98+
->toArray();
6899
}
69100
}

0 commit comments

Comments
 (0)