|
9 | 9 | from typing import TYPE_CHECKING, Any, Literal |
10 | 10 |
|
11 | 11 | import yaml |
12 | | -from rich import print |
| 12 | +from rich import print # noqa: A004 # Allow shadowing the built-in |
13 | 13 | from rich.syntax import Syntax |
14 | 14 |
|
15 | 15 | from airbyte_protocol.models import ( |
@@ -405,9 +405,25 @@ def get_stream_json_schema(self, stream_name: str) -> dict[str, Any]: |
405 | 405 |
|
406 | 406 | return found[0].json_schema |
407 | 407 |
|
408 | | - def get_records(self, stream: str) -> LazyDataset: |
| 408 | + def get_records( |
| 409 | + self, |
| 410 | + stream: str, |
| 411 | + *, |
| 412 | + normalize_field_names: bool = False, |
| 413 | + prune_undeclared_fields: bool = True, |
| 414 | + ) -> LazyDataset: |
409 | 415 | """Read a stream from the connector. |
410 | 416 |
|
| 417 | + Args: |
| 418 | + stream: The name of the stream to read. |
| 419 | + normalize_field_names: When `True`, field names will be normalized to lower case, with |
| 420 | + special characters removed. This matches the behavior of PyAirbyte caches and most |
| 421 | + Airbyte destinations. |
| 422 | + prune_undeclared_fields: When `True`, undeclared fields will be pruned from the records, |
| 423 | + which generally matches the behavior of PyAirbyte caches and most Airbyte |
| 424 | + destinations, specifically when you expect the catalog may be stale. You can disable |
| 425 | + this to keep all fields in the records. |
| 426 | +
|
411 | 427 | This involves the following steps: |
412 | 428 | * Call discover to get the catalog |
413 | 429 | * Generate a configured catalog that syncs the given stream in full_refresh mode |
@@ -445,8 +461,8 @@ def _with_logging(records: Iterable[dict[str, Any]]) -> Iterator[dict[str, Any]] |
445 | 461 |
|
446 | 462 | stream_record_handler = StreamRecordHandler( |
447 | 463 | json_schema=self.get_stream_json_schema(stream), |
448 | | - prune_extra_fields=True, |
449 | | - normalize_keys=False, |
| 464 | + prune_extra_fields=prune_undeclared_fields, |
| 465 | + normalize_keys=normalize_field_names, |
450 | 466 | ) |
451 | 467 |
|
452 | 468 | # This method is non-blocking, so we use "PLAIN" to avoid a live progress display |
|
0 commit comments