Skip to content

Commit 377884c

Browse files
refactor code
1 parent 8615bb9 commit 377884c

File tree

3 files changed

+8
-4
lines changed

3 files changed

+8
-4
lines changed

airbyte_cdk/sources/file_based/config/file_based_stream_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,9 @@ class FileBasedStreamConfig(BaseModel):
7474
default=None,
7575
gt=0,
7676
)
77-
use_first_found_file_for_schema_discovery: Optional[bool] = Field(
77+
use_first_found_file_for_schema_discovery: bool = Field(
7878
title="Use first found file for schema discovery",
79-
description="When enable, the source will use the first found file for schema discovery. Helps to avoid long discovery step",
79+
description="When enabled, the source will use the first found file for schema discovery. Helps to avoid long discovery step",
8080
default=False,
8181
)
8282

airbyte_cdk/sources/file_based/stream/default_file_based_stream.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,10 @@ def _get_raw_json_schema(self) -> JsonSchema:
274274
elif self.config.schemaless:
275275
return schemaless_schema
276276
elif self.config.use_first_found_file_for_schema_discovery:
277-
self.logger.info(msg=f"Using only first found file for schema discovery.")
278-
files = [next(iter(self.get_files()))]
277+
self.logger.info(
278+
msg=f"Using only first found file for schema discovery for stream {self.name} due to limitation in config."
279+
)
280+
files = list(itertools.islice(self.get_files(), 1))
279281
first_n_files = len(files)
280282
else:
281283
files = self.list_files()

unit_tests/sources/file_based/stream/test_default_file_based_stream.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ def test_use_first_found_file_for_schema_discovery(self) -> None:
239239
self._stream_reader.get_matching_files.return_value = files
240240

241241
schema = self._stream.get_json_schema()
242+
assert self._parser.infer_schema.call_count == 1
243+
assert self._parser.infer_schema.call_args[0][1].uri == "file0"
242244
assert schema == {
243245
"properties": {
244246
"_ab_source_file_last_modified": {"type": "string"},

0 commit comments

Comments
 (0)