Add unit tests to new query property methods, add docstrings, and fix bug around character chunking

brianjlai · brianjlai · commit cad709e68451 · 2025-04-01T00:18:15.000-07:00
diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/group_by_key.py b/airbyte_cdk/sources/declarative/requesters/query_properties/group_by_key.py
@@ -1,15 +1,15 @@
 # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
 
 from dataclasses import InitVar, dataclass
-from typing import Any, List, Mapping, Union
+from typing import Any, List, Mapping, Optional, Union
 
 from airbyte_cdk.sources.types import Config, Record
 
 
 @dataclass
 class GroupByKey:
     """
-    tbd
+    Record merge strategy that combines records together according to values on the record for one or many keys.
     """
 
     key: Union[str, List[str]]
@@ -19,6 +19,12 @@ class GroupByKey:
     def __post_init__(self, parameters: Mapping[str, Any]) -> None:
         self._keys = [self.key] if isinstance(self.key, str) else self.key
 
-    def get_group_key(self, record: Record) -> str:
-        resolved_keys = [str(record.data.get(key)) for key in self._keys]
+    def get_group_key(self, record: Record) -> Optional[str]:
+        resolved_keys = []
+        for key in self._keys:
+            key_value = record.data.get(key)
+            if key_value:
+                resolved_keys.append(key_value)
+            else:
+                return None
         return ",".join(resolved_keys)
diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py b/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
 
 from dataclasses import InitVar, dataclass
-from typing import Any, Iterable, List, Mapping, Optional, Union
+from typing import Any, Iterable, List, Mapping, Optional
 
 import dpath
 
diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py b/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py
@@ -20,7 +20,8 @@ class PropertyLimitType(Enum):
 @dataclass
 class PropertyChunking:
     """
-    tbd
+    Defines the behavior for how the complete list of properties to query for are broken down into smaller groups
+    that will be used for multiple requests to the target API.
     """
 
     property_limit_type: PropertyLimitType
@@ -48,6 +49,7 @@ def get_request_property_chunks(
         current_chunk = list(always_include_properties) if always_include_properties else []
         chunk_size = 0
         for property_field in property_fields:
+            # If property_limit_type is not defined, we default to property_count which is just an incrementing count
             property_field_size = (
                 len(property_field)
                 if self.property_limit_type == PropertyLimitType.characters
@@ -61,5 +63,5 @@ def get_request_property_chunks(
             chunk_size += property_field_size
         yield current_chunk
 
-    def get_merge_key(self, record: Record) -> str:
+    def get_merge_key(self, record: Record) -> Optional[str]:
         return self._record_merge_strategy.get_group_key(record=record)
diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py b/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py
@@ -13,7 +13,10 @@
 @dataclass
 class QueryProperties:
     """
-    tbd
+    Low-code component that encompasses the behavior to inject additional property values into the outbound API
+    requests. Property values can be defined statically within the manifest or dynamically by making requests
+    to a partner API to retrieve the properties. Query properties also allow for splitting of the total set of
+    properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved
     """
 
     property_list: Optional[Union[List[str], PropertiesFromEndpoint]]
diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py
@@ -505,7 +505,10 @@ def read_records(
                                 current_record
                             )
                         )
-                        merged_records[merge_key].update(current_record)
+                        if merge_key:
+                            merged_records[merge_key].update(current_record)
+                        else:
+                            yield stream_data
                     else:
                         yield stream_data
             if self.cursor:
diff --git a/unit_tests/sources/declarative/requesters/query_properties/__init__.py b/unit_tests/sources/declarative/requesters/query_properties/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
diff --git a/unit_tests/sources/declarative/requesters/query_properties/test_group_by_key.py b/unit_tests/sources/declarative/requesters/query_properties/test_group_by_key.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+
+import pytest
+
+from airbyte_cdk.sources.declarative.requesters.query_properties import GroupByKey
+from airbyte_cdk.sources.types import Record
+
+
+@pytest.mark.parametrize(
+    "key,record,expected_merge_key",
+    [
+        pytest.param(
+            ["id"],
+            Record(
+                stream_name="test",
+                data={"id": "0", "first_name": "Belinda", "last_name": "Lindsey"},
+            ),
+            "0",
+            id="test_get_merge_key_single",
+        ),
+        pytest.param(
+            ["last_name", "first_name"],
+            Record(
+                stream_name="test", data={"id": "1", "first_name": "Zion", "last_name": "Lindsey"}
+            ),
+            "Lindsey,Zion",
+            id="test_get_merge_key_single_multiple",
+        ),
+        pytest.param(
+            [""],
+            Record(stream_name="test", data={}),
+            None,
+            id="test_get_merge_key_not_present",
+        ),
+    ],
+)
+def test_get_merge_key(key, record, expected_merge_key):
+    group_by_key = GroupByKey(key=key, config={}, parameters={})
+
+    merge_key = group_by_key.get_group_key(record=record)
+    assert merge_key == expected_merge_key
diff --git a/unit_tests/sources/declarative/requesters/query_properties/test_properties_from_endpoint.py b/unit_tests/sources/declarative/requesters/query_properties/test_properties_from_endpoint.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+
+from unittest.mock import Mock
+
+from airbyte_cdk.sources.declarative.requesters.query_properties import PropertiesFromEndpoint
+from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
+from airbyte_cdk.sources.types import Record, StreamSlice
+
+CONFIG = {}
+
+
+def test_get_properties_from_endpoint():
+    expected_properties = [
+        "gentarou",
+        "light",
+        "aoi",
+        "clover",
+        "junpei",
+        "akane",
+        "unknown",
+        "hazuki",
+        "teruaki",
+    ]
+
+    retriever = Mock(spec=SimpleRetriever)
+    retriever.read_records.return_value = iter(
+        [
+            Record(stream_name="players", data={"id": "ace", "name": "gentarou"}),
+            Record(stream_name="players", data={"id": "snake", "name": "light"}),
+            Record(stream_name="players", data={"id": "santa", "name": "aoi"}),
+            Record(stream_name="players", data={"id": "clover", "name": "clover"}),
+            Record(stream_name="players", data={"id": "junpei", "name": "junpei"}),
+            Record(stream_name="players", data={"id": "june", "name": "akane"}),
+            Record(stream_name="players", data={"id": "seven", "name": "unknown"}),
+            Record(stream_name="players", data={"id": "lotus", "name": "hazuki"}),
+            Record(stream_name="players", data={"id": "nine", "name": "teruaki"}),
+        ]
+    )
+
+    properties_from_endpoint = PropertiesFromEndpoint(
+        retriever=retriever,
+        property_field_path=["name"],
+        config=CONFIG,
+        parameters={},
+    )
+
+    properties = list(
+        properties_from_endpoint.get_properties_from_endpoint(
+            stream_slice=StreamSlice(cursor_slice={}, partition={})
+        )
+    )
+
+    assert len(properties) == 9
+    assert properties == expected_properties
+
+
+def test_get_properties_from_endpoint_with_multiple_field_paths():
+    expected_properties = [
+        "gentarou",
+        "light",
+        "aoi",
+        "clover",
+        "junpei",
+        "akane",
+        "unknown",
+        "hazuki",
+        "teruaki",
+    ]
+
+    retriever = Mock(spec=SimpleRetriever)
+    retriever.read_records.return_value = iter(
+        [
+            Record(stream_name="players", data={"id": "ace", "names": {"first_name": "gentarou"}}),
+            Record(stream_name="players", data={"id": "snake", "names": {"first_name": "light"}}),
+            Record(stream_name="players", data={"id": "santa", "names": {"first_name": "aoi"}}),
+            Record(stream_name="players", data={"id": "clover", "names": {"first_name": "clover"}}),
+            Record(stream_name="players", data={"id": "junpei", "names": {"first_name": "junpei"}}),
+            Record(stream_name="players", data={"id": "june", "names": {"first_name": "akane"}}),
+            Record(stream_name="players", data={"id": "seven", "names": {"first_name": "unknown"}}),
+            Record(stream_name="players", data={"id": "lotus", "names": {"first_name": "hazuki"}}),
+            Record(stream_name="players", data={"id": "nine", "names": {"first_name": "teruaki"}}),
+        ]
+    )
+
+    properties_from_endpoint = PropertiesFromEndpoint(
+        retriever=retriever,
+        property_field_path=["names", "first_name"],
+        config=CONFIG,
+        parameters={},
+    )
+
+    properties = list(
+        properties_from_endpoint.get_properties_from_endpoint(
+            stream_slice=StreamSlice(cursor_slice={}, partition={})
+        )
+    )
+
+    assert len(properties) == 9
+    assert properties == expected_properties
+
+
+def test_get_properties_from_endpoint_with_interpolation():
+    config = {"top_level_field": "names"}
+    expected_properties = [
+        "gentarou",
+        "light",
+        "aoi",
+        "clover",
+        "junpei",
+        "akane",
+        "unknown",
+        "hazuki",
+        "teruaki",
+    ]
+
+    retriever = Mock(spec=SimpleRetriever)
+    retriever.read_records.return_value = iter(
+        [
+            Record(stream_name="players", data={"id": "ace", "names": {"first_name": "gentarou"}}),
+            Record(stream_name="players", data={"id": "snake", "names": {"first_name": "light"}}),
+            Record(stream_name="players", data={"id": "santa", "names": {"first_name": "aoi"}}),
+            Record(stream_name="players", data={"id": "clover", "names": {"first_name": "clover"}}),
+            Record(stream_name="players", data={"id": "junpei", "names": {"first_name": "junpei"}}),
+            Record(stream_name="players", data={"id": "june", "names": {"first_name": "akane"}}),
+            Record(stream_name="players", data={"id": "seven", "names": {"first_name": "unknown"}}),
+            Record(stream_name="players", data={"id": "lotus", "names": {"first_name": "hazuki"}}),
+            Record(stream_name="players", data={"id": "nine", "names": {"first_name": "teruaki"}}),
+        ]
+    )
+
+    properties_from_endpoint = PropertiesFromEndpoint(
+        retriever=retriever,
+        property_field_path=["{{ config['top_level_field'] }}", "first_name"],
+        config=config,
+        parameters={},
+    )
+
+    properties = list(
+        properties_from_endpoint.get_properties_from_endpoint(
+            stream_slice=StreamSlice(cursor_slice={}, partition={})
+        )
+    )
+
+    assert len(properties) == 9
+    assert properties == expected_properties
diff --git a/unit_tests/sources/declarative/requesters/query_properties/test_property_chunking.py b/unit_tests/sources/declarative/requesters/query_properties/test_property_chunking.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
+
+import pytest
+
+from airbyte_cdk.sources.declarative.requesters.query_properties import GroupByKey, PropertyChunking
+from airbyte_cdk.sources.declarative.requesters.query_properties.property_chunking import (
+    PropertyLimitType,
+)
+from airbyte_cdk.sources.types import Record
+
+CONFIG = {}
+
+
+@pytest.mark.parametrize(
+    "property_fields,always_include_properties,property_limit_type,property_limit,expected_property_chunks",
+    [
+        pytest.param(
+            ["rick", "chelsea", "victoria", "tim", "saxon", "lochlan", "piper"],
+            None,
+            PropertyLimitType.property_count,
+            2,
+            [["rick", "chelsea"], ["victoria", "tim"], ["saxon", "lochlan"], ["piper"]],
+            id="test_property_chunking",
+        ),
+        pytest.param(
+            ["rick", "chelsea", "victoria", "tim"],
+            ["mook", "gaitok"],
+            PropertyLimitType.property_count,
+            2,
+            [["mook", "gaitok", "rick", "chelsea"], ["mook", "gaitok", "victoria", "tim"]],
+            id="test_property_chunking_with_always_include_fields",
+        ),
+        pytest.param(
+            ["rick", "chelsea", "victoria", "tim", "saxon", "lochlan", "piper"],
+            None,
+            PropertyLimitType.property_count,
+            None,
+            [["rick", "chelsea", "victoria", "tim", "saxon", "lochlan", "piper"]],
+            id="test_property_chunking_no_limit",
+        ),
+        pytest.param(
+            ["kate", "laurie", "jaclyn"],
+            None,
+            PropertyLimitType.characters,
+            10,
+            [["kate", "laurie"], ["jaclyn"]],
+            id="test_property_chunking_limit_characters",
+        ),
+        pytest.param(
+            [],
+            None,
+            PropertyLimitType.property_count,
+            5,
+            [[]],
+            id="test_property_chunking_no_properties",
+        ),
+    ],
+)
+def test_get_request_property_chunks(
+    property_fields,
+    always_include_properties,
+    property_limit_type,
+    property_limit,
+    expected_property_chunks,
+):
+    property_fields = iter(property_fields)
+    property_chunking = PropertyChunking(
+        property_limit_type=property_limit_type,
+        property_limit=property_limit,
+        record_merge_strategy=GroupByKey(key="id", config=CONFIG, parameters={}),
+        config=CONFIG,
+        parameters={},
+    )
+
+    property_chunks = list(
+        property_chunking.get_request_property_chunks(
+            property_fields=property_fields, always_include_properties=always_include_properties
+        )
+    )
+
+    assert len(property_chunks) == len(expected_property_chunks)
+    for i, expected_property_chunk in enumerate(expected_property_chunks):
+        assert property_chunks[i] == expected_property_chunk
+
+
+def test_get_merge_key():
+    record = Record(stream_name="test", data={"id": "0"})
+    property_chunking = PropertyChunking(
+        property_limit_type=PropertyLimitType.property_count,
+        property_limit=10,
+        record_merge_strategy=GroupByKey(key="id", config=CONFIG, parameters={}),
+        config=CONFIG,
+        parameters={},
+    )
+
+    merge_key = property_chunking.get_merge_key(record=record)
+    assert merge_key == "0"
diff --git a/unit_tests/sources/declarative/requesters/query_properties/test_query_properties.py b/unit_tests/sources/declarative/requesters/query_properties/test_query_properties.py

Original file line number	Diff line number	Diff line change
`@@ -505,7 +505,10 @@ def read_records(`
`505`	`505`	`current_record`
`506`	`506`	`)`
`507`	`507`	`)`
`508`		`- merged_records[merge_key].update(current_record)`
	`508`	`+ if merge_key:`
	`509`	`+ merged_records[merge_key].update(current_record)`
	`510`	`+ else:`
	`511`	`+ yield stream_data`
`509`	`512`	`else:`
`510`	`513`	`yield stream_data`
`511`	`514`	`if self.cursor:`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Copyright (c) 2025 Airbyte, Inc., all rights reserved.`