airbytehq · Aaron ("AJ") Steers (aaronsteers) · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/airbyte/_executors/declarative.py b/airbyte/_executors/declarative.py
@@ -15,7 +15,11 @@
 from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
     ConcurrentDeclarativeSource,
 )
+from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
+from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
+from airbyte_cdk.sources.types import StreamSlice
 
+from airbyte import exceptions as exc
 from airbyte._executors.base import Executor
 
 
@@ -140,3 +144,139 @@ def install(self) -> None:
     def uninstall(self) -> None:
         """No-op. The declarative source is included with PyAirbyte."""
         pass
+
+    def fetch_record(
+        self,
+        stream_name: str,
+        primary_key_value: str,
+    ) -> dict[str, Any]:
+        """Fetch a single record by primary key from a declarative stream.
+
+        This method uses the already-instantiated streams from the declarative source
+        to access the stream's retriever and make an HTTP GET request by appending
+        the primary key value to the stream's base path (e.g., /users/123).
+
+        Args:
+            stream_name: The name of the stream to fetch from.
+            primary_key_value: The primary key value as a string.
+
+        Returns:
+            The fetched record as a dictionary.
+
+        Raises:
+            exc.AirbyteStreamNotFoundError: If the stream is not found.
+            exc.AirbyteRecordNotFoundError: If the record is not found (empty response).
+            NotImplementedError: If the stream does not use SimpleRetriever.
+        """
+        streams = self.declarative_source.streams(self._config_dict)
+
+        target_stream = None
+        for stream in streams:
+            if stream.name == stream_name:
+                if not isinstance(stream, AbstractStream):
+                    raise NotImplementedError(
+                        f"Stream '{stream_name}' is type {type(stream).__name__}; "
+                        "fetch_record() supports only AbstractStream."
+                    )
+                target_stream = stream
+                break
+
+        if target_stream is None:
+            available_streams = [s.name for s in streams]
+            raise exc.AirbyteStreamNotFoundError(
+                stream_name=stream_name,
+                connector_name=self.name,
+                available_streams=available_streams,
+                message=f"Stream '{stream_name}' not found in source.",
+            )
+
+        if not hasattr(target_stream, "retriever"):
+            raise NotImplementedError(
+                f"Stream '{stream_name}' does not have a retriever attribute. "
+                f"fetch_record() requires access to the stream's retriever component."
+            )
+
+        retriever = target_stream.retriever
+
+        # Guard: Retriever must be SimpleRetriever
+        if not isinstance(retriever, SimpleRetriever):
+            raise NotImplementedError(
+                f"Stream '{stream_name}' uses {type(retriever).__name__}, but fetch_record() "
+                "only supports SimpleRetriever."
+            )
+
+        empty_slice = StreamSlice(partition={}, cursor_slice={})
+        base_path = retriever.requester.get_path(
+            stream_state={},
+            stream_slice=empty_slice,
+            next_page_token=None,
+        )
+
+        if base_path:
+            fetch_path = f"{base_path.rstrip('/')}/{primary_key_value}"
+        else:
+            fetch_path = primary_key_value
+
+        response = retriever.requester.send_request(
+            path=fetch_path,
+            stream_state={},
+            stream_slice=empty_slice,
+            next_page_token=None,
+            request_headers=retriever._request_headers(  # noqa: SLF001
+                stream_slice=empty_slice,
+                next_page_token=None,
+            ),
+            request_params=retriever._request_params(  # noqa: SLF001
+                stream_slice=empty_slice,
+                next_page_token=None,
+            ),
+            request_body_data=retriever._request_body_data(  # noqa: SLF001
+                stream_slice=empty_slice,
+                next_page_token=None,
+            ),
+            request_body_json=retriever._request_body_json(  # noqa: SLF001
+                stream_slice=empty_slice,
+                next_page_token=None,
+            ),
+        )
+
+        # Guard: Response must not be None
+        if response is None:
+            raise exc.AirbyteRecordNotFoundError(
+                stream_name=stream_name,
+                primary_key_value=primary_key_value,
+                connector_name=self.name,
+                message=f"No response received when fetching record with primary key "
+                f"'{primary_key_value}' from stream '{stream_name}'.",
+            )
+
+        records_schema = {}
+        if hasattr(target_stream, "schema_loader"):
+            schema_loader = target_stream.schema_loader
+            if hasattr(schema_loader, "get_json_schema"):
+                records_schema = schema_loader.get_json_schema()
+
+        records = list(
+            retriever.record_selector.select_records(
+                response=response,
+                stream_state={},
+                records_schema=records_schema,
+                stream_slice=empty_slice,
+                next_page_token=None,
+            )
+        )
+
+        # Guard: Records must not be empty
+        if not records:
+            raise exc.AirbyteRecordNotFoundError(
+                stream_name=stream_name,
+                primary_key_value=primary_key_value,
+                connector_name=self.name,
+                message=f"Record with primary key '{primary_key_value}' "
+                f"not found in stream '{stream_name}'.",
+            )
+
+        first_record = records[0]
+        if hasattr(first_record, "data"):
+            return dict(first_record.data)  # type: ignore[arg-type]
+        return dict(first_record)  # type: ignore[arg-type]
diff --git a/airbyte/exceptions.py b/airbyte/exceptions.py
@@ -412,6 +412,14 @@ class AirbyteStateNotFoundError(AirbyteConnectorError, KeyError):
     available_streams: list[str] | None = None
 
 
+@dataclass
+class AirbyteRecordNotFoundError(AirbyteConnectorError):
+    """Record not found in stream."""
+
+    stream_name: str | None = None
+    primary_key_value: str | None = None
+
+
 @dataclass
 class PyAirbyteSecretNotFoundError(PyAirbyteError):
     """Secret not found."""

diff --git a/airbyte/mcp/local_ops.py b/airbyte/mcp/local_ops.py
@@ -461,6 +461,124 @@ def read_source_stream_records(
         return records
 
 
+@mcp_tool(
+    domain="local",
+    read_only=True,
+    idempotent=True,
+    extra_help_text=_CONFIG_HELP,
+)
+def get_source_record(  # noqa: PLR0913, PLR0917
+    source_connector_name: Annotated[
+        str,
+        Field(description="The name of the source connector."),
+    ],
+    stream_name: Annotated[
+        str,
+        Field(description="The name of the stream to fetch the record from."),
+    ],
+    pk_value: Annotated[
+        str | int | dict[str, Any],
+        Field(
+            description=(
+                "The primary key value to fetch. "
+                "Can be a string, int, or dict with PK field name(s) as keys."
+            )
+        ),
+    ],
+    config: Annotated[
+        dict | str | None,
+        Field(
+            description="The configuration for the source connector as a dict or JSON string.",
+            default=None,
+        ),
+    ],
+    config_file: Annotated[
+        str | Path | None,
+        Field(
+            description="Path to a YAML or JSON file containing the source connector config.",
+            default=None,
+        ),
+    ],
+    config_secret_name: Annotated[
+        str | None,
+        Field(
+            description="The name of the secret containing the configuration.",
+            default=None,
+        ),
+    ],
+    override_execution_mode: Annotated[
+        Literal["docker", "python", "yaml", "auto"],
+        Field(
+            description="Optionally override the execution method to use for the connector. "
+            "This parameter is ignored if manifest_path is provided (yaml mode will be used).",
+            default="auto",
+        ),
+    ],
+    manifest_path: Annotated[
+        str | Path | None,
+        Field(
+            description="Path to a local YAML manifest file for declarative connectors.",
+            default=None,
+        ),
+    ],
+    allow_scanning: Annotated[
+        bool,
+        Field(
+            description="If True, fall back to scanning stream records if direct fetch fails.",
+            default=False,
+        ),
+    ],
+    scan_timeout_seconds: Annotated[
+        int,
+        Field(
+            description="Maximum time in seconds to spend scanning for the record.",
+            default=60,
+        ),
+    ],
+) -> dict[str, Any] | str:
+    """Fetch a single record from a source connector by primary key value.
+
+    This operation requires a valid configuration and only works with
+    declarative (YAML-based) sources. For sources with SimpleRetriever-based
+    streams, it will attempt a direct fetch by constructing the appropriate
+    API request. If allow_scanning is True and direct fetch fails, it will
+    fall back to scanning through stream records.
+    """
+    try:
+        source: Source = _get_mcp_source(
+            connector_name=source_connector_name,
+            override_execution_mode=override_execution_mode,
+            manifest_path=manifest_path,
+        )
+        config_dict = resolve_config(
+            config=config,
+            config_file=config_file,
+            config_secret_name=config_secret_name,
+            config_spec_jsonschema=source.config_spec,
+        )
+        source.set_config(config_dict)
+
+        record = source.get_record(
+            stream_name=stream_name,
+            pk_value=pk_value,
+            allow_scanning=allow_scanning,
+            scan_timeout_seconds=scan_timeout_seconds,
+        )
+
+        print(
+            f"Retrieved record from stream '{stream_name}' with pk_value={pk_value!r}",
+            file=sys.stderr,
+        )
+
+    except Exception as ex:
+        tb_str = traceback.format_exc()
+        return (
+            f"Error fetching record from source '{source_connector_name}': {ex!r}, {ex!s}\n{tb_str}"
+        )
+    else:
+        return record
+
+
 @mcp_tool(
     domain="local",
     read_only=True,