Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions airbyte/_executors/declarative.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
ConcurrentDeclarativeSource,
)
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
from airbyte_cdk.sources.types import StreamSlice

from airbyte import exceptions as exc
from airbyte._executors.base import Executor


Expand Down Expand Up @@ -140,3 +144,139 @@ def install(self) -> None:
def uninstall(self) -> None:
"""No-op. The declarative source is included with PyAirbyte."""
pass

def fetch_record(
self,
stream_name: str,
primary_key_value: str,
) -> dict[str, Any]:
"""Fetch a single record by primary key from a declarative stream.

This method uses the already-instantiated streams from the declarative source
to access the stream's retriever and make an HTTP GET request by appending
the primary key value to the stream's base path (e.g., /users/123).

Args:
stream_name: The name of the stream to fetch from.
primary_key_value: The primary key value as a string.

Returns:
The fetched record as a dictionary.

Raises:
exc.AirbyteStreamNotFoundError: If the stream is not found.
exc.AirbyteRecordNotFoundError: If the record is not found (empty response).
NotImplementedError: If the stream does not use SimpleRetriever.
"""
streams = self.declarative_source.streams(self._config_dict)

target_stream = None
for stream in streams:
if stream.name == stream_name:
if not isinstance(stream, AbstractStream):
raise NotImplementedError(
f"Stream '{stream_name}' is type {type(stream).__name__}; "
"fetch_record() supports only AbstractStream."
)
target_stream = stream
break

if target_stream is None:
available_streams = [s.name for s in streams]
raise exc.AirbyteStreamNotFoundError(
stream_name=stream_name,
connector_name=self.name,
available_streams=available_streams,
message=f"Stream '{stream_name}' not found in source.",
)

if not hasattr(target_stream, "retriever"):
raise NotImplementedError(
f"Stream '{stream_name}' does not have a retriever attribute. "
f"fetch_record() requires access to the stream's retriever component."
)

retriever = target_stream.retriever

# Guard: Retriever must be SimpleRetriever
if not isinstance(retriever, SimpleRetriever):
raise NotImplementedError(
f"Stream '{stream_name}' uses {type(retriever).__name__}, but fetch_record() "
"only supports SimpleRetriever."
)

empty_slice = StreamSlice(partition={}, cursor_slice={})
base_path = retriever.requester.get_path(
stream_state={},
stream_slice=empty_slice,
next_page_token=None,
)

if base_path:
fetch_path = f"{base_path.rstrip('/')}/{primary_key_value}"
else:
fetch_path = primary_key_value

response = retriever.requester.send_request(
path=fetch_path,
stream_state={},
stream_slice=empty_slice,
next_page_token=None,
request_headers=retriever._request_headers( # noqa: SLF001
stream_slice=empty_slice,
next_page_token=None,
),
request_params=retriever._request_params( # noqa: SLF001
stream_slice=empty_slice,
next_page_token=None,
),
request_body_data=retriever._request_body_data( # noqa: SLF001
stream_slice=empty_slice,
next_page_token=None,
),
request_body_json=retriever._request_body_json( # noqa: SLF001
stream_slice=empty_slice,
next_page_token=None,
),
)

# Guard: Response must not be None
if response is None:
raise exc.AirbyteRecordNotFoundError(
stream_name=stream_name,
primary_key_value=primary_key_value,
connector_name=self.name,
message=f"No response received when fetching record with primary key "
f"'{primary_key_value}' from stream '{stream_name}'.",
)

records_schema = {}
if hasattr(target_stream, "schema_loader"):
schema_loader = target_stream.schema_loader
if hasattr(schema_loader, "get_json_schema"):
records_schema = schema_loader.get_json_schema()

records = list(
retriever.record_selector.select_records(
response=response,
stream_state={},
records_schema=records_schema,
stream_slice=empty_slice,
next_page_token=None,
)
)

# Guard: Records must not be empty
if not records:
raise exc.AirbyteRecordNotFoundError(
stream_name=stream_name,
primary_key_value=primary_key_value,
connector_name=self.name,
message=f"Record with primary key '{primary_key_value}' "
f"not found in stream '{stream_name}'.",
)

first_record = records[0]
if hasattr(first_record, "data"):
return dict(first_record.data) # type: ignore[arg-type]
return dict(first_record) # type: ignore[arg-type]
8 changes: 8 additions & 0 deletions airbyte/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,14 @@ class AirbyteStateNotFoundError(AirbyteConnectorError, KeyError):
available_streams: list[str] | None = None


@dataclass
class AirbyteRecordNotFoundError(AirbyteConnectorError):
"""Record not found in stream."""

stream_name: str | None = None
primary_key_value: str | None = None


@dataclass
class PyAirbyteSecretNotFoundError(PyAirbyteError):
"""Secret not found."""
Expand Down
118 changes: 118 additions & 0 deletions airbyte/mcp/local_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,124 @@ def read_source_stream_records(
return records


@mcp_tool(
domain="local",
read_only=True,
idempotent=True,
extra_help_text=_CONFIG_HELP,
)
def get_source_record( # noqa: PLR0913, PLR0917
source_connector_name: Annotated[
str,
Field(description="The name of the source connector."),
],
stream_name: Annotated[
str,
Field(description="The name of the stream to fetch the record from."),
],
pk_value: Annotated[
str | int | dict[str, Any],
Field(
description=(
"The primary key value to fetch. "
"Can be a string, int, or dict with PK field name(s) as keys."
)
),
],
config: Annotated[
dict | str | None,
Field(
description="The configuration for the source connector as a dict or JSON string.",
default=None,
),
],
config_file: Annotated[
str | Path | None,
Field(
description="Path to a YAML or JSON file containing the source connector config.",
default=None,
),
],
config_secret_name: Annotated[
str | None,
Field(
description="The name of the secret containing the configuration.",
default=None,
),
],
override_execution_mode: Annotated[
Literal["docker", "python", "yaml", "auto"],
Field(
description="Optionally override the execution method to use for the connector. "
"This parameter is ignored if manifest_path is provided (yaml mode will be used).",
default="auto",
),
],
manifest_path: Annotated[
str | Path | None,
Field(
description="Path to a local YAML manifest file for declarative connectors.",
default=None,
),
],
allow_scanning: Annotated[
bool,
Field(
description="If True, fall back to scanning stream records if direct fetch fails.",
default=False,
),
],
scan_timeout_seconds: Annotated[
int,
Field(
description="Maximum time in seconds to spend scanning for the record.",
default=60,
),
],
) -> dict[str, Any] | str:
"""Fetch a single record from a source connector by primary key value.

This operation requires a valid configuration and only works with
declarative (YAML-based) sources. For sources with SimpleRetriever-based
streams, it will attempt a direct fetch by constructing the appropriate
API request. If allow_scanning is True and direct fetch fails, it will
fall back to scanning through stream records.
"""
try:
source: Source = _get_mcp_source(
connector_name=source_connector_name,
override_execution_mode=override_execution_mode,
manifest_path=manifest_path,
)
config_dict = resolve_config(
config=config,
config_file=config_file,
config_secret_name=config_secret_name,
config_spec_jsonschema=source.config_spec,
)
source.set_config(config_dict)

record = source.get_record(
stream_name=stream_name,
pk_value=pk_value,
allow_scanning=allow_scanning,
scan_timeout_seconds=scan_timeout_seconds,
)

print(
f"Retrieved record from stream '{stream_name}' with pk_value={pk_value!r}",
file=sys.stderr,
)

except Exception as ex:
tb_str = traceback.format_exc()
return (
f"Error fetching record from source '{source_connector_name}': {ex!r}, {ex!s}\n{tb_str}"
)
else:
return record


@mcp_tool(
domain="local",
read_only=True,
Expand Down
Loading