diff --git a/airbyte_cdk/config_observation.py b/airbyte_cdk/config_observation.py index ae85e8277..836eedf23 100644 --- a/airbyte_cdk/config_observation.py +++ b/airbyte_cdk/config_observation.py @@ -7,8 +7,9 @@ ) import time +from collections.abc import MutableMapping from copy import copy -from typing import Any, List, MutableMapping +from typing import Any import orjson @@ -38,7 +39,7 @@ def __init__( non_observed_mapping[item] = ObservedDict(value, observer) # Observe nested list of dicts - if isinstance(value, List): + if isinstance(value, list): for i, sub_value in enumerate(value): if isinstance(sub_value, MutableMapping): value[i] = ObservedDict(sub_value, observer) @@ -52,11 +53,11 @@ def __setitem__(self, item: Any, value: Any) -> None: previous_value = self.get(item) if isinstance(value, MutableMapping): value = ObservedDict(value, self.observer) - if isinstance(value, List): + if isinstance(value, list): for i, sub_value in enumerate(value): if isinstance(sub_value, MutableMapping): value[i] = ObservedDict(sub_value, self.observer) - super(ObservedDict, self).__setitem__(item, value) + super().__setitem__(item, value) if self.update_on_unchanged_value or value != previous_value: self.observer.update() diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 342ecee2d..70a6b137c 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -8,7 +8,8 @@ import os import pkgutil from abc import ABC, abstractmethod -from typing import Any, Generic, Mapping, Optional, Protocol, TypeVar +from collections.abc import Mapping +from typing import Any, Generic, Protocol, TypeVar import yaml @@ -19,7 +20,7 @@ ) -def load_optional_package_file(package: str, filename: str) -> Optional[bytes]: +def load_optional_package_file(package: str, filename: str) -> bytes | None: """Gets a resource from a package, returning None if it does not exist""" try: return pkgutil.get_data(package, filename) @@ -52,7 +53,7 @@ def read_config(config_path: str) -> Mapping[str, Any]: @staticmethod def _read_json_file(file_path: str) -> Any: - with open(file_path, "r") as file: + with open(file_path) as file: contents = file.read() try: diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index 27929dfa2..7459028e8 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -3,8 +3,9 @@ # +from collections.abc import Mapping from dataclasses import asdict, dataclass, field -from typing import Any, Dict, List, Mapping +from typing import Any from airbyte_cdk.connector_builder.test_reader import TestReader from airbyte_cdk.models import ( @@ -74,7 +75,7 @@ def read_stream( source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, - state: List[AirbyteStateMessage], + state: list[AirbyteStateMessage], limits: TestLimits, ) -> AirbyteMessage: try: @@ -128,7 +129,7 @@ def full_resolve_manifest(source: ManifestDeclarativeSource, limits: TestLimits) for stream in streams: stream["dynamic_stream_name"] = None - mapped_streams: Dict[str, List[Dict[str, Any]]] = {} + mapped_streams: dict[str, list[dict[str, Any]]] = {} for stream in source.dynamic_streams: generated_streams = mapped_streams.setdefault(stream["dynamic_stream_name"], []) diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py index 80cf4afa9..ee4d7691a 100644 --- a/airbyte_cdk/connector_builder/main.py +++ b/airbyte_cdk/connector_builder/main.py @@ -4,7 +4,8 @@ import sys -from typing import Any, List, Mapping, Optional, Tuple +from collections.abc import Mapping +from typing import Any import orjson @@ -31,8 +32,8 @@ def get_config_and_catalog_from_args( - args: List[str], -) -> Tuple[str, Mapping[str, Any], Optional[ConfiguredAirbyteCatalog], Any]: + args: list[str], +) -> tuple[str, Mapping[str, Any], ConfiguredAirbyteCatalog | None, Any]: # TODO: Add functionality for the `debug` logger. # Currently, no one `debug` level log will be displayed during `read` a stream for a connector created through `connector-builder`. parsed_args = AirbyteEntrypoint.parse_args(args) @@ -71,8 +72,8 @@ def handle_connector_builder_request( source: ManifestDeclarativeSource, command: str, config: Mapping[str, Any], - catalog: Optional[ConfiguredAirbyteCatalog], - state: List[AirbyteStateMessage], + catalog: ConfiguredAirbyteCatalog | None, + state: list[AirbyteStateMessage], limits: TestLimits, ) -> AirbyteMessage: if command == "resolve_manifest": @@ -88,7 +89,7 @@ def handle_connector_builder_request( raise ValueError(f"Unrecognized command {command}.") -def handle_request(args: List[str]) -> str: +def handle_request(args: list[str]) -> str: command, config, catalog, state = get_config_and_catalog_from_args(args) limits = get_limits(config) source = create_source(config, limits) diff --git a/airbyte_cdk/connector_builder/models.py b/airbyte_cdk/connector_builder/models.py index 561c159fc..1014613ba 100644 --- a/airbyte_cdk/connector_builder/models.py +++ b/airbyte_cdk/connector_builder/models.py @@ -3,30 +3,30 @@ # from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from typing import Any @dataclass class HttpResponse: status: int - body: Optional[str] = None - headers: Optional[Dict[str, Any]] = None + body: str | None = None + headers: dict[str, Any] | None = None @dataclass class HttpRequest: url: str - headers: Optional[Dict[str, Any]] + headers: dict[str, Any] | None http_method: str - body: Optional[str] = None + body: str | None = None @dataclass class LogMessage: message: str level: str - internal_message: Optional[str] = None - stacktrace: Optional[str] = None + internal_message: str | None = None + stacktrace: str | None = None @dataclass @@ -40,34 +40,34 @@ class AuxiliaryRequest: @dataclass class StreamReadPages: - records: List[object] - request: Optional[HttpRequest] = None - response: Optional[HttpResponse] = None + records: list[object] + request: HttpRequest | None = None + response: HttpResponse | None = None @dataclass class StreamReadSlices: - pages: List[StreamReadPages] - slice_descriptor: Optional[Dict[str, Any]] - state: Optional[List[Dict[str, Any]]] = None - auxiliary_requests: Optional[List[AuxiliaryRequest]] = None + pages: list[StreamReadPages] + slice_descriptor: dict[str, Any] | None + state: list[dict[str, Any]] | None = None + auxiliary_requests: list[AuxiliaryRequest] | None = None @dataclass -class StreamRead(object): - logs: List[LogMessage] - slices: List[StreamReadSlices] +class StreamRead: + logs: list[LogMessage] + slices: list[StreamReadSlices] test_read_limit_reached: bool - auxiliary_requests: List[AuxiliaryRequest] - inferred_schema: Optional[Dict[str, Any]] - inferred_datetime_formats: Optional[Dict[str, str]] - latest_config_update: Optional[Dict[str, Any]] + auxiliary_requests: list[AuxiliaryRequest] + inferred_schema: dict[str, Any] | None + inferred_datetime_formats: dict[str, str] | None + latest_config_update: dict[str, Any] | None @dataclass class StreamReadRequestBody: - manifest: Dict[str, Any] + manifest: dict[str, Any] stream: str - config: Dict[str, Any] - state: Optional[Dict[str, Any]] - record_limit: Optional[int] + config: dict[str, Any] + state: dict[str, Any] | None + record_limit: int | None diff --git a/airbyte_cdk/connector_builder/test_reader/helpers.py b/airbyte_cdk/connector_builder/test_reader/helpers.py index fcd36189f..6b547d82f 100644 --- a/airbyte_cdk/connector_builder/test_reader/helpers.py +++ b/airbyte_cdk/connector_builder/test_reader/helpers.py @@ -3,9 +3,10 @@ # import json +from collections.abc import Mapping from copy import deepcopy from json import JSONDecodeError -from typing import Any, Dict, List, Mapping, Optional +from typing import Any from airbyte_cdk.connector_builder.models import ( AuxiliaryRequest, @@ -35,7 +36,7 @@ # ------- -def airbyte_message_to_json(message: AirbyteMessage) -> Optional[Dict[str, JsonType]]: +def airbyte_message_to_json(message: AirbyteMessage) -> dict[str, JsonType] | None: """ Converts an AirbyteMessage to a JSON dictionary if its type is LOG. @@ -64,7 +65,7 @@ def airbyte_message_to_json(message: AirbyteMessage) -> Optional[Dict[str, JsonT return None -def clean_config(config: Dict[str, Any]) -> Dict[str, Any]: +def clean_config(config: dict[str, Any]) -> dict[str, Any]: """ Cleans the configuration dictionary by removing all keys that start with a double underscore. @@ -85,7 +86,7 @@ def clean_config(config: Dict[str, Any]) -> Dict[str, Any]: return cleaned_config -def create_request_from_log_message(json_http_message: Dict[str, Any]) -> HttpRequest: +def create_request_from_log_message(json_http_message: dict[str, Any]) -> HttpRequest: """ Creates an HttpRequest object from the provided JSON-formatted log message. @@ -129,7 +130,7 @@ def create_request_from_log_message(json_http_message: Dict[str, Any]) -> HttpRe ) -def create_response_from_log_message(json_http_message: Dict[str, Any]) -> HttpResponse: +def create_response_from_log_message(json_http_message: dict[str, Any]) -> HttpResponse: """ Generate an HttpResponse instance from a JSON log message containing HTTP response details. @@ -174,7 +175,7 @@ def parse_json(log_message: AirbyteLogMessage) -> JsonType: return None -def parse_slice_description(log_message: str) -> Dict[str, Any]: +def parse_slice_description(log_message: str) -> dict[str, Any]: """ Parses a log message containing a JSON payload and returns it as a dictionary. @@ -203,7 +204,7 @@ def parse_slice_description(log_message: str) -> Dict[str, Any]: def should_close_page( at_least_one_page_in_group: bool, message: AirbyteMessage, - json_message: Optional[Dict[str, Any]], + json_message: dict[str, Any] | None, ) -> bool: """ Determines whether a page should be closed based on its content and state. @@ -269,7 +270,7 @@ def should_close_page_for_slice(at_least_one_page_in_group: bool, message: Airby return at_least_one_page_in_group and should_process_slice_descriptor(message) -def is_page_http_request(json_message: Optional[Dict[str, Any]]) -> bool: +def is_page_http_request(json_message: dict[str, Any] | None) -> bool: """ Determines whether a given JSON message represents a page HTTP request. @@ -291,7 +292,7 @@ def is_page_http_request(json_message: Optional[Dict[str, Any]]) -> bool: return is_http_log(json_message) and not is_auxiliary_http_request(json_message) -def is_http_log(message: Dict[str, JsonType]) -> bool: +def is_http_log(message: dict[str, JsonType]) -> bool: """ Determine if the provided log message represents an HTTP log. @@ -308,7 +309,7 @@ def is_http_log(message: Dict[str, JsonType]) -> bool: return bool(message.get("http", False)) -def is_auxiliary_http_request(message: Optional[Dict[str, Any]]) -> bool: +def is_auxiliary_http_request(message: dict[str, Any] | None) -> bool: """ Determines if the provided message represents an auxiliary HTTP request. @@ -415,10 +416,10 @@ def is_state_message(message: AirbyteMessage) -> bool: def handle_current_slice( - current_slice_pages: List[StreamReadPages], - current_slice_descriptor: Optional[Dict[str, Any]] = None, - latest_state_message: Optional[Dict[str, Any]] = None, - auxiliary_requests: Optional[List[AuxiliaryRequest]] = None, + current_slice_pages: list[StreamReadPages], + current_slice_descriptor: dict[str, Any] | None = None, + latest_state_message: dict[str, Any] | None = None, + auxiliary_requests: list[AuxiliaryRequest] | None = None, ) -> StreamReadSlices: """ Handles the current slice by packaging its pages, descriptor, and state into a StreamReadSlices instance. @@ -441,10 +442,10 @@ def handle_current_slice( def handle_current_page( - current_page_request: Optional[HttpRequest], - current_page_response: Optional[HttpResponse], - current_slice_pages: List[StreamReadPages], - current_page_records: List[Mapping[str, Any]], + current_page_request: HttpRequest | None, + current_page_response: HttpResponse | None, + current_slice_pages: list[StreamReadPages], + current_page_records: list[Mapping[str, Any]], ) -> tuple[None, None]: """ Closes the current page by appending its request, response, and records @@ -472,7 +473,7 @@ def handle_current_page( return None, None -def handle_auxiliary_request(json_message: Dict[str, JsonType]) -> AuxiliaryRequest: +def handle_auxiliary_request(json_message: dict[str, JsonType]) -> AuxiliaryRequest: """ Parses the provided JSON message and constructs an AuxiliaryRequest object by extracting relevant fields from nested dictionaries. @@ -517,10 +518,10 @@ def handle_auxiliary_request(json_message: Dict[str, JsonType]) -> AuxiliaryRequ def handle_log_message( message: AirbyteMessage, - json_message: Dict[str, JsonType] | None, + json_message: dict[str, JsonType] | None, at_least_one_page_in_group: bool, - current_page_request: Optional[HttpRequest], - current_page_response: Optional[HttpResponse], + current_page_request: HttpRequest | None, + current_page_response: HttpResponse | None, ) -> LOG_MESSAGES_OUTPUT_TYPE: """ Process a log message by handling both HTTP-specific and auxiliary log entries. @@ -571,7 +572,7 @@ def handle_record_message( schema_inferrer: SchemaInferrer, datetime_format_inferrer: DatetimeFormatInferrer, records_count: int, - current_page_records: List[Mapping[str, Any]], + current_page_records: list[Mapping[str, Any]], ) -> int: """ Processes an Airbyte record message by updating the current batch and accumulating schema and datetime format information. @@ -600,7 +601,7 @@ def handle_record_message( # ------- -def get_airbyte_cdk_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore +def get_airbyte_cdk_from_message(json_message: dict[str, JsonType]) -> dict: # type: ignore """ Retrieves the "airbyte_cdk" dictionary from the provided JSON message. @@ -658,7 +659,7 @@ def get_auxiliary_request_title_prefix(stream: dict) -> str: # type: ignore return "Parent stream: " if stream.get("is_substream", False) else "" -def get_http_property_from_message(json_message: Dict[str, JsonType]) -> dict: # type: ignore +def get_http_property_from_message(json_message: dict[str, JsonType]) -> dict: # type: ignore """ Retrieves the "http" dictionary from the provided JSON message. diff --git a/airbyte_cdk/connector_builder/test_reader/message_grouper.py b/airbyte_cdk/connector_builder/test_reader/message_grouper.py index e4478a0ad..0f311b076 100644 --- a/airbyte_cdk/connector_builder/test_reader/message_grouper.py +++ b/airbyte_cdk/connector_builder/test_reader/message_grouper.py @@ -3,7 +3,8 @@ # -from typing import Any, Dict, Iterator, List, Mapping, Optional +from collections.abc import Iterator, Mapping +from typing import Any from airbyte_cdk.connector_builder.models import ( AuxiliaryRequest, @@ -85,13 +86,13 @@ def get_message_groups( records_count = 0 at_least_one_page_in_group = False - current_page_records: List[Mapping[str, Any]] = [] - current_slice_descriptor: Optional[Dict[str, Any]] = None - current_slice_pages: List[StreamReadPages] = [] - current_page_request: Optional[HttpRequest] = None - current_page_response: Optional[HttpResponse] = None - latest_state_message: Optional[Dict[str, Any]] = None - slice_auxiliary_requests: List[AuxiliaryRequest] = [] + current_page_records: list[Mapping[str, Any]] = [] + current_slice_descriptor: dict[str, Any] | None = None + current_slice_pages: list[StreamReadPages] = [] + current_page_request: HttpRequest | None = None + current_page_response: HttpResponse | None = None + latest_state_message: dict[str, Any] | None = None + slice_auxiliary_requests: list[AuxiliaryRequest] = [] while records_count < limit and (message := next(messages, None)): json_message = airbyte_message_to_json(message) diff --git a/airbyte_cdk/connector_builder/test_reader/reader.py b/airbyte_cdk/connector_builder/test_reader/reader.py index b776811eb..7eb191ea0 100644 --- a/airbyte_cdk/connector_builder/test_reader/reader.py +++ b/airbyte_cdk/connector_builder/test_reader/reader.py @@ -4,7 +4,8 @@ import logging -from typing import Any, Dict, Iterator, List, Mapping, Optional, Union +from collections.abc import Iterator, Mapping +from typing import Any from airbyte_cdk.connector_builder.models import ( AuxiliaryRequest, @@ -83,8 +84,8 @@ def run_test_read( source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, - state: List[AirbyteStateMessage], - record_limit: Optional[int] = None, + state: list[AirbyteStateMessage], + record_limit: int | None = None, ) -> StreamRead: """ Run a test read for the connector by reading from a single stream and inferring schema and datetime formats. @@ -140,8 +141,8 @@ def run_test_read( ) def _pk_to_nested_and_composite_field( - self, field: Optional[Union[str, List[str], List[List[str]]]] - ) -> List[List[str]]: + self, field: str | list[str] | list[list[str]] | None + ) -> list[list[str]]: """ Converts a primary key definition into a nested list representation. @@ -173,8 +174,8 @@ def _pk_to_nested_and_composite_field( return field # type: ignore # the type of field is expected to be List[List[str]] here def _cursor_field_to_nested_and_composite_field( - self, field: Union[str, List[str]] - ) -> List[List[str]]: + self, field: str | list[str] + ) -> list[list[str]]: """ Transforms the cursor field input into a nested list format suitable for further processing. @@ -208,7 +209,7 @@ def _cursor_field_to_nested_and_composite_field( raise ValueError(f"Unknown type for cursor field `{field}") - def _check_record_limit(self, record_limit: Optional[int] = None) -> int: + def _check_record_limit(self, record_limit: int | None = None) -> int: """ Checks and adjusts the provided record limit to ensure it falls within the valid range. @@ -265,7 +266,7 @@ def _categorise_groups(self, message_groups: MESSAGE_GROUPS) -> GROUPED_MESSAGES slices = [] log_messages = [] auxiliary_requests = [] - latest_config_update: Optional[AirbyteControlMessage] = None + latest_config_update: AirbyteControlMessage | None = None for message_group in message_groups: match message_group: @@ -302,7 +303,7 @@ def _get_infered_schema( self, configured_catalog: ConfiguredAirbyteCatalog, schema_inferrer: SchemaInferrer, - log_messages: List[LogMessage], + log_messages: list[LogMessage], ) -> INFERRED_SCHEMA_OUTPUT_TYPE: """ Retrieves the inferred schema from the given configured catalog using the provided schema inferrer. @@ -337,7 +338,7 @@ def _get_infered_schema( def _get_latest_config_update( self, latest_config_update: AirbyteControlMessage | None, - ) -> Dict[str, Any] | None: + ) -> dict[str, Any] | None: """ Retrieves a cleaned configuration from the latest Airbyte control message. @@ -362,7 +363,7 @@ def _read_stream( source: DeclarativeSource, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, - state: List[AirbyteStateMessage], + state: list[AirbyteStateMessage], ) -> Iterator[AirbyteMessage]: """ Reads messages from the given DeclarativeSource using an AirbyteEntrypoint. @@ -407,7 +408,7 @@ def _read_stream( e, message=error_message ).as_airbyte_message() - def _has_reached_limit(self, slices: List[StreamReadSlices]) -> bool: + def _has_reached_limit(self, slices: list[StreamReadSlices]) -> bool: """ Determines whether the provided collection of slices has reached any defined limits. diff --git a/airbyte_cdk/connector_builder/test_reader/types.py b/airbyte_cdk/connector_builder/test_reader/types.py index 0bb95d8a6..8bb463560 100644 --- a/airbyte_cdk/connector_builder/test_reader/types.py +++ b/airbyte_cdk/connector_builder/test_reader/types.py @@ -29,7 +29,7 @@ - An optional AirbyteControlMessage that, if present, governs control flow in message processing. """ -from typing import Any, Iterable, List +from collections.abc import Iterable from airbyte_cdk.connector_builder.models import ( AuxiliaryRequest, @@ -57,13 +57,13 @@ INFERRED_SCHEMA_OUTPUT_TYPE = tuple[ InferredSchema | None, - List[LogMessage], + list[LogMessage], ] GROUPED_MESSAGES = tuple[ - List[StreamReadSlices], - List[LogMessage], - List[AuxiliaryRequest], + list[StreamReadSlices], + list[LogMessage], + list[AuxiliaryRequest], AirbyteControlMessage | None, ] diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py index 547f96684..0b82ade30 100644 --- a/airbyte_cdk/destinations/destination.py +++ b/airbyte_cdk/destinations/destination.py @@ -7,7 +7,8 @@ import logging import sys from abc import ABC, abstractmethod -from typing import Any, Iterable, List, Mapping +from collections.abc import Iterable, Mapping +from typing import Any import orjson @@ -68,7 +69,7 @@ def _run_write( ) logger.info("Writing complete.") - def parse_args(self, args: List[str]) -> argparse.Namespace: + def parse_args(self, args: list[str]) -> argparse.Namespace: """ :param args: commandline arguments :return: @@ -146,7 +147,7 @@ def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]: input_stream=wrapped_stdin, ) - def run(self, args: List[str]) -> None: + def run(self, args: list[str]) -> None: init_uncaught_exception_handler(logger) parsed_args = self.parse_args(args) output_messages = self.run_cmd(parsed_args) diff --git a/airbyte_cdk/destinations/vector_db_based/config.py b/airbyte_cdk/destinations/vector_db_based/config.py index c7c40ecaa..f580dac94 100644 --- a/airbyte_cdk/destinations/vector_db_based/config.py +++ b/airbyte_cdk/destinations/vector_db_based/config.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Union import dpath from pydantic.v1 import BaseModel, Field @@ -13,7 +13,7 @@ class SeparatorSplitterConfigModel(BaseModel): mode: Literal["separator"] = Field("separator", const=True) - separators: List[str] = Field( + separators: list[str] = Field( default=['"\\n\\n"', '"\\n"', '" "', '""'], title="Separators", description='List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use ".". To split by a newline, use "\\n".', @@ -102,14 +102,14 @@ class ProcessingConfigModel(BaseModel): description="Size of overlap between chunks in tokens to store in vector store to better capture relevant context", default=0, ) - text_fields: Optional[List[str]] = Field( + text_fields: list[str] | None = Field( default=[], title="Text fields to embed", description="List of fields in the record that should be used to calculate the embedding. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", always_show=True, examples=["text", "user.name", "users.*.name"], ) - metadata_fields: Optional[List[str]] = Field( + metadata_fields: list[str] | None = Field( default=[], title="Fields to store as metadata", description="List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", @@ -123,7 +123,7 @@ class ProcessingConfigModel(BaseModel): type="object", description="Split text fields into chunks based on the specified method.", ) - field_name_mappings: Optional[List[FieldNameMappingConfigModel]] = Field( + field_name_mappings: list[FieldNameMappingConfigModel] | None = Field( default=[], title="Field name mappings", description="List of fields to rename. Not applicable for nested fields, but can be used to rename fields already flattened via dot notation.", @@ -251,13 +251,13 @@ class VectorDBConfigModel(BaseModel): Processing, embedding and advanced configuration are provided by this base class, while the indexing configuration is provided by the destination connector in the sub class. """ - embedding: Union[ - OpenAIEmbeddingConfigModel, - CohereEmbeddingConfigModel, - FakeEmbeddingConfigModel, - AzureOpenAIEmbeddingConfigModel, - OpenAICompatibleEmbeddingConfigModel, - ] = Field( + embedding: ( + OpenAIEmbeddingConfigModel + | CohereEmbeddingConfigModel + | FakeEmbeddingConfigModel + | AzureOpenAIEmbeddingConfigModel + | OpenAICompatibleEmbeddingConfigModel + ) = Field( ..., title="Embedding", description="Embedding configuration", @@ -285,14 +285,14 @@ class Config: } @staticmethod - def remove_discriminator(schema: Dict[str, Any]) -> None: + def remove_discriminator(schema: dict[str, Any]) -> None: """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references""" dpath.delete(schema, "properties/**/discriminator") @classmethod - def schema(cls, by_alias: bool = True, ref_template: str = "") -> Dict[str, Any]: + def schema(cls, by_alias: bool = True, ref_template: str = "") -> dict[str, Any]: """we're overriding the schema classmethod to enable some post-processing""" - schema: Dict[str, Any] = super().schema() + schema: dict[str, Any] = super().schema() schema = resolve_refs(schema) cls.remove_discriminator(schema) return schema diff --git a/airbyte_cdk/destinations/vector_db_based/document_processor.py b/airbyte_cdk/destinations/vector_db_based/document_processor.py index c007bf9e2..771c18844 100644 --- a/airbyte_cdk/destinations/vector_db_based/document_processor.py +++ b/airbyte_cdk/destinations/vector_db_based/document_processor.py @@ -4,8 +4,9 @@ import json import logging +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Dict, List, Mapping, Optional, Tuple +from typing import Any import dpath from langchain.text_splitter import Language, RecursiveCharacterTextSplitter @@ -34,10 +35,10 @@ @dataclass class Chunk: - page_content: Optional[str] - metadata: Dict[str, Any] + page_content: str | None + metadata: dict[str, Any] record: AirbyteRecordMessage - embedding: Optional[List[float]] = None + embedding: list[float] | None = None headers_to_split_on = [ @@ -69,7 +70,7 @@ class DocumentProcessor: streams: Mapping[str, ConfiguredAirbyteStream] @staticmethod - def check_config(config: ProcessingConfigModel) -> Optional[str]: + def check_config(config: ProcessingConfigModel) -> str | None: if config.text_splitter is not None and config.text_splitter.mode == "separator": for s in config.text_splitter.separators: try: @@ -84,7 +85,7 @@ def _get_text_splitter( self, chunk_size: int, chunk_overlap: int, - splitter_config: Optional[TextSplitterConfigModel], + splitter_config: TextSplitterConfigModel | None, ) -> RecursiveCharacterTextSplitter: if splitter_config is None: splitter_config = SeparatorSplitterConfigModel(mode="separator") @@ -128,7 +129,7 @@ def __init__(self, config: ProcessingConfigModel, catalog: ConfiguredAirbyteCata self.field_name_mappings = config.field_name_mappings self.logger = logging.getLogger("airbyte.document_processor") - def process(self, record: AirbyteRecordMessage) -> Tuple[List[Chunk], Optional[str]]: + def process(self, record: AirbyteRecordMessage) -> tuple[list[Chunk], str | None]: """ Generate documents from records. :param records: List of AirbyteRecordMessages @@ -159,7 +160,7 @@ def process(self, record: AirbyteRecordMessage) -> Tuple[List[Chunk], Optional[s ) return chunks, id_to_delete - def _generate_document(self, record: AirbyteRecordMessage) -> Optional[Document]: + def _generate_document(self, record: AirbyteRecordMessage) -> Document | None: relevant_fields = self._extract_relevant_fields(record, self.text_fields) if len(relevant_fields) == 0: return None @@ -168,8 +169,8 @@ def _generate_document(self, record: AirbyteRecordMessage) -> Optional[Document] return Document(page_content=text, metadata=metadata) def _extract_relevant_fields( - self, record: AirbyteRecordMessage, fields: Optional[List[str]] - ) -> Dict[str, Any]: + self, record: AirbyteRecordMessage, fields: list[str] | None + ) -> dict[str, Any]: relevant_fields = {} if fields and len(fields) > 0: for field in fields: @@ -180,7 +181,7 @@ def _extract_relevant_fields( relevant_fields = record.data return self._remap_field_names(relevant_fields) - def _extract_metadata(self, record: AirbyteRecordMessage) -> Dict[str, Any]: + def _extract_metadata(self, record: AirbyteRecordMessage) -> dict[str, Any]: metadata = self._extract_relevant_fields(record, self.metadata_fields) metadata[METADATA_STREAM_FIELD] = create_stream_identifier(record) primary_key = self._extract_primary_key(record) @@ -188,7 +189,7 @@ def _extract_metadata(self, record: AirbyteRecordMessage) -> Dict[str, Any]: metadata[METADATA_RECORD_ID_FIELD] = primary_key return metadata - def _extract_primary_key(self, record: AirbyteRecordMessage) -> Optional[str]: + def _extract_primary_key(self, record: AirbyteRecordMessage) -> str | None: stream_identifier = create_stream_identifier(record) current_stream: ConfiguredAirbyteStream = self.streams[stream_identifier] # if the sync mode is deduping, use the primary key to upsert existing records instead of appending new ones @@ -207,11 +208,11 @@ def _extract_primary_key(self, record: AirbyteRecordMessage) -> Optional[str]: stringified_primary_key = "_".join(primary_key) return f"{stream_identifier}_{stringified_primary_key}" - def _split_document(self, doc: Document) -> List[Document]: - chunks: List[Document] = self.splitter.split_documents([doc]) + def _split_document(self, doc: Document) -> list[Document]: + chunks: list[Document] = self.splitter.split_documents([doc]) return chunks - def _remap_field_names(self, fields: Dict[str, Any]) -> Dict[str, Any]: + def _remap_field_names(self, fields: dict[str, Any]) -> dict[str, Any]: if not self.field_name_mappings: return fields diff --git a/airbyte_cdk/destinations/vector_db_based/embedder.py b/airbyte_cdk/destinations/vector_db_based/embedder.py index 6889c8e16..59656fceb 100644 --- a/airbyte_cdk/destinations/vector_db_based/embedder.py +++ b/airbyte_cdk/destinations/vector_db_based/embedder.py @@ -5,7 +5,7 @@ import os from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import List, Optional, Union, cast +from typing import cast from langchain.embeddings.cohere import CohereEmbeddings from langchain.embeddings.fake import FakeEmbeddings @@ -45,11 +45,11 @@ def __init__(self) -> None: pass @abstractmethod - def check(self) -> Optional[str]: + def check(self) -> str | None: pass @abstractmethod - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: """ Embed the text of each chunk and return the resulting embedding vectors. If a chunk cannot be embedded or is configured to not be embedded, return None for that chunk. @@ -73,14 +73,14 @@ def __init__(self, embeddings: OpenAIEmbeddings, chunk_size: int): self.embeddings = embeddings self.chunk_size = chunk_size - def check(self) -> Optional[str]: + def check(self) -> str | None: try: self.embeddings.embed_query("test") except Exception as e: return format_exception(e) return None - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: """ Embed the text of each chunk and return the resulting embedding vectors. @@ -91,7 +91,7 @@ def embed_documents(self, documents: List[Document]) -> List[Optional[List[float # Each chunk can hold at most self.chunk_size tokens, so tokens-per-minute by maximum tokens per chunk is the number of documents that can be embedded at once without exhausting the limit in a single request embedding_batch_size = OPEN_AI_TOKEN_LIMIT // self.chunk_size batches = create_chunks(documents, batch_size=embedding_batch_size) - embeddings: List[Optional[List[float]]] = [] + embeddings: list[list[float] | None] = [] for batch in batches: embeddings.extend( self.embeddings.embed_documents([chunk.page_content for chunk in batch]) @@ -143,16 +143,16 @@ def __init__(self, config: CohereEmbeddingConfigModel): cohere_api_key=config.cohere_key, model="embed-english-light-v2.0" ) # type: ignore - def check(self) -> Optional[str]: + def check(self) -> str | None: try: self.embeddings.embed_query("test") except Exception as e: return format_exception(e) return None - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: return cast( - List[Optional[List[float]]], + list[list[float] | None], self.embeddings.embed_documents([document.page_content for document in documents]), ) @@ -167,16 +167,16 @@ def __init__(self, config: FakeEmbeddingConfigModel): super().__init__() self.embeddings = FakeEmbeddings(size=OPEN_AI_VECTOR_SIZE) - def check(self) -> Optional[str]: + def check(self) -> str | None: try: self.embeddings.embed_query("test") except Exception as e: return format_exception(e) return None - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: return cast( - List[Optional[List[float]]], + list[list[float] | None], self.embeddings.embed_documents([document.page_content for document in documents]), ) @@ -203,7 +203,7 @@ def __init__(self, config: OpenAICompatibleEmbeddingConfigModel): disallowed_special=(), ) # type: ignore - def check(self) -> Optional[str]: + def check(self) -> str | None: deployment_mode = os.environ.get("DEPLOYMENT_MODE", "") if ( deployment_mode.casefold() == CLOUD_DEPLOYMENT_MODE @@ -217,9 +217,9 @@ def check(self) -> Optional[str]: return format_exception(e) return None - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: return cast( - List[Optional[List[float]]], + list[list[float] | None], self.embeddings.embed_documents([document.page_content for document in documents]), ) @@ -234,15 +234,15 @@ def __init__(self, config: FromFieldEmbeddingConfigModel): super().__init__() self.config = config - def check(self) -> Optional[str]: + def check(self) -> str | None: return None - def embed_documents(self, documents: List[Document]) -> List[Optional[List[float]]]: + def embed_documents(self, documents: list[Document]) -> list[list[float] | None]: """ From each chunk, pull the embedding from the field specified in the config. Check that the field exists, is a list of numbers and is the correct size. If not, raise an AirbyteTracedException explaining the problem. """ - embeddings: List[Optional[List[float]]] = [] + embeddings: list[list[float] | None] = [] for document in documents: data = document.record.data if self.config.field_name not in data: @@ -252,7 +252,7 @@ def embed_documents(self, documents: List[Document]) -> List[Optional[List[float message=f"Record {str(data)[:250]}... in stream {document.record.stream} does not contain embedding vector field {self.config.field_name}. Please check your embedding configuration, the embedding vector field has to be set correctly on every record.", ) field = data[self.config.field_name] - if not isinstance(field, list) or not all(isinstance(x, (int, float)) for x in field): + if not isinstance(field, list) or not all(isinstance(x, int | float) for x in field): raise AirbyteTracedException( internal_message="Embedding vector field not a list of numbers", failure_type=FailureType.config_error, @@ -284,14 +284,12 @@ def embedding_dimensions(self) -> int: def create_from_config( - embedding_config: Union[ - AzureOpenAIEmbeddingConfigModel, - CohereEmbeddingConfigModel, - FakeEmbeddingConfigModel, - FromFieldEmbeddingConfigModel, - OpenAIEmbeddingConfigModel, - OpenAICompatibleEmbeddingConfigModel, - ], + embedding_config: AzureOpenAIEmbeddingConfigModel + | CohereEmbeddingConfigModel + | FakeEmbeddingConfigModel + | FromFieldEmbeddingConfigModel + | OpenAIEmbeddingConfigModel + | OpenAICompatibleEmbeddingConfigModel, processing_config: ProcessingConfigModel, ) -> Embedder: if embedding_config.mode == "azure_openai" or embedding_config.mode == "openai": diff --git a/airbyte_cdk/destinations/vector_db_based/indexer.py b/airbyte_cdk/destinations/vector_db_based/indexer.py index c49f576a6..68048db53 100644 --- a/airbyte_cdk/destinations/vector_db_based/indexer.py +++ b/airbyte_cdk/destinations/vector_db_based/indexer.py @@ -4,7 +4,8 @@ import itertools from abc import ABC, abstractmethod -from typing import Any, Generator, Iterable, List, Optional, Tuple, TypeVar +from collections.abc import Generator, Iterable +from typing import Any, TypeVar from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog @@ -31,14 +32,14 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog) -> None: """ pass - def post_sync(self) -> List[AirbyteMessage]: + def post_sync(self) -> list[AirbyteMessage]: """ Run after the sync finishes. This method should be used to perform any cleanup operations and can return a list of AirbyteMessages to be logged. """ return [] @abstractmethod - def index(self, document_chunks: List[Chunk], namespace: str, stream: str) -> None: + def index(self, document_chunks: list[Chunk], namespace: str, stream: str) -> None: """ Index a list of document chunks. @@ -48,7 +49,7 @@ def index(self, document_chunks: List[Chunk], namespace: str, stream: str) -> No pass @abstractmethod - def delete(self, delete_ids: List[str], namespace: str, stream: str) -> None: + def delete(self, delete_ids: list[str], namespace: str, stream: str) -> None: """ Delete document chunks belonging to certain record ids. @@ -59,7 +60,7 @@ def delete(self, delete_ids: List[str], namespace: str, stream: str) -> None: pass @abstractmethod - def check(self) -> Optional[str]: + def check(self) -> str | None: """ Check if the indexer is configured correctly. This method should be used to check if the indexer is configured correctly and return an error message if it is not. """ @@ -69,7 +70,7 @@ def check(self) -> Optional[str]: T = TypeVar("T") -def chunks(iterable: Iterable[T], batch_size: int) -> Generator[Tuple[T, ...], None, None]: +def chunks(iterable: Iterable[T], batch_size: int) -> Generator[tuple[T, ...], None, None]: """A helper function to break an iterable into chunks of size batch_size.""" it = iter(iterable) chunk = tuple(itertools.islice(it, batch_size)) diff --git a/airbyte_cdk/destinations/vector_db_based/test_utils.py b/airbyte_cdk/destinations/vector_db_based/test_utils.py index a2f3d3d83..33b3782ab 100644 --- a/airbyte_cdk/destinations/vector_db_based/test_utils.py +++ b/airbyte_cdk/destinations/vector_db_based/test_utils.py @@ -4,7 +4,7 @@ import json import unittest -from typing import Any, Dict +from typing import Any from airbyte_cdk.models import ( AirbyteMessage, @@ -47,7 +47,7 @@ def _get_configured_catalog( return ConfiguredAirbyteCatalog(streams=[overwrite_stream]) - def _state(self, data: Dict[str, Any]) -> AirbyteMessage: + def _state(self, data: dict[str, Any]) -> AirbyteMessage: return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=data)) def _record(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage: @@ -59,5 +59,5 @@ def _record(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage ) def setUp(self) -> None: - with open("secrets/config.json", "r") as f: + with open("secrets/config.json") as f: self.config = json.loads(f.read()) diff --git a/airbyte_cdk/destinations/vector_db_based/utils.py b/airbyte_cdk/destinations/vector_db_based/utils.py index dbb1f4714..0fd12a986 100644 --- a/airbyte_cdk/destinations/vector_db_based/utils.py +++ b/airbyte_cdk/destinations/vector_db_based/utils.py @@ -4,7 +4,8 @@ import itertools import traceback -from typing import Any, Iterable, Iterator, Tuple, Union +from collections.abc import Iterable, Iterator +from typing import Any from airbyte_cdk.models import AirbyteRecordMessage, AirbyteStream @@ -17,7 +18,7 @@ def format_exception(exception: Exception) -> str: ) -def create_chunks(iterable: Iterable[Any], batch_size: int) -> Iterator[Tuple[Any, ...]]: +def create_chunks(iterable: Iterable[Any], batch_size: int) -> Iterator[tuple[Any, ...]]: """A helper function to break an iterable into chunks of size batch_size.""" it = iter(iterable) chunk = tuple(itertools.islice(it, batch_size)) @@ -26,7 +27,7 @@ def create_chunks(iterable: Iterable[Any], batch_size: int) -> Iterator[Tuple[An chunk = tuple(itertools.islice(it, batch_size)) -def create_stream_identifier(stream: Union[AirbyteStream, AirbyteRecordMessage]) -> str: +def create_stream_identifier(stream: AirbyteStream | AirbyteRecordMessage) -> str: if isinstance(stream, AirbyteStream): return str(stream.name if stream.namespace is None else f"{stream.namespace}_{stream.name}") else: diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 45c7c7326..f4c168bf8 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -4,7 +4,7 @@ from collections import defaultdict -from typing import Dict, Iterable, List, Tuple +from collections.abc import Iterable from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk, DocumentProcessor @@ -42,8 +42,8 @@ def __init__( self._init_batch() def _init_batch(self) -> None: - self.chunks: Dict[Tuple[str, str], List[Chunk]] = defaultdict(list) - self.ids_to_delete: Dict[Tuple[str, str], List[str]] = defaultdict(list) + self.chunks: dict[tuple[str, str], list[Chunk]] = defaultdict(list) + self.ids_to_delete: dict[tuple[str, str], list[str]] = defaultdict(list) self.number_of_chunks = 0 def _convert_to_document(self, chunk: Chunk) -> Document: diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 0a13cfebe..2b08722eb 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -12,8 +12,9 @@ import sys import tempfile from collections import defaultdict +from collections.abc import Iterable, Mapping from functools import wraps -from typing import Any, DefaultDict, Iterable, List, Mapping, Optional +from typing import Any from urllib.parse import urlparse import orjson @@ -50,7 +51,7 @@ _HAS_LOGGED_FOR_SERIALIZATION_ERROR = False -class AirbyteEntrypoint(object): +class AirbyteEntrypoint: def __init__(self, source: Source): init_uncaught_exception_handler(logger) @@ -62,7 +63,7 @@ def __init__(self, source: Source): self.logger = logging.getLogger(f"airbyte.{getattr(source, 'name', '')}") @staticmethod - def parse_args(args: List[str]) -> argparse.Namespace: + def parse_args(args: list[str]) -> argparse.Namespace: # set up parent parsers parent_parser = argparse.ArgumentParser(add_help=False) parent_parser.add_argument( @@ -240,7 +241,7 @@ def read( self.validate_connection(source_spec, config) # The Airbyte protocol dictates that counts be expressed as float/double to better protect against integer overflows - stream_message_counter: DefaultDict[HashableStreamDescriptor, float] = defaultdict(float) + stream_message_counter: defaultdict[HashableStreamDescriptor, float] = defaultdict(float) for message in self.source.read(self.logger, config, catalog, state): yield self.handle_record_counts(message, stream_message_counter) for message in self._emit_queued_messages(self.source): @@ -248,7 +249,7 @@ def read( @staticmethod def handle_record_counts( - message: AirbyteMessage, stream_message_count: DefaultDict[HashableStreamDescriptor, float] + message: AirbyteMessage, stream_message_count: defaultdict[HashableStreamDescriptor, float] ) -> AirbyteMessage: match message.type: case Type.RECORD: @@ -306,21 +307,21 @@ def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str: return json.dumps(serialized_message) @classmethod - def extract_state(cls, args: List[str]) -> Optional[Any]: + def extract_state(cls, args: list[str]) -> Any | None: parsed_args = cls.parse_args(args) if hasattr(parsed_args, "state"): return parsed_args.state return None @classmethod - def extract_catalog(cls, args: List[str]) -> Optional[Any]: + def extract_catalog(cls, args: list[str]) -> Any | None: parsed_args = cls.parse_args(args) if hasattr(parsed_args, "catalog"): return parsed_args.catalog return None @classmethod - def extract_config(cls, args: List[str]) -> Optional[Any]: + def extract_config(cls, args: list[str]) -> Any | None: parsed_args = cls.parse_args(args) if hasattr(parsed_args, "config"): return parsed_args.config @@ -332,7 +333,7 @@ def _emit_queued_messages(self, source: Source) -> Iterable[AirbyteMessage]: return -def launch(source: Source, args: List[str]) -> None: +def launch(source: Source, args: list[str]) -> None: source_entrypoint = AirbyteEntrypoint(source) parsed_args = source_entrypoint.parse_args(args) # temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs diff --git a/airbyte_cdk/exception_handler.py b/airbyte_cdk/exception_handler.py index 84aa39ba1..f52e5a0c1 100644 --- a/airbyte_cdk/exception_handler.py +++ b/airbyte_cdk/exception_handler.py @@ -4,8 +4,9 @@ import logging import sys +from collections.abc import Mapping from types import TracebackType -from typing import Any, List, Mapping, Optional +from typing import Any from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets from airbyte_cdk.utils.traced_exception import AirbyteTracedException @@ -28,7 +29,7 @@ def init_uncaught_exception_handler(logger: logging.Logger) -> None: def hook_fn( exception_type: type[BaseException], exception_value: BaseException, - traceback_: Optional[TracebackType], + traceback_: TracebackType | None, ) -> Any: # For developer ergonomics, we want to see the stack trace in the logs when we do a ctrl-c if issubclass(exception_type, KeyboardInterrupt): @@ -45,7 +46,7 @@ def hook_fn( sys.excepthook = hook_fn -def generate_failed_streams_error_message(stream_failures: Mapping[str, List[Exception]]) -> str: +def generate_failed_streams_error_message(stream_failures: Mapping[str, list[Exception]]) -> str: failures = "\n".join( [ f"{stream}: {filter_secrets(exception.__repr__())}" diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 13c3b4676..abd2f9203 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -5,7 +5,8 @@ import json import logging import logging.config -from typing import Any, Callable, Mapping, Optional, Tuple +from collections.abc import Callable, Mapping +from typing import Any import orjson @@ -40,7 +41,7 @@ } -def init_logger(name: Optional[str] = None) -> logging.Logger: +def init_logger(name: str | None = None) -> logging.Logger: """Initial set up of logger""" logger = logging.getLogger(name) logger.setLevel(logging.INFO) @@ -94,7 +95,7 @@ def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, An return {k: str(getattr(record, k)) for k in extra_keys if hasattr(record, k)} -def log_by_prefix(msg: str, default_level: str) -> Tuple[int, str]: +def log_by_prefix(msg: str, default_level: str) -> tuple[int, str]: """Custom method, which takes log level from first word of message""" valid_log_types = ["FATAL", "ERROR", "WARN", "INFO", "DEBUG", "TRACE"] split_line = msg.split() diff --git a/airbyte_cdk/models/airbyte_protocol.py b/airbyte_cdk/models/airbyte_protocol.py index 2528f7d7e..8af083374 100644 --- a/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte_cdk/models/airbyte_protocol.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Annotated, Any, Dict, List, Mapping, Optional, Union +from typing import Annotated, Any from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*' from serpyco_rs.metadata import Alias @@ -56,35 +57,35 @@ def __eq__(self, other: object) -> bool: @dataclass class AirbyteStreamState: stream_descriptor: StreamDescriptor # type: ignore [name-defined] - stream_state: Optional[AirbyteStateBlob] = None + stream_state: AirbyteStateBlob | None = None @dataclass class AirbyteGlobalState: - stream_states: List[AirbyteStreamState] - shared_state: Optional[AirbyteStateBlob] = None + stream_states: list[AirbyteStreamState] + shared_state: AirbyteStateBlob | None = None @dataclass class AirbyteStateMessage: - type: Optional[AirbyteStateType] = None # type: ignore [name-defined] - stream: Optional[AirbyteStreamState] = None + type: AirbyteStateType | None = None # type: ignore [name-defined] + stream: AirbyteStreamState | None = None global_: Annotated[AirbyteGlobalState | None, Alias("global")] = ( None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization ) - data: Optional[Dict[str, Any]] = None - sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined] - destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined] + data: dict[str, Any] | None = None + sourceStats: AirbyteStateStats | None = None # type: ignore [name-defined] + destinationStats: AirbyteStateStats | None = None # type: ignore [name-defined] @dataclass class AirbyteMessage: type: Type # type: ignore [name-defined] - log: Optional[AirbyteLogMessage] = None # type: ignore [name-defined] - spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined] - connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined] - catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined] - record: Optional[Union[AirbyteFileTransferRecordMessage, AirbyteRecordMessage]] = None # type: ignore [name-defined] - state: Optional[AirbyteStateMessage] = None - trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined] - control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined] + log: AirbyteLogMessage | None = None # type: ignore [name-defined] + spec: ConnectorSpecification | None = None # type: ignore [name-defined] + connectionStatus: AirbyteConnectionStatus | None = None # type: ignore [name-defined] + catalog: AirbyteCatalog | None = None # type: ignore [name-defined] + record: AirbyteFileTransferRecordMessage | AirbyteRecordMessage | None = None # type: ignore [name-defined] + state: AirbyteStateMessage | None = None + trace: AirbyteTraceMessage | None = None # type: ignore [name-defined] + control: AirbyteControlMessage | None = None # type: ignore [name-defined] diff --git a/airbyte_cdk/models/airbyte_protocol_serializers.py b/airbyte_cdk/models/airbyte_protocol_serializers.py index 129556acc..6ce15d130 100644 --- a/airbyte_cdk/models/airbyte_protocol_serializers.py +++ b/airbyte_cdk/models/airbyte_protocol_serializers.py @@ -1,5 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Any, Dict +from typing import Any from serpyco_rs import CustomType, Serializer @@ -14,19 +14,19 @@ ) -class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]): - def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]: +class AirbyteStateBlobType(CustomType[AirbyteStateBlob, dict[str, Any]]): + def serialize(self, value: AirbyteStateBlob) -> dict[str, Any]: # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete" return {k: v for k, v in value.__dict__.items()} - def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob: + def deserialize(self, value: dict[str, Any]) -> AirbyteStateBlob: return AirbyteStateBlob(value) - def get_json_schema(self) -> Dict[str, Any]: + def get_json_schema(self) -> dict[str, Any]: return {"type": "object"} -def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, Dict[str, Any]] | None: +def custom_type_resolver(t: type) -> CustomType[AirbyteStateBlob, dict[str, Any]] | None: return AirbyteStateBlobType() if t is AirbyteStateBlob else None diff --git a/airbyte_cdk/models/file_transfer_record_message.py b/airbyte_cdk/models/file_transfer_record_message.py index dcc1b7a92..8bde8b408 100644 --- a/airbyte_cdk/models/file_transfer_record_message.py +++ b/airbyte_cdk/models/file_transfer_record_message.py @@ -1,13 +1,13 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any @dataclass class AirbyteFileTransferRecordMessage: stream: str - file: Dict[str, Any] + file: dict[str, Any] emitted_at: int - namespace: Optional[str] = None - data: Optional[Dict[str, Any]] = None + namespace: str | None = None + data: dict[str, Any] | None = None diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index ab9ee48b8..30a1337cc 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -4,17 +4,9 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Iterable, Iterator, Mapping, MutableMapping from typing import ( Any, - Dict, - Iterable, - Iterator, - List, - Mapping, - MutableMapping, - Optional, - Tuple, - Union, ) from airbyte_cdk.exception_handler import generate_failed_streams_error_message @@ -58,7 +50,7 @@ class AbstractSource(Source, ABC): @abstractmethod def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: """ :param logger: source logger :param config: The user-provided configuration as specified by the source's spec. @@ -71,7 +63,7 @@ def check_connection( """ @abstractmethod - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: """ :param config: The user-provided configuration as specified by the source's spec. Any stream construction related operation should happen here. @@ -79,7 +71,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: """ # Stream name to instance map for applying output object transformation - _stream_to_instance_map: Dict[str, Stream] = {} + _stream_to_instance_map: dict[str, Stream] = {} _slice_logger: SliceLogger = DebugSliceLogger() def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: @@ -103,7 +95,7 @@ def read( logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, ) -> Iterator[AirbyteMessage]: """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/.""" logger.info(f"Starting syncing {self.name}") @@ -212,7 +204,7 @@ def read( @staticmethod def _serialize_exception( - stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Optional[Stream] = None + stream_descriptor: StreamDescriptor, e: Exception, stream_instance: Stream | None = None ) -> AirbyteTracedException: display_message = stream_instance.get_error_display_message(e) if stream_instance else None if display_message: @@ -294,7 +286,7 @@ def _emit_queued_messages(self) -> Iterable[AirbyteMessage]: return def _get_message( - self, record_data_or_message: Union[StreamData, AirbyteMessage], stream: Stream + self, record_data_or_message: StreamData | AirbyteMessage, stream: Stream ) -> AirbyteMessage: """ Converts the input to an AirbyteMessage if it is a StreamData. Returns the input as is if it is already an AirbyteMessage @@ -311,7 +303,7 @@ def _get_message( ) @property - def message_repository(self) -> Union[None, MessageRepository]: + def message_repository(self) -> None | MessageRepository: return _default_message_repository @property diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py b/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py index f57db7e14..41b3c7e8f 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import logging -from typing import Dict, Iterable, List, Optional, Set +from collections.abc import Iterable from airbyte_cdk.exception_handler import generate_failed_streams_error_message from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, FailureType, StreamDescriptor @@ -30,7 +30,7 @@ class ConcurrentReadProcessor: def __init__( self, - stream_instances_to_read_from: List[AbstractStream], + stream_instances_to_read_from: list[AbstractStream], partition_enqueuer: PartitionEnqueuer, thread_pool_manager: ThreadPoolManager, logger: logging.Logger, @@ -50,20 +50,20 @@ def __init__( """ self._stream_name_to_instance = {s.name: s for s in stream_instances_to_read_from} self._record_counter = {} - self._streams_to_running_partitions: Dict[str, Set[Partition]] = {} + self._streams_to_running_partitions: dict[str, set[Partition]] = {} for stream in stream_instances_to_read_from: self._streams_to_running_partitions[stream.name] = set() self._record_counter[stream.name] = 0 self._thread_pool_manager = thread_pool_manager self._partition_enqueuer = partition_enqueuer self._stream_instances_to_start_partition_generation = stream_instances_to_read_from - self._streams_currently_generating_partitions: List[str] = [] + self._streams_currently_generating_partitions: list[str] = [] self._logger = logger self._slice_logger = slice_logger self._message_repository = message_repository self._partition_reader = partition_reader - self._streams_done: Set[str] = set() - self._exceptions_per_stream_name: dict[str, List[Exception]] = {} + self._streams_done: set[str] = set() + self._exceptions_per_stream_name: dict[str, list[Exception]] = {} def on_partition_generation_completed( self, sentinel: PartitionGenerationCompletedSentinel @@ -186,7 +186,7 @@ def on_exception(self, exception: StreamThreadException) -> Iterable[AirbyteMess def _flag_exception(self, stream_name: str, exception: Exception) -> None: self._exceptions_per_stream_name.setdefault(stream_name, []).append(exception) - def start_next_partition_generator(self) -> Optional[AirbyteMessage]: + def start_next_partition_generator(self) -> AirbyteMessage | None: """ Start the next partition generator. 1. Pop the next stream to read from diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_source.py b/airbyte_cdk/sources/concurrent_source/concurrent_source.py index ffdee2dc1..850362a6c 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_source.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_source.py @@ -3,8 +3,8 @@ # import concurrent import logging +from collections.abc import Iterable, Iterator from queue import Queue -from typing import Iterable, Iterator, List from airbyte_cdk.models import AirbyteMessage from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor @@ -93,7 +93,7 @@ def __init__( def read( self, - streams: List[AbstractStream], + streams: list[AbstractStream], ) -> Iterator[AirbyteMessage]: self._logger.info("Starting syncing") diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py b/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py index c150dc956..aaa570331 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py @@ -4,8 +4,9 @@ import logging from abc import ABC +from collections.abc import Callable, Iterator, Mapping, MutableMapping from datetime import timedelta -from typing import Any, Callable, Iterator, List, Mapping, MutableMapping, Optional, Tuple +from typing import Any from airbyte_cdk.models import AirbyteMessage, AirbyteStateMessage, ConfiguredAirbyteCatalog from airbyte_cdk.sources import AbstractSource @@ -47,7 +48,7 @@ def read( logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, ) -> Iterator[AirbyteMessage]: abstract_streams = self._select_abstract_streams(config, catalog) concurrent_stream_names = {stream.name for stream in abstract_streams} @@ -65,13 +66,13 @@ def read( def _select_abstract_streams( self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog - ) -> List[AbstractStream]: + ) -> list[AbstractStream]: """ Selects streams that can be processed concurrently and returns their abstract representations. """ all_streams = self.streams(config) stream_name_to_instance: Mapping[str, Stream] = {s.name: s for s in all_streams} - abstract_streams: List[AbstractStream] = [] + abstract_streams: list[AbstractStream] = [] for configured_stream in configured_catalog.streams: stream_instance = stream_name_to_instance.get(configured_stream.stream.name) if not stream_instance: @@ -86,7 +87,7 @@ def convert_to_concurrent_stream( logger: logging.Logger, stream: Stream, state_manager: ConnectorStateManager, - cursor: Optional[Cursor] = None, + cursor: Cursor | None = None, ) -> Stream: """ Prepares a stream for concurrent processing by initializing or assigning a cursor, @@ -113,12 +114,12 @@ def initialize_cursor( stream: Stream, state_manager: ConnectorStateManager, converter: AbstractStreamStateConverter, - slice_boundary_fields: Optional[Tuple[str, str]], - start: Optional[CursorValueType], + slice_boundary_fields: tuple[str, str] | None, + start: CursorValueType | None, end_provider: Callable[[], CursorValueType], - lookback_window: Optional[GapType] = None, - slice_range: Optional[GapType] = None, - ) -> Optional[ConcurrentCursor]: + lookback_window: GapType | None = None, + slice_range: GapType | None = None, + ) -> ConcurrentCursor | None: lookback_window = lookback_window or timedelta(seconds=DEFAULT_LOOKBACK_SECONDS) cursor_field_name = stream.cursor_field diff --git a/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py b/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py index 59f8a1f0b..d5f1c34b6 100644 --- a/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +++ b/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py @@ -3,8 +3,9 @@ # import logging import threading +from collections.abc import Callable from concurrent.futures import Future, ThreadPoolExecutor -from typing import Any, Callable, List, Optional +from typing import Any class ThreadPoolManager: @@ -28,9 +29,9 @@ def __init__( self._threadpool = threadpool self._logger = logger self._max_concurrent_tasks = max_concurrent_tasks - self._futures: List[Future[Any]] = [] + self._futures: list[Future[Any]] = [] self._lock = threading.Lock() - self._most_recently_seen_exception: Optional[Exception] = None + self._most_recently_seen_exception: Exception | None = None self._logging_threshold = max_concurrent_tasks * 2 @@ -45,7 +46,7 @@ def prune_to_validate_has_reached_futures_limit(self) -> bool: def submit(self, function: Callable[..., Any], *args: Any) -> None: self._futures.append(self._threadpool.submit(function, *args)) - def _prune_futures(self, futures: List[Future[Any]]) -> None: + def _prune_futures(self, futures: list[Future[Any]]) -> None: """ Take a list in input and remove the futures that are completed. If a future has an exception, it'll raise and kill the stream operation. diff --git a/airbyte_cdk/sources/config.py b/airbyte_cdk/sources/config.py index ea91b17f3..643882d24 100644 --- a/airbyte_cdk/sources/config.py +++ b/airbyte_cdk/sources/config.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Dict +from typing import Any from pydantic.v1 import BaseModel @@ -18,7 +18,7 @@ class BaseConfig(BaseModel): """ @classmethod - def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: + def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: """We're overriding the schema classmethod to enable some post-processing""" schema = super().schema(*args, **kwargs) rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index 914374a55..2ea779ac2 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -3,8 +3,9 @@ # import copy +from collections.abc import Mapping, MutableMapping from dataclasses import dataclass -from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast +from typing import Any, cast from airbyte_cdk.models import ( AirbyteMessage, @@ -26,7 +27,7 @@ class HashableStreamDescriptor: """ name: str - namespace: Optional[str] = None + namespace: str | None = None class ConnectorStateManager: @@ -35,7 +36,7 @@ class ConnectorStateManager: interface. It also provides methods to extract and update state """ - def __init__(self, state: Optional[List[AirbyteStateMessage]] = None): + def __init__(self, state: list[AirbyteStateMessage] | None = None): shared_state, per_stream_states = self._extract_from_state_message(state) # We explicitly throw an error if we receive a GLOBAL state message that contains a shared_state because API sources are @@ -50,9 +51,7 @@ def __init__(self, state: Optional[List[AirbyteStateMessage]] = None): ) self.per_stream_states = per_stream_states - def get_stream_state( - self, stream_name: str, namespace: Optional[str] - ) -> MutableMapping[str, Any]: + def get_stream_state(self, stream_name: str, namespace: str | None) -> MutableMapping[str, Any]: """ Retrieves the state of a given stream based on its descriptor (name + namespace). :param stream_name: Name of the stream being fetched @@ -67,7 +66,7 @@ def get_stream_state( return {} def update_state_for_stream( - self, stream_name: str, namespace: Optional[str], value: Mapping[str, Any] + self, stream_name: str, namespace: str | None, value: Mapping[str, Any] ) -> None: """ Overwrites the state blob of a specific stream based on the provided stream name and optional namespace @@ -78,7 +77,7 @@ def update_state_for_stream( stream_descriptor = HashableStreamDescriptor(name=stream_name, namespace=namespace) self.per_stream_states[stream_descriptor] = AirbyteStateBlob(value) - def create_state_message(self, stream_name: str, namespace: Optional[str]) -> AirbyteMessage: + def create_state_message(self, stream_name: str, namespace: str | None) -> AirbyteMessage: """ Generates an AirbyteMessage using the current per-stream state of a specified stream :param stream_name: The name of the stream for the message that is being created @@ -102,10 +101,10 @@ def create_state_message(self, stream_name: str, namespace: Optional[str]) -> Ai @classmethod def _extract_from_state_message( cls, - state: Optional[List[AirbyteStateMessage]], - ) -> Tuple[ - Optional[AirbyteStateBlob], - MutableMapping[HashableStreamDescriptor, Optional[AirbyteStateBlob]], + state: list[AirbyteStateMessage] | None, + ) -> tuple[ + AirbyteStateBlob | None, + MutableMapping[HashableStreamDescriptor, AirbyteStateBlob | None], ]: """ Takes an incoming list of state messages or a global state message and extracts state attributes according to @@ -146,9 +145,9 @@ def _extract_from_state_message( return None, streams @staticmethod - def _is_global_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]]) -> bool: + def _is_global_state(state: list[AirbyteStateMessage] | MutableMapping[str, Any]) -> bool: return ( - isinstance(state, List) + isinstance(state, list) and len(state) == 1 and isinstance(state[0], AirbyteStateMessage) and state[0].type == AirbyteStateType.GLOBAL @@ -156,6 +155,6 @@ def _is_global_state(state: Union[List[AirbyteStateMessage], MutableMapping[str, @staticmethod def _is_per_stream_state( - state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]], + state: list[AirbyteStateMessage] | MutableMapping[str, Any], ) -> bool: - return isinstance(state, List) + return isinstance(state, list) diff --git a/airbyte_cdk/sources/declarative/async_job/job.py b/airbyte_cdk/sources/declarative/async_job/job.py index ea83b7456..bc7ad2ee5 100644 --- a/airbyte_cdk/sources/declarative/async_job/job.py +++ b/airbyte_cdk/sources/declarative/async_job/job.py @@ -2,7 +2,6 @@ from datetime import timedelta -from typing import Optional from airbyte_cdk.sources.declarative.async_job.timer import Timer from airbyte_cdk.sources.types import StreamSlice @@ -19,7 +18,7 @@ class AsyncJob: """ def __init__( - self, api_job_id: str, job_parameters: StreamSlice, timeout: Optional[timedelta] = None + self, api_job_id: str, job_parameters: StreamSlice, timeout: timedelta | None = None ) -> None: self._api_job_id = api_job_id self._job_parameters = job_parameters diff --git a/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py b/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py index 399f42430..f3bb0977e 100644 --- a/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +++ b/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py @@ -5,18 +5,11 @@ import time import traceback import uuid +from collections.abc import Generator, Iterable, Mapping from datetime import timedelta from typing import ( Any, - Generator, Generic, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, - Type, TypeVar, ) @@ -47,7 +40,7 @@ class AsyncPartition: _DEFAULT_MAX_JOB_RETRY = 3 def __init__( - self, jobs: List[AsyncJob], stream_slice: StreamSlice, job_max_retry: Optional[int] = None + self, jobs: list[AsyncJob], stream_slice: StreamSlice, job_max_retry: int | None = None ) -> None: self._attempts_per_job = {job: 1 for job in jobs} self._stream_slice = stream_slice @@ -63,7 +56,7 @@ def has_reached_max_attempt(self) -> bool: ) ) - def replace_job(self, job_to_replace: AsyncJob, new_jobs: List[AsyncJob]) -> None: + def replace_job(self, job_to_replace: AsyncJob, new_jobs: list[AsyncJob]) -> None: current_attempt_count = self._attempts_per_job.pop(job_to_replace, None) if current_attempt_count is None: raise ValueError("Could not find job to replace") @@ -116,7 +109,7 @@ def __json_serializable__(self) -> Any: class LookaheadIterator(Generic[T]): def __init__(self, iterable: Iterable[T]) -> None: self._iterator = iter(iterable) - self._buffer: List[T] = [] + self._buffer: list[T] = [] def __iter__(self) -> "LookaheadIterator[T]": return self @@ -158,9 +151,9 @@ def __init__( slices: Iterable[StreamSlice], job_tracker: JobTracker, message_repository: MessageRepository, - exceptions_to_break_on: Iterable[Type[Exception]] = tuple(), + exceptions_to_break_on: Iterable[type[Exception]] = tuple(), has_bulk_parent: bool = False, - job_max_retry: Optional[int] = None, + job_max_retry: int | None = None, ) -> None: """ If the stream slices provided as a parameters relies on a async job streams that relies on the same JobTracker, `has_bulk_parent` @@ -176,14 +169,14 @@ def __init__( self._job_repository: AsyncJobRepository = job_repository self._slice_iterator = LookaheadIterator(slices) - self._running_partitions: List[AsyncPartition] = [] + self._running_partitions: list[AsyncPartition] = [] self._job_tracker = job_tracker self._message_repository = message_repository - self._exceptions_to_break_on: Tuple[Type[Exception], ...] = tuple(exceptions_to_break_on) + self._exceptions_to_break_on: tuple[type[Exception], ...] = tuple(exceptions_to_break_on) self._has_bulk_parent = has_bulk_parent self._job_max_retry = job_max_retry - self._non_breaking_exceptions: List[Exception] = [] + self._non_breaking_exceptions: list[Exception] = [] def _replace_failed_jobs(self, partition: AsyncPartition) -> None: failed_status_jobs = (AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT) @@ -232,7 +225,7 @@ def _start_jobs(self) -> None: "Waiting before creating more jobs as the limit of concurrent jobs has been reached. Will try again later..." ) - def _start_job(self, _slice: StreamSlice, previous_job_id: Optional[str] = None) -> AsyncJob: + def _start_job(self, _slice: StreamSlice, previous_job_id: str | None = None) -> AsyncJob: if previous_job_id: id_to_replace = previous_job_id lazy_log(LOGGER, logging.DEBUG, lambda: f"Attempting to replace job {id_to_replace}...") @@ -278,7 +271,7 @@ def _create_failed_job(self, stream_slice: StreamSlice) -> AsyncJob: job.update_status(AsyncJobStatus.FAILED) return job - def _get_running_jobs(self) -> Set[AsyncJob]: + def _get_running_jobs(self) -> set[AsyncJob]: """ Returns a set of running AsyncJob objects. @@ -353,7 +346,7 @@ def _process_running_partitions_and_yield_completed_ones( Raises: Any: Any exception raised during processing. """ - current_running_partitions: List[AsyncPartition] = [] + current_running_partitions: list[AsyncPartition] = [] for partition in self._running_partitions: match partition.status: case AsyncJobStatus.COMPLETED: @@ -408,7 +401,7 @@ def _remove_completed_jobs(self, partition: AsyncPartition) -> None: def _reallocate_partition( self, - current_running_partitions: List[AsyncPartition], + current_running_partitions: list[AsyncPartition], partition: AsyncPartition, ) -> None: """ diff --git a/airbyte_cdk/sources/declarative/async_job/job_tracker.py b/airbyte_cdk/sources/declarative/async_job/job_tracker.py index 7d0ebba75..57099cc3a 100644 --- a/airbyte_cdk/sources/declarative/async_job/job_tracker.py +++ b/airbyte_cdk/sources/declarative/async_job/job_tracker.py @@ -3,8 +3,9 @@ import logging import threading import uuid +from collections.abc import Mapping from dataclasses import dataclass, field -from typing import Any, Mapping, Set, Union +from typing import Any from airbyte_cdk.logger import lazy_log from airbyte_cdk.sources.declarative.interpolation import InterpolatedString @@ -18,11 +19,11 @@ class ConcurrentJobLimitReached(Exception): @dataclass class JobTracker: - limit: Union[int, str] + limit: int | str config: Mapping[str, Any] = field(default_factory=dict) def __post_init__(self) -> None: - self._jobs: Set[str] = set() + self._jobs: set[str] = set() self._lock = threading.Lock() if isinstance(self.limit, str): try: diff --git a/airbyte_cdk/sources/declarative/async_job/repository.py b/airbyte_cdk/sources/declarative/async_job/repository.py index 21581ec4f..df46748b4 100644 --- a/airbyte_cdk/sources/declarative/async_job/repository.py +++ b/airbyte_cdk/sources/declarative/async_job/repository.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from abc import abstractmethod -from typing import Any, Iterable, Mapping, Set +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.sources.declarative.async_job.job import AsyncJob from airbyte_cdk.sources.types import StreamSlice @@ -13,7 +14,7 @@ def start(self, stream_slice: StreamSlice) -> AsyncJob: pass @abstractmethod - def update_jobs_status(self, jobs: Set[AsyncJob]) -> None: + def update_jobs_status(self, jobs: set[AsyncJob]) -> None: pass @abstractmethod diff --git a/airbyte_cdk/sources/declarative/async_job/timer.py b/airbyte_cdk/sources/declarative/async_job/timer.py index c4e5a9a1d..a1686bc04 100644 --- a/airbyte_cdk/sources/declarative/async_job/timer.py +++ b/airbyte_cdk/sources/declarative/async_job/timer.py @@ -1,12 +1,11 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from datetime import datetime, timedelta, timezone -from typing import Optional class Timer: def __init__(self, timeout: timedelta) -> None: - self._start_datetime: Optional[datetime] = None - self._end_datetime: Optional[datetime] = None + self._start_datetime: datetime | None = None + self._end_datetime: datetime | None = None self._timeout = timeout def start(self) -> None: @@ -21,7 +20,7 @@ def is_started(self) -> bool: return self._start_datetime is not None @property - def elapsed_time(self) -> Optional[timedelta]: + def elapsed_time(self) -> timedelta | None: if not self._start_datetime: return None diff --git a/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index b749718fa..5a7949eb2 100644 --- a/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Union +from typing import Any from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import ( AbstractHeaderAuthenticator, @@ -20,7 +21,7 @@ def get_request_params(self) -> Mapping[str, Any]: """HTTP request parameter to add to the requests""" return {} - def get_request_body_data(self) -> Union[Mapping[str, Any], str]: + def get_request_body_data(self) -> Mapping[str, Any] | str: """Form-encoded body data to set on the requests""" return {} diff --git a/airbyte_cdk/sources/declarative/auth/jwt.py b/airbyte_cdk/sources/declarative/auth/jwt.py index c83d081bb..fbca5cf32 100644 --- a/airbyte_cdk/sources/declarative/auth/jwt.py +++ b/airbyte_cdk/sources/declarative/auth/jwt.py @@ -4,9 +4,10 @@ import base64 import json +from collections.abc import Mapping from dataclasses import InitVar, dataclass from datetime import datetime -from typing import Any, Mapping, Optional, Union +from typing import Any import jwt @@ -61,19 +62,19 @@ class JwtAuthenticator(DeclarativeAuthenticator): config: Mapping[str, Any] parameters: InitVar[Mapping[str, Any]] - secret_key: Union[InterpolatedString, str] - algorithm: Union[str, JwtAlgorithm] - token_duration: Optional[int] - base64_encode_secret_key: Optional[Union[InterpolatedBoolean, str, bool]] = False - header_prefix: Optional[Union[InterpolatedString, str]] = None - kid: Optional[Union[InterpolatedString, str]] = None - typ: Optional[Union[InterpolatedString, str]] = None - cty: Optional[Union[InterpolatedString, str]] = None - iss: Optional[Union[InterpolatedString, str]] = None - sub: Optional[Union[InterpolatedString, str]] = None - aud: Optional[Union[InterpolatedString, str]] = None - additional_jwt_headers: Optional[Mapping[str, Any]] = None - additional_jwt_payload: Optional[Mapping[str, Any]] = None + secret_key: InterpolatedString | str + algorithm: str | JwtAlgorithm + token_duration: int | None + base64_encode_secret_key: InterpolatedBoolean | str | bool | None = False + header_prefix: InterpolatedString | str | None = None + kid: InterpolatedString | str | None = None + typ: InterpolatedString | str | None = None + cty: InterpolatedString | str | None = None + iss: InterpolatedString | str | None = None + sub: InterpolatedString | str | None = None + aud: InterpolatedString | str | None = None + additional_jwt_headers: Mapping[str, Any] | None = None + additional_jwt_payload: Mapping[str, Any] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._secret_key = InterpolatedString.create(self.secret_key, parameters=parameters) @@ -160,7 +161,7 @@ def _get_secret_key(self) -> str: else secret_key ) - def _get_signed_token(self) -> Union[str, Any]: + def _get_signed_token(self) -> str | Any: """ Signed the JWT using the provided secret key and algorithm and the generated headers and payload. For additional information on PyJWT see: https://pyjwt.readthedocs.io/en/stable/ """ @@ -174,7 +175,7 @@ def _get_signed_token(self) -> Union[str, Any]: except Exception as e: raise ValueError(f"Failed to sign token: {e}") - def _get_header_prefix(self) -> Union[str, None]: + def _get_header_prefix(self) -> str | None: """ Returns the header prefix to be used when attaching the token to the request. """ diff --git a/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte_cdk/sources/declarative/auth/oauth.py index bc609e42e..48e47a5e6 100644 --- a/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte_cdk/sources/declarative/auth/oauth.py @@ -2,9 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field from datetime import datetime, timedelta -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean @@ -47,33 +48,33 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut config: Mapping[str, Any] parameters: InitVar[Mapping[str, Any]] - client_id: Optional[Union[InterpolatedString, str]] = None - client_secret: Optional[Union[InterpolatedString, str]] = None - token_refresh_endpoint: Optional[Union[InterpolatedString, str]] = None - refresh_token: Optional[Union[InterpolatedString, str]] = None - scopes: Optional[List[str]] = None - token_expiry_date: Optional[Union[InterpolatedString, str]] = None - _token_expiry_date: Optional[AirbyteDateTime] = field(init=False, repr=False, default=None) - token_expiry_date_format: Optional[str] = None + client_id: InterpolatedString | str | None = None + client_secret: InterpolatedString | str | None = None + token_refresh_endpoint: InterpolatedString | str | None = None + refresh_token: InterpolatedString | str | None = None + scopes: list[str] | None = None + token_expiry_date: InterpolatedString | str | None = None + _token_expiry_date: AirbyteDateTime | None = field(init=False, repr=False, default=None) + token_expiry_date_format: str | None = None token_expiry_is_time_of_expiration: bool = False - access_token_name: Union[InterpolatedString, str] = "access_token" - access_token_value: Optional[Union[InterpolatedString, str]] = None - client_id_name: Union[InterpolatedString, str] = "client_id" - client_secret_name: Union[InterpolatedString, str] = "client_secret" - expires_in_name: Union[InterpolatedString, str] = "expires_in" - refresh_token_name: Union[InterpolatedString, str] = "refresh_token" - refresh_request_body: Optional[Mapping[str, Any]] = None - refresh_request_headers: Optional[Mapping[str, Any]] = None - grant_type_name: Union[InterpolatedString, str] = "grant_type" - grant_type: Union[InterpolatedString, str] = "refresh_token" + access_token_name: InterpolatedString | str = "access_token" + access_token_value: InterpolatedString | str | None = None + client_id_name: InterpolatedString | str = "client_id" + client_secret_name: InterpolatedString | str = "client_secret" + expires_in_name: InterpolatedString | str = "expires_in" + refresh_token_name: InterpolatedString | str = "refresh_token" + refresh_request_body: Mapping[str, Any] | None = None + refresh_request_headers: Mapping[str, Any] | None = None + grant_type_name: InterpolatedString | str = "grant_type" + grant_type: InterpolatedString | str = "refresh_token" message_repository: MessageRepository = NoopMessageRepository() - profile_assertion: Optional[DeclarativeAuthenticator] = None - use_profile_assertion: Optional[Union[InterpolatedBoolean, str, bool]] = False + profile_assertion: DeclarativeAuthenticator | None = None + use_profile_assertion: InterpolatedBoolean | str | bool | None = False def __post_init__(self, parameters: Mapping[str, Any]) -> None: super().__init__() if self.token_refresh_endpoint is not None: - self._token_refresh_endpoint: Optional[InterpolatedString] = InterpolatedString.create( + self._token_refresh_endpoint: InterpolatedString | None = InterpolatedString.create( self.token_refresh_endpoint, parameters=parameters ) else: @@ -96,7 +97,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.refresh_token_name, parameters=parameters ) if self.refresh_token is not None: - self._refresh_token: Optional[InterpolatedString] = InterpolatedString.create( + self._refresh_token: InterpolatedString | None = InterpolatedString.create( self.refresh_token, parameters=parameters ) else: @@ -124,7 +125,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: ) try: if ( - isinstance(self.token_expiry_date, (int, str)) + isinstance(self.token_expiry_date, int | str) and str(self.token_expiry_date).isdigit() ): self._token_expiry_date = ab_datetime_parse(self.token_expiry_date) @@ -154,7 +155,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: else: self._access_token_value = None - self._access_token: Optional[str] = ( + self._access_token: str | None = ( self._access_token_value if self.access_token_value else None ) @@ -174,7 +175,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: "OAuthAuthenticator configuration error: A 'refresh_token' is required when the 'grant_type' is set to 'refresh_token'." ) - def get_token_refresh_endpoint(self) -> Optional[str]: + def get_token_refresh_endpoint(self) -> str | None: if self._token_refresh_endpoint is not None: refresh_token_endpoint: str = self._token_refresh_endpoint.eval(self.config) if not refresh_token_endpoint: @@ -207,10 +208,10 @@ def get_client_secret(self) -> str: def get_refresh_token_name(self) -> str: return self._refresh_token_name.eval(self.config) # type: ignore # eval returns a string in this context - def get_refresh_token(self) -> Optional[str]: + def get_refresh_token(self) -> str | None: return None if self._refresh_token is None else str(self._refresh_token.eval(self.config)) - def get_scopes(self) -> List[str]: + def get_scopes(self) -> list[str]: return self.scopes or [] def get_access_token_name(self) -> str: @@ -239,7 +240,7 @@ def get_token_expiry_date(self) -> AirbyteDateTime: def _has_access_token_been_initialized(self) -> bool: return self._access_token is not None - def set_token_expiry_date(self, value: Union[str, int]) -> None: + def set_token_expiry_date(self, value: str | int) -> None: self._token_expiry_date = self._parse_token_expiration_date(value) def get_assertion_name(self) -> str: diff --git a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py index 3a84150bf..c2b7ec1ba 100644 --- a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, List, Mapping +from typing import Any import dpath @@ -16,14 +17,14 @@ class SelectiveAuthenticator(DeclarativeAuthenticator): config: Mapping[str, Any] authenticators: Mapping[str, DeclarativeAuthenticator] - authenticator_selection_path: List[str] + authenticator_selection_path: list[str] # returns "DeclarativeAuthenticator", but must return a subtype of "SelectiveAuthenticator" def __new__( # type: ignore[misc] cls, config: Mapping[str, Any], authenticators: Mapping[str, DeclarativeAuthenticator], - authenticator_selection_path: List[str], + authenticator_selection_path: list[str], *arg: Any, **kwargs: Any, ) -> DeclarativeAuthenticator: diff --git a/airbyte_cdk/sources/declarative/auth/token.py b/airbyte_cdk/sources/declarative/auth/token.py index caecf9d2c..376dedec7 100644 --- a/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte_cdk/sources/declarative/auth/token.py @@ -4,8 +4,9 @@ import base64 import logging +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, MutableMapping, Union +from typing import Any import requests from cachetools import TTLCache, cached @@ -63,7 +64,7 @@ def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, A def get_request_params(self) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter) - def get_request_body_data(self) -> Union[Mapping[str, Any], str]: + def get_request_body_data(self) -> Mapping[str, Any] | str: return self._get_request_options(RequestOptionType.body_data) def get_request_body_json(self) -> Mapping[str, Any]: @@ -113,10 +114,10 @@ class BasicHttpAuthenticator(DeclarativeAuthenticator): parameters (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - username: Union[InterpolatedString, str] + username: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - password: Union[InterpolatedString, str] = "" + password: InterpolatedString | str = "" def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._username = InterpolatedString.create(self.username, parameters=parameters) @@ -129,7 +130,7 @@ def auth_header(self) -> str: @property def token(self) -> str: auth_string = ( - f"{self._username.eval(self.config)}:{self._password.eval(self.config)}".encode("utf8") + f"{self._username.eval(self.config)}:{self._password.eval(self.config)}".encode() ) b64_encoded = base64.b64encode(auth_string).decode("utf8") return f"Basic {b64_encoded}" @@ -196,16 +197,16 @@ class LegacySessionTokenAuthenticator(DeclarativeAuthenticator): validate_session_url (Union[InterpolatedString, str]): Url to validate passed session token """ - api_url: Union[InterpolatedString, str] - header: Union[InterpolatedString, str] - session_token: Union[InterpolatedString, str] - session_token_response_key: Union[InterpolatedString, str] - username: Union[InterpolatedString, str] + api_url: InterpolatedString | str + header: InterpolatedString | str + session_token: InterpolatedString | str + session_token_response_key: InterpolatedString | str + username: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - login_url: Union[InterpolatedString, str] - validate_session_url: Union[InterpolatedString, str] - password: Union[InterpolatedString, str] = "" + login_url: InterpolatedString | str + validate_session_url: InterpolatedString | str + password: InterpolatedString | str = "" def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._username = InterpolatedString.create(self.username, parameters=parameters) diff --git a/airbyte_cdk/sources/declarative/auth/token_provider.py b/airbyte_cdk/sources/declarative/auth/token_provider.py index c4bae02f1..40fb3e673 100644 --- a/airbyte_cdk/sources/declarative/auth/token_provider.py +++ b/airbyte_cdk/sources/declarative/auth/token_provider.py @@ -5,8 +5,9 @@ import datetime from abc import abstractmethod +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, List, Mapping, Optional, Union +from typing import Any import dpath from isodate import Duration @@ -31,14 +32,14 @@ def get_token(self) -> str: @dataclass class SessionTokenProvider(TokenProvider): login_requester: Requester - session_token_path: List[str] - expiration_duration: Optional[Union[datetime.timedelta, Duration]] + session_token_path: list[str] + expiration_duration: datetime.timedelta | Duration | None parameters: InitVar[Mapping[str, Any]] message_repository: MessageRepository = NoopMessageRepository() decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={})) - _next_expiration_time: Optional[AirbyteDateTime] = None - _token: Optional[str] = None + _next_expiration_time: AirbyteDateTime | None = None + _token: str | None = None def get_token(self) -> str: self._refresh_if_necessary() @@ -72,7 +73,7 @@ def _refresh(self) -> None: @dataclass class InterpolatedStringTokenProvider(TokenProvider): config: Config - api_token: Union[InterpolatedString, str] + api_token: InterpolatedString | str parameters: Mapping[str, Any] def __post_init__(self) -> None: diff --git a/airbyte_cdk/sources/declarative/checks/__init__.py b/airbyte_cdk/sources/declarative/checks/__init__.py index 87bcaa24d..b8a7df778 100644 --- a/airbyte_cdk/sources/declarative/checks/__init__.py +++ b/airbyte_cdk/sources/declarative/checks/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. # -from typing import Mapping +from collections.abc import Mapping from pydantic.v1 import BaseModel diff --git a/airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py b/airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py index 64d90de19..1ffc19cb6 100644 --- a/airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +++ b/airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py @@ -3,9 +3,9 @@ # import logging -import traceback +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, List, Mapping, Tuple +from typing import Any from airbyte_cdk import AbstractSource from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -33,7 +33,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def check_connection( self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Any]: + ) -> tuple[bool, Any]: streams = source.streams(config=config) if len(streams) == 0: diff --git a/airbyte_cdk/sources/declarative/checks/check_stream.py b/airbyte_cdk/sources/declarative/checks/check_stream.py index 1123349cb..bbeff0097 100644 --- a/airbyte_cdk/sources/declarative/checks/check_stream.py +++ b/airbyte_cdk/sources/declarative/checks/check_stream.py @@ -4,8 +4,9 @@ import logging import traceback +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, List, Mapping, Optional, Tuple +from typing import Any from airbyte_cdk import AbstractSource from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -31,16 +32,16 @@ class CheckStream(ConnectionChecker): stream_name (List[str]): names of streams to check """ - stream_names: List[str] + stream_names: list[str] parameters: InitVar[Mapping[str, Any]] - dynamic_streams_check_configs: Optional[List[DynamicStreamCheckConfig]] = None + dynamic_streams_check_configs: list[DynamicStreamCheckConfig] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._parameters = parameters if self.dynamic_streams_check_configs is None: self.dynamic_streams_check_configs = [] - def _log_error(self, logger: logging.Logger, action: str, error: Exception) -> Tuple[bool, str]: + def _log_error(self, logger: logging.Logger, action: str, error: Exception) -> tuple[bool, str]: """Logs an error and returns a formatted error message.""" error_message = f"Encountered an error while {action}. Error: {error}" logger.error(error_message + f"Error traceback: \n {traceback.format_exc()}", exc_info=True) @@ -48,7 +49,7 @@ def _log_error(self, logger: logging.Logger, action: str, error: Exception) -> T def check_connection( self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Any]: + ) -> tuple[bool, Any]: """Checks the connection to the source and its streams.""" try: streams = source.streams(config=config) @@ -82,8 +83,8 @@ def check_connection( return True, None def _check_stream_availability( - self, stream_name_to_stream: Dict[str, Any], stream_name: str, logger: logging.Logger - ) -> Tuple[bool, Any]: + self, stream_name_to_stream: dict[str, Any], stream_name: str, logger: logging.Logger + ) -> tuple[bool, Any]: """Checks if streams are available.""" availability_strategy = HttpAvailabilityStrategy() try: @@ -98,8 +99,8 @@ def _check_stream_availability( return True, None def _check_dynamic_streams_availability( - self, source: AbstractSource, stream_name_to_stream: Dict[str, Any], logger: logging.Logger - ) -> Tuple[bool, Any]: + self, source: AbstractSource, stream_name_to_stream: dict[str, Any], logger: logging.Logger + ) -> tuple[bool, Any]: """Checks the availability of dynamic streams.""" dynamic_streams = source.resolved_manifest.get("dynamic_streams", []) # type: ignore[attr-defined] # The source's resolved_manifest manifest is checked before calling this method dynamic_stream_name_to_dynamic_stream = { @@ -124,21 +125,21 @@ def _check_dynamic_streams_availability( return True, None def _map_generated_streams( - self, dynamic_streams: List[Dict[str, Any]] - ) -> Dict[str, List[Dict[str, Any]]]: + self, dynamic_streams: list[dict[str, Any]] + ) -> dict[str, list[dict[str, Any]]]: """Maps dynamic stream names to their corresponding generated streams.""" - mapped_streams: Dict[str, List[Dict[str, Any]]] = {} + mapped_streams: dict[str, list[dict[str, Any]]] = {} for stream in dynamic_streams: mapped_streams.setdefault(stream["dynamic_stream_name"], []).append(stream) return mapped_streams def _check_generated_streams_availability( self, - generated_streams: List[Dict[str, Any]], - stream_name_to_stream: Dict[str, Any], + generated_streams: list[dict[str, Any]], + stream_name_to_stream: dict[str, Any], logger: logging.Logger, max_count: int, - ) -> Tuple[bool, Any]: + ) -> tuple[bool, Any]: """Checks availability of generated dynamic streams.""" availability_strategy = HttpAvailabilityStrategy() for declarative_stream in generated_streams[: min(max_count, len(generated_streams))]: diff --git a/airbyte_cdk/sources/declarative/checks/connection_checker.py b/airbyte_cdk/sources/declarative/checks/connection_checker.py index fd1d1bba2..ee1b10783 100644 --- a/airbyte_cdk/sources/declarative/checks/connection_checker.py +++ b/airbyte_cdk/sources/declarative/checks/connection_checker.py @@ -4,7 +4,8 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Mapping, Tuple +from collections.abc import Mapping +from typing import Any from airbyte_cdk import AbstractSource @@ -17,7 +18,7 @@ class ConnectionChecker(ABC): @abstractmethod def check_connection( self, source: AbstractSource, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Any]: + ) -> tuple[bool, Any]: """ Tests if the input configuration can be used to successfully connect to the integration e.g: if a provided Stripe API token can be used to connect to the Stripe API. diff --git a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py index f5cd24f00..add0b2bf1 100644 --- a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +++ b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.types import Config @@ -19,14 +20,14 @@ class ConcurrencyLevel: max_concurrency (Optional[int]): The maximum number of worker threads to use when the default_concurrency is exceeded """ - default_concurrency: Union[int, str] - max_concurrency: Optional[int] + default_concurrency: int | str + max_concurrency: int | None config: Config parameters: InitVar[Mapping[str, Any]] def __post_init__(self, parameters: Mapping[str, Any]) -> None: if isinstance(self.default_concurrency, int): - self._default_concurrency: Union[int, InterpolatedString] = self.default_concurrency + self._default_concurrency: int | InterpolatedString = self.default_concurrency elif "config" in self.default_concurrency and not self.max_concurrency: raise ValueError( "ConcurrencyLevel requires that max_concurrency be defined if the default_concurrency can be used-specified" diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index e212b0f2a..c7c336834 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -3,7 +3,8 @@ # import logging -from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple +from collections.abc import Iterator, Mapping, MutableMapping +from typing import Any, Generic from airbyte_cdk.models import ( AirbyteCatalog, @@ -63,13 +64,13 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): def __init__( self, - catalog: Optional[ConfiguredAirbyteCatalog], - config: Optional[Mapping[str, Any]], + catalog: ConfiguredAirbyteCatalog | None, + config: Mapping[str, Any] | None, state: TState, source_config: ConnectionDefinition, debug: bool = False, emit_connector_builder_messages: bool = False, - component_factory: Optional[ModelToComponentFactory] = None, + component_factory: ModelToComponentFactory | None = None, **kwargs: Any, ) -> None: # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source @@ -133,7 +134,7 @@ def read( logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, ) -> Iterator[AirbyteMessage]: concurrent_streams, _ = self._group_streams(config=config) @@ -141,9 +142,9 @@ def read( # the concurrent streams must be saved so that they can be removed from the catalog before starting # synchronous streams if len(concurrent_streams) > 0: - concurrent_stream_names = set( - [concurrent_stream.name for concurrent_stream in concurrent_streams] - ) + concurrent_stream_names = { + concurrent_stream.name for concurrent_stream in concurrent_streams + } selected_concurrent_streams = self._select_streams( streams=concurrent_streams, configured_catalog=catalog @@ -176,7 +177,7 @@ def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> Airbyte ] ) - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: """ The `streams` method is used as part of the AbstractSource in the following cases: * ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams @@ -189,9 +190,9 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: def _group_streams( self, config: Mapping[str, Any] - ) -> Tuple[List[AbstractStream], List[Stream]]: - concurrent_streams: List[AbstractStream] = [] - synchronous_streams: List[Stream] = [] + ) -> tuple[list[AbstractStream], list[Stream]]: + concurrent_streams: list[AbstractStream] = [] + synchronous_streams: list[Stream] = [] # Combine streams and dynamic_streams. Note: both cannot be empty at the same time, # and this is validated during the initialization of the source. @@ -482,10 +483,10 @@ def _get_retriever( @staticmethod def _select_streams( - streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog - ) -> List[AbstractStream]: + streams: list[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog + ) -> list[AbstractStream]: stream_name_to_instance: Mapping[str, AbstractStream] = {s.name: s for s in streams} - abstract_streams: List[AbstractStream] = [] + abstract_streams: list[AbstractStream] = [] for configured_stream in configured_catalog.streams: stream_instance = stream_name_to_instance.get(configured_stream.stream.name) if stream_instance: diff --git a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 2707ffe11..ad41de894 100644 --- a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -3,7 +3,6 @@ # import datetime -from typing import Union class DatetimeParser: @@ -18,7 +17,7 @@ class DatetimeParser: _UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) - def parse(self, date: Union[str, int], format: str) -> datetime.datetime: + def parse(self, date: str | int, format: str) -> datetime.datetime: # "%s" is a valid (but unreliable) directive for formatting, but not for parsing # It is defined as # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index eb407db44..dc169f859 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -3,8 +3,9 @@ # import datetime as dt +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, Optional, Union +from typing import Any, Union from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString @@ -28,14 +29,14 @@ class MinMaxDatetime: max_datetime (Union[InterpolatedString, str]): Represents the maximum allowed datetime value. """ - datetime: Union[InterpolatedString, str] + datetime: InterpolatedString | str parameters: InitVar[Mapping[str, Any]] # datetime_format is a unique case where we inherit it from the parent if it is not specified before using the default value # which is why we need dedicated getter/setter methods and private dataclass field datetime_format: str _datetime_format: str = field(init=False, repr=False, default="") - min_datetime: Union[InterpolatedString, str] = "" - max_datetime: Union[InterpolatedString, str] = "" + min_datetime: InterpolatedString | str = "" + max_datetime: InterpolatedString | str = "" def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {}) @@ -104,7 +105,7 @@ def datetime_format(self, value: str) -> None: def create( cls, interpolated_string_or_min_max_datetime: Union[InterpolatedString, str, "MinMaxDatetime"], - parameters: Optional[Mapping[str, Any]] = None, + parameters: Mapping[str, Any] | None = None, ) -> "MinMaxDatetime": if parameters is None: parameters = {} diff --git a/airbyte_cdk/sources/declarative/declarative_source.py b/airbyte_cdk/sources/declarative/declarative_source.py index 77bf427a1..769407d3a 100644 --- a/airbyte_cdk/sources/declarative/declarative_source.py +++ b/airbyte_cdk/sources/declarative/declarative_source.py @@ -4,7 +4,8 @@ import logging from abc import abstractmethod -from typing import Any, Mapping, Tuple +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.abstract_source import AbstractSource from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -22,7 +23,7 @@ def connection_checker(self) -> ConnectionChecker: def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Any]: + ) -> tuple[bool, Any]: """ :param logger: The source logger :param config: The user-provided configuration as specified by the source's spec. diff --git a/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte_cdk/sources/declarative/declarative_stream.py index 0ae117459..77da654c4 100644 --- a/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte_cdk/sources/declarative/declarative_stream.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import logging +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.incremental import ( @@ -47,12 +48,12 @@ class DeclarativeStream(Stream): config: Config parameters: InitVar[Mapping[str, Any]] name: str - primary_key: Optional[Union[str, List[str], List[List[str]]]] - state_migrations: List[StateMigration] = field(repr=True, default_factory=list) - schema_loader: Optional[SchemaLoader] = None + primary_key: str | list[str] | list[list[str]] | None + state_migrations: list[StateMigration] = field(repr=True, default_factory=list) + schema_loader: SchemaLoader | None = None _name: str = field(init=False, repr=False, default="") _primary_key: str = field(init=False, repr=False, default="") - stream_cursor_field: Optional[Union[InterpolatedString, str]] = None + stream_cursor_field: InterpolatedString | str | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._stream_cursor_field = ( @@ -67,7 +68,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: ) @property # type: ignore - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return self._primary_key @primary_key.setter @@ -121,7 +122,7 @@ def get_updated_state( return self.state @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: """ Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. If the cursor is nested, return an array consisting of the path to the cursor. @@ -138,9 +139,9 @@ def is_resumable(self) -> bool: def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: """ :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state. @@ -173,9 +174,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[StreamSlice]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[StreamSlice | None]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. @@ -187,7 +188,7 @@ def stream_slices( return self.retriever.stream_slices() @property - def state_checkpoint_interval(self) -> Optional[int]: + def state_checkpoint_interval(self) -> int | None: """ We explicitly disable checkpointing here. There are a couple reasons for that and not all are documented here but: * In the case where records are not ordered, the granularity of what is ordered is the slice. Therefore, we will only update the @@ -197,7 +198,7 @@ def state_checkpoint_interval(self) -> Optional[int]: """ return None - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: if self.retriever and isinstance(self.retriever, SimpleRetriever): return self.retriever.cursor return None @@ -205,7 +206,7 @@ def get_cursor(self) -> Optional[Cursor]: def _get_checkpoint_reader( self, logger: logging.Logger, - cursor_field: Optional[List[str]], + cursor_field: list[str] | None, sync_mode: SyncMode, stream_state: MutableMapping[str, Any], ) -> CheckpointReader: @@ -229,7 +230,7 @@ def _get_checkpoint_reader( checkpoint_mode = self._checkpoint_mode if isinstance( - cursor, (GlobalSubstreamCursor, PerPartitionCursor, PerPartitionWithGlobalCursor) + cursor, GlobalSubstreamCursor | PerPartitionCursor | PerPartitionWithGlobalCursor ): self.has_multiple_slices = True return CursorBasedCheckpointReader( diff --git a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py index 2fc26c43a..50c96fc00 100644 --- a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py @@ -9,7 +9,7 @@ import logging from dataclasses import dataclass from io import BufferedIOBase, TextIOWrapper -from typing import Any, Optional +from typing import Any import orjson import requests @@ -60,7 +60,7 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: if body_json is None: raise AirbyteTracedException( message="Response JSON data failed to be parsed. See logs for more information.", - internal_message=f"Response JSON data failed to be parsed.", + internal_message="Response JSON data failed to be parsed.", failure_type=FailureType.system_error, ) @@ -69,7 +69,7 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: else: yield from [body_json] - def _parse_orjson(self, raw_data: bytes) -> Optional[Any]: + def _parse_orjson(self, raw_data: bytes) -> Any | None: try: return orjson.loads(raw_data.decode(self.encoding)) except Exception as exc: @@ -78,7 +78,7 @@ def _parse_orjson(self, raw_data: bytes) -> Optional[Any]: ) return None - def _parse_json(self, raw_data: bytes) -> Optional[Any]: + def _parse_json(self, raw_data: bytes) -> Any | None: try: return json.loads(raw_data.decode(self.encoding)) except Exception as exc: @@ -88,7 +88,7 @@ def _parse_json(self, raw_data: bytes) -> Optional[Any]: @dataclass class JsonLineParser(Parser): - encoding: Optional[str] = "utf-8" + encoding: str | None = "utf-8" def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: for line in data: @@ -101,10 +101,10 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: @dataclass class CsvParser(Parser): # TODO: migrate implementation to re-use file-base classes - encoding: Optional[str] = "utf-8" - delimiter: Optional[str] = "," + encoding: str | None = "utf-8" + delimiter: str | None = "," - def _get_delimiter(self) -> Optional[str]: + def _get_delimiter(self) -> str | None: """ Get delimiter from the configuration. Check for the escape character and decode it. """ @@ -120,8 +120,7 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: """ text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",") - for row in reader: - yield row + yield from reader class CompositeRawDecoder(Decoder): diff --git a/airbyte_cdk/sources/declarative/decoders/decoder.py b/airbyte_cdk/sources/declarative/decoders/decoder.py index 34d99db1f..feb5a487f 100644 --- a/airbyte_cdk/sources/declarative/decoders/decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/decoder.py @@ -3,8 +3,9 @@ # from abc import abstractmethod +from collections.abc import Generator, MutableMapping from dataclasses import dataclass -from typing import Any, Generator, MutableMapping +from typing import Any import requests diff --git a/airbyte_cdk/sources/declarative/decoders/decoder_parser.py b/airbyte_cdk/sources/declarative/decoders/decoder_parser.py index d1401f54f..9149d7925 100644 --- a/airbyte_cdk/sources/declarative/decoders/decoder_parser.py +++ b/airbyte_cdk/sources/declarative/decoders/decoder_parser.py @@ -5,9 +5,10 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Generator, MutableMapping from dataclasses import dataclass from io import BufferedIOBase -from typing import Any, Dict, Generator, List, MutableMapping, Optional, Set, Tuple +from typing import Any, Optional logger = logging.getLogger("airbyte") @@ -26,5 +27,5 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: # reusable parser types -PARSERS_TYPE = List[Tuple[Set[str], Set[str], Parser]] -PARSERS_BY_HEADER_TYPE = Optional[Dict[str, Dict[str, Parser]]] +PARSERS_TYPE = list[tuple[set[str], set[str], Parser]] +PARSERS_BY_HEADER_TYPE = Optional[dict[str, dict[str, Parser]]] diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 3533fc5c8..f07efcbb2 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -1,13 +1,11 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import codecs import logging +from collections.abc import Generator, Mapping, MutableMapping from dataclasses import InitVar, dataclass -from gzip import decompress -from typing import Any, Generator, List, Mapping, MutableMapping, Optional +from typing import Any -import orjson import requests from airbyte_cdk.sources.declarative.decoders import CompositeRawDecoder, JsonParser diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index cf0bc56eb..bc0479075 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import logging -from typing import Any, Generator, Mapping +from collections.abc import Generator, Mapping +from typing import Any import requests diff --git a/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py b/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py index e5a152711..d8da3340d 100644 --- a/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +++ b/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Generator, MutableMapping from dataclasses import dataclass -from typing import Any, Generator, MutableMapping +from typing import Any import requests diff --git a/airbyte_cdk/sources/declarative/decoders/xml_decoder.py b/airbyte_cdk/sources/declarative/decoders/xml_decoder.py index 0786c3202..bfab4b8cc 100644 --- a/airbyte_cdk/sources/declarative/decoders/xml_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/xml_decoder.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Generator, Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any, Generator, Mapping, MutableMapping +from typing import Any from xml.parsers.expat import ExpatError import requests diff --git a/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py b/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py index 9c97773e3..d1575e4b6 100644 --- a/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, List, Mapping, MutableMapping, Union +from typing import Any import dpath import requests @@ -53,7 +54,7 @@ class DpathExtractor(RecordExtractor): decoder (Decoder): The decoder responsible to transfom the response in a Mapping """ - field_path: List[Union[InterpolatedString, str]] + field_path: list[InterpolatedString | str] config: Config parameters: InitVar[Mapping[str, Any]] decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={})) diff --git a/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte_cdk/sources/declarative/extractors/http_selector.py index 846071125..8db96c796 100644 --- a/airbyte_cdk/sources/declarative/extractors/http_selector.py +++ b/airbyte_cdk/sources/declarative/extractors/http_selector.py @@ -3,7 +3,8 @@ # from abc import abstractmethod -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any import requests @@ -22,8 +23,8 @@ def select_records( response: requests.Response, stream_state: StreamState, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Record]: """ Selects records from the response diff --git a/airbyte_cdk/sources/declarative/extractors/record_extractor.py b/airbyte_cdk/sources/declarative/extractors/record_extractor.py index 5de6a84a7..6890950af 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/record_extractor.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # from abc import abstractmethod +from collections.abc import Iterable, Mapping from dataclasses import dataclass -from typing import Any, Iterable, Mapping +from typing import Any import requests diff --git a/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte_cdk/sources/declarative/extractors/record_filter.py index 373669612..39a65ccf9 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_filter.py +++ b/airbyte_cdk/sources/declarative/extractors/record_filter.py @@ -1,8 +1,9 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.incremental import ( DatetimeBasedCursor, @@ -35,8 +36,8 @@ def filter_records( self, records: Iterable[Mapping[str, Any]], stream_state: StreamState, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: kwargs = { "stream_state": stream_state, @@ -59,7 +60,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter): def __init__( self, - cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor], + cursor: DatetimeBasedCursor | PerPartitionWithGlobalCursor | GlobalSubstreamCursor, **kwargs: Any, ): super().__init__(**kwargs) @@ -69,8 +70,8 @@ def filter_records( self, records: Iterable[Mapping[str, Any]], stream_state: StreamState, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: records = ( record diff --git a/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte_cdk/sources/declarative/extractors/record_selector.py index c37b8035b..48a1b662f 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_selector.py +++ b/airbyte_cdk/sources/declarative/extractors/record_selector.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, List, Mapping, Optional, Union +from typing import Any import requests @@ -14,7 +15,6 @@ TypeTransformer as DeclarativeTypeTransformer, ) from airbyte_cdk.sources.declarative.interpolation import InterpolatedString -from airbyte_cdk.sources.declarative.models import SchemaNormalization from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.utils.transform import TypeTransformer @@ -36,11 +36,11 @@ class RecordSelector(HttpSelector): extractor: RecordExtractor config: Config parameters: InitVar[Mapping[str, Any]] - schema_normalization: Union[TypeTransformer, DeclarativeTypeTransformer] + schema_normalization: TypeTransformer | DeclarativeTypeTransformer name: str - _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="") - record_filter: Optional[RecordFilter] = None - transformations: List[RecordTransformation] = field(default_factory=lambda: []) + _name: InterpolatedString | str = field(init=False, repr=False, default="") + record_filter: RecordFilter | None = None + transformations: list[RecordTransformation] = field(default_factory=lambda: []) transform_before_filtering: bool = False def __post_init__(self, parameters: Mapping[str, Any]) -> None: @@ -72,8 +72,8 @@ def select_records( response: requests.Response, stream_state: StreamState, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Record]: """ Selects records from the response @@ -94,8 +94,8 @@ def filter_and_transform( all_data: Iterable[Mapping[str, Any]], stream_state: StreamState, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Record]: """ There is an issue with the selector as of 2024-08-30: it does technology-agnostic processing like filtering, transformation and @@ -120,7 +120,7 @@ def filter_and_transform( yield Record(data=data, stream_name=self.name, associated_slice=stream_slice) def _normalize_by_schema( - self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]] + self, records: Iterable[Mapping[str, Any]], schema: Mapping[str, Any] | None ) -> Iterable[Mapping[str, Any]]: if schema: # record has type Mapping[str, Any], but dict[str, Any] expected @@ -135,8 +135,8 @@ def _filter( self, records: Iterable[Mapping[str, Any]], stream_state: StreamState, - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, ) -> Iterable[Mapping[str, Any]]: if self.record_filter: yield from self.record_filter.filter_records( @@ -152,7 +152,7 @@ def _transform( self, records: Iterable[Mapping[str, Any]], stream_state: StreamState, - stream_slice: Optional[StreamSlice] = None, + stream_slice: StreamSlice | None = None, ) -> Iterable[Mapping[str, Any]]: for record in records: for transformation in self.transformations: diff --git a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py index c7fd98c17..8c6d0eada 100644 --- a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py @@ -5,9 +5,10 @@ import os import uuid import zlib +from collections.abc import Iterable, Mapping from contextlib import closing from dataclasses import InitVar, dataclass -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple +from typing import Any import pandas as pd import requests @@ -34,7 +35,7 @@ class ResponseToFileExtractor(RecordExtractor): def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.logger = logging.getLogger("airbyte") - def _get_response_encoding(self, headers: Dict[str, Any]) -> str: + def _get_response_encoding(self, headers: dict[str, Any]) -> str: """ Get the encoding of the response based on the provided headers. This method is heavily inspired by the requests library implementation. @@ -77,7 +78,7 @@ def _filter_null_bytes(self, b: bytes) -> bytes: ) return res - def _save_to_file(self, response: requests.Response) -> Tuple[str, str]: + def _save_to_file(self, response: requests.Response) -> tuple[str, str]: """ Saves the binary data from the given response to a temporary file and returns the filepath and response encoding. @@ -135,25 +136,24 @@ def _read_with_chunks( """ try: - with open(path, "r", encoding=file_encoding) as data: + with open(path, encoding=file_encoding) as data: chunks = pd.read_csv( data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object ) for chunk in chunks: chunk = chunk.replace({nan: None}).to_dict(orient="records") - for row in chunk: - yield row + yield from chunk except pd.errors.EmptyDataError as e: self.logger.info(f"Empty data received. {e}") yield from [] - except IOError as ioe: + except OSError as ioe: raise ValueError(f"The IO/Error occured while reading tmp data. Called: {path}", ioe) finally: # remove binary tmp file, after data is read os.remove(path) def extract_records( - self, response: Optional[requests.Response] = None + self, response: requests.Response | None = None ) -> Iterable[Mapping[str, Any]]: """ Extracts records from the given response by: diff --git a/airbyte_cdk/sources/declarative/extractors/type_transformer.py b/airbyte_cdk/sources/declarative/extractors/type_transformer.py index fe307684f..9060bed35 100644 --- a/airbyte_cdk/sources/declarative/extractors/type_transformer.py +++ b/airbyte_cdk/sources/declarative/extractors/type_transformer.py @@ -3,8 +3,9 @@ # from abc import ABC, abstractmethod +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Dict, Mapping +from typing import Any @dataclass @@ -35,7 +36,7 @@ class TypeTransformer(ABC): @abstractmethod def transform( self, - record: Dict[str, Any], + record: dict[str, Any], schema: Mapping[str, Any], ) -> None: """ diff --git a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py index a0c541dc4..ee819c56b 100644 --- a/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py @@ -7,9 +7,10 @@ import threading import time from collections import OrderedDict +from collections.abc import Callable, Iterable, Mapping, MutableMapping from copy import deepcopy from datetime import timedelta -from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional +from typing import Any from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import ( @@ -36,7 +37,7 @@ def __init__(self, create_function: Callable[..., ConcurrentCursor]): self._create_function = create_function def create( - self, stream_state: Mapping[str, Any], runtime_lookback_window: Optional[timedelta] + self, stream_state: Mapping[str, Any], runtime_lookback_window: timedelta | None ) -> ConcurrentCursor: return self._create_function( stream_state=stream_state, runtime_lookback_window=runtime_lookback_window @@ -73,7 +74,7 @@ def __init__( cursor_factory: ConcurrentCursorFactory, partition_router: PartitionRouter, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, stream_state: Any, message_repository: MessageRepository, connector_state_manager: ConnectorStateManager, @@ -81,7 +82,7 @@ def __init__( cursor_field: CursorField, use_global_cursor: bool = False, ) -> None: - self._global_cursor: Optional[StreamState] = {} + self._global_cursor: StreamState | None = {} self._stream_name = stream_name self._stream_namespace = stream_namespace self._message_repository = message_repository @@ -103,9 +104,9 @@ def __init__( self._finished_partitions: set[str] = set() self._lock = threading.Lock() self._timer = Timer() - self._new_global_cursor: Optional[StreamState] = None + self._new_global_cursor: StreamState | None = None self._lookback_window: int = 0 - self._parent_state: Optional[StreamState] = None + self._parent_state: StreamState | None = None self._number_of_partitions: int = 0 self._use_global_cursor: bool = use_global_cursor self._partition_serializer = PerPartitionKeySerializer() @@ -143,7 +144,7 @@ def state(self) -> MutableMapping[str, Any]: def close_partition(self, partition: Partition) -> None: # Attempt to retrieve the stream slice - stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment] + stream_slice: StreamSlice | None = partition.to_slice() # type: ignore[assignment] # Ensure stream_slice is not None if stream_slice is None: @@ -226,7 +227,7 @@ def ensure_at_least_one_state_emitted(self) -> None: self._parent_state = self._partition_router.get_stream_state() self._emit_state_message(throttle=False) - def _throttle_state_message(self) -> Optional[float]: + def _throttle_state_message(self) -> float | None: """ Throttles the state message emission to once every 60 seconds. """ diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index cb39f56ba..0dbb45447 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -3,9 +3,10 @@ # import datetime +from collections.abc import Callable, Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field from datetime import timedelta -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any from isodate import Duration, duration_isoformat, parse_duration @@ -53,28 +54,28 @@ class DatetimeBasedCursor(DeclarativeCursor): lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for (ISO8601 duration) """ - start_datetime: Union[MinMaxDatetime, str] - cursor_field: Union[InterpolatedString, str] + start_datetime: MinMaxDatetime | str + cursor_field: InterpolatedString | str datetime_format: str config: Config parameters: InitVar[Mapping[str, Any]] - _highest_observed_cursor_field_value: Optional[str] = field( + _highest_observed_cursor_field_value: str | None = field( repr=False, default=None ) # tracks the latest observed datetime, which may not be safe to emit in the case of out-of-order records - _cursor: Optional[str] = field( + _cursor: str | None = field( repr=False, default=None ) # tracks the latest observed datetime that is appropriate to emit as stream state - end_datetime: Optional[Union[MinMaxDatetime, str]] = None - step: Optional[Union[InterpolatedString, str]] = None - cursor_granularity: Optional[str] = None - start_time_option: Optional[RequestOption] = None - end_time_option: Optional[RequestOption] = None - partition_field_start: Optional[str] = None - partition_field_end: Optional[str] = None - lookback_window: Optional[Union[InterpolatedString, str]] = None - message_repository: Optional[MessageRepository] = None - is_compare_strictly: Optional[bool] = False - cursor_datetime_formats: List[str] = field(default_factory=lambda: []) + end_datetime: MinMaxDatetime | str | None = None + step: InterpolatedString | str | None = None + cursor_granularity: str | None = None + start_time_option: RequestOption | None = None + end_time_option: RequestOption | None = None + partition_field_start: str | None = None + partition_field_end: str | None = None + lookback_window: InterpolatedString | str | None = None + message_repository: MessageRepository | None = None + is_compare_strictly: bool | None = False + cursor_datetime_formats: list[str] = field(default_factory=lambda: []) def __post_init__(self, parameters: Mapping[str, Any]) -> None: if (self.step and not self.cursor_granularity) or ( @@ -207,7 +208,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: start_datetime = self._calculate_earliest_possible_value(self.select_best_end_datetime()) return self._partition_daterange(start_datetime, end_datetime, self._step) - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # Datetime based cursors operate over slices made up of datetime ranges. Stream state is based on the progress # through each slice and does not belong to a specific slice. We just return stream state as it is. return self.get_stream_state() @@ -259,8 +260,8 @@ def _partition_daterange( self, start: datetime.datetime, end: datetime.datetime, - step: Union[datetime.timedelta, Duration], - ) -> List[StreamSlice]: + step: datetime.timedelta | Duration, + ) -> list[StreamSlice]: start_field = self._partition_field_start.eval(self.config) end_field = self._partition_field_end.eval(self.config) dates = [] @@ -316,7 +317,7 @@ def parse_date(self, date: str) -> datetime.datetime: raise ValueError(f"No format in {self.cursor_datetime_formats} matching {date}") @classmethod - def _parse_timedelta(cls, time_str: Optional[str]) -> Union[datetime.timedelta, Duration]: + def _parse_timedelta(cls, time_str: str | None) -> datetime.timedelta | Duration: """ :return Parses an ISO 8601 durations into datetime.timedelta or Duration objects. """ @@ -327,36 +328,36 @@ def _parse_timedelta(cls, time_str: Optional[str]) -> Union[datetime.timedelta, def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.header, stream_slice) def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json, stream_slice) @@ -365,7 +366,7 @@ def request_kwargs(self) -> Mapping[str, Any]: return {} def _get_request_options( - self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice] + self, option_type: RequestOptionType, stream_slice: StreamSlice | None ) -> Mapping[str, Any]: options: MutableMapping[str, Any] = {} if not stream_slice: @@ -401,8 +402,8 @@ def should_be_synced(self, record: Record) -> bool: def _is_within_daterange_boundaries( self, record: Record, - start_datetime_boundary: Union[datetime.datetime, str], - end_datetime_boundary: Union[datetime.datetime, str], + start_datetime_boundary: datetime.datetime | str, + end_datetime_boundary: datetime.datetime | str, ) -> bool: cursor_field = self.cursor_field.eval(self.config) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ record_cursor_value = record.get(cursor_field) diff --git a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py index 610a15bbd..21f5fb617 100644 --- a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py @@ -4,7 +4,8 @@ import threading import time -from typing import Any, Callable, Iterable, Mapping, Optional, TypeVar, Union +from collections.abc import Callable, Iterable, Mapping +from typing import Any, TypeVar from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor @@ -15,7 +16,7 @@ def iterate_with_last_flag_and_state( - generator: Iterable[T], get_stream_state_func: Callable[[], Optional[Mapping[str, StreamState]]] + generator: Iterable[T], get_stream_state_func: Callable[[], Mapping[str, StreamState] | None] ) -> Iterable[tuple[T, bool, Any]]: """ Iterates over the given generator, yielding tuples containing the element, a flag @@ -53,7 +54,7 @@ class Timer: """ def __init__(self) -> None: - self._start: Optional[int] = None + self._start: int | None = None def start(self) -> None: self._start = time.perf_counter_ns() @@ -91,10 +92,10 @@ def __init__(self, stream_cursor: DatetimeBasedCursor, partition_router: Partiti 0 ) # Start with 0, indicating no slices being tracked self._all_slices_yielded = False - self._lookback_window: Optional[int] = None - self._current_partition: Optional[Mapping[str, Any]] = None + self._lookback_window: int | None = None + self._current_partition: Mapping[str, Any] | None = None self._last_slice: bool = False - self._parent_state: Optional[Mapping[str, Any]] = None + self._parent_state: Mapping[str, Any] | None = None def start_slices_generation(self) -> None: self._timer.start() @@ -251,16 +252,16 @@ def get_stream_state(self) -> StreamState: return state - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # stream_slice is ignored as cursor is global return self._stream_cursor.get_stream_state() def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: return self._partition_router.get_request_params( # type: ignore # this always returns a mapping @@ -278,9 +279,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping @@ -298,10 +299,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: if stream_slice: return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping stream_state=stream_state, @@ -318,9 +319,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py index 76a16e141..fcdc630ef 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py @@ -4,7 +4,8 @@ import logging from collections import OrderedDict -from typing import Any, Callable, Iterable, Mapping, Optional, Union +from collections.abc import Callable, Iterable, Mapping +from typing import Any from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter @@ -183,7 +184,7 @@ def get_stream_state(self) -> StreamState: state["parent_state"] = parent_state return state - def _get_state_for_partition(self, partition: Mapping[str, Any]) -> Optional[StreamState]: + def _get_state_for_partition(self, partition: Mapping[str, Any]) -> StreamState | None: cursor = self._cursor_per_partition.get(self._to_partition_key(partition)) if cursor: return cursor.get_stream_state() @@ -200,7 +201,7 @@ def _to_partition_key(self, partition: Mapping[str, Any]) -> str: def _to_dict(self, partition_key: str) -> Mapping[str, Any]: return self._partition_serializer.to_partition(partition_key) - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: if not stream_slice: raise ValueError("A partition needs to be provided in order to extract a state") @@ -217,9 +218,9 @@ def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition: @@ -241,9 +242,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition: @@ -265,10 +266,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: if stream_slice: if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition: self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition)) @@ -289,9 +290,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: if stream_slice: if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition: diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py index defa2d897..a1b8bbc5d 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py @@ -1,7 +1,8 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Iterable, Mapping, MutableMapping, Optional, Union +from collections.abc import Iterable, Mapping, MutableMapping +from typing import Any from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor @@ -76,11 +77,11 @@ def __init__( self._per_partition_cursor = PerPartitionCursor(cursor_factory, partition_router) self._global_cursor = GlobalSubstreamCursor(stream_cursor, partition_router) self._use_global_cursor = False - self._current_partition: Optional[Mapping[str, Any]] = None + self._current_partition: Mapping[str, Any] | None = None self._last_slice: bool = False - self._parent_state: Optional[Mapping[str, Any]] = None + self._parent_state: Mapping[str, Any] | None = None - def _get_active_cursor(self) -> Union[PerPartitionCursor, GlobalSubstreamCursor]: + def _get_active_cursor(self) -> PerPartitionCursor | GlobalSubstreamCursor: return self._global_cursor if self._use_global_cursor else self._per_partition_cursor def stream_slices(self) -> Iterable[StreamSlice]: @@ -138,15 +139,15 @@ def get_stream_state(self) -> StreamState: return final_state - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: return self._get_active_cursor().select_state(stream_slice) def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_active_cursor().get_request_params( stream_state=stream_state, @@ -157,9 +158,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_active_cursor().get_request_headers( stream_state=stream_state, @@ -170,10 +171,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return self._get_active_cursor().get_request_body_data( stream_state=stream_state, stream_slice=stream_slice, @@ -183,9 +184,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_active_cursor().get_request_body_json( stream_state=stream_state, diff --git a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py index a0b4665f1..e9e5231e3 100644 --- a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.incremental import DeclarativeCursor from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState @@ -48,7 +49,7 @@ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: """ return False - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # A top-level RFR cursor only manages the state of a single partition return self._cursor @@ -65,36 +66,36 @@ def stream_slices(self) -> Iterable[StreamSlice]: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} diff --git a/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte_cdk/sources/declarative/interpolation/filters.py index 4824e3a9f..b2787b5ae 100644 --- a/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -7,10 +7,10 @@ import hmac as hmac_lib import json import re -from typing import Any, Dict, Optional +from typing import Any -def hash(value: Any, hash_type: str = "md5", salt: Optional[str] = None) -> str: +def hash(value: Any, hash_type: str = "md5", salt: str | None = None) -> str: """ Implementation of a custom Jinja2 hash filter Hash type defaults to 'md5' if one is not specified. @@ -51,7 +51,7 @@ def hash(value: Any, hash_type: str = "md5", salt: Optional[str] = None) -> str: hash_obj.update(str(salt).encode("utf-8")) computed_hash: str = hash_obj.hexdigest() else: - raise AttributeError("No hashing function named {hname}".format(hname=hash_type)) + raise AttributeError(f"No hashing function named {hash_type}") return computed_hash @@ -156,7 +156,7 @@ def hmac(value: Any, key: str, hash_type: str = "sha256") -> str: :return: HMAC digest as a hexadecimal string """ # Define allowed hash functions - ALLOWED_HASH_TYPES: Dict[str, Any] = { + ALLOWED_HASH_TYPES: dict[str, Any] = { "sha256": hashlib.sha256, } diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py index 04cc7e694..83788f94d 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py @@ -2,13 +2,14 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Final, List, Mapping +from typing import Any, Final from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.types import Config -FALSE_VALUES: Final[List[Any]] = [ +FALSE_VALUES: Final[list[Any]] = [ "False", "false", "{}", diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py index b96a2a6b7..669231002 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py @@ -3,8 +3,9 @@ # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.types import Config @@ -22,11 +23,11 @@ class InterpolatedMapping: mapping: Mapping[str, str] parameters: InitVar[Mapping[str, Any]] - def __post_init__(self, parameters: Optional[Mapping[str, Any]]) -> None: + def __post_init__(self, parameters: Mapping[str, Any] | None) -> None: self._interpolation = JinjaInterpolation() self._parameters = parameters - def eval(self, config: Config, **additional_parameters: Any) -> Dict[str, Any]: + def eval(self, config: Config, **additional_parameters: Any) -> dict[str, Any]: """ Wrapper around a Mapping[str, str] that allows for both keys and values to be interpolated. diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py index f441ba918..50a7eab8e 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py @@ -3,8 +3,9 @@ # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any, Union from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.types import Config @@ -27,7 +28,7 @@ class InterpolatedNestedMapping: mapping: NestedMapping parameters: InitVar[Mapping[str, Any]] - def __post_init__(self, parameters: Optional[Mapping[str, Any]]) -> None: + def __post_init__(self, parameters: Mapping[str, Any] | None) -> None: self._interpolation = JinjaInterpolation() self._parameters = parameters @@ -35,7 +36,7 @@ def eval(self, config: Config, **additional_parameters: Any) -> Any: return self._eval(self.mapping, config, **additional_parameters) def _eval( - self, value: Union[NestedMapping, NestedMappingEntry], config: Config, **kwargs: Any + self, value: NestedMapping | NestedMappingEntry, config: Config, **kwargs: Any ) -> Any: # Recursively interpolate dictionaries and lists if isinstance(value, str): diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py index ef20a436f..0f726dd87 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py @@ -2,8 +2,9 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any, Union from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.types import Config @@ -22,7 +23,7 @@ class InterpolatedString: string: str parameters: InitVar[Mapping[str, Any]] - default: Optional[str] = None + default: str | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.default = self.default or self.string @@ -55,9 +56,12 @@ def eval(self, config: Config, **kwargs: Any) -> Any: ) def __eq__(self, other: Any) -> bool: - if not isinstance(other, InterpolatedString): + if isinstance(other, InterpolatedString): + return self.string == other.string and self.default == other.default + try: + return self.string == str(other) + except (ValueError, TypeError): return False - return self.string == other.string and self.default == other.default @classmethod def create( diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolation.py b/airbyte_cdk/sources/declarative/interpolation/interpolation.py index 021f96df6..eba47251f 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolation.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolation.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any from airbyte_cdk.sources.types import Config @@ -19,7 +19,7 @@ def eval( self, input_str: str, config: Config, - default: Optional[str] = None, + default: str | None = None, **additional_options: Any, ) -> Any: """ diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index 543fe9b46..301e9a5d2 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -3,8 +3,9 @@ # import ast +from collections.abc import Mapping from functools import cache -from typing import Any, Mapping, Optional, Set, Tuple, Type +from typing import Any from jinja2 import meta from jinja2.environment import Template @@ -86,8 +87,8 @@ def eval( self, input_str: str, config: Config, - default: Optional[str] = None, - valid_types: Optional[Tuple[Type[Any]]] = None, + default: str | None = None, + valid_types: tuple[type[Any]] | None = None, **additional_parameters: Any, ) -> Any: context = {"config": config, **additional_parameters} @@ -122,7 +123,7 @@ def eval( # If result is empty or resulted in an undefined error, evaluate and return the default string return self._literal_eval(self._eval(default, context), valid_types) - def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[Any]]]) -> Any: + def _literal_eval(self, result: str | None, valid_types: tuple[type[Any]] | None) -> Any: try: evaluated = ast.literal_eval(result) # type: ignore # literal_eval is able to handle None except (ValueError, SyntaxError): @@ -131,7 +132,7 @@ def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[ return evaluated return result - def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: + def _eval(self, s: str | None, context: Mapping[str, Any]) -> str | None: try: undeclared = self._find_undeclared_variables(s) undeclared_not_in_context = {var for var in undeclared if var not in context} @@ -146,7 +147,7 @@ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: return s @cache - def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]: + def _find_undeclared_variables(self, s: str | None) -> set[str]: """ Find undeclared variables and cache them """ diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index f84ece214..d4c6db8ad 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -5,7 +5,6 @@ import builtins import datetime import typing -from typing import Optional, Union import isodate import pytz @@ -49,7 +48,7 @@ def today_with_timezone(timezone: str) -> datetime.date: return datetime.datetime.now(tz=pytz.timezone(timezone)).date() -def timestamp(dt: Union[float, str]) -> Union[int, float]: +def timestamp(dt: float | str) -> int | float: """ Converts a number or a string to a timestamp @@ -62,7 +61,7 @@ def timestamp(dt: Union[float, str]) -> Union[int, float]: :param dt: datetime to convert to timestamp :return: unix timestamp """ - if isinstance(dt, (int, float)): + if isinstance(dt, int | float): return int(dt) else: return str_to_datetime(dt).astimezone(pytz.utc).timestamp() @@ -145,7 +144,7 @@ def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str: ).strftime(format) -def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]: +def duration(datestring: str) -> datetime.timedelta | isodate.Duration: """ Converts ISO8601 duration to datetime.timedelta @@ -156,7 +155,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]: def format_datetime( - dt: Union[str, datetime.datetime, int], format: str, input_format: Optional[str] = None + dt: str | datetime.datetime | int, format: str, input_format: str | None = None ) -> str: """ Converts datetime to another format diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index cfd258c6c..7d4f124f9 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -5,10 +5,11 @@ import json import logging import pkgutil +from collections.abc import Iterator, Mapping from copy import deepcopy from importlib import metadata from types import ModuleType -from typing import Any, Dict, Iterator, List, Mapping, Optional, Set +from typing import Any import yaml from jsonschema.exceptions import ValidationError @@ -67,7 +68,7 @@ def __init__( config: Mapping[str, Any] | None = None, debug: bool = False, emit_connector_builder_messages: bool = False, - component_factory: Optional[ModelToComponentFactory] = None, + component_factory: ModelToComponentFactory | None = None, ): """ Args: @@ -118,7 +119,7 @@ def message_repository(self) -> MessageRepository: return self._message_repository @property - def dynamic_streams(self) -> List[Dict[str, Any]]: + def dynamic_streams(self) -> list[dict[str, Any]]: return self._dynamic_stream_configs( manifest=self._source_config, config=self._config, with_dynamic_stream_name=True ) @@ -141,7 +142,7 @@ def connection_checker(self) -> ConnectionChecker: f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}" ) - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: self._emit_manifest_debug_message( extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)} ) @@ -170,8 +171,8 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: @staticmethod def _initialize_cache_for_parent_streams( - stream_configs: List[Dict[str, Any]], - ) -> List[Dict[str, Any]]: + stream_configs: list[dict[str, Any]], + ) -> list[dict[str, Any]]: parent_streams = set() def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None: @@ -250,7 +251,7 @@ def read( logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, ) -> Iterator[AirbyteMessage]: self._configure_logger_level(logger) yield from super().read(logger, config, catalog, state) @@ -346,9 +347,9 @@ def _parse_version( # No exception return parsed_version - def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]: + def _stream_configs(self, manifest: Mapping[str, Any]) -> list[dict[str, Any]]: # This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config - stream_configs: List[Dict[str, Any]] = manifest.get("streams", []) + stream_configs: list[dict[str, Any]] = manifest.get("streams", []) for s in stream_configs: if "type" not in s: s["type"] = "DeclarativeStream" @@ -358,11 +359,11 @@ def _dynamic_stream_configs( self, manifest: Mapping[str, Any], config: Mapping[str, Any], - with_dynamic_stream_name: Optional[bool] = None, - ) -> List[Dict[str, Any]]: - dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", []) - dynamic_stream_configs: List[Dict[str, Any]] = [] - seen_dynamic_streams: Set[str] = set() + with_dynamic_stream_name: bool | None = None, + ) -> list[dict[str, Any]]: + dynamic_stream_definitions: list[dict[str, Any]] = manifest.get("dynamic_streams", []) + dynamic_stream_configs: list[dict[str, Any]] = [] + seen_dynamic_streams: set[str] = set() for dynamic_definition_index, dynamic_definition in enumerate(dynamic_stream_definitions): components_resolver_config = dynamic_definition["components_resolver"] diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 830646fe9..b84dc587d 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -1,6 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration diff --git a/airbyte_cdk/sources/declarative/migrations/state_migration.py b/airbyte_cdk/sources/declarative/migrations/state_migration.py index 9cf7f3cfe..49bf5aa57 100644 --- a/airbyte_cdk/sources/declarative/migrations/state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/state_migration.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from abc import abstractmethod -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any class StateMigration: diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 3566abef4..9e6c0a59a 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -6,7 +6,7 @@ from __future__ import annotations from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal from pydantic.v1 import BaseModel, Extra, Field @@ -24,13 +24,13 @@ class BasicHttpAuthenticator(BaseModel): examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], title="Username", ) - password: Optional[str] = Field( + password: str | None = Field( "", description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", examples=["{{ config['password'] }}", ""], title="Password", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class BearerAuthenticator(BaseModel): @@ -41,7 +41,7 @@ class BearerAuthenticator(BaseModel): examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], title="Bearer Token", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DynamicStreamCheckConfig(BaseModel): @@ -49,7 +49,7 @@ class DynamicStreamCheckConfig(BaseModel): dynamic_stream_name: str = Field( ..., description="The dynamic stream name.", title="Dynamic Stream Name" ) - stream_count: Optional[int] = Field( + stream_count: int | None = Field( 0, description="The number of streams to attempt reading from during a check operation. If `stream_count` exceeds the total number of available streams, the minimum of the two values will be used.", title="Stream Count", @@ -63,7 +63,7 @@ class CheckDynamicStream(BaseModel): description="Numbers of the streams to try reading from when running a check operation.", title="Stream Count", ) - use_check_availability: Optional[bool] = Field( + use_check_availability: bool | None = Field( True, description="Enables stream check availability. This field is automatically set by the CDK.", title="Use Check Availability", @@ -71,31 +71,31 @@ class CheckDynamicStream(BaseModel): class ConcurrencyLevel(BaseModel): - type: Optional[Literal["ConcurrencyLevel"]] = None - default_concurrency: Union[int, str] = Field( + type: Literal["ConcurrencyLevel"] | None = None + default_concurrency: int | str = Field( ..., description="The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.", examples=[10, "{{ config['num_workers'] or 10 }}"], title="Default Concurrency", ) - max_concurrency: Optional[int] = Field( + max_concurrency: int | None = Field( None, description="The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.", examples=[20, 100], title="Max Concurrency", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ConstantBackoffStrategy(BaseModel): type: Literal["ConstantBackoffStrategy"] - backoff_time_in_seconds: Union[float, str] = Field( + backoff_time_in_seconds: float | str = Field( ..., description="Backoff time in seconds.", examples=[30, 30.5, "{{ config['backoff_time'] }}"], title="Backoff Time", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CursorPagination(BaseModel): @@ -110,13 +110,13 @@ class CursorPagination(BaseModel): ], title="Cursor Value", ) - page_size: Optional[int] = Field( + page_size: int | None = Field( None, description="The number of records to include in each pages.", examples=[100], title="Page Size", ) - stop_condition: Optional[str] = Field( + stop_condition: str | None = Field( None, description="Template string evaluating when to stop paginating.", examples=[ @@ -125,7 +125,7 @@ class CursorPagination(BaseModel): ], title="Stop Condition", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomAuthenticator(BaseModel): @@ -139,7 +139,7 @@ class Config: examples=["source_railz.components.ShortLivedTokenAuthenticator"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomBackoffStrategy(BaseModel): @@ -153,7 +153,7 @@ class Config: examples=["source_railz.components.MyCustomBackoffStrategy"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomErrorHandler(BaseModel): @@ -167,7 +167,7 @@ class Config: examples=["source_railz.components.MyCustomErrorHandler"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomIncrementalSync(BaseModel): @@ -185,7 +185,7 @@ class Config: ..., description="The location of the value on a record that will be used as a bookmark during sync.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomPaginationStrategy(BaseModel): @@ -199,7 +199,7 @@ class Config: examples=["source_railz.components.MyCustomPaginationStrategy"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomRecordExtractor(BaseModel): @@ -213,7 +213,7 @@ class Config: examples=["source_railz.components.MyCustomRecordExtractor"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomRecordFilter(BaseModel): @@ -227,7 +227,7 @@ class Config: examples=["source_railz.components.MyCustomCustomRecordFilter"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomRequester(BaseModel): @@ -241,7 +241,7 @@ class Config: examples=["source_railz.components.MyCustomRecordExtractor"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomRetriever(BaseModel): @@ -255,7 +255,7 @@ class Config: examples=["source_railz.components.MyCustomRetriever"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomPartitionRouter(BaseModel): @@ -269,7 +269,7 @@ class Config: examples=["source_railz.components.MyCustomPartitionRouter"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomSchemaLoader(BaseModel): @@ -283,7 +283,7 @@ class Config: examples=["source_railz.components.MyCustomSchemaLoader"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomSchemaNormalization(BaseModel): @@ -299,7 +299,7 @@ class Config: ], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomStateMigration(BaseModel): @@ -313,7 +313,7 @@ class Config: examples=["source_railz.components.MyCustomStateMigration"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CustomTransformation(BaseModel): @@ -327,14 +327,14 @@ class Config: examples=["source_railz.components.MyCustomTransformation"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class LegacyToPerPartitionStateMigration(BaseModel): class Config: extra = Extra.allow - type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None + type: Literal["LegacyToPerPartitionStateMigration"] | None = None class Clamping(BaseModel): @@ -344,7 +344,7 @@ class Clamping(BaseModel): examples=["DAY", "WEEK", "MONTH", "{{ config['target'] }}"], title="Target", ) - target_details: Optional[Dict[str, Any]] = None + target_details: dict[str, Any] | None = None class Algorithm(Enum): @@ -368,19 +368,19 @@ class JwtHeaders(BaseModel): class Config: extra = Extra.forbid - kid: Optional[str] = Field( + kid: str | None = Field( None, description="Private key ID for user account.", examples=["{{ config['kid'] }}"], title="Key Identifier", ) - typ: Optional[str] = Field( + typ: str | None = Field( "JWT", description="The media type of the complete JWT.", examples=["JWT"], title="Type", ) - cty: Optional[str] = Field( + cty: str | None = Field( None, description="Content type of JWT header.", examples=["JWT"], @@ -392,18 +392,18 @@ class JwtPayload(BaseModel): class Config: extra = Extra.forbid - iss: Optional[str] = Field( + iss: str | None = Field( None, description="The user/principal that issued the JWT. Commonly a value unique to the user.", examples=["{{ config['iss'] }}"], title="Issuer", ) - sub: Optional[str] = Field( + sub: str | None = Field( None, description="The subject of the JWT. Commonly defined by the API.", title="Subject", ) - aud: Optional[str] = Field( + aud: str | None = Field( None, description="The recipient that the JWT is intended for. Commonly defined by the API.", examples=["appstoreconnect-v1"], @@ -418,7 +418,7 @@ class JwtAuthenticator(BaseModel): description="Secret used to sign the JSON web token.", examples=["{{ config['secret_key'] }}"], ) - base64_encode_secret_key: Optional[bool] = Field( + base64_encode_secret_key: bool | None = Field( False, description='When set to true, the secret key will be base64 encoded prior to being encoded as part of the JWT. Only set to "true" when required by the API.', ) @@ -427,79 +427,79 @@ class JwtAuthenticator(BaseModel): description="Algorithm used to sign the JSON web token.", examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"], ) - token_duration: Optional[int] = Field( + token_duration: int | None = Field( 1200, description="The amount of time in seconds a JWT token can be valid after being issued.", examples=[1200, 3600], title="Token Duration", ) - header_prefix: Optional[str] = Field( + header_prefix: str | None = Field( None, description="The prefix to be used within the Authentication header.", examples=["Bearer", "Basic"], title="Header Prefix", ) - jwt_headers: Optional[JwtHeaders] = Field( + jwt_headers: JwtHeaders | None = Field( None, description="JWT headers used when signing JSON web token.", title="JWT Headers", ) - additional_jwt_headers: Optional[Dict[str, Any]] = Field( + additional_jwt_headers: dict[str, Any] | None = Field( None, description="Additional headers to be included with the JWT headers object.", title="Additional JWT Headers", ) - jwt_payload: Optional[JwtPayload] = Field( + jwt_payload: JwtPayload | None = Field( None, description="JWT Payload used when signing JSON web token.", title="JWT Payload", ) - additional_jwt_payload: Optional[Dict[str, Any]] = Field( + additional_jwt_payload: dict[str, Any] | None = Field( None, description="Additional properties to be added to the JWT payload.", title="Additional JWT Payload Properties", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class RefreshTokenUpdater(BaseModel): - refresh_token_name: Optional[str] = Field( + refresh_token_name: str | None = Field( "refresh_token", description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", examples=["refresh_token"], title="Refresh Token Property Name", ) - access_token_config_path: Optional[List[str]] = Field( + access_token_config_path: list[str] | None = Field( ["credentials", "access_token"], description="Config path to the access token. Make sure the field actually exists in the config.", examples=[["credentials", "access_token"], ["access_token"]], title="Config Path To Access Token", ) - refresh_token_config_path: Optional[List[str]] = Field( + refresh_token_config_path: list[str] | None = Field( ["credentials", "refresh_token"], description="Config path to the access token. Make sure the field actually exists in the config.", examples=[["credentials", "refresh_token"], ["refresh_token"]], title="Config Path To Refresh Token", ) - token_expiry_date_config_path: Optional[List[str]] = Field( + token_expiry_date_config_path: list[str] | None = Field( ["credentials", "token_expiry_date"], description="Config path to the expiry date. Make sure actually exists in the config.", examples=[["credentials", "token_expiry_date"]], title="Config Path To Expiry Date", ) - refresh_token_error_status_codes: Optional[List[int]] = Field( + refresh_token_error_status_codes: list[int] | None = Field( [], description="Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error", examples=[[400, 500]], title="Refresh Token Error Status Codes", ) - refresh_token_error_key: Optional[str] = Field( + refresh_token_error_key: str | None = Field( "", description="Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).", examples=["error"], title="Refresh Token Error Key", ) - refresh_token_error_values: Optional[List[str]] = Field( + refresh_token_error_values: list[str] | None = Field( [], description='List of values to check for exception during token refresh process. Used to check if the error found in the response matches the key from the Refresh Token Error Key field (e.g. response={"error": "invalid_grant"}). Only responses with one of the error status code and containing an error value will be flagged as a config error', examples=[["invalid_grant", "invalid_permissions"]], @@ -509,25 +509,25 @@ class RefreshTokenUpdater(BaseModel): class OAuthAuthenticator(BaseModel): type: Literal["OAuthAuthenticator"] - client_id_name: Optional[str] = Field( + client_id_name: str | None = Field( "client_id", description="The name of the property to use to refresh the `access_token`.", examples=["custom_app_id"], title="Client ID Property Name", ) - client_id: Optional[str] = Field( + client_id: str | None = Field( None, description="The OAuth client ID. Fill it in the user inputs.", examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], title="Client ID", ) - client_secret_name: Optional[str] = Field( + client_secret_name: str | None = Field( "client_secret", description="The name of the property to use to refresh the `access_token`.", examples=["custom_app_secret"], title="Client Secret Property Name", ) - client_secret: Optional[str] = Field( + client_secret: str | None = Field( None, description="The OAuth client secret. Fill it in the user inputs.", examples=[ @@ -536,13 +536,13 @@ class OAuthAuthenticator(BaseModel): ], title="Client Secret", ) - refresh_token_name: Optional[str] = Field( + refresh_token_name: str | None = Field( "refresh_token", description="The name of the property to use to refresh the `access_token`.", examples=["custom_app_refresh_value"], title="Refresh Token Property Name", ) - refresh_token: Optional[str] = Field( + refresh_token: str | None = Field( None, description="Credential artifact used to get a new access token.", examples=[ @@ -551,43 +551,43 @@ class OAuthAuthenticator(BaseModel): ], title="Refresh Token", ) - token_refresh_endpoint: Optional[str] = Field( + token_refresh_endpoint: str | None = Field( None, description="The full URL to call to obtain a new access token.", examples=["https://connect.squareup.com/oauth2/token"], title="Token Refresh Endpoint", ) - access_token_name: Optional[str] = Field( + access_token_name: str | None = Field( "access_token", description="The name of the property which contains the access token in the response from the token refresh endpoint.", examples=["access_token"], title="Access Token Property Name", ) - access_token_value: Optional[str] = Field( + access_token_value: str | None = Field( None, description="The value of the access_token to bypass the token refreshing using `refresh_token`.", examples=["secret_access_token_value"], title="Access Token Value", ) - expires_in_name: Optional[str] = Field( + expires_in_name: str | None = Field( "expires_in", description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", examples=["expires_in"], title="Token Expiry Property Name", ) - grant_type_name: Optional[str] = Field( + grant_type_name: str | None = Field( "grant_type", description="The name of the property to use to refresh the `access_token`.", examples=["custom_grant_type"], title="Grant Type Property Name", ) - grant_type: Optional[str] = Field( + grant_type: str | None = Field( "refresh_token", description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", examples=["refresh_token", "client_credentials"], title="Grant Type", ) - refresh_request_body: Optional[Dict[str, Any]] = Field( + refresh_request_body: dict[str, Any] | None = Field( None, description="Body of the request sent to get a new access token.", examples=[ @@ -599,7 +599,7 @@ class OAuthAuthenticator(BaseModel): ], title="Refresh Request Body", ) - refresh_request_headers: Optional[Dict[str, Any]] = Field( + refresh_request_headers: dict[str, Any] | None = Field( None, description="Headers of the request sent to get a new access token.", examples=[ @@ -610,47 +610,47 @@ class OAuthAuthenticator(BaseModel): ], title="Refresh Request Headers", ) - scopes: Optional[List[str]] = Field( + scopes: list[str] | None = Field( None, description="List of scopes that should be granted to the access token.", examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], title="Scopes", ) - token_expiry_date: Optional[str] = Field( + token_expiry_date: str | None = Field( None, description="The access token expiry date.", examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], title="Token Expiry Date", ) - token_expiry_date_format: Optional[str] = Field( + token_expiry_date_format: str | None = Field( None, description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], title="Token Expiry Date Format", ) - refresh_token_updater: Optional[RefreshTokenUpdater] = Field( + refresh_token_updater: RefreshTokenUpdater | None = Field( None, description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", title="Token Updater", ) - profile_assertion: Optional[JwtAuthenticator] = Field( + profile_assertion: JwtAuthenticator | None = Field( None, description="The authenticator being used to authenticate the client authenticator.", title="Profile Assertion", ) - use_profile_assertion: Optional[bool] = Field( + use_profile_assertion: bool | None = Field( False, description="Enable using profile assertion as a flow for OAuth authorization.", title="Use Profile Assertion", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class Rate(BaseModel): class Config: extra = Extra.allow - limit: Union[int, str] = Field( + limit: int | str = Field( ..., description="The maximum number of calls allowed within the interval.", title="Limit", @@ -667,30 +667,30 @@ class HttpRequestRegexMatcher(BaseModel): class Config: extra = Extra.allow - method: Optional[str] = Field( + method: str | None = Field( None, description="The HTTP method to match (e.g., GET, POST).", title="Method" ) - url_base: Optional[str] = Field( + url_base: str | None = Field( None, description='The base URL (scheme and host, e.g. "https://api.example.com") to match.', title="URL Base", ) - url_path_pattern: Optional[str] = Field( + url_path_pattern: str | None = Field( None, description="A regular expression pattern to match the URL path.", title="URL Path Pattern", ) - params: Optional[Dict[str, Any]] = Field( + params: dict[str, Any] | None = Field( None, description="The query parameters to match.", title="Parameters" ) - headers: Optional[Dict[str, Any]] = Field( + headers: dict[str, Any] | None = Field( None, description="The headers to match.", title="Headers" ) class DpathExtractor(BaseModel): type: Literal["DpathExtractor"] - field_path: List[str] = Field( + field_path: list[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ @@ -701,34 +701,34 @@ class DpathExtractor(BaseModel): ], title="Field Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ResponseToFileExtractor(BaseModel): type: Literal["ResponseToFileExtractor"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ExponentialBackoffStrategy(BaseModel): type: Literal["ExponentialBackoffStrategy"] - factor: Optional[Union[float, str]] = Field( + factor: float | str | None = Field( 5, description="Multiplicative constant applied on each retry.", examples=[5, 5.5, "10"], title="Factor", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class GroupByKeyMergeStrategy(BaseModel): type: Literal["GroupByKeyMergeStrategy"] - key: Union[str, List[str]] = Field( + key: str | list[str] = Field( ..., description="The name of the field on the record whose value will be used to group properties that were retrieved through multiple API requests.", examples=["id", ["parent_id", "end_date"]], title="Key", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SessionTokenRequestBearerAuthenticator(BaseModel): @@ -756,37 +756,37 @@ class FailureType(Enum): class HttpResponseFilter(BaseModel): type: Literal["HttpResponseFilter"] - action: Optional[Action] = Field( + action: Action | None = Field( None, description="Action to execute if a response matches the filter.", examples=["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"], title="Action", ) - failure_type: Optional[FailureType] = Field( + failure_type: FailureType | None = Field( None, description="Failure type of traced exception if a response matches the filter.", examples=["system_error", "config_error", "transient_error"], title="Failure Type", ) - error_message: Optional[str] = Field( + error_message: str | None = Field( None, description="Error Message to display if the response matches the filter.", title="Error Message", ) - error_message_contains: Optional[str] = Field( + error_message_contains: str | None = Field( None, description="Match the response if its error message contains the substring.", example=["This API operation is not enabled for this site"], title="Error Message Substring", ) - http_codes: Optional[List[int]] = Field( + http_codes: list[int] | None = Field( None, description="Match the response if its HTTP code is included in this list.", examples=[[420, 429], [500]], title="HTTP Codes", unique_items=True, ) - predicate: Optional[str] = Field( + predicate: str | None = Field( None, description="Match the response if the predicate evaluates to true.", examples=[ @@ -795,44 +795,44 @@ class HttpResponseFilter(BaseModel): ], title="Predicate", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ComplexFieldType(BaseModel): field_type: str - items: Optional[Union[str, ComplexFieldType]] = None + items: str | ComplexFieldType | None = None class TypesMap(BaseModel): - target_type: Union[str, List[str], ComplexFieldType] - current_type: Union[str, List[str]] - condition: Optional[str] = None + target_type: str | list[str] | ComplexFieldType + current_type: str | list[str] + condition: str | None = None class SchemaTypeIdentifier(BaseModel): - type: Optional[Literal["SchemaTypeIdentifier"]] = None - schema_pointer: Optional[List[str]] = Field( + type: Literal["SchemaTypeIdentifier"] | None = None + schema_pointer: list[str] | None = Field( [], description="List of nested fields defining the schema field path to extract. Defaults to [].", title="Schema Path", ) - key_pointer: List[str] = Field( + key_pointer: list[str] = Field( ..., description="List of potentially nested fields describing the full path of the field key to extract.", title="Key Path", ) - type_pointer: Optional[List[str]] = Field( + type_pointer: list[str] | None = Field( None, description="List of potentially nested fields describing the full path of the field type to extract.", title="Type Path", ) - types_mapping: Optional[List[TypesMap]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + types_mapping: list[TypesMap] | None = None + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class InlineSchemaLoader(BaseModel): type: Literal["InlineSchemaLoader"] - schema_: Optional[Dict[str, Any]] = Field( + schema_: dict[str, Any] | None = Field( None, alias="schema", description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', @@ -842,13 +842,13 @@ class InlineSchemaLoader(BaseModel): class JsonFileSchemaLoader(BaseModel): type: Literal["JsonFileSchemaLoader"] - file_path: Optional[str] = Field( + file_path: str | None = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", example=["./schemas/users.json"], title="File Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class JsonDecoder(BaseModel): @@ -861,43 +861,43 @@ class JsonlDecoder(BaseModel): class KeysToLower(BaseModel): type: Literal["KeysToLower"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class KeysToSnakeCase(BaseModel): type: Literal["KeysToSnakeCase"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class FlattenFields(BaseModel): type: Literal["FlattenFields"] - flatten_lists: Optional[bool] = Field( + flatten_lists: bool | None = Field( True, description="Whether to flatten lists or leave it as is. Default is True.", title="Flatten Lists", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DpathFlattenFields(BaseModel): type: Literal["DpathFlattenFields"] - field_path: List[str] = Field( + field_path: list[str] = Field( ..., description="A path to field that needs to be flattened.", examples=[["data"], ["data", "*", "field"]], title="Field Path", ) - delete_origin_value: Optional[bool] = Field( + delete_origin_value: bool | None = Field( None, description="Whether to delete the origin value or keep it. Default is False.", title="Delete Origin Value", ) - replace_record: Optional[bool] = Field( + replace_record: bool | None = Field( None, description="Whether to replace the origin record or not. Default is False.", title="Replace Origin Record", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class KeysReplace(BaseModel): @@ -924,7 +924,7 @@ class KeysReplace(BaseModel): ], title="New value", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class IterableDecoder(BaseModel): @@ -946,7 +946,7 @@ class Config: examples=["source_amazon_ads.components.GzipJsonlDecoder"], title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class MinMaxDatetime(BaseModel): @@ -957,30 +957,30 @@ class MinMaxDatetime(BaseModel): examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], title="Datetime", ) - datetime_format: Optional[str] = Field( + datetime_format: str | None = Field( "", description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], title="Datetime Format", ) - max_datetime: Optional[str] = Field( + max_datetime: str | None = Field( None, description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", examples=["2021-01-01T00:00:00Z", "2021-01-01"], title="Max Datetime", ) - min_datetime: Optional[str] = Field( + min_datetime: str | None = Field( None, description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", examples=["2010-01-01T00:00:00Z", "2010-01-01"], title="Min Datetime", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class NoAuth(BaseModel): type: Literal["NoAuth"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class NoPagination(BaseModel): @@ -1008,7 +1008,7 @@ class Config: ], title="Consent URL", ) - scope: Optional[str] = Field( + scope: str | None = Field( None, description="The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.", examples=["user:read user:read_orders workspaces:read"], @@ -1022,7 +1022,7 @@ class Config: ], title="Access Token URL", ) - access_token_headers: Optional[Dict[str, Any]] = Field( + access_token_headers: dict[str, Any] | None = Field( None, description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.", examples=[ @@ -1032,7 +1032,7 @@ class Config: ], title="Access Token Headers", ) - access_token_params: Optional[Dict[str, Any]] = Field( + access_token_params: dict[str, Any] | None = Field( None, description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.", examples=[ @@ -1044,49 +1044,49 @@ class Config: ], title="Access Token Query Params (Json Encoded)", ) - extract_output: Optional[List[str]] = Field( + extract_output: list[str] | None = Field( None, description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.", examples=[["access_token", "refresh_token", "other_field"]], title="Extract Output", ) - state: Optional[State] = Field( + state: State | None = Field( None, description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.", examples=[{"min": 7, "max": 128}], title="Configurable State Query Param", ) - client_id_key: Optional[str] = Field( + client_id_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.", examples=["my_custom_client_id_key_name"], title="Client ID Key Override", ) - client_secret_key: Optional[str] = Field( + client_secret_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.", examples=["my_custom_client_secret_key_name"], title="Client Secret Key Override", ) - scope_key: Optional[str] = Field( + scope_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.", examples=["my_custom_scope_key_key_name"], title="Scopes Key Override", ) - state_key: Optional[str] = Field( + state_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.", examples=["my_custom_state_key_key_name"], title="State Key Override", ) - auth_code_key: Optional[str] = Field( + auth_code_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.", examples=["my_custom_auth_code_key_name"], title="Auth Code Key Override", ) - redirect_uri_key: Optional[str] = Field( + redirect_uri_key: str | None = Field( None, description="The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.", examples=["my_custom_redirect_uri_key_name"], @@ -1098,7 +1098,7 @@ class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( + oauth_user_input_from_connector_config_specification: dict[str, Any] | None = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", examples=[ @@ -1112,12 +1112,12 @@ class Config: ], title="OAuth user input", ) - oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field( + oauth_connector_input_specification: OauthConnectorInputSpecification | None = Field( None, description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }', title="DeclarativeOAuth Connector Specification", ) - complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( + complete_oauth_output_specification: dict[str, Any] | None = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ @@ -1130,13 +1130,13 @@ class Config: ], title="OAuth output specification", ) - complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( + complete_oauth_server_input_specification: dict[str, Any] | None = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], title="OAuth input specification", ) - complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( + complete_oauth_server_output_specification: dict[str, Any] | None = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ @@ -1157,44 +1157,44 @@ class Config: class OffsetIncrement(BaseModel): type: Literal["OffsetIncrement"] - page_size: Optional[Union[int, str]] = Field( + page_size: int | str | None = Field( None, description="The number of records to include in each pages.", examples=[100, "{{ config['page_size'] }}"], title="Limit", ) - inject_on_first_request: Optional[bool] = Field( + inject_on_first_request: bool | None = Field( False, description="Using the `offset` with value `0` during the first request", title="Inject Offset", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class PageIncrement(BaseModel): type: Literal["PageIncrement"] - page_size: Optional[Union[int, str]] = Field( + page_size: int | str | None = Field( None, description="The number of records to include in each pages.", examples=[100, "100", "{{ config['page_size'] }}"], title="Page Size", ) - start_from_page: Optional[int] = Field( + start_from_page: int | None = Field( 0, description="Index of the first page to request.", examples=[0, 1], title="Start From Page", ) - inject_on_first_request: Optional[bool] = Field( + inject_on_first_request: bool | None = Field( False, description="Using the `page number` with value defined by `start_from_page` during the first request", title="Inject Page Number", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class PrimaryKey(BaseModel): - __root__: Union[str, List[str], List[List[str]]] = Field( + __root__: str | list[str] | list[list[str]] = Field( ..., description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", examples=["id", ["code", "type"]], @@ -1214,22 +1214,22 @@ class PropertyChunking(BaseModel): description="The type used to determine the maximum number of properties per chunk", title="Property Limit Type", ) - property_limit: Optional[int] = Field( + property_limit: int | None = Field( None, description="The maximum amount of properties that can be retrieved per request according to the limit type.", title="Property Limit", ) - record_merge_strategy: Optional[GroupByKeyMergeStrategy] = Field( + record_merge_strategy: GroupByKeyMergeStrategy | None = Field( None, description="Dictates how to records that require multiple requests to get all properties should be emitted to the destination", title="Record Merge Strategy", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class RecordFilter(BaseModel): type: Literal["RecordFilter"] - condition: Optional[str] = Field( + condition: str | None = Field( "", description="The predicate to filter a record. Records will be removed if evaluated to False.", examples=[ @@ -1237,7 +1237,7 @@ class RecordFilter(BaseModel): "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SchemaNormalization(Enum): @@ -1247,7 +1247,7 @@ class SchemaNormalization(Enum): class RemoveFields(BaseModel): type: Literal["RemoveFields"] - condition: Optional[str] = Field( + condition: str | None = Field( "", description="The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,", examples=[ @@ -1257,7 +1257,7 @@ class RemoveFields(BaseModel): "{{ property == 'some_string_to_match' }}", ], ) - field_pointers: List[List[str]] = Field( + field_pointers: list[list[str]] = Field( ..., description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], @@ -1278,13 +1278,13 @@ class InjectInto(Enum): class RequestOption(BaseModel): type: Literal["RequestOption"] - field_name: Optional[str] = Field( + field_name: str | None = Field( None, description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.", examples=["segment_id"], title="Field Name", ) - field_path: Optional[List[str]] = Field( + field_path: list[str] | None = Field( None, description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)", examples=[["data", "viewer", "id"]], @@ -1319,7 +1319,7 @@ class LegacySessionTokenAuthenticator(BaseModel): examples=["session"], title="Login Path", ) - session_token: Optional[str] = Field( + session_token: str | None = Field( None, description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", example=["{{ config['session_token'] }}"], @@ -1331,13 +1331,13 @@ class LegacySessionTokenAuthenticator(BaseModel): examples=["id"], title="Response Token Response Key", ) - username: Optional[str] = Field( + username: str | None = Field( None, description="Username used to authenticate and obtain a session token", examples=[" {{ config['username'] }}"], title="Username", ) - password: Optional[str] = Field( + password: str | None = Field( "", description="Password used to authenticate and obtain a session token", examples=["{{ config['password'] }}", ""], @@ -1349,21 +1349,21 @@ class LegacySessionTokenAuthenticator(BaseModel): examples=["user/current"], title="Validate Session Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class CsvDecoder(BaseModel): type: Literal["CsvDecoder"] - encoding: Optional[str] = "utf-8" - delimiter: Optional[str] = "," + encoding: str | None = "utf-8" + delimiter: str | None = "," class AsyncJobStatusMap(BaseModel): - type: Optional[Literal["AsyncJobStatusMap"]] = None - running: List[str] - completed: List[str] - failed: List[str] - timeout: List[str] + type: Literal["AsyncJobStatusMap"] | None = None + running: list[str] + completed: list[str] + failed: list[str] + timeout: list[str] class ValueType(Enum): @@ -1381,19 +1381,19 @@ class WaitTimeFromHeader(BaseModel): examples=["Retry-After"], title="Response Header Name", ) - regex: Optional[str] = Field( + regex: str | None = Field( None, description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", examples=["([-+]?\\d+)"], title="Extraction Regex", ) - max_waiting_time_in_seconds: Optional[float] = Field( + max_waiting_time_in_seconds: float | None = Field( None, description="Given the value extracted from the header is greater than this value, stop the stream.", examples=[3600], title="Max Waiting Time in Seconds", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class WaitUntilTimeFromHeader(BaseModel): @@ -1404,24 +1404,24 @@ class WaitUntilTimeFromHeader(BaseModel): examples=["wait_time"], title="Response Header", ) - min_wait: Optional[Union[float, str]] = Field( + min_wait: float | str | None = Field( None, description="Minimum time to wait before retrying.", examples=[10, "60"], title="Minimum Wait Time", ) - regex: Optional[str] = Field( + regex: str | None = Field( None, description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", examples=["([-+]?\\d+)"], title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ComponentMappingDefinition(BaseModel): type: Literal["ComponentMappingDefinition"] - field_path: List[str] = Field( + field_path: list[str] = Field( ..., description="A list of potentially nested fields indicating the full path where value will be added or updated.", examples=[ @@ -1446,35 +1446,35 @@ class ComponentMappingDefinition(BaseModel): ], title="Value", ) - value_type: Optional[ValueType] = Field( + value_type: ValueType | None = Field( None, description="The expected data type of the value. If omitted, the type will be inferred from the value provided.", title="Value Type", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class StreamConfig(BaseModel): type: Literal["StreamConfig"] - configs_pointer: List[str] = Field( + configs_pointer: list[str] = Field( ..., description="A list of potentially nested fields indicating the full path in source config file where streams configs located.", examples=[["data"], ["data", "streams"], ["data", "{{ parameters.name }}"]], title="Configs Pointer", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ConfigComponentsResolver(BaseModel): type: Literal["ConfigComponentsResolver"] stream_config: StreamConfig - components_mapping: List[ComponentMappingDefinition] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + components_mapping: list[ComponentMappingDefinition] + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class AddedFieldDefinition(BaseModel): type: Literal["AddedFieldDefinition"] - path: List[str] = Field( + path: list[str] = Field( ..., description="List of strings defining the path where to add the value on the record.", examples=[["segment_id"], ["metadata", "segment_id"]], @@ -1490,22 +1490,22 @@ class AddedFieldDefinition(BaseModel): ], title="Value", ) - value_type: Optional[ValueType] = Field( + value_type: ValueType | None = Field( None, description="Type of the value. If not specified, the type will be inferred from the value.", title="Value Type", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class AddFields(BaseModel): type: Literal["AddFields"] - fields: List[AddedFieldDefinition] = Field( + fields: list[AddedFieldDefinition] = Field( ..., description="List of transformations (path and corresponding value) that will be added to the record.", title="Fields", ) - condition: Optional[str] = Field( + condition: str | None = Field( "", description="Fields will be added if expression is evaluated to True.", examples=[ @@ -1515,24 +1515,24 @@ class AddFields(BaseModel): "{{ property == 'some_string_to_match' }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ApiKeyAuthenticator(BaseModel): type: Literal["ApiKeyAuthenticator"] - api_token: Optional[str] = Field( + api_token: str | None = Field( None, description="The API key to inject in the request. Fill it in the user inputs.", examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], title="API Key", ) - header: Optional[str] = Field( + header: str | None = Field( None, description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", examples=["Authorization", "Api-Token", "X-Auth-Token"], title="Header Name", ) - inject_into: Optional[RequestOption] = Field( + inject_into: RequestOption | None = Field( None, description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", examples=[ @@ -1541,37 +1541,37 @@ class ApiKeyAuthenticator(BaseModel): ], title="Inject API Key Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class AuthFlow(BaseModel): - auth_flow_type: Optional[AuthFlowType] = Field( + auth_flow_type: AuthFlowType | None = Field( None, description="The type of auth to use", title="Auth flow type" ) - predicate_key: Optional[List[str]] = Field( + predicate_key: list[str] | None = Field( None, description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", examples=[["credentials", "auth_type"]], title="Predicate key", ) - predicate_value: Optional[str] = Field( + predicate_value: str | None = Field( None, description="Value of the predicate_key fields for the advanced auth to be applicable.", examples=["Oauth"], title="Predicate value", ) - oauth_config_specification: Optional[OAuthConfigSpecification] = None + oauth_config_specification: OAuthConfigSpecification | None = None class CheckStream(BaseModel): type: Literal["CheckStream"] - stream_names: Optional[List[str]] = Field( + stream_names: list[str] | None = Field( None, description="Names of the streams to try reading from when running a check operation.", examples=[["users"], ["users", "contacts"]], title="Stream Names", ) - dynamic_streams_check_configs: Optional[List[DynamicStreamCheckConfig]] = None + dynamic_streams_check_configs: list[DynamicStreamCheckConfig] | None = None class IncrementingCountCursor(BaseModel): @@ -1582,23 +1582,23 @@ class IncrementingCountCursor(BaseModel): examples=["created_at", "{{ config['record_cursor'] }}"], title="Cursor Field", ) - start_value: Optional[Union[str, int]] = Field( + start_value: str | int | None = Field( None, description="The value that determines the earliest record that should be synced.", examples=[0, "{{ config['start_value'] }}"], title="Start Value", ) - start_value_option: Optional[RequestOption] = Field( + start_value_option: RequestOption | None = Field( None, description="Optionally configures how the start value will be sent in requests to the source API.", title="Inject Start Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DatetimeBasedCursor(BaseModel): type: Literal["DatetimeBasedCursor"] - clamping: Optional[Clamping] = Field( + clamping: Clamping | None = Field( None, description="This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)", title="Date Range Clamping", @@ -1615,84 +1615,84 @@ class DatetimeBasedCursor(BaseModel): examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"], title="Outgoing Datetime Format", ) - start_datetime: Union[str, MinMaxDatetime] = Field( + start_datetime: str | MinMaxDatetime = Field( ..., description="The datetime that determines the earliest record that should be synced.", examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], title="Start Datetime", ) - cursor_datetime_formats: Optional[List[str]] = Field( + cursor_datetime_formats: list[str] | None = Field( None, description="The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.", title="Cursor Datetime Formats", ) - cursor_granularity: Optional[str] = Field( + cursor_granularity: str | None = Field( None, description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", examples=["PT1S"], title="Cursor Granularity", ) - end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( + end_datetime: str | MinMaxDatetime | None = Field( None, description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], title="End Datetime", ) - end_time_option: Optional[RequestOption] = Field( + end_time_option: RequestOption | None = Field( None, description="Optionally configures how the end datetime will be sent in requests to the source API.", title="Inject End Time Into Outgoing HTTP Request", ) - is_data_feed: Optional[bool] = Field( + is_data_feed: bool | None = Field( None, description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", title="Whether the target API is formatted as a data feed", ) - is_client_side_incremental: Optional[bool] = Field( + is_client_side_incremental: bool | None = Field( None, description="If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.", title="Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)", ) - is_compare_strictly: Optional[bool] = Field( + is_compare_strictly: bool | None = Field( False, description="Set to True if the target API does not accept queries where the start time equal the end time.", title="Whether to skip requests if the start time equals the end time", ) - global_substream_cursor: Optional[bool] = Field( + global_substream_cursor: bool | None = Field( False, description="This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).", title="Whether to store cursor as one value instead of per partition", ) - lookback_window: Optional[str] = Field( + lookback_window: str | None = Field( None, description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", examples=["P1D", "P{{ config['lookback_days'] }}D"], title="Lookback Window", ) - partition_field_end: Optional[str] = Field( + partition_field_end: str | None = Field( None, description="Name of the partition start time field.", examples=["ending_time"], title="Partition Field End", ) - partition_field_start: Optional[str] = Field( + partition_field_start: str | None = Field( None, description="Name of the partition end time field.", examples=["starting_time"], title="Partition Field Start", ) - start_time_option: Optional[RequestOption] = Field( + start_time_option: RequestOption | None = Field( None, description="Optionally configures how the start datetime will be sent in requests to the source API.", title="Inject Start Time Into Outgoing HTTP Request", ) - step: Optional[str] = Field( + step: str | None = Field( None, description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", examples=["P1W", "{{ config['step_increment'] }}"], title="Step", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class FixedWindowCallRatePolicy(BaseModel): @@ -1708,7 +1708,7 @@ class Config: description="The maximum number of calls allowed within the period.", title="Call Limit", ) - matchers: List[HttpRequestRegexMatcher] = Field( + matchers: list[HttpRequestRegexMatcher] = Field( ..., description="List of matchers that define which requests this policy applies to.", title="Matchers", @@ -1720,12 +1720,12 @@ class Config: extra = Extra.allow type: Literal["MovingWindowCallRatePolicy"] - rates: List[Rate] = Field( + rates: list[Rate] = Field( ..., description="List of rates that define the call limits for different time intervals.", title="Rates", ) - matchers: List[HttpRequestRegexMatcher] = Field( + matchers: list[HttpRequestRegexMatcher] = Field( ..., description="List of matchers that define which requests this policy applies to.", title="Matchers", @@ -1737,7 +1737,7 @@ class Config: extra = Extra.allow type: Literal["UnlimitedCallRatePolicy"] - matchers: List[HttpRequestRegexMatcher] = Field( + matchers: list[HttpRequestRegexMatcher] = Field( ..., description="List of matchers that define which requests this policy applies to.", title="Matchers", @@ -1746,47 +1746,46 @@ class Config: class DefaultErrorHandler(BaseModel): type: Literal["DefaultErrorHandler"] - backoff_strategies: Optional[ - List[ - Union[ - ConstantBackoffStrategy, - CustomBackoffStrategy, - ExponentialBackoffStrategy, - WaitTimeFromHeader, - WaitUntilTimeFromHeader, - ] + backoff_strategies: ( + list[ + ConstantBackoffStrategy + | CustomBackoffStrategy + | ExponentialBackoffStrategy + | WaitTimeFromHeader + | WaitUntilTimeFromHeader ] - ] = Field( + | None + ) = Field( None, description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", title="Backoff Strategies", ) - max_retries: Optional[int] = Field( + max_retries: int | None = Field( 5, description="The maximum number of time to retry a retryable request before giving up and failing.", examples=[5, 0, 10], title="Max Retry Count", ) - response_filters: Optional[List[HttpResponseFilter]] = Field( + response_filters: list[HttpResponseFilter] | None = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", title="Response Filters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DefaultPaginator(BaseModel): type: Literal["DefaultPaginator"] - pagination_strategy: Union[ - CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement - ] = Field( + pagination_strategy: ( + CursorPagination | CustomPaginationStrategy | OffsetIncrement | PageIncrement + ) = Field( ..., description="Strategy defining how records are paginated.", title="Pagination Strategy", ) - page_size_option: Optional[RequestOption] = None - page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + page_size_option: RequestOption | None = None + page_token_option: RequestOption | RequestPath | None = None + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SessionTokenRequestApiKeyAuthenticator(BaseModel): @@ -1810,59 +1809,59 @@ class ListPartitionRouter(BaseModel): examples=["section", "{{ config['section_key'] }}"], title="Current Partition Value Identifier", ) - values: Union[str, List[str]] = Field( + values: str | list[str] = Field( ..., description="The list of attributes being iterated over and used as input for the requests made to the source API.", examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], title="Partition Values", ) - request_option: Optional[RequestOption] = Field( + request_option: RequestOption | None = Field( None, description="A request option describing where the list value should be injected into and under what field name if applicable.", title="Inject Partition Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class RecordSelector(BaseModel): type: Literal["RecordSelector"] - extractor: Union[CustomRecordExtractor, DpathExtractor] - record_filter: Optional[Union[CustomRecordFilter, RecordFilter]] = Field( + extractor: CustomRecordExtractor | DpathExtractor + record_filter: CustomRecordFilter | RecordFilter | None = Field( None, description="Responsible for filtering records to be emitted by the Source.", title="Record Filter", ) - schema_normalization: Optional[Union[SchemaNormalization, CustomSchemaNormalization]] = Field( + schema_normalization: SchemaNormalization | CustomSchemaNormalization | None = Field( SchemaNormalization.None_, description="Responsible for normalization according to the schema.", title="Schema Normalization", ) - transform_before_filtering: Optional[bool] = Field( + transform_before_filtering: bool | None = Field( False, description="If true, transformation will be applied before record filtering.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class GzipDecoder(BaseModel): type: Literal["GzipDecoder"] - decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder] + decoder: CsvDecoder | GzipDecoder | JsonDecoder | JsonlDecoder class Spec(BaseModel): type: Literal["Spec"] - connection_specification: Dict[str, Any] = Field( + connection_specification: dict[str, Any] = Field( ..., description="A connection specification describing how a the connector can be configured.", title="Connection Specification", ) - documentation_url: Optional[str] = Field( + documentation_url: str | None = Field( None, description="URL of the connector's documentation page.", examples=["https://docs.airbyte.com/integrations/sources/dremio"], title="Documentation URL", ) - advanced_auth: Optional[AuthFlow] = Field( + advanced_auth: AuthFlow | None = Field( None, description="Advanced specification for configuring the authentication flow.", title="Advanced Auth", @@ -1871,12 +1870,12 @@ class Spec(BaseModel): class CompositeErrorHandler(BaseModel): type: Literal["CompositeErrorHandler"] - error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( + error_handlers: list[CompositeErrorHandler | DefaultErrorHandler] = Field( ..., description="List of error handlers to iterate on to determine how to handle a failed response.", title="Error Handlers", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class HTTPAPIBudget(BaseModel): @@ -1884,28 +1883,24 @@ class Config: extra = Extra.allow type: Literal["HTTPAPIBudget"] - policies: List[ - Union[ - FixedWindowCallRatePolicy, - MovingWindowCallRatePolicy, - UnlimitedCallRatePolicy, - ] + policies: list[ + FixedWindowCallRatePolicy | MovingWindowCallRatePolicy | UnlimitedCallRatePolicy ] = Field( ..., description="List of call rate policies that define how many calls are allowed.", title="Policies", ) - ratelimit_reset_header: Optional[str] = Field( + ratelimit_reset_header: str | None = Field( "ratelimit-reset", description="The HTTP response header name that indicates when the rate limit resets.", title="Rate Limit Reset Header", ) - ratelimit_remaining_header: Optional[str] = Field( + ratelimit_remaining_header: str | None = Field( "ratelimit-remaining", description="The HTTP response header name that indicates the number of remaining allowed calls.", title="Rate Limit Remaining Header", ) - status_codes_for_ratelimit_hit: Optional[List[int]] = Field( + status_codes_for_ratelimit_hit: list[int] | None = Field( [429], description="List of HTTP status codes that indicate a rate limit has been hit.", title="Status Codes for Rate Limit Hit", @@ -1917,7 +1912,7 @@ class Config: extra = Extra.allow type: Literal["ZipfileDecoder"] - decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder] = Field( + decoder: CsvDecoder | GzipDecoder | JsonDecoder | JsonlDecoder = Field( ..., description="Parser to parse the decompressed data from the zipfile(s).", title="Parser", @@ -1929,29 +1924,29 @@ class Config: extra = Extra.forbid type: Literal["DeclarativeSource"] - check: Union[CheckStream, CheckDynamicStream] - streams: List[Union[DeclarativeStream, StateDelegatingStream]] - dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None + check: CheckStream | CheckDynamicStream + streams: list[DeclarativeStream | StateDelegatingStream] + dynamic_streams: list[DynamicDeclarativeStream] | None = None version: str = Field( ..., description="The version of the Airbyte CDK used to build and test the source.", ) - schemas: Optional[Schemas] = None - definitions: Optional[Dict[str, Any]] = None - spec: Optional[Spec] = None - concurrency_level: Optional[ConcurrencyLevel] = None - api_budget: Optional[HTTPAPIBudget] = None - max_concurrent_async_job_count: Optional[Union[int, str]] = Field( + schemas: Schemas | None = None + definitions: dict[str, Any] | None = None + spec: Spec | None = None + concurrency_level: ConcurrencyLevel | None = None + api_budget: HTTPAPIBudget | None = None + max_concurrent_async_job_count: int | str | None = Field( None, description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.", examples=[3, "{{ config['max_concurrent_async_job_count'] }}"], title="Maximum Concurrent Asynchronous Jobs", ) - metadata: Optional[Dict[str, Any]] = Field( + metadata: dict[str, Any] | None = Field( None, description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) - description: Optional[str] = Field( + description: str | None = Field( None, description="A description of the connector. It will be presented on the Source documentation page.", ) @@ -1962,29 +1957,29 @@ class Config: extra = Extra.forbid type: Literal["DeclarativeSource"] - check: Union[CheckStream, CheckDynamicStream] - streams: Optional[List[Union[DeclarativeStream, StateDelegatingStream]]] = None - dynamic_streams: List[DynamicDeclarativeStream] + check: CheckStream | CheckDynamicStream + streams: list[DeclarativeStream | StateDelegatingStream] | None = None + dynamic_streams: list[DynamicDeclarativeStream] version: str = Field( ..., description="The version of the Airbyte CDK used to build and test the source.", ) - schemas: Optional[Schemas] = None - definitions: Optional[Dict[str, Any]] = None - spec: Optional[Spec] = None - concurrency_level: Optional[ConcurrencyLevel] = None - api_budget: Optional[HTTPAPIBudget] = None - max_concurrent_async_job_count: Optional[Union[int, str]] = Field( + schemas: Schemas | None = None + definitions: dict[str, Any] | None = None + spec: Spec | None = None + concurrency_level: ConcurrencyLevel | None = None + api_budget: HTTPAPIBudget | None = None + max_concurrent_async_job_count: int | str | None = Field( None, description="Maximum number of concurrent asynchronous jobs to run. This property is only relevant for sources/streams that support asynchronous job execution through the AsyncRetriever (e.g. a report-based stream that initiates a job, polls the job status, and then fetches the job results). This is often set by the API's maximum number of concurrent jobs on the account level. Refer to the API's documentation for this information.", examples=[3, "{{ config['max_concurrent_async_job_count'] }}"], title="Maximum Concurrent Asynchronous Jobs", ) - metadata: Optional[Dict[str, Any]] = Field( + metadata: dict[str, Any] | None = Field( None, description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) - description: Optional[str] = Field( + description: str | None = Field( None, description="A description of the connector. It will be presented on the Source documentation page.", ) @@ -1994,7 +1989,7 @@ class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - __root__: Union[DeclarativeSource1, DeclarativeSource2] = Field( + __root__: DeclarativeSource1 | DeclarativeSource2 = Field( ..., description="An API source that extracts data according to its declarative components.", title="DeclarativeSource", @@ -2006,25 +2001,23 @@ class Config: extra = Extra.allow type: Literal["SelectiveAuthenticator"] - authenticator_selection_path: List[str] = Field( + authenticator_selection_path: list[str] = Field( ..., description="Path of the field in config with selected authenticator name", examples=[["auth"], ["auth", "type"]], title="Authenticator Selection Path", ) - authenticators: Dict[ + authenticators: dict[ str, - Union[ - ApiKeyAuthenticator, - BasicHttpAuthenticator, - BearerAuthenticator, - CustomAuthenticator, - OAuthAuthenticator, - JwtAuthenticator, - NoAuth, - SessionTokenAuthenticator, - LegacySessionTokenAuthenticator, - ], + ApiKeyAuthenticator + | BasicHttpAuthenticator + | BearerAuthenticator + | CustomAuthenticator + | OAuthAuthenticator + | JwtAuthenticator + | NoAuth + | SessionTokenAuthenticator + | LegacySessionTokenAuthenticator, ] = Field( ..., description="Authenticators to select from.", @@ -2039,7 +2032,7 @@ class Config: ], title="Authenticators", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DeclarativeStream(BaseModel): @@ -2047,60 +2040,54 @@ class Config: extra = Extra.allow type: Literal["DeclarativeStream"] - retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( + retriever: AsyncRetriever | CustomRetriever | SimpleRetriever = Field( ..., description="Component used to coordinate how records are extracted across stream slices and request pages.", title="Retriever", ) - incremental_sync: Optional[ - Union[CustomIncrementalSync, DatetimeBasedCursor, IncrementingCountCursor] - ] = Field( + incremental_sync: ( + CustomIncrementalSync | DatetimeBasedCursor | IncrementingCountCursor | None + ) = Field( None, description="Component used to fetch data incrementally based on a time field in the data.", title="Incremental Sync", ) - name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") - primary_key: Optional[PrimaryKey] = Field( + name: str | None = Field("", description="The stream name.", example=["Users"], title="Name") + primary_key: PrimaryKey | None = Field( "", description="The primary key of the stream.", title="Primary Key" ) - schema_loader: Optional[ - Union[ - DynamicSchemaLoader, - InlineSchemaLoader, - JsonFileSchemaLoader, - CustomSchemaLoader, - ] - ] = Field( + schema_loader: ( + DynamicSchemaLoader | InlineSchemaLoader | JsonFileSchemaLoader | CustomSchemaLoader | None + ) = Field( None, description="Component used to retrieve the schema for the current stream.", title="Schema Loader", ) - transformations: Optional[ - List[ - Union[ - AddFields, - CustomTransformation, - RemoveFields, - KeysToLower, - KeysToSnakeCase, - FlattenFields, - DpathFlattenFields, - KeysReplace, - ] + transformations: ( + list[ + AddFields + | CustomTransformation + | RemoveFields + | KeysToLower + | KeysToSnakeCase + | FlattenFields + | DpathFlattenFields + | KeysReplace ] - ] = Field( + | None + ) = Field( None, description="A list of transformations to be applied to each output record.", title="Transformations", ) - state_migrations: Optional[ - List[Union[LegacyToPerPartitionStateMigration, CustomStateMigration]] - ] = Field( - [], - description="Array of state migrations to be applied on the input state", - title="State Migrations", + state_migrations: list[LegacyToPerPartitionStateMigration | CustomStateMigration] | None = ( + Field( + [], + description="Array of state migrations to be applied on the input state", + title="State Migrations", + ) ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SessionTokenAuthenticator(BaseModel): @@ -2122,29 +2109,29 @@ class SessionTokenAuthenticator(BaseModel): ], title="Login Requester", ) - session_token_path: List[str] = Field( + session_token_path: list[str] = Field( ..., description="The path in the response body returned from the login requester to the session token.", examples=[["access_token"], ["result", "token"]], title="Session Token Path", ) - expiration_duration: Optional[str] = Field( + expiration_duration: str | None = Field( None, description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", examples=["PT1H", "P1D"], title="Expiration Duration", ) - request_authentication: Union[ - SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator - ] = Field( + request_authentication: ( + SessionTokenRequestApiKeyAuthenticator | SessionTokenRequestBearerAuthenticator + ) = Field( ..., description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", title="Data Request Authentication", ) - decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field( + decoder: JsonDecoder | XmlDecoder | None = Field( None, description="Component used to decode the response.", title="Decoder" ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class HttpRequester(BaseModel): @@ -2160,7 +2147,7 @@ class HttpRequester(BaseModel): ], title="API Base URL", ) - path: Optional[str] = Field( + path: str | None = Field( None, description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ @@ -2170,38 +2157,35 @@ class HttpRequester(BaseModel): ], title="URL Path", ) - authenticator: Optional[ - Union[ - ApiKeyAuthenticator, - BasicHttpAuthenticator, - BearerAuthenticator, - CustomAuthenticator, - OAuthAuthenticator, - JwtAuthenticator, - NoAuth, - SessionTokenAuthenticator, - LegacySessionTokenAuthenticator, - SelectiveAuthenticator, - ] - ] = Field( + authenticator: ( + ApiKeyAuthenticator + | BasicHttpAuthenticator + | BearerAuthenticator + | CustomAuthenticator + | OAuthAuthenticator + | JwtAuthenticator + | NoAuth + | SessionTokenAuthenticator + | LegacySessionTokenAuthenticator + | SelectiveAuthenticator + | None + ) = Field( None, description="Authentication method to use for requests sent to the API.", title="Authenticator", ) - error_handler: Optional[ - Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] - ] = Field( + error_handler: DefaultErrorHandler | CustomErrorHandler | CompositeErrorHandler | None = Field( None, description="Error handler component that defines how to handle errors.", title="Error Handler", ) - http_method: Optional[HttpMethod] = Field( + http_method: HttpMethod | None = Field( HttpMethod.GET, description="The HTTP method used to fetch data from the source (can be GET or POST).", examples=["GET", "POST"], title="HTTP Method", ) - request_body_data: Optional[Union[str, Dict[str, str]]] = Field( + request_body_data: str | dict[str, str] | None = Field( None, description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", examples=[ @@ -2209,7 +2193,7 @@ class HttpRequester(BaseModel): ], title="Request Body Payload (Non-JSON)", ) - request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( + request_body_json: str | dict[str, Any] | None = Field( None, description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", examples=[ @@ -2219,13 +2203,13 @@ class HttpRequester(BaseModel): ], title="Request Body JSON Payload", ) - request_headers: Optional[Union[str, Dict[str, str]]] = Field( + request_headers: str | dict[str, str] | None = Field( None, description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], title="Request Headers", ) - request_parameters: Optional[Union[str, Dict[str, Union[str, Any]]]] = Field( + request_parameters: str | dict[str, str | Any] | None = Field( None, description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", examples=[ @@ -2238,46 +2222,45 @@ class HttpRequester(BaseModel): ], title="Query Parameters", ) - use_cache: Optional[bool] = Field( + use_cache: bool | None = Field( False, description="Enables stream requests caching. This field is automatically set by the CDK.", title="Use Cache", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DynamicSchemaLoader(BaseModel): type: Literal["DynamicSchemaLoader"] - retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( + retriever: AsyncRetriever | CustomRetriever | SimpleRetriever = Field( ..., description="Component used to coordinate how records are extracted across stream slices and request pages.", title="Retriever", ) - schema_transformations: Optional[ - List[ - Union[ - AddFields, - CustomTransformation, - RemoveFields, - KeysToLower, - KeysToSnakeCase, - FlattenFields, - DpathFlattenFields, - KeysReplace, - ] + schema_transformations: ( + list[ + AddFields + | CustomTransformation + | RemoveFields + | KeysToLower + | KeysToSnakeCase + | FlattenFields + | DpathFlattenFields + | KeysReplace ] - ] = Field( + | None + ) = Field( None, description="A list of transformations to be applied to the schema.", title="Schema Transformations", ) schema_type_identifier: SchemaTypeIdentifier - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class ParentStreamConfig(BaseModel): type: Literal["ParentStreamConfig"] - lazy_read_pointer: Optional[List[str]] = Field( + lazy_read_pointer: list[str] | None = Field( [], description="If set, this will enable lazy reading, using the initial read of parent records to extract child records.", title="Lazy Read Pointer", @@ -2288,7 +2271,7 @@ class ParentStreamConfig(BaseModel): examples=["id", "{{ config['parent_record_id'] }}"], title="Parent Key", ) - stream: Union[DeclarativeStream, StateDelegatingStream] = Field( + stream: DeclarativeStream | StateDelegatingStream = Field( ..., description="Reference to the parent stream.", title="Parent Stream" ) partition_field: str = Field( @@ -2297,56 +2280,56 @@ class ParentStreamConfig(BaseModel): examples=["parent_id", "{{ config['parent_partition_field'] }}"], title="Current Parent Key Value Identifier", ) - request_option: Optional[RequestOption] = Field( + request_option: RequestOption | None = Field( None, description="A request option describing where the parent key value should be injected into and under what field name if applicable.", title="Request Option", ) - incremental_dependency: Optional[bool] = Field( + incremental_dependency: bool | None = Field( False, description="Indicates whether the parent stream should be read incrementally based on updates in the child stream.", title="Incremental Dependency", ) - extra_fields: Optional[List[List[str]]] = Field( + extra_fields: list[list[str]] | None = Field( None, description="Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.", title="Extra Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class PropertiesFromEndpoint(BaseModel): type: Literal["PropertiesFromEndpoint"] - property_field_path: List[str] = Field( + property_field_path: list[str] = Field( ..., description="Describes the path to the field that should be extracted", examples=[["name"]], ) - retriever: Union[CustomRetriever, SimpleRetriever] = Field( + retriever: CustomRetriever | SimpleRetriever = Field( ..., description="Requester component that describes how to fetch the properties to query from a remote API endpoint.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class QueryProperties(BaseModel): type: Literal["QueryProperties"] - property_list: Union[List[str], PropertiesFromEndpoint] = Field( + property_list: list[str] | PropertiesFromEndpoint = Field( ..., description="The set of properties that will be queried for in the outbound request. This can either be statically defined or dynamic based on an API endpoint", title="Property List", ) - always_include_properties: Optional[List[str]] = Field( + always_include_properties: list[str] | None = Field( None, description="The list of properties that should be included in every set of properties when multiple chunks of properties are being requested.", title="Always Include Properties", ) - property_chunking: Optional[PropertyChunking] = Field( + property_chunking: PropertyChunking | None = Field( None, description="Defines how query properties will be grouped into smaller sets for APIs with limitations on the number of properties fetched per API request.", title="Property Chunking", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class StateDelegatingStream(BaseModel): @@ -2362,7 +2345,7 @@ class StateDelegatingStream(BaseModel): description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.", title="Retriever", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SimpleRetriever(BaseModel): @@ -2371,55 +2354,51 @@ class SimpleRetriever(BaseModel): ..., description="Component that describes how to extract records from a HTTP response.", ) - requester: Union[CustomRequester, HttpRequester] = Field( + requester: CustomRequester | HttpRequester = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API.", ) - paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( + paginator: DefaultPaginator | NoPagination | None = Field( None, description="Paginator component that describes how to navigate through the API's pages.", ) - ignore_stream_slicer_parameters_on_paginated_requests: Optional[bool] = Field( + ignore_stream_slicer_parameters_on_paginated_requests: bool | None = Field( False, description="If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.", ) - partition_router: Optional[ - Union[ - CustomPartitionRouter, - ListPartitionRouter, - SubstreamPartitionRouter, - GroupingPartitionRouter, - List[ - Union[ - CustomPartitionRouter, - ListPartitionRouter, - SubstreamPartitionRouter, - GroupingPartitionRouter, - ] - ], + partition_router: ( + CustomPartitionRouter + | ListPartitionRouter + | SubstreamPartitionRouter + | GroupingPartitionRouter + | list[ + CustomPartitionRouter + | ListPartitionRouter + | SubstreamPartitionRouter + | GroupingPartitionRouter ] - ] = Field( + | None + ) = Field( [], description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", title="Partition Router", ) - decoder: Optional[ - Union[ - CustomDecoder, - CsvDecoder, - GzipDecoder, - JsonDecoder, - JsonlDecoder, - IterableDecoder, - XmlDecoder, - ZipfileDecoder, - ] - ] = Field( + decoder: ( + CustomDecoder + | CsvDecoder + | GzipDecoder + | JsonDecoder + | JsonlDecoder + | IterableDecoder + | XmlDecoder + | ZipfileDecoder + | None + ) = Field( None, description="Component decoding the response so records can be extracted.", title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class AsyncRetriever(BaseModel): @@ -2431,111 +2410,106 @@ class AsyncRetriever(BaseModel): status_mapping: AsyncJobStatusMap = Field( ..., description="Async Job Status to Airbyte CDK Async Job Status mapping." ) - status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( + status_extractor: CustomRecordExtractor | DpathExtractor = Field( ..., description="Responsible for fetching the actual status of the async job." ) - download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( + download_target_extractor: CustomRecordExtractor | DpathExtractor = Field( ..., description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.", ) - download_extractor: Optional[ - Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor] - ] = Field(None, description="Responsible for fetching the records from provided urls.") - creation_requester: Union[CustomRequester, HttpRequester] = Field( + download_extractor: CustomRecordExtractor | DpathExtractor | ResponseToFileExtractor | None = ( + Field(None, description="Responsible for fetching the records from provided urls.") + ) + creation_requester: CustomRequester | HttpRequester = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", ) - polling_requester: Union[CustomRequester, HttpRequester] = Field( + polling_requester: CustomRequester | HttpRequester = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.", ) - polling_job_timeout: Optional[Union[int, str]] = Field( + polling_job_timeout: int | str | None = Field( None, description="The time in minutes after which the single Async Job should be considered as Timed Out.", ) - download_target_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( + download_target_requester: CustomRequester | HttpRequester | None = Field( None, description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.", ) - download_requester: Union[CustomRequester, HttpRequester] = Field( + download_requester: CustomRequester | HttpRequester = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.", ) - download_paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( + download_paginator: DefaultPaginator | NoPagination | None = Field( None, description="Paginator component that describes how to navigate through the API's pages during download.", ) - abort_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( + abort_requester: CustomRequester | HttpRequester | None = Field( None, description="Requester component that describes how to prepare HTTP requests to send to the source API to abort a job once it is timed out from the source's perspective.", ) - delete_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( + delete_requester: CustomRequester | HttpRequester | None = Field( None, description="Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.", ) - partition_router: Optional[ - Union[ - CustomPartitionRouter, - ListPartitionRouter, - SubstreamPartitionRouter, - GroupingPartitionRouter, - List[ - Union[ - CustomPartitionRouter, - ListPartitionRouter, - SubstreamPartitionRouter, - GroupingPartitionRouter, - ] - ], + partition_router: ( + CustomPartitionRouter + | ListPartitionRouter + | SubstreamPartitionRouter + | GroupingPartitionRouter + | list[ + CustomPartitionRouter + | ListPartitionRouter + | SubstreamPartitionRouter + | GroupingPartitionRouter ] - ] = Field( + | None + ) = Field( [], description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", title="Partition Router", ) - decoder: Optional[ - Union[ - CustomDecoder, - CsvDecoder, - GzipDecoder, - JsonDecoder, - JsonlDecoder, - IterableDecoder, - XmlDecoder, - ZipfileDecoder, - ] - ] = Field( + decoder: ( + CustomDecoder + | CsvDecoder + | GzipDecoder + | JsonDecoder + | JsonlDecoder + | IterableDecoder + | XmlDecoder + | ZipfileDecoder + | None + ) = Field( None, description="Component decoding the response so records can be extracted.", title="Decoder", ) - download_decoder: Optional[ - Union[ - CustomDecoder, - CsvDecoder, - GzipDecoder, - JsonDecoder, - JsonlDecoder, - IterableDecoder, - XmlDecoder, - ZipfileDecoder, - ] - ] = Field( + download_decoder: ( + CustomDecoder + | CsvDecoder + | GzipDecoder + | JsonDecoder + | JsonlDecoder + | IterableDecoder + | XmlDecoder + | ZipfileDecoder + | None + ) = Field( None, description="Component decoding the download response so records can be extracted.", title="Download Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class SubstreamPartitionRouter(BaseModel): type: Literal["SubstreamPartitionRouter"] - parent_stream_configs: List[ParentStreamConfig] = Field( + parent_stream_configs: list[ParentStreamConfig] = Field( ..., description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", title="Parent Stream Configs", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class GroupingPartitionRouter(BaseModel): @@ -2546,41 +2520,41 @@ class GroupingPartitionRouter(BaseModel): examples=[10, 50], title="Group Size", ) - underlying_partition_router: Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] = Field( + underlying_partition_router: ( + CustomPartitionRouter | ListPartitionRouter | SubstreamPartitionRouter + ) = Field( ..., description="The partition router whose output will be grouped. This can be any valid partition router component.", title="Underlying Partition Router", ) - deduplicate: Optional[bool] = Field( + deduplicate: bool | None = Field( True, description="If true, ensures that partitions are unique within each group by removing duplicates based on the partition key.", title="Deduplicate Partitions", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class HttpComponentsResolver(BaseModel): type: Literal["HttpComponentsResolver"] - retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( + retriever: AsyncRetriever | CustomRetriever | SimpleRetriever = Field( ..., description="Component used to coordinate how records are extracted across stream slices and request pages.", title="Retriever", ) - components_mapping: List[ComponentMappingDefinition] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + components_mapping: list[ComponentMappingDefinition] + parameters: dict[str, Any] | None = Field(None, alias="$parameters") class DynamicDeclarativeStream(BaseModel): type: Literal["DynamicDeclarativeStream"] - name: Optional[str] = Field( + name: str | None = Field( "", description="The dynamic stream name.", example=["Tables"], title="Name" ) stream_template: DeclarativeStream = Field( ..., description="Reference to the stream template.", title="Stream Template" ) - components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = Field( + components_resolver: HttpComponentsResolver | ConfigComponentsResolver = Field( ..., description="Component resolve and populates stream templates with components values.", title="Components Resolver", diff --git a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py index d759cd90d..75eeba45a 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py @@ -5,9 +5,7 @@ import sys from collections.abc import Mapping from types import ModuleType -from typing import Any, cast - -from typing_extensions import Literal +from typing import Any, Literal, cast ChecksumType = Literal["md5", "sha256"] CHECKSUM_FUNCTIONS = { diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py index 6779b54ab..c8bf4d16a 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py @@ -3,8 +3,8 @@ # import copy -import typing -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any PARAMETERS_STR = "$parameters" @@ -94,7 +94,7 @@ def propagate_types_and_parameters( parent_field_identifier: str, declarative_component: Mapping[str, Any], parent_parameters: Mapping[str, Any], - use_parent_parameters: Optional[bool] = None, + use_parent_parameters: bool | None = None, ) -> Mapping[str, Any]: """ Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the @@ -158,7 +158,7 @@ def propagate_types_and_parameters( ) if excluded_parameter: current_parameters[field_name] = excluded_parameter - elif isinstance(field_value, typing.List): + elif isinstance(field_value, list): # We exclude propagating a parameter that matches the current field name because that would result in an infinite cycle excluded_parameter = current_parameters.pop(field_name, None) for i, element in enumerate(field_value): diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py index 045ea9a2c..bed8286d6 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py @@ -3,7 +3,8 @@ # import re -from typing import Any, Mapping, Set, Tuple, Union +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ( CircularReferenceException, @@ -106,7 +107,7 @@ def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]: """ return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return] - def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: Set[Any]) -> Any: + def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: set[Any]) -> Any: if isinstance(node, dict): evaluated_dict = { k: self._evaluate_node(v, manifest, visited) @@ -185,7 +186,7 @@ def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any: return manifest_node -def _parse_path(ref: str) -> Tuple[Union[str, int], str]: +def _parse_path(ref: str) -> tuple[str | int, str]: """ Return the next path component, together with the rest of the path. diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 25840f06f..285cc2f55 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -8,17 +8,10 @@ import importlib import inspect import re +from collections.abc import Callable, Mapping, MutableMapping from functools import partial from typing import ( Any, - Callable, - Dict, - List, - Mapping, - MutableMapping, - Optional, - Type, - Union, get_args, get_origin, get_type_hints, @@ -106,7 +99,6 @@ ) from airbyte_cdk.sources.declarative.models import ( CustomStateMigration, - GzipDecoder, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( AddedFieldDefinition as AddedFieldDefinitionModel, @@ -404,10 +396,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( ZipfileDecoder as ZipfileDecoderModel, ) -from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import ( - COMPONENTS_MODULE_NAME, - SDM_COMPONENTS_MODULE_NAME, -) from airbyte_cdk.sources.declarative.partition_routers import ( CartesianProductStreamSlicer, GroupingPartitionRouter, @@ -560,15 +548,15 @@ class ModelToComponentFactory: def __init__( self, - limit_pages_fetched_per_slice: Optional[int] = None, - limit_slices_fetched: Optional[int] = None, + limit_pages_fetched_per_slice: int | None = None, + limit_slices_fetched: int | None = None, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, disable_resumable_full_refresh: bool = False, - message_repository: Optional[MessageRepository] = None, - connector_state_manager: Optional[ConnectorStateManager] = None, - max_concurrent_async_job_count: Optional[int] = None, + message_repository: MessageRepository | None = None, + connector_state_manager: ConnectorStateManager | None = None, + max_concurrent_async_job_count: int | None = None, ): self._init_mappings() self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice @@ -581,11 +569,11 @@ def __init__( self._evaluate_log_level(emit_connector_builder_messages) ) self._connector_state_manager = connector_state_manager or ConnectorStateManager() - self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None + self._api_budget: APIBudget | HttpAPIBudget | None = None self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) def _init_mappings(self) -> None: - self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { + self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[type[BaseModel], Callable[..., Any]] = { AddedFieldDefinitionModel: self.create_added_field_definition, AddFieldsModel: self.create_add_fields, ApiKeyAuthenticatorModel: self.create_api_key_authenticator, @@ -688,7 +676,7 @@ def _init_mappings(self) -> None: def create_component( self, - model_type: Type[BaseModel], + model_type: type[BaseModel], component_definition: ComponentDefinition, config: Config, **kwargs: Any, @@ -789,7 +777,7 @@ def create_flatten_fields( def create_dpath_flatten_fields( self, model: DpathFlattenFieldsModel, config: Config, **kwargs: Any ) -> DpathFlattenFields: - model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] + model_field_path: list[InterpolatedString | str] = [x for x in model.field_path] return DpathFlattenFields( config=config, field_path=model_field_path, @@ -801,7 +789,7 @@ def create_dpath_flatten_fields( ) @staticmethod - def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: + def _json_schema_type_name_to_type(value_type: ValueType | None) -> type[Any] | None: if not value_type: return None names_to_types = { @@ -816,7 +804,7 @@ def create_api_key_authenticator( self, model: ApiKeyAuthenticatorModel, config: Config, - token_provider: Optional[TokenProvider] = None, + token_provider: TokenProvider | None = None, **kwargs: Any, ) -> ApiKeyAuthenticator: if model.inject_into is None and model.header is None: @@ -874,7 +862,7 @@ def create_legacy_to_per_partition_state_migration( ) partition_router = retriever.partition_router if not isinstance( - partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) + partition_router, SubstreamPartitionRouterModel | CustomPartitionRouterModel ): raise ValueError( f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" @@ -898,7 +886,7 @@ def create_legacy_to_per_partition_state_migration( def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any - ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: + ) -> ApiKeyAuthenticator | BearerAuthenticator: decoder = ( self._create_component_from_model(model=model.decoder, config=config) if model.decoder @@ -952,7 +940,7 @@ def create_basic_http_authenticator( def create_bearer_authenticator( model: BearerAuthenticatorModel, config: Config, - token_provider: Optional[TokenProvider] = None, + token_provider: TokenProvider | None = None, **kwargs: Any, ) -> BearerAuthenticator: if token_provider is not None and model.api_token != "": @@ -1043,7 +1031,7 @@ def create_concurrency_level( @staticmethod def apply_stream_state_migrations( - stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any] + stream_state_migrations: list[Any] | None, stream_state: MutableMapping[str, Any] ) -> MutableMapping[str, Any]: if stream_state_migrations: for state_migration in stream_state_migrations: @@ -1054,14 +1042,14 @@ def apply_stream_state_migrations( def create_concurrent_cursor_from_datetime_based_cursor( self, - model_type: Type[BaseModel], + model_type: type[BaseModel], component_definition: ComponentDefinition, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, config: Config, - message_repository: Optional[MessageRepository] = None, - runtime_lookback_window: Optional[datetime.timedelta] = None, - stream_state_migrations: Optional[List[Any]] = None, + message_repository: MessageRepository | None = None, + runtime_lookback_window: datetime.timedelta | None = None, + stream_state_migrations: list[Any] | None = None, **kwargs: Any, ) -> ConcurrentCursor: # Per-partition incremental streams can dynamically create child cursors which will pass their current @@ -1149,7 +1137,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( new_stream_state ) - start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] + start_date_runtime_value: InterpolatedString | str | MinMaxDatetime if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): start_date_runtime_value = self.create_min_max_datetime( model=datetime_based_cursor_model.start_datetime, config=config @@ -1157,7 +1145,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( else: start_date_runtime_value = datetime_based_cursor_model.start_datetime - end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] + end_date_runtime_value: InterpolatedString | str | MinMaxDatetime | None if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): end_date_runtime_value = self.create_min_max_datetime( model=datetime_based_cursor_model.end_datetime, config=config @@ -1281,12 +1269,12 @@ def create_concurrent_cursor_from_datetime_based_cursor( def create_concurrent_cursor_from_incrementing_count_cursor( self, - model_type: Type[BaseModel], + model_type: type[BaseModel], component_definition: ComponentDefinition, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, config: Config, - message_repository: Optional[MessageRepository] = None, + message_repository: MessageRepository | None = None, **kwargs: Any, ) -> ConcurrentCursor: # Per-partition incremental streams can dynamically create child cursors which will pass their current @@ -1365,14 +1353,14 @@ def _assemble_weekday(self, weekday: str) -> Weekday: def create_concurrent_cursor_from_perpartition_cursor( self, state_manager: ConnectorStateManager, - model_type: Type[BaseModel], + model_type: type[BaseModel], component_definition: ComponentDefinition, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, config: Config, stream_state: MutableMapping[str, Any], partition_router: PartitionRouter, - stream_state_migrations: Optional[List[Any]] = None, + stream_state_migrations: list[Any] | None = None, **kwargs: Any, ) -> ConcurrentPerPartitionCursor: component_type = component_definition.get("type") @@ -1585,7 +1573,7 @@ def _get_class_from_fully_qualified_class_name( ) from e @staticmethod - def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: + def _derive_component_type_from_type_hints(field_type: Any) -> str | None: interface = field_type while True: origin = get_origin(interface) @@ -1602,13 +1590,13 @@ def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: return None @staticmethod - def is_builtin_type(cls: Optional[Type[Any]]) -> bool: + def is_builtin_type(cls: type[Any] | None) -> bool: if not cls: return False return cls.__module__ == "builtins" @staticmethod - def _extract_missing_parameters(error: TypeError) -> List[str]: + def _extract_missing_parameters(error: TypeError) -> list[str]: parameter_search = re.search(r"keyword-only.*:\s(.*)", str(error)) if parameter_search: return re.findall(r"\'(.+?)\'", parameter_search.group(1)) @@ -1650,10 +1638,8 @@ def _create_nested_component( raise ValueError( f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " + ", ".join( - ( - f"{type_name}.$parameters.{parameter}" - for parameter in missing_parameters - ) + f"{type_name}.$parameters.{parameter}" + for parameter in missing_parameters ) ) raise TypeError( @@ -1671,12 +1657,12 @@ def _is_component(model_value: Any) -> bool: def create_datetime_based_cursor( self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any ) -> DatetimeBasedCursor: - start_datetime: Union[str, MinMaxDatetime] = ( + start_datetime: str | MinMaxDatetime = ( model.start_datetime if isinstance(model.start_datetime, str) else self.create_min_max_datetime(model.start_datetime, config) ) - end_datetime: Union[str, MinMaxDatetime, None] = None + end_datetime: str | MinMaxDatetime | None = None if model.is_data_feed and model.end_datetime: raise ValueError("Data feed does not support end_datetime") if model.is_data_feed and model.is_client_side_incremental: @@ -1759,7 +1745,7 @@ def create_declarative_stream( cursor = ( combined_slicers if isinstance( - combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) + combined_slicers, PerPartitionWithGlobalCursor | GlobalSubstreamCursor ) else self._create_component_from_model(model=model.incremental_sync, config=config) ) @@ -1872,14 +1858,10 @@ def create_declarative_stream( def _build_stream_slicer_from_partition_router( self, - model: Union[ - AsyncRetrieverModel, - CustomRetrieverModel, - SimpleRetrieverModel, - ], + model: AsyncRetrieverModel | CustomRetrieverModel | SimpleRetrieverModel, config: Config, - stream_name: Optional[str] = None, - ) -> Optional[PartitionRouter]: + stream_name: str | None = None, + ) -> PartitionRouter | None: if ( hasattr(model, "partition_router") and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel) @@ -1905,9 +1887,9 @@ def _build_stream_slicer_from_partition_router( def _build_incremental_cursor( self, model: DeclarativeStreamModel, - stream_slicer: Optional[PartitionRouter], + stream_slicer: PartitionRouter | None, config: Config, - ) -> Optional[StreamSlicer]: + ) -> StreamSlicer | None: if model.incremental_sync and stream_slicer: if model.retriever.type == "AsyncRetriever": return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing @@ -1958,13 +1940,9 @@ def _build_incremental_cursor( def _build_resumable_cursor( self, - model: Union[ - AsyncRetrieverModel, - CustomRetrieverModel, - SimpleRetrieverModel, - ], - stream_slicer: Optional[PartitionRouter], - ) -> Optional[StreamSlicer]: + model: AsyncRetrieverModel | CustomRetrieverModel | SimpleRetrieverModel, + stream_slicer: PartitionRouter | None, + ) -> StreamSlicer | None: if hasattr(model, "paginator") and model.paginator and not stream_slicer: # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` return ResumableFullRefreshCursor(parameters={}) @@ -1980,7 +1958,7 @@ def _build_resumable_cursor( def _merge_stream_slicers( self, model: DeclarativeStreamModel, config: Config - ) -> Optional[StreamSlicer]: + ) -> StreamSlicer | None: retriever_model = model.retriever stream_slicer = self._build_stream_slicer_from_partition_router( @@ -2054,9 +2032,9 @@ def create_default_paginator( config: Config, *, url_base: str, - decoder: Optional[Decoder] = None, - cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None, - ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: + decoder: Decoder | None = None, + cursor_used_for_stop_condition: DeclarativeCursor | None = None, + ) -> DefaultPaginator | PaginatorTestReadDecorator: if decoder: if self._is_supported_decoder_for_pagination(decoder): decoder_to_use = PaginationDecoderDecorator(decoder=decoder) @@ -2098,14 +2076,14 @@ def create_dpath_extractor( self, model: DpathExtractorModel, config: Config, - decoder: Optional[Decoder] = None, + decoder: Decoder | None = None, **kwargs: Any, ) -> DpathExtractor: if decoder: decoder_to_use = decoder else: decoder_to_use = JsonDecoder(parameters={}) - model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] + model_field_path: list[InterpolatedString | str] = [x for x in model.field_path] return DpathExtractor( decoder=decoder_to_use, field_path=model_field_path, @@ -2137,8 +2115,8 @@ def create_http_requester( model: HttpRequesterModel, config: Config, decoder: Decoder = JsonDecoder(parameters={}), - query_properties_key: Optional[str] = None, - use_cache: Optional[bool] = None, + query_properties_key: str | None = None, + use_cache: bool | None = None, *, name: str, ) -> HttpRequester: @@ -2266,11 +2244,11 @@ def create_schema_type_identifier( for types_map in model.types_mapping ] ) - model_schema_pointer: List[Union[InterpolatedString, str]] = ( + model_schema_pointer: list[InterpolatedString | str] = ( [x for x in model.schema_pointer] if model.schema_pointer else [] ) - model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer] - model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = ( + model_key_pointer: list[InterpolatedString | str] = [x for x in model.key_pointer] + model_type_pointer: list[InterpolatedString | str] | None = ( [x for x in model.type_pointer] if model.type_pointer else None ) @@ -2406,7 +2384,7 @@ def _get_parser(model: BaseModel, config: Config) -> Parser: inner_parser=ModelToComponentFactory._get_parser(model.decoder, config) ) elif isinstance( - model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel) + model, CustomDecoderModel | IterableDecoderModel | XmlDecoderModel | ZipfileDecoderModel ): raise ValueError(f"Decoder type {model} does not have parser associated to it") @@ -2624,7 +2602,7 @@ def create_parent_stream_config( "The '*' wildcard in 'lazy_read_pointer' is not supported — only direct paths are allowed." ) - model_lazy_read_pointer: List[Union[InterpolatedString, str]] = ( + model_lazy_read_pointer: list[InterpolatedString | str] = ( [x for x in model.lazy_read_pointer] if model.lazy_read_pointer else [] ) @@ -2730,7 +2708,7 @@ def create_request_option( model: RequestOptionModel, config: Config, **kwargs: Any ) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) - field_path: Optional[List[Union[InterpolatedString, str]]] = ( + field_path: list[InterpolatedString | str] | None = ( [ InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) for segment in model.field_path @@ -2756,9 +2734,9 @@ def create_record_selector( config: Config, *, name: str, - transformations: List[RecordTransformation] | None = None, + transformations: list[RecordTransformation] | None = None, decoder: Decoder | None = None, - client_side_incremental_sync: Dict[str, Any] | None = None, + client_side_incremental_sync: dict[str, Any] | None = None, **kwargs: Any, ) -> RecordSelector: extractor = self._create_component_from_model( @@ -2847,18 +2825,17 @@ def create_simple_retriever( config: Config, *, name: str, - primary_key: Optional[Union[str, List[str], List[List[str]]]], - stream_slicer: Optional[StreamSlicer], - request_options_provider: Optional[RequestOptionsProvider] = None, + primary_key: str | list[str] | list[list[str]] | None, + stream_slicer: StreamSlicer | None, + request_options_provider: RequestOptionsProvider | None = None, stop_condition_on_cursor: bool = False, - client_side_incremental_sync: Optional[Dict[str, Any]] = None, - transformations: List[RecordTransformation], - incremental_sync: Optional[ - Union[ - IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel - ] - ] = None, - use_cache: Optional[bool] = None, + client_side_incremental_sync: dict[str, Any] | None = None, + transformations: list[RecordTransformation], + incremental_sync: IncrementingCountCursorModel + | DatetimeBasedCursorModel + | CustomIncrementalSyncModel + | None = None, + use_cache: bool | None = None, **kwargs: Any, ) -> SimpleRetriever: decoder = ( @@ -2875,8 +2852,8 @@ def create_simple_retriever( client_side_incremental_sync=client_side_incremental_sync, ) - query_properties: Optional[QueryProperties] = None - query_properties_key: Optional[str] = None + query_properties: QueryProperties | None = None + query_properties_key: str | None = None if ( hasattr(model.requester, "request_parameters") and model.requester.request_parameters @@ -3036,8 +3013,8 @@ def create_simple_retriever( @staticmethod def _remove_query_properties( - request_parameters: Mapping[str, Union[Any, str]], - ) -> Mapping[str, Union[Any, str]]: + request_parameters: Mapping[str, Any | str], + ) -> Mapping[str, Any | str]: return { parameter_field: request_parameter for parameter_field, request_parameter in request_parameters.items() @@ -3049,7 +3026,7 @@ def create_state_delegating_stream( self, model: StateDelegatingStreamModel, config: Config, - has_parent_state: Optional[bool] = None, + has_parent_state: bool | None = None, **kwargs: Any, ) -> DeclarativeStream: if ( @@ -3104,12 +3081,13 @@ def create_async_retriever( config: Config, *, name: str, - primary_key: Optional[ - Union[str, List[str], List[List[str]]] - ], # this seems to be needed to match create_simple_retriever - stream_slicer: Optional[StreamSlicer], - client_side_incremental_sync: Optional[Dict[str, Any]] = None, - transformations: List[RecordTransformation], + primary_key: str + | list[str] + | list[list[str]] + | None, # this seems to be needed to match create_simple_retriever + stream_slicer: StreamSlicer | None, + client_side_incremental_sync: dict[str, Any] | None = None, + transformations: list[RecordTransformation], **kwargs: Any, ) -> AsyncRetriever: def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetriever: @@ -3157,7 +3135,7 @@ def _get_download_retriever() -> SimpleRetrieverTestReadDecorator | SimpleRetrie ) def _get_job_timeout() -> datetime.timedelta: - user_defined_timeout: Optional[int] = ( + user_defined_timeout: int | None = ( int( InterpolatedString.create( str(model.polling_job_timeout), @@ -3450,7 +3428,7 @@ def create_http_components_resolver( def create_stream_config( model: StreamConfigModel, config: Config, **kwargs: Any ) -> StreamConfig: - model_configs_pointer: List[Union[InterpolatedString, str]] = ( + model_configs_pointer: list[InterpolatedString | str] = ( [x for x in model.configs_pointer] if model.configs_pointer else [] ) @@ -3491,7 +3469,7 @@ def create_config_components_resolver( ) def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool: - if isinstance(decoder, (JsonDecoder, XmlDecoder)): + if isinstance(decoder, JsonDecoder | XmlDecoder): return True elif isinstance(decoder, CompositeRawDecoder): return self._is_supported_parser_for_pagination(decoder.parser) diff --git a/airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py index 38a4f5328..e004e0d3f 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +from collections.abc import Callable, Iterable, Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Callable, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.models import FailureType from airbyte_cdk.sources.declarative.async_job.job import AsyncJob @@ -33,7 +34,7 @@ class AsyncJobPartitionRouter(StreamSlicer): def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._job_orchestrator_factory = self.job_orchestrator_factory - self._job_orchestrator: Optional[AsyncJobOrchestrator] = None + self._job_orchestrator: AsyncJobOrchestrator | None = None self._parameters = parameters def stream_slices(self) -> Iterable[StreamSlice]: diff --git a/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py b/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py index 8718004bf..e2ff14752 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +++ b/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py @@ -5,9 +5,9 @@ import itertools import logging from collections import ChainMap -from collections.abc import Callable +from collections.abc import Callable, Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, List, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( @@ -57,7 +57,7 @@ class CartesianProductStreamSlicer(PartitionRouter): stream_slicers (List[PartitionRouter]): Underlying stream slicers. The RequestOptions (e.g: Request headers, parameters, etc..) returned by this slicer are the combination of the RequestOptions of its input slicers. If there are conflicts e.g: two slicers define the same header or request param, the conflict is resolved by taking the value from the first slicer, where ordering is determined by the order in which slicers were input to this composite slicer. """ - stream_slicers: List[PartitionRouter] + stream_slicers: list[PartitionRouter] parameters: InitVar[Mapping[str, Any]] def __post_init__(self, parameters: Mapping[str, Any]) -> None: @@ -66,9 +66,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return dict( ChainMap( @@ -86,9 +86,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return dict( ChainMap( @@ -106,9 +106,9 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return dict( ChainMap( @@ -126,9 +126,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return dict( ChainMap( @@ -165,7 +165,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: """ pass - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """ Parent stream states are not supported for cartesian product stream slicer """ diff --git a/airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py index a08acbbea..b774c5ca1 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from dataclasses import dataclass -from typing import Any, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @@ -29,7 +30,7 @@ class GroupingPartitionRouter(PartitionRouter): deduplicate: bool = True def __post_init__(self) -> None: - self._state: Optional[Mapping[str, StreamState]] = {} + self._state: Mapping[str, StreamState] | None = {} def stream_slices(self) -> Iterable[StreamSlice]: """ @@ -110,33 +111,33 @@ def _create_grouped_slice(self, batch: list[StreamSlice]) -> StreamSlice: def get_request_params( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_headers( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_data( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_json( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} @@ -145,6 +146,6 @@ def set_initial_state(self, stream_state: StreamState) -> None: self.underlying_partition_router.set_initial_state(stream_state) self._state = self.underlying_partition_router.get_stream_state() - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """Delegate state retrieval to the underlying partition router.""" return self._state diff --git a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py index 6049cefe2..aa080dd5b 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter @@ -27,11 +28,11 @@ class ListPartitionRouter(PartitionRouter): request_option (Optional[RequestOption]): The request option to configure the HTTP request """ - values: Union[str, List[str]] - cursor_field: Union[InterpolatedString, str] + values: str | list[str] + cursor_field: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - request_option: Optional[RequestOption] = None + request_option: RequestOption | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: if isinstance(self.values, str): @@ -48,36 +49,36 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.header, stream_slice) def get_request_body_data( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_json, stream_slice) @@ -91,7 +92,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: ] def _get_request_option( - self, request_option_type: RequestOptionType, stream_slice: Optional[StreamSlice] + self, request_option_type: RequestOptionType, stream_slice: StreamSlice | None ) -> Mapping[str, Any]: if ( self.request_option @@ -114,7 +115,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: """ pass - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """ ListPartitionRouter doesn't have parent streams """ diff --git a/airbyte_cdk/sources/declarative/partition_routers/partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/partition_router.py index 3a9bc3abf..d0a1c062a 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/partition_router.py @@ -3,8 +3,8 @@ # from abc import abstractmethod +from collections.abc import Mapping from dataclasses import dataclass -from typing import Mapping, Optional from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.types import StreamState @@ -41,7 +41,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: """ @abstractmethod - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """ Get the state of the parent streams. diff --git a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py index 32e6a353d..bd9856f3a 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.types import StreamSlice, StreamState @@ -17,33 +18,33 @@ class SinglePartitionRouter(PartitionRouter): def get_request_params( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_headers( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_data( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_json( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} @@ -56,7 +57,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: """ pass - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """ SinglePartitionRouter doesn't have parent streams """ diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 000beeff9..b04c859f1 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -6,8 +6,9 @@ import copy import json import logging +from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import TYPE_CHECKING, Any import dpath import requests @@ -41,16 +42,16 @@ class ParentStreamConfig: """ stream: "DeclarativeStream" # Parent streams must be DeclarativeStream because we can't know which part of the stream slice is a partition for regular Stream - parent_key: Union[InterpolatedString, str] - partition_field: Union[InterpolatedString, str] + parent_key: InterpolatedString | str + partition_field: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - extra_fields: Optional[Union[List[List[str]], List[List[InterpolatedString]]]] = ( + extra_fields: list[list[str]] | list[list[InterpolatedString]] | None = ( None # List of field paths (arrays of strings) ) - request_option: Optional[RequestOption] = None + request_option: RequestOption | None = None incremental_dependency: bool = False - lazy_read_pointer: Optional[List[Union[InterpolatedString, str]]] = None + lazy_read_pointer: list[InterpolatedString | str] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.parent_key = InterpolatedString.create(self.parent_key, parameters=parameters) @@ -86,7 +87,7 @@ class SubstreamPartitionRouter(PartitionRouter): parent_stream_configs (List[ParentStreamConfig]): parent streams to iterate over and their config """ - parent_stream_configs: List[ParentStreamConfig] + parent_stream_configs: list[ParentStreamConfig] config: Config parameters: InitVar[Mapping[str, Any]] @@ -97,42 +98,42 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.header, stream_slice) def get_request_body_data( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_json, stream_slice) def _get_request_option( - self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice] + self, option_type: RequestOptionType, stream_slice: StreamSlice | None ) -> Mapping[str, Any]: params: MutableMapping[str, Any] = {} if stream_slice: @@ -231,7 +232,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: ) def _extract_child_response( - self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: List[InterpolatedString] + self, parent_record: Mapping[str, Any] | AirbyteMessage, pointer: list[InterpolatedString] ) -> requests.Response: """Extract child records from a parent record based on lazy pointers.""" @@ -248,7 +249,7 @@ def _create_response(data: MutableMapping[str, Any]) -> SafeResponse: def _extract_extra_fields( self, parent_record: Mapping[str, Any] | AirbyteMessage, - extra_fields: Optional[List[List[str]]] = None, + extra_fields: list[list[str]] | None = None, ) -> Mapping[str, Any]: """ Extracts additional fields specified by their paths from the parent record. @@ -372,7 +373,7 @@ def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> Str substream_state = substream_state_values[0] if substream_state_values else {} # Ignore per-partition states or invalid formats. - if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1: + if isinstance(substream_state, list | dict) or len(substream_state_values) != 1: # If a global state is present under the key "state", use its first value. if ( "state" in stream_state @@ -394,7 +395,7 @@ def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> Str return parent_state - def get_stream_state(self) -> Optional[Mapping[str, StreamState]]: + def get_stream_state(self) -> Mapping[str, StreamState] | None: """ Get the state of the parent streams. @@ -433,9 +434,9 @@ def __getattr__(self, name: str) -> Any: return getattr(requests.Response, name, None) @property - def content(self) -> Optional[bytes]: + def content(self) -> bytes | None: return super().content @content.setter - def content(self, value: Union[str, bytes]) -> None: + def content(self, value: str | bytes) -> None: self._content = value.encode() if isinstance(value, str) else value diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py index 26c7c7673..75155112e 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -21,7 +22,7 @@ class ConstantBackoffStrategy(BackoffStrategy): backoff_time_in_seconds (float): time to backoff before retrying a retryable request. """ - backoff_time_in_seconds: Union[float, InterpolatedString, str] + backoff_time_in_seconds: float | InterpolatedString | str parameters: InitVar[Mapping[str, Any]] config: Config @@ -39,7 +40,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: return self.backoff_time_in_seconds.eval(self.config) # type: ignore # backoff_time_in_seconds is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py index cdd1fe650..8a22da91c 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -23,7 +24,7 @@ class ExponentialBackoffStrategy(BackoffStrategy): parameters: InitVar[Mapping[str, Any]] config: Config - factor: Union[float, InterpolatedString, str] = 5 + factor: float | InterpolatedString | str = 5 def __post_init__(self, parameters: Mapping[str, Any]) -> None: if not isinstance(self.factor, InterpolatedString): @@ -39,7 +40,7 @@ def _retry_factor(self) -> float: def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: return self._retry_factor * 2**attempt_count # type: ignore # factor is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py index 60103f343..e6fab9bb3 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py @@ -4,14 +4,13 @@ import numbers from re import Pattern -from typing import Optional import requests def get_numeric_value_from_header( - response: requests.Response, header: str, regex: Optional[Pattern[str]] -) -> Optional[float]: + response: requests.Response, header: str, regex: Pattern[str] | None +) -> float | None: """ Extract a header value from the response as a float :param response: response the extract header value from @@ -34,7 +33,7 @@ def get_numeric_value_from_header( return None -def _as_float(s: str) -> Optional[float]: +def _as_float(s: str) -> float | None: try: return float(s) except ValueError: diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py index 5cda96a4d..789e68e86 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py @@ -3,8 +3,9 @@ # import re +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -31,11 +32,11 @@ class WaitTimeFromHeaderBackoffStrategy(BackoffStrategy): max_waiting_time_in_seconds: (Optional[float]): given the value extracted from the header is greater than this value, stop the stream """ - header: Union[InterpolatedString, str] + header: InterpolatedString | str parameters: InitVar[Mapping[str, Any]] config: Config - regex: Optional[Union[InterpolatedString, str]] = None - max_waiting_time_in_seconds: Optional[float] = None + regex: InterpolatedString | str | None = None + max_waiting_time_in_seconds: float | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.regex = ( @@ -45,9 +46,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: header = self.header.eval(config=self.config) # type: ignore # header is always cast to an interpolated stream if self.regex: evaled_regex = self.regex.eval(self.config) # type: ignore # header is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py index 1220e198f..7bf2b0e63 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py @@ -5,8 +5,9 @@ import numbers import re import time +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -32,11 +33,11 @@ class WaitUntilTimeFromHeaderBackoffStrategy(BackoffStrategy): regex (Optional[str]): optional regex to apply on the header to extract its value """ - header: Union[InterpolatedString, str] + header: InterpolatedString | str parameters: InitVar[Mapping[str, Any]] config: Config - min_wait: Optional[Union[float, InterpolatedString, str]] = None - regex: Optional[Union[InterpolatedString, str]] = None + min_wait: float | InterpolatedString | str | None = None + regex: InterpolatedString | str | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.header = InterpolatedString.create(self.header, parameters=parameters) @@ -48,9 +49,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: now = time.time() header = self.header.eval(self.config) # type: ignore # header is always cast to an interpolated string if self.regex: diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py index bb60f2a96..281b6045d 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, List, Mapping, Optional, Union +from typing import Any import requests @@ -41,7 +42,7 @@ class CompositeErrorHandler(ErrorHandler): error_handlers (List[ErrorHandler]): list of error handlers """ - error_handlers: List[ErrorHandler] + error_handlers: list[ErrorHandler] parameters: InitVar[Mapping[str, Any]] def __post_init__(self, parameters: Mapping[str, Any]) -> None: @@ -49,15 +50,15 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: raise ValueError("CompositeErrorHandler expects at least 1 underlying error handler") @property - def max_retries(self) -> Optional[int]: + def max_retries(self) -> int | None: return self.error_handlers[0].max_retries @property - def max_time(self) -> Optional[int]: + def max_time(self) -> int | None: return max([error_handler.max_time or 0 for error_handler in self.error_handlers]) def interpret_response( - self, response_or_exception: Optional[Union[requests.Response, Exception]] + self, response_or_exception: requests.Response | Exception | None ) -> ErrorResolution: matched_error_resolution = None for error_handler in self.error_handlers: @@ -80,7 +81,7 @@ def interpret_response( return create_fallback_error_resolution(response_or_exception) @property - def backoff_strategies(self) -> Optional[List[BackoffStrategy]]: + def backoff_strategies(self) -> list[BackoffStrategy] | None: """ Combines backoff strategies from all child error handlers into a single flattened list. diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index b70ceaaeb..744bf8209 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from typing import Any import requests @@ -95,12 +96,12 @@ class DefaultErrorHandler(ErrorHandler): parameters: InitVar[Mapping[str, Any]] config: Config - response_filters: Optional[List[HttpResponseFilter]] = None - max_retries: Optional[int] = 5 + response_filters: list[HttpResponseFilter] | None = None + max_retries: int | None = 5 max_time: int = 60 * 10 _max_retries: int = field(init=False, repr=False, default=5) _max_time: int = field(init=False, repr=False, default=60 * 10) - backoff_strategies: Optional[List[BackoffStrategy]] = None + backoff_strategies: list[BackoffStrategy] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: if not self.response_filters: @@ -109,7 +110,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._last_request_to_attempt_count: MutableMapping[requests.PreparedRequest, int] = {} def interpret_response( - self, response_or_exception: Optional[Union[requests.Response, Exception]] + self, response_or_exception: requests.Response | Exception | None ) -> ErrorResolution: if self.response_filters: for response_filter in self.response_filters: @@ -133,9 +134,9 @@ def interpret_response( def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int = 0, - ) -> Optional[float]: + ) -> float | None: backoff = None if self.backoff_strategies: for backoff_strategy in self.backoff_strategies: diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py index 9943a0d6a..0728dcb85 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py @@ -2,7 +2,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -from typing import Optional, Union import requests @@ -20,12 +19,12 @@ class DefaultHttpResponseFilter(HttpResponseFilter): def matches( - self, response_or_exception: Optional[Union[requests.Response, Exception]] - ) -> Optional[ErrorResolution]: + self, response_or_exception: requests.Response | Exception | None + ) -> ErrorResolution | None: default_mapped_error_resolution = None - if isinstance(response_or_exception, (requests.Response, Exception)): - mapped_key: Union[int, type] = ( + if isinstance(response_or_exception, requests.Response | Exception): + mapped_key: int | type = ( response_or_exception.status_code if isinstance(response_or_exception, requests.Response) else response_or_exception.__class__ diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index 866d6b7d2..2c97f387c 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Set, Union +from typing import Any import requests @@ -40,12 +41,12 @@ class HttpResponseFilter: config: Config parameters: InitVar[Mapping[str, Any]] - action: Optional[Union[ResponseAction, str]] = None - failure_type: Optional[Union[FailureType, str]] = None - http_codes: Optional[Set[int]] = None - error_message_contains: Optional[str] = None - predicate: Union[InterpolatedBoolean, str] = "" - error_message: Union[InterpolatedString, str] = "" + action: ResponseAction | str | None = None + failure_type: FailureType | str | None = None + http_codes: set[int] | None = None + error_message_contains: str | None = None + predicate: InterpolatedBoolean | str = "" + error_message: InterpolatedString | str = "" def __post_init__(self, parameters: Mapping[str, Any]) -> None: if self.action is not None: @@ -70,8 +71,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.failure_type = FailureType[self.failure_type] def matches( - self, response_or_exception: Optional[Union[requests.Response, Exception]] - ) -> Optional[ErrorResolution]: + self, response_or_exception: requests.Response | Exception | None + ) -> ErrorResolution | None: filter_action = self._matches_filter(response_or_exception) mapped_key = ( response_or_exception.status_code @@ -79,7 +80,7 @@ def matches( else response_or_exception.__class__ ) - if isinstance(mapped_key, (int, Exception)): + if isinstance(mapped_key, int | Exception): default_mapped_error_resolution = self._match_default_error_mapping(mapped_key) else: default_mapped_error_resolution = None @@ -118,13 +119,13 @@ def matches( return None def _match_default_error_mapping( - self, mapped_key: Union[int, type[Exception]] - ) -> Optional[ErrorResolution]: + self, mapped_key: int | type[Exception] + ) -> ErrorResolution | None: return DEFAULT_ERROR_MAPPING.get(mapped_key) def _matches_filter( - self, response_or_exception: Optional[Union[requests.Response, Exception]] - ) -> Optional[ResponseAction]: + self, response_or_exception: requests.Response | Exception | None + ) -> ResponseAction | None: """ Apply the HTTP filter on the response and return the action to execute if it matches :param response: The HTTP response to evaluate @@ -145,7 +146,7 @@ def _safe_response_json(response: requests.Response) -> dict[str, Any]: except requests.exceptions.JSONDecodeError: return {} - def _create_error_message(self, response: requests.Response) -> Optional[str]: + def _create_error_message(self, response: requests.Response) -> str | None: """ Construct an error message based on the specified message template of the filter. :param response: The HTTP response which can be used during interpolation diff --git a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py index e8bca6cc9..51df5d11f 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py +++ b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py @@ -1,9 +1,10 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import logging import uuid +from collections.abc import Iterable, Mapping from dataclasses import dataclass, field from datetime import timedelta -from typing import Any, Dict, Iterable, Mapping, Optional +from typing import Any import requests from requests import Response @@ -39,25 +40,25 @@ class AsyncHttpJobRepository(AsyncJobRepository): creation_requester: Requester polling_requester: Requester download_retriever: SimpleRetriever - abort_requester: Optional[Requester] - delete_requester: Optional[Requester] + abort_requester: Requester | None + delete_requester: Requester | None status_extractor: DpathExtractor status_mapping: Mapping[str, AsyncJobStatus] download_target_extractor: DpathExtractor # timeout for the job to be completed, passed from `polling_job_timeout` - job_timeout: Optional[timedelta] = None + job_timeout: timedelta | None = None record_extractor: RecordExtractor = field( init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({}) ) - download_target_requester: Optional[Requester] = ( + download_target_requester: Requester | None = ( None # use it in case polling_requester provides some and extra request is needed to obtain list of urls to download from ) def __post_init__(self) -> None: - self._create_job_response_by_id: Dict[str, Response] = {} - self._polling_job_response_by_id: Dict[str, Response] = {} + self._create_job_response_by_id: dict[str, Response] = {} + self._polling_job_response_by_id: dict[str, Response] = {} def _get_validated_polling_response(self, stream_slice: StreamSlice) -> requests.Response: """ @@ -73,7 +74,7 @@ def _get_validated_polling_response(self, stream_slice: StreamSlice) -> requests AirbyteTracedException: If the polling request returns an empty response. """ - polling_response: Optional[requests.Response] = self.polling_requester.send_request( + polling_response: requests.Response | None = self.polling_requester.send_request( stream_slice=stream_slice, log_formatter=lambda polling_response: format_http_message( response=polling_response, @@ -128,7 +129,7 @@ def _start_job_and_validate_response(self, stream_slice: StreamSlice) -> request AirbyteTracedException: If no response is received from the creation requester. """ - response: Optional[requests.Response] = self.creation_requester.send_request( + response: requests.Response | None = self.creation_requester.send_request( stream_slice=stream_slice, log_formatter=lambda response: format_http_message( response=response, @@ -229,7 +230,7 @@ def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]: elif isinstance(message, AirbyteMessage): if message.type == Type.RECORD: yield message.record.data # type: ignore # message.record won't be None here as the message is a record - elif isinstance(message, (dict, Mapping)): + elif isinstance(message, dict | Mapping): yield message else: raise TypeError(f"Unknown type `{type(message)}` for message") @@ -240,7 +241,7 @@ def abort(self, job: AsyncJob) -> None: if not self.abort_requester: return - abort_response = self.abort_requester.send_request( + self.abort_requester.send_request( stream_slice=self._get_create_job_stream_slice(job), log_formatter=lambda abort_response: format_http_message( response=abort_response, @@ -256,7 +257,7 @@ def delete(self, job: AsyncJob) -> None: if not self.delete_requester: return - delete_job_reponse = self.delete_requester.send_request( + self.delete_requester.send_request( stream_slice=self._get_create_job_stream_slice(job), log_formatter=lambda delete_job_reponse: format_http_message( response=delete_job_reponse, @@ -273,7 +274,7 @@ def _clean_up_job(self, job_id: str) -> None: del self._create_job_response_by_id[job_id] del self._polling_job_response_by_id[job_id] - def _get_creation_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]: + def _get_creation_response_interpolation_context(self, job: AsyncJob) -> dict[str, Any]: """ Returns the interpolation context for the creation response. @@ -296,7 +297,7 @@ def _get_creation_response_interpolation_context(self, job: AsyncJob) -> Dict[st ].request return creation_response_context - def _get_polling_response_interpolation_context(self, job: AsyncJob) -> Dict[str, Any]: + def _get_polling_response_interpolation_context(self, job: AsyncJob) -> dict[str, Any]: """ Returns the interpolation context for the polling response. diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index 78c07b725..a7285c443 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -3,9 +3,9 @@ # import logging -import os +from collections.abc import Callable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, Callable, Mapping, MutableMapping, Optional, Union +from typing import Any from urllib.parse import urljoin import requests @@ -53,16 +53,16 @@ class HttpRequester(Requester): """ name: str - url_base: Union[InterpolatedString, str] + url_base: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - path: Optional[Union[InterpolatedString, str]] = None - authenticator: Optional[DeclarativeAuthenticator] = None - http_method: Union[str, HttpMethod] = HttpMethod.GET - request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None - error_handler: Optional[ErrorHandler] = None - api_budget: Optional[APIBudget] = None + path: InterpolatedString | str | None = None + authenticator: DeclarativeAuthenticator | None = None + http_method: str | HttpMethod = HttpMethod.GET + request_options_provider: InterpolatedRequestOptionsProvider | None = None + error_handler: ErrorHandler | None = None + api_budget: APIBudget | None = None disable_retries: bool = False message_repository: MessageRepository = NoopMessageRepository() use_cache: bool = False @@ -123,9 +123,9 @@ def get_authenticator(self) -> DeclarativeAuthenticator: def get_url_base( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: interpolation_context = get_interpolation_context( stream_state=stream_state, @@ -137,9 +137,9 @@ def get_url_base( def get_path( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: interpolation_context = get_interpolation_context( stream_state=stream_state, @@ -155,9 +155,9 @@ def get_method(self) -> HttpMethod: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return self._request_options_provider.get_request_params( stream_state=stream_state, @@ -168,9 +168,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._request_options_provider.get_request_headers( stream_state=stream_state, @@ -182,10 +182,10 @@ def get_request_headers( def get_request_body_data( # type: ignore self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return ( self._request_options_provider.get_request_body_data( stream_state=stream_state, @@ -199,10 +199,10 @@ def get_request_body_data( # type: ignore def get_request_body_json( # type: ignore self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[Mapping[str, Any]]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | None: return self._request_options_provider.get_request_body_json( stream_state=stream_state, stream_slice=stream_slice, @@ -215,13 +215,13 @@ def logger(self) -> logging.Logger: def _get_request_options( self, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], - requester_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], - auth_options_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], - extra_options: Optional[Union[Mapping[str, Any], str]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, + requester_method: Callable[..., Mapping[str, Any] | str | None], + auth_options_method: Callable[..., Mapping[str, Any] | str | None], + extra_options: Mapping[str, Any] | str | None = None, + ) -> Mapping[str, Any] | str: """ Get the request_option from the requester, the authenticator and extra_options passed in. Raise a ValueError if there's a key collision @@ -245,10 +245,10 @@ def _get_request_options( def _request_headers( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - extra_headers: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + extra_headers: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies request headers. @@ -268,10 +268,10 @@ def _request_headers( def _request_params( self, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], - extra_params: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, + extra_params: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -290,7 +290,7 @@ def _request_params( raise ValueError("Request params cannot be a string") for k, v in options.items(): - if isinstance(v, (dict,)): + if isinstance(v, dict): raise ValueError( f"Invalid value for `{k}` parameter. The values of request params cannot be an object." ) @@ -299,11 +299,11 @@ def _request_params( def _request_body_data( self, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], - extra_body_data: Optional[Union[Mapping[str, Any], str]] = None, - ) -> Optional[Union[Mapping[str, Any], str]]: + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, + extra_body_data: Mapping[str, Any] | str | None = None, + ) -> Mapping[str, Any] | str | None: """ Specifies how to populate the body of the request with a non-JSON payload. @@ -325,11 +325,11 @@ def _request_body_data( def _request_body_json( self, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], - extra_body_json: Optional[Mapping[str, Any]] = None, - ) -> Optional[Mapping[str, Any]]: + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, + extra_body_json: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | None: """ Specifies how to populate the body of the request with a JSON payload. @@ -387,16 +387,16 @@ def _join_url(cls, url_base: str, path: str) -> str: def send_request( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - path: Optional[str] = None, - request_headers: Optional[Mapping[str, Any]] = None, - request_params: Optional[Mapping[str, Any]] = None, - request_body_data: Optional[Union[Mapping[str, Any], str]] = None, - request_body_json: Optional[Mapping[str, Any]] = None, - log_formatter: Optional[Callable[[requests.Response], Any]] = None, - ) -> Optional[requests.Response]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + path: str | None = None, + request_headers: Mapping[str, Any] | None = None, + request_params: Mapping[str, Any] | None = None, + request_body_data: Mapping[str, Any] | str | None = None, + request_body_json: Mapping[str, Any] | None = None, + log_formatter: Callable[[requests.Response], Any] | None = None, + ) -> requests.Response | None: request, response = self._http_client.send_request( http_method=self.get_method().value, url=self._join_url( diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index ca2405b44..ed4c3ecc7 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, MutableMapping, Optional, Union +from typing import Any import requests @@ -101,13 +102,13 @@ class DefaultPaginator(Paginator): pagination_strategy: PaginationStrategy config: Config - url_base: Union[InterpolatedString, str] + url_base: InterpolatedString | str parameters: InitVar[Mapping[str, Any]] decoder: Decoder = field( default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) ) - page_size_option: Optional[RequestOption] = None - page_token_option: Optional[Union[RequestPath, RequestOption]] = None + page_size_option: RequestOption | None = None + page_token_option: RequestPath | RequestOption | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: if self.page_size_option and not self.pagination_strategy.get_page_size(): @@ -124,7 +125,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.page_token_option, ) - def get_initial_token(self) -> Optional[Any]: + def get_initial_token(self) -> Any | None: """ Return the page token that should be used for the first request of a stream @@ -137,9 +138,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any] = None, - ) -> Optional[Mapping[str, Any]]: + last_record: Record | None, + last_page_token_value: Any | None = None, + ) -> Mapping[str, Any] | None: next_page_token = self.pagination_strategy.next_page_token( response=response, last_page_size=last_page_size, @@ -153,10 +154,10 @@ def next_page_token( def path( self, - next_page_token: Optional[Mapping[str, Any]], - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Optional[str]: + next_page_token: Mapping[str, Any] | None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: StreamSlice | None = None, + ) -> str | None: token = next_page_token.get("next_page_token") if next_page_token else None if token and self.page_token_option and isinstance(self.page_token_option, RequestPath): # make additional interpolation context @@ -173,41 +174,41 @@ def path( def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, next_page_token) def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, str]: return self._get_request_options(RequestOptionType.header, next_page_token) def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data, next_page_token) def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json, next_page_token) def _get_request_options( - self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]] + self, option_type: RequestOptionType, next_page_token: Mapping[str, Any] | None ) -> MutableMapping[str, Any]: options: MutableMapping[str, Any] = {} @@ -251,7 +252,7 @@ def __init__(self, decorated: Paginator, maximum_number_of_pages: int = 5) -> No self._decorated = decorated self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL - def get_initial_token(self) -> Optional[Any]: + def get_initial_token(self) -> Any | None: self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL return self._decorated.get_initial_token() @@ -259,9 +260,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any] = None, - ) -> Optional[Mapping[str, Any]]: + last_record: Record | None, + last_page_token_value: Any | None = None, + ) -> Mapping[str, Any] | None: if self._page_count >= self._maximum_number_of_pages: return None @@ -272,10 +273,10 @@ def next_page_token( def path( self, - next_page_token: Optional[Mapping[str, Any]], - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Optional[str]: + next_page_token: Mapping[str, Any] | None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: StreamSlice | None = None, + ) -> str | None: return self._decorated.path( next_page_token=next_page_token, stream_state=stream_state, @@ -285,9 +286,9 @@ def path( def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._decorated.get_request_params( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token @@ -296,9 +297,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, str]: return self._decorated.get_request_headers( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token @@ -307,10 +308,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return self._decorated.get_request_body_data( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) @@ -318,9 +319,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._decorated.get_request_body_json( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index b3b1d3b66..b309bfe58 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, MutableMapping, Optional, Union +from typing import Any import requests @@ -21,56 +22,56 @@ class NoPagination(Paginator): def path( self, - next_page_token: Optional[Mapping[str, Any]], - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Optional[str]: + next_page_token: Mapping[str, Any] | None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: StreamSlice | None = None, + ) -> str | None: return None def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, str]: return {} def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return {} def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} - def get_initial_token(self) -> Optional[Any]: + def get_initial_token(self) -> Any | None: return None def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any], - ) -> Optional[Mapping[str, Any]]: + last_record: Record | None, + last_page_token_value: Any | None, + ) -> Mapping[str, Any] | None: return {} diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index f8c31d4f5..8dbbba7b8 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -3,8 +3,9 @@ # from abc import ABC, abstractmethod +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping, Optional +from typing import Any import requests @@ -24,7 +25,7 @@ class Paginator(ABC, RequestOptionsProvider): """ @abstractmethod - def get_initial_token(self) -> Optional[Any]: + def get_initial_token(self) -> Any | None: """ Get the page token that should be included in the request to get the first page of records """ @@ -34,9 +35,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any], - ) -> Optional[Mapping[str, Any]]: + last_record: Record | None, + last_page_token_value: Any | None, + ) -> Mapping[str, Any] | None: """ Returns the next_page_token to use to fetch the next page of records. @@ -51,10 +52,10 @@ def next_page_token( @abstractmethod def path( self, - next_page_token: Optional[Mapping[str, Any]], - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Optional[str]: + next_page_token: Mapping[str, Any] | None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: StreamSlice | None = None, + ) -> str | None: """ Returns the URL path to hit to fetch the next page of records diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py index 8df5ce66f..ad6168b4d 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Dict, Mapping, Optional, Union +from typing import Any import requests @@ -33,11 +34,11 @@ class CursorPaginationStrategy(PaginationStrategy): decoder (Decoder): decoder to decode the response """ - cursor_value: Union[InterpolatedString, str] + cursor_value: InterpolatedString | str config: Config parameters: InitVar[Mapping[str, Any]] - page_size: Optional[int] = None - stop_condition: Optional[Union[InterpolatedBoolean, str]] = None + page_size: int | None = None + stop_condition: InterpolatedBoolean | str | None = None decoder: Decoder = field( default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) ) @@ -48,14 +49,14 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: else: self._cursor_value = self.cursor_value if isinstance(self.stop_condition, str): - self._stop_condition: Optional[InterpolatedBoolean] = InterpolatedBoolean( + self._stop_condition: InterpolatedBoolean | None = InterpolatedBoolean( condition=self.stop_condition, parameters=parameters ) else: self._stop_condition = self.stop_condition @property - def initial_token(self) -> Optional[Any]: + def initial_token(self) -> Any | None: """ CursorPaginationStrategy does not have an initial value because the next cursor is typically included in the response of the first request. For Resumable Full Refresh streams that checkpoint the page @@ -67,13 +68,13 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any] = None, - ) -> Optional[Any]: + last_record: Record | None, + last_page_token_value: Any | None = None, + ) -> Any | None: decoded_response = next(self.decoder.decode(response)) # The default way that link is presented in requests.Response is a string of various links (last, next, etc). This # is not indexable or useful for parsing the cursor, so we replace it with the link dictionary from response.links - headers: Dict[str, Any] = dict(response.headers) + headers: dict[str, Any] = dict(response.headers) headers["link"] = response.links if self._stop_condition: should_stop = self._stop_condition.eval( @@ -94,5 +95,5 @@ def next_page_token( ) return token if token else None - def get_page_size(self) -> Optional[int]: + def get_page_size(self) -> int | None: return self.page_size diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py index 512d8143c..ef52f2c98 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -44,7 +45,7 @@ class OffsetIncrement(PaginationStrategy): """ config: Config - page_size: Optional[Union[str, int]] + page_size: str | int | None parameters: InitVar[Mapping[str, Any]] decoder: Decoder = field( default_factory=lambda: PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) @@ -54,14 +55,14 @@ class OffsetIncrement(PaginationStrategy): def __post_init__(self, parameters: Mapping[str, Any]) -> None: page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size if page_size: - self._page_size: Optional[InterpolatedString] = InterpolatedString( + self._page_size: InterpolatedString | None = InterpolatedString( page_size, parameters=parameters ) else: self._page_size = None @property - def initial_token(self) -> Optional[Any]: + def initial_token(self) -> Any | None: if self.inject_on_first_request: return 0 return None @@ -70,9 +71,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any] = None, - ) -> Optional[Any]: + last_record: Record | None, + last_page_token_value: Any | None = None, + ) -> Any | None: decoded_response = next(self.decoder.decode(response)) # Stop paginating when there are fewer records than the page size or the current page has no records @@ -92,7 +93,7 @@ def next_page_token( else: return last_page_token_value + last_page_size - def get_page_size(self) -> Optional[int]: + def get_page_size(self) -> int | None: if self._page_size: page_size = self._page_size.eval(self.config) if not isinstance(page_size, int): diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py index 2e1643b56..00e69497c 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests @@ -25,7 +26,7 @@ class PageIncrement(PaginationStrategy): """ config: Config - page_size: Optional[Union[str, int]] + page_size: str | int | None parameters: InitVar[Mapping[str, Any]] start_from_page: int = 0 inject_on_first_request: bool = False @@ -40,7 +41,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._page_size = page_size @property - def initial_token(self) -> Optional[Any]: + def initial_token(self) -> Any | None: if self.inject_on_first_request: return self.start_from_page return None @@ -49,9 +50,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any], - ) -> Optional[Any]: + last_record: Record | None, + last_page_token_value: Any | None, + ) -> Any | None: # Stop paginating when there are fewer records than the page size or the current page has no records if (self._page_size and last_page_size < self._page_size) or last_page_size == 0: return None @@ -67,5 +68,5 @@ def next_page_token( else: return last_page_token_value + 1 - def get_page_size(self) -> Optional[int]: + def get_page_size(self) -> int | None: return self._page_size diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index dae02ba13..5315ec950 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -4,7 +4,7 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any, Optional +from typing import Any import requests @@ -19,7 +19,7 @@ class PaginationStrategy: @property @abstractmethod - def initial_token(self) -> Optional[Any]: + def initial_token(self) -> Any | None: """ Return the initial value of the token """ @@ -29,9 +29,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any], - ) -> Optional[Any]: + last_record: Record | None, + last_page_token_value: Any | None, + ) -> Any | None: """ :param response: response to process :param last_page_size: the number of records read from the response @@ -42,7 +42,7 @@ def next_page_token( pass @abstractmethod - def get_page_size(self) -> Optional[int]: + def get_page_size(self) -> int | None: """ :return: page size: The number of records to fetch in a page. Returns None if unspecified """ diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py index 7c89ba552..c5a9695ea 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py @@ -3,7 +3,7 @@ # from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any import requests @@ -47,9 +47,9 @@ def next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any] = None, - ) -> Optional[Any]: + last_record: Record | None, + last_page_token_value: Any | None = None, + ) -> Any | None: # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure # will return records in descending order. In terms of performance/memory, we return the records lazily if last_record and self._stop_condition.is_met(last_record): @@ -58,9 +58,9 @@ def next_page_token( response, last_page_size, last_record, last_page_token_value ) - def get_page_size(self) -> Optional[int]: + def get_page_size(self) -> int | None: return self._delegate.get_page_size() @property - def initial_token(self) -> Optional[Any]: + def initial_token(self) -> Any | None: return self._delegate.initial_token diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py b/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py index 1e294bc8e..0efeb34f1 100644 --- a/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +++ b/airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py @@ -1,7 +1,8 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, List, Mapping, Optional +from typing import Any import dpath @@ -17,7 +18,7 @@ class PropertiesFromEndpoint: API endpoint. The set retrieved can then be injected into the requests to extract records from an API source. """ - property_field_path: List[str] + property_field_path: list[str] retriever: Retriever config: Config parameters: InitVar[Mapping[str, Any]] @@ -28,7 +29,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: for property_field in self.property_field_path ] - def get_properties_from_endpoint(self, stream_slice: Optional[StreamSlice]) -> Iterable[str]: + def get_properties_from_endpoint(self, stream_slice: StreamSlice | None) -> Iterable[str]: response_properties = self.retriever.read_records( records_schema={}, stream_slice=stream_slice ) diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py b/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py index 53f387775..48b78ad65 100644 --- a/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +++ b/airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py @@ -1,8 +1,9 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any, Iterable, List, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.requesters.query_properties.strategies import GroupByKey from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import ( @@ -29,8 +30,8 @@ class PropertyChunking: """ property_limit_type: PropertyLimitType - property_limit: Optional[int] - record_merge_strategy: Optional[RecordMergeStrategy] + property_limit: int | None + record_merge_strategy: RecordMergeStrategy | None parameters: InitVar[Mapping[str, Any]] config: Config @@ -40,8 +41,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: ) def get_request_property_chunks( - self, property_fields: Iterable[str], always_include_properties: Optional[List[str]] - ) -> Iterable[List[str]]: + self, property_fields: Iterable[str], always_include_properties: list[str] | None + ) -> Iterable[list[str]]: if not self.property_limit: single_property_chunk = list(property_fields) if always_include_properties: @@ -65,5 +66,5 @@ def get_request_property_chunks( chunk_size += property_field_size yield current_chunk - def get_merge_key(self, record: Record) -> Optional[str]: + def get_merge_key(self, record: Record) -> str | None: return self._record_merge_strategy.get_group_key(record=record) diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py b/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py index 4dd7bced8..ffc71dddc 100644 --- a/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +++ b/airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py @@ -1,7 +1,8 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Iterable, List, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.requesters.query_properties import ( PropertiesFromEndpoint, @@ -19,22 +20,22 @@ class QueryProperties: properties into smaller chunks to satisfy API restrictions around the total amount of data retrieved """ - property_list: Optional[Union[List[str], PropertiesFromEndpoint]] - always_include_properties: Optional[List[str]] - property_chunking: Optional[PropertyChunking] + property_list: list[str] | PropertiesFromEndpoint | None + always_include_properties: list[str] | None + property_chunking: PropertyChunking | None config: Config parameters: InitVar[Mapping[str, Any]] def get_request_property_chunks( - self, stream_slice: Optional[StreamSlice] = None - ) -> Iterable[List[str]]: + self, stream_slice: StreamSlice | None = None + ) -> Iterable[list[str]]: """ Uses the defined property_list to fetch the total set of properties dynamically or from a static list and based on the resulting properties, performs property chunking if applicable. :param stream_slice: The StreamSlice of the current partition being processed during the sync. This is included because subcomponents of QueryProperties can make use of interpolation of the top-level StreamSlice object """ - fields: Union[Iterable[str], List[str]] + fields: Iterable[str] | list[str] if isinstance(self.property_list, PropertiesFromEndpoint): fields = self.property_list.get_properties_from_endpoint(stream_slice=stream_slice) else: @@ -48,7 +49,7 @@ def get_request_property_chunks( yield list(fields) # delete later, but leaving this to keep the discussion thread on the PR from getting hidden - def has_multiple_chunks(self, stream_slice: Optional[StreamSlice]) -> bool: + def has_multiple_chunks(self, stream_slice: StreamSlice | None) -> bool: property_chunks = iter(self.get_request_property_chunks(stream_slice=stream_slice)) try: next(property_chunks) diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py b/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py index e470e5521..ba22ecd54 100644 --- a/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +++ b/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py @@ -1,7 +1,8 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, List, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.requesters.query_properties.strategies.merge_strategy import ( RecordMergeStrategy, @@ -15,14 +16,14 @@ class GroupByKey(RecordMergeStrategy): Record merge strategy that combines records together according to values on the record for one or many keys. """ - key: Union[str, List[str]] + key: str | list[str] parameters: InitVar[Mapping[str, Any]] config: Config def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._keys = [self.key] if isinstance(self.key, str) else self.key - def get_group_key(self, record: Record) -> Optional[str]: + def get_group_key(self, record: Record) -> str | None: resolved_keys = [] for key in self._keys: key_value = record.data.get(key) diff --git a/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py b/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py index f77b5ba0c..2e32c3a5b 100644 --- a/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py @@ -2,7 +2,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Optional from airbyte_cdk.sources.types import Record @@ -15,5 +14,5 @@ class RecordMergeStrategy(ABC): """ @abstractmethod - def get_group_key(self, record: Record) -> Optional[str]: + def get_group_key(self, record: Record) -> str | None: pass diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index e0946b53b..78912af90 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -2,9 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any, List, Literal, Mapping, MutableMapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.types import Config @@ -35,8 +36,8 @@ class RequestOption: inject_into: RequestOptionType parameters: InitVar[Mapping[str, Any]] - field_name: Optional[Union[InterpolatedString, str]] = None - field_path: Optional[List[Union[InterpolatedString, str]]] = None + field_name: InterpolatedString | str | None = None + field_path: list[InterpolatedString | str] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: # Validate inputs. We should expect either field_name or field_path, but not both @@ -94,14 +95,14 @@ def inject_into_request( current = target # Convert path segments into strings, evaluating any interpolated segments # Example: ["data", "{{ config[user_type] }}", "id"] -> ["data", "admin", "id"] - *path_parts, final_key = [ + *path_parts, final_key = ( str( segment.eval(config=config) if isinstance(segment, InterpolatedString) else segment ) for segment in self.field_path - ] + ) # Build a nested dictionary structure and set the final value at the deepest level for part in path_parts: diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 437ea7b7b..946ea4115 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, MutableMapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.request_option import ( @@ -25,10 +26,10 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider): config: Config parameters: InitVar[Mapping[str, Any]] - start_time_option: Optional[RequestOption] = None - end_time_option: Optional[RequestOption] = None - partition_field_start: Optional[str] = None - partition_field_end: Optional[str] = None + start_time_option: RequestOption | None = None + end_time_option: RequestOption | None = None + partition_field_start: str | None = None + partition_field_end: str | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._partition_field_start = InterpolatedString.create( @@ -41,41 +42,41 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.header, stream_slice) def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return self._get_request_options(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json, stream_slice) def _get_request_options( - self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice] + self, option_type: RequestOptionType, stream_slice: StreamSlice | None ) -> Mapping[str, Any]: options: MutableMapping[str, Any] = {} if not stream_slice: diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py index 449da977f..5fc55bced 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, @@ -26,35 +27,35 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return {} def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return {} diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py index 4e175bb28..46d16103b 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import ( InterpolatedNestedMapping, @@ -20,14 +21,12 @@ class InterpolatedNestedRequestInputProvider: """ parameters: InitVar[Mapping[str, Any]] - request_inputs: Optional[Union[str, NestedMapping]] = field(default=None) + request_inputs: str | NestedMapping | None = field(default=None) config: Config = field(default_factory=dict) - _interpolator: Optional[Union[InterpolatedString, InterpolatedNestedMapping]] = field( - init=False, repr=False, default=None - ) - _request_inputs: Optional[Union[str, NestedMapping]] = field( + _interpolator: InterpolatedString | InterpolatedNestedMapping | None = field( init=False, repr=False, default=None ) + _request_inputs: str | NestedMapping | None = field(init=False, repr=False, default=None) def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._request_inputs = self.request_inputs or {} @@ -42,8 +41,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def eval_request_inputs( self, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Returns the request inputs to set on an outgoing HTTP request diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py index ed0e54c60..103cc352c 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py @@ -2,12 +2,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, Optional, Tuple, Type, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState +from airbyte_cdk.sources.types import Config, StreamSlice @dataclass @@ -17,14 +18,12 @@ class InterpolatedRequestInputProvider: """ parameters: InitVar[Mapping[str, Any]] - request_inputs: Optional[Union[str, Mapping[str, str]]] = field(default=None) + request_inputs: str | Mapping[str, str] | None = field(default=None) config: Config = field(default_factory=dict) - _interpolator: Optional[Union[InterpolatedString, InterpolatedMapping]] = field( - init=False, repr=False, default=None - ) - _request_inputs: Optional[Union[str, Mapping[str, str]]] = field( + _interpolator: InterpolatedString | InterpolatedMapping | None = field( init=False, repr=False, default=None ) + _request_inputs: str | Mapping[str, str] | None = field(init=False, repr=False, default=None) def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._request_inputs = self.request_inputs or {} @@ -37,10 +36,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def eval_request_inputs( self, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - valid_key_types: Optional[Tuple[Type[Any]]] = None, - valid_value_types: Optional[Tuple[Type[Any], ...]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + valid_key_types: tuple[type[Any]] | None = None, + valid_value_types: tuple[type[Any], ...] | None = None, ) -> Mapping[str, Any]: """ Returns the request inputs to set on an outgoing HTTP request diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py index 2e0038730..7b5d2c6d6 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py @@ -2,8 +2,9 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from typing import Any, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import ( @@ -36,11 +37,11 @@ class InterpolatedRequestOptionsProvider(RequestOptionsProvider): parameters: InitVar[Mapping[str, Any]] config: Config = field(default_factory=dict) - request_parameters: Optional[RequestInput] = None - request_headers: Optional[RequestInput] = None - request_body_data: Optional[RequestInput] = None - request_body_json: Optional[NestedMapping] = None - query_properties_key: Optional[str] = None + request_parameters: RequestInput | None = None + request_headers: RequestInput | None = None + request_body_data: RequestInput | None = None + request_body_json: NestedMapping | None = None + query_properties_key: str | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: if self.request_parameters is None: @@ -73,9 +74,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: interpolated_value = self._parameter_interpolator.eval_request_inputs( stream_slice, @@ -96,7 +97,7 @@ def get_request_params( raise ValueError( "QueryProperties component is defined but stream_partition does not contain query_properties. Please contact Airbyte Support" ) - elif not isinstance(stream_slice.extra_fields.get("query_properties"), List): + elif not isinstance(stream_slice.extra_fields.get("query_properties"), list): raise ValueError( "QueryProperties component is defined but stream_slice.extra_fields.query_properties is not a List. Please contact Airbyte Support" ) @@ -112,19 +113,19 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._headers_interpolator.eval_request_inputs(stream_slice, next_page_token) def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: return self._body_data_interpolator.eval_request_inputs( stream_slice, next_page_token, @@ -135,8 +136,8 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return self._body_json_interpolator.eval_request_inputs(stream_slice, next_page_token) diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py index f0a94ecb9..f5147d481 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py @@ -3,8 +3,9 @@ # from abc import abstractmethod +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any from airbyte_cdk.sources.types import StreamSlice, StreamState @@ -25,9 +26,9 @@ class RequestOptionsProvider: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -40,9 +41,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.""" @@ -50,10 +51,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: """ Specifies how to populate the body of the request with a non-JSON payload. @@ -68,9 +69,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies how to populate the body of the request with a JSON payload. diff --git a/airbyte_cdk/sources/declarative/requesters/request_path.py b/airbyte_cdk/sources/declarative/requesters/request_path.py index 378ea6220..d6b289f9d 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_path.py +++ b/airbyte_cdk/sources/declarative/requesters/request_path.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping +from typing import Any @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte_cdk/sources/declarative/requesters/requester.py index ddda1ddba..00ab9c8ba 100644 --- a/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte_cdk/sources/declarative/requesters/requester.py @@ -3,8 +3,9 @@ # from abc import abstractmethod +from collections.abc import Callable, Mapping, MutableMapping from enum import Enum -from typing import Any, Callable, Mapping, MutableMapping, Optional, Union +from typing import Any import requests @@ -38,9 +39,9 @@ def get_authenticator(self) -> DeclarativeAuthenticator: def get_url_base( self, *, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, ) -> str: """ :return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/" @@ -50,9 +51,9 @@ def get_url_base( def get_path( self, *, - stream_state: Optional[StreamState], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], + stream_state: StreamState | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, ) -> str: """ Returns the URL path for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "some_entity" @@ -68,9 +69,9 @@ def get_method(self) -> HttpMethod: def get_request_params( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: """ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -82,9 +83,9 @@ def get_request_params( def get_request_headers( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method. @@ -94,10 +95,10 @@ def get_request_headers( def get_request_body_data( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: """ Specifies how to populate the body of the request with a non-JSON payload. @@ -112,9 +113,9 @@ def get_request_body_data( def get_request_body_json( self, *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies how to populate the body of the request with a JSON payload. @@ -125,16 +126,16 @@ def get_request_body_json( @abstractmethod def send_request( self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - path: Optional[str] = None, - request_headers: Optional[Mapping[str, Any]] = None, - request_params: Optional[Mapping[str, Any]] = None, - request_body_data: Optional[Union[Mapping[str, Any], str]] = None, - request_body_json: Optional[Mapping[str, Any]] = None, - log_formatter: Optional[Callable[[requests.Response], Any]] = None, - ) -> Optional[requests.Response]: + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + path: str | None = None, + request_headers: Mapping[str, Any] | None = None, + request_params: Mapping[str, Any] | None = None, + request_body_data: Mapping[str, Any] | str | None = None, + request_body_json: Mapping[str, Any] | None = None, + log_formatter: Callable[[requests.Response], Any] | None = None, + ) -> requests.Response | None: """ Sends a request and returns the response. Might return no response if the error handler chooses to ignore the response or throw an exception in case of an error. If path is set, the path configured on the requester itself is ignored. diff --git a/airbyte_cdk/sources/declarative/resolvers/__init__.py b/airbyte_cdk/sources/declarative/resolvers/__init__.py index dba2f60b8..a6e5467a0 100644 --- a/airbyte_cdk/sources/declarative/resolvers/__init__.py +++ b/airbyte_cdk/sources/declarative/resolvers/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -from typing import Mapping +from collections.abc import Mapping from pydantic.v1 import BaseModel diff --git a/airbyte_cdk/sources/declarative/resolvers/components_resolver.py b/airbyte_cdk/sources/declarative/resolvers/components_resolver.py index 5975b3082..564f695d1 100644 --- a/airbyte_cdk/sources/declarative/resolvers/components_resolver.py +++ b/airbyte_cdk/sources/declarative/resolvers/components_resolver.py @@ -3,8 +3,9 @@ # from abc import ABC, abstractmethod +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, Iterable, List, Mapping, Optional, Type, Union +from typing import Any, Union from typing_extensions import deprecated @@ -18,9 +19,9 @@ class ComponentMappingDefinition: what field in the stream template should be updated with value, supporting dynamic interpolation and type enforcement.""" - field_path: List["InterpolatedString"] + field_path: list["InterpolatedString"] value: Union["InterpolatedString", str] - value_type: Optional[Type[Any]] + value_type: type[Any] | None parameters: InitVar[Mapping[str, Any]] @@ -30,9 +31,9 @@ class ResolvedComponentMappingDefinition: what field in the stream template should be updated with value, supporting dynamic interpolation and type enforcement.""" - field_path: List["InterpolatedString"] + field_path: list["InterpolatedString"] value: "InterpolatedString" - value_type: Optional[Type[Any]] + value_type: type[Any] | None parameters: InitVar[Mapping[str, Any]] @@ -45,8 +46,8 @@ class ComponentsResolver(ABC): @abstractmethod def resolve_components( - self, stream_template_config: Dict[str, Any] - ) -> Iterable[Dict[str, Any]]: + self, stream_template_config: dict[str, Any] + ) -> Iterable[dict[str, Any]]: """ Maps and populates values into a stream template configuration. :param stream_template_config: The stream template with placeholders for components. diff --git a/airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py b/airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py index 0308ea5da..8442f4cfa 100644 --- a/airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +++ b/airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py @@ -2,9 +2,10 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from copy import deepcopy from dataclasses import InitVar, dataclass, field -from typing import Any, Dict, Iterable, List, Mapping, Union +from typing import Any import dpath from typing_extensions import deprecated @@ -26,7 +27,7 @@ class StreamConfig: Identifies stream config details for dynamic schema extraction and processing. """ - configs_pointer: List[Union[InterpolatedString, str]] + configs_pointer: list[InterpolatedString | str] parameters: InitVar[Mapping[str, Any]] def __post_init__(self, parameters: Mapping[str, Any]) -> None: @@ -50,9 +51,9 @@ class ConfigComponentsResolver(ComponentsResolver): stream_config: StreamConfig config: Config - components_mapping: List[ComponentMappingDefinition] + components_mapping: list[ComponentMappingDefinition] parameters: InitVar[Mapping[str, Any]] - _resolved_components: List[ResolvedComponentMappingDefinition] = field( + _resolved_components: list[ResolvedComponentMappingDefinition] = field( init=False, repr=False, default_factory=list ) @@ -65,7 +66,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: """ for component_mapping in self.components_mapping: - if isinstance(component_mapping.value, (str, InterpolatedString)): + if isinstance(component_mapping.value, str | InterpolatedString): interpolated_value = ( InterpolatedString.create(component_mapping.value, parameters=parameters) if isinstance(component_mapping.value, str) @@ -104,8 +105,8 @@ def _stream_config(self) -> Iterable[Mapping[str, Any]]: return stream_config def resolve_components( - self, stream_template_config: Dict[str, Any] - ) -> Iterable[Dict[str, Any]]: + self, stream_template_config: dict[str, Any] + ) -> Iterable[dict[str, Any]]: """ Resolves components in the stream template configuration by populating values. diff --git a/airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py b/airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py index 6e85fc578..cf95f6655 100644 --- a/airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +++ b/airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py @@ -2,9 +2,10 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping from copy import deepcopy from dataclasses import InitVar, dataclass, field -from typing import Any, Dict, Iterable, List, Mapping +from typing import Any import dpath from typing_extensions import deprecated @@ -35,9 +36,9 @@ class HttpComponentsResolver(ComponentsResolver): retriever: Retriever config: Config - components_mapping: List[ComponentMappingDefinition] + components_mapping: list[ComponentMappingDefinition] parameters: InitVar[Mapping[str, Any]] - _resolved_components: List[ResolvedComponentMappingDefinition] = field( + _resolved_components: list[ResolvedComponentMappingDefinition] = field( init=False, repr=False, default_factory=list ) @@ -49,7 +50,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: parameters (Mapping[str, Any]): Parameters for interpolation. """ for component_mapping in self.components_mapping: - if isinstance(component_mapping.value, (str, InterpolatedString)): + if isinstance(component_mapping.value, str | InterpolatedString): interpolated_value = ( InterpolatedString.create(component_mapping.value, parameters=parameters) if isinstance(component_mapping.value, str) @@ -75,8 +76,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: ) def resolve_components( - self, stream_template_config: Dict[str, Any] - ) -> Iterable[Dict[str, Any]]: + self, stream_template_config: dict[str, Any] + ) -> Iterable[dict[str, Any]]: """ Resolves components in the stream template configuration by populating values. diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index 33a288c43..247d539fb 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -1,8 +1,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.sources.declarative.async_job.job import AsyncJob from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector @@ -84,7 +85,7 @@ def _get_stream_state(self) -> StreamState: return self.state def _validate_and_get_stream_slice_jobs( - self, stream_slice: Optional[StreamSlice] = None + self, stream_slice: StreamSlice | None = None ) -> Iterable[AsyncJob]: """ Validates the stream_slice argument and returns the partition from it. @@ -101,13 +102,13 @@ def _validate_and_get_stream_slice_jobs( """ return stream_slice.extra_fields.get("jobs", []) if stream_slice else [] - def stream_slices(self) -> Iterable[Optional[StreamSlice]]: + def stream_slices(self) -> Iterable[StreamSlice | None]: yield from self.stream_slicer.stream_slices() def read_records( self, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, + stream_slice: StreamSlice | None = None, ) -> Iterable[StreamData]: # emit the slice_descriptor log message, for connector builder TestRead yield self.slice_logger.create_slice_log_message(stream_slice.cursor_slice) # type: ignore diff --git a/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte_cdk/sources/declarative/retrievers/retriever.py index 155de5782..f4ba620b0 100644 --- a/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -3,7 +3,8 @@ # from abc import abstractmethod -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice from airbyte_cdk.sources.streams.core import StreamData @@ -19,7 +20,7 @@ class Retriever: def read_records( self, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, + stream_slice: StreamSlice | None = None, ) -> Iterable[StreamData]: """ Fetch a stream's records from an HTTP API source @@ -30,7 +31,7 @@ def read_records( """ @abstractmethod - def stream_slices(self) -> Iterable[Optional[StreamSlice]]: + def stream_slices(self) -> Iterable[StreamSlice | None]: """Returns the stream slices""" @property diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index a739a628a..b0f1e7a8a 100644 --- a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -4,20 +4,12 @@ import json from collections import defaultdict +from collections.abc import Callable, Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field from functools import partial from itertools import islice from typing import ( Any, - Callable, - Iterable, - List, - Mapping, - MutableMapping, - Optional, - Set, - Tuple, - Union, ) import requests @@ -79,19 +71,19 @@ class SimpleRetriever(Retriever): config: Config parameters: InitVar[Mapping[str, Any]] name: str - _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="") - primary_key: Optional[Union[str, List[str], List[List[str]]]] + _name: InterpolatedString | str = field(init=False, repr=False, default="") + primary_key: str | list[str] | list[list[str]] | None _primary_key: str = field(init=False, repr=False, default="") - paginator: Optional[Paginator] = None + paginator: Paginator | None = None stream_slicer: StreamSlicer = field( default_factory=lambda: SinglePartitionRouter(parameters={}) ) request_option_provider: RequestOptionsProvider = field( default_factory=lambda: DefaultRequestOptionsProvider(parameters={}) ) - cursor: Optional[DeclarativeCursor] = None + cursor: DeclarativeCursor | None = None ignore_stream_slicer_parameters_on_paginated_requests: bool = False - additional_query_properties: Optional[QueryProperties] = None + additional_query_properties: QueryProperties | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._paginator = self.paginator or NoPagination(parameters=parameters) @@ -119,8 +111,8 @@ def name(self, value: str) -> None: self._name = value def _get_mapping( - self, method: Callable[..., Optional[Union[Mapping[str, Any], str]]], **kwargs: Any - ) -> Tuple[Union[Mapping[str, Any], str], Set[str]]: + self, method: Callable[..., Mapping[str, Any] | str | None], **kwargs: Any + ) -> tuple[Mapping[str, Any] | str, set[str]]: """ Get mapping from the provided method, and get the keys of the mapping. If the method returns a string, it will return the string and an empty set. @@ -132,12 +124,12 @@ def _get_mapping( def _get_request_options( self, - stream_state: Optional[StreamData], - stream_slice: Optional[StreamSlice], - next_page_token: Optional[Mapping[str, Any]], - paginator_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], - stream_slicer_method: Callable[..., Optional[Union[Mapping[str, Any], str]]], - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamData | None, + stream_slice: StreamSlice | None, + next_page_token: Mapping[str, Any] | None, + paginator_method: Callable[..., Mapping[str, Any] | str | None], + stream_slicer_method: Callable[..., Mapping[str, Any] | str | None], + ) -> Mapping[str, Any] | str: """ Get the request_option from the paginator and the stream slicer. Raise a ValueError if there's a key collision @@ -164,9 +156,9 @@ def _get_request_options( def _request_headers( self, - stream_state: Optional[StreamData] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamData | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies request headers. @@ -185,9 +177,9 @@ def _request_headers( def _request_params( self, - stream_state: Optional[StreamData] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamData | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Specifies the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -207,10 +199,10 @@ def _request_params( def _request_body_data( self, - stream_state: Optional[StreamData] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Union[Mapping[str, Any], str]: + stream_state: StreamData | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str: """ Specifies how to populate the body of the request with a non-JSON payload. @@ -230,10 +222,10 @@ def _request_body_data( def _request_body_json( self, - stream_state: Optional[StreamData] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[Mapping[str, Any]]: + stream_state: StreamData | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | None: """ Specifies how to populate the body of the request with a JSON payload. @@ -252,10 +244,10 @@ def _request_body_json( def _paginator_path( self, - next_page_token: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Optional[str]: + next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: StreamSlice | None = None, + ) -> str | None: """ If the paginator points to a path, follow it, else return nothing so the requester is used. :param next_page_token: @@ -269,11 +261,11 @@ def _paginator_path( def _parse_response( self, - response: Optional[requests.Response], + response: requests.Response | None, stream_state: StreamState, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Record]: if not response: yield from [] @@ -287,7 +279,7 @@ def _parse_response( ) @property # type: ignore - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: """The stream's primary key""" return self._primary_key @@ -300,9 +292,9 @@ def _next_page_token( self, response: requests.Response, last_page_size: int, - last_record: Optional[Record], - last_page_token_value: Optional[Any], - ) -> Optional[Mapping[str, Any]]: + last_record: Record | None, + last_page_token_value: Any | None, + ) -> Mapping[str, Any] | None: """ Specifies a pagination strategy. @@ -321,8 +313,8 @@ def _fetch_next_page( self, stream_state: Mapping[str, Any], stream_slice: StreamSlice, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[requests.Response]: + next_page_token: Mapping[str, Any] | None = None, + ) -> requests.Response | None: return self.requester.send_request( path=self._paginator_path( next_page_token=next_page_token, @@ -357,20 +349,20 @@ def _fetch_next_page( # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well. def _read_pages( self, - records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]], + records_generator_fn: Callable[[requests.Response | None], Iterable[Record]], stream_state: Mapping[str, Any], stream_slice: StreamSlice, ) -> Iterable[Record]: pagination_complete = False initial_token = self._paginator.get_initial_token() - next_page_token: Optional[Mapping[str, Any]] = ( + next_page_token: Mapping[str, Any] | None = ( {"next_page_token": initial_token} if initial_token is not None else None ) while not pagination_complete: response = self._fetch_next_page(stream_state, stream_slice, next_page_token) last_page_size = 0 - last_record: Optional[Record] = None + last_record: Record | None = None for record in records_generator_fn(response): last_page_size += 1 last_record = record @@ -396,21 +388,21 @@ def _read_pages( def _read_single_page( self, - records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]], + records_generator_fn: Callable[[requests.Response | None], Iterable[Record]], stream_state: Mapping[str, Any], stream_slice: StreamSlice, ) -> Iterable[StreamData]: initial_token = stream_state.get("next_page_token") if initial_token is None: initial_token = self._paginator.get_initial_token() - next_page_token: Optional[Mapping[str, Any]] = ( + next_page_token: Mapping[str, Any] | None = ( {"next_page_token": initial_token} if initial_token else None ) response = self._fetch_next_page(stream_state, stream_slice, next_page_token) last_page_size = 0 - last_record: Optional[Record] = None + last_record: Record | None = None for record in records_generator_fn(response): last_page_size += 1 last_record = record @@ -440,7 +432,7 @@ def _read_single_page( def read_records( self, records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice] = None, + stream_slice: StreamSlice | None = None, ) -> Iterable[StreamData]: """ Fetch a stream's records from an HTTP API source @@ -556,10 +548,10 @@ def read_records( def _get_most_recent_record( self, - current_most_recent: Optional[Record], - current_record: Optional[Record], + current_most_recent: Record | None, + current_record: Record | None, stream_slice: StreamSlice, - ) -> Optional[Record]: + ) -> Record | None: if self.cursor and current_record: if not current_most_recent: return current_record @@ -572,9 +564,7 @@ def _get_most_recent_record( else: return None - def _extract_record( - self, stream_data: StreamData, stream_slice: StreamSlice - ) -> Optional[Record]: + def _extract_record(self, stream_data: StreamData, stream_slice: StreamSlice) -> Record | None: """ As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize to data to streamline the rest of the process. @@ -582,7 +572,7 @@ def _extract_record( if isinstance(stream_data, Record): # Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData` return stream_data - elif isinstance(stream_data, (dict, Mapping)): + elif isinstance(stream_data, dict | Mapping): return Record( data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name ) @@ -595,7 +585,7 @@ def _extract_record( return None # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever - def stream_slices(self) -> Iterable[Optional[StreamSlice]]: # type: ignore + def stream_slices(self) -> Iterable[StreamSlice | None]: # type: ignore """ Specifies the slices for this stream. See the stream slicing section of the docs for more information. @@ -618,10 +608,10 @@ def state(self, value: StreamState) -> None: def _parse_records( self, - response: Optional[requests.Response], + response: requests.Response | None, stream_state: Mapping[str, Any], records_schema: Mapping[str, Any], - stream_slice: Optional[StreamSlice], + stream_slice: StreamSlice | None, ) -> Iterable[Record]: yield from self._parse_response( response, @@ -656,15 +646,15 @@ def __post_init__(self, options: Mapping[str, Any]) -> None: ) # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever - def stream_slices(self) -> Iterable[Optional[StreamSlice]]: # type: ignore + def stream_slices(self) -> Iterable[StreamSlice | None]: # type: ignore return islice(super().stream_slices(), self.maximum_number_of_slices) def _fetch_next_page( self, stream_state: Mapping[str, Any], stream_slice: StreamSlice, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[requests.Response]: + next_page_token: Mapping[str, Any] | None = None, + ) -> requests.Response | None: return self.requester.send_request( path=self._paginator_path( next_page_token=next_page_token, @@ -715,7 +705,7 @@ class LazySimpleRetriever(SimpleRetriever): def _read_pages( self, - records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]], + records_generator_fn: Callable[[requests.Response | None], Iterable[Record]], stream_state: Mapping[str, Any], stream_slice: StreamSlice, ) -> Iterable[Record]: @@ -743,7 +733,7 @@ def _read_pages( def _paginate( self, next_page_token: Any, - records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]], + records_generator_fn: Callable[[requests.Response | None], Iterable[Record]], stream_state: Mapping[str, Any], stream_slice: StreamSlice, ) -> Iterable[Record]: diff --git a/airbyte_cdk/sources/declarative/schema/default_schema_loader.py b/airbyte_cdk/sources/declarative/schema/default_schema_loader.py index f860d60fb..3547a5ae5 100644 --- a/airbyte_cdk/sources/declarative/schema/default_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/default_schema_loader.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping +from typing import Any from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader diff --git a/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py b/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py index a7a6230a6..813234a32 100644 --- a/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py @@ -3,9 +3,10 @@ # +from collections.abc import Mapping, MutableMapping from copy import deepcopy from dataclasses import InitVar, dataclass, field -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from typing import Any, Union import dpath from typing_extensions import deprecated @@ -53,7 +54,7 @@ class ComplexFieldType: """ field_type: str - items: Optional[Union[str, "ComplexFieldType"]] = None + items: Union[str, "ComplexFieldType"] | None = None def __post_init__(self) -> None: """ @@ -71,9 +72,9 @@ class TypesMap: Represents a mapping between a current type and its corresponding target type. """ - target_type: Union[List[str], str, ComplexFieldType] - current_type: Union[List[str], str] - condition: Optional[str] + target_type: list[str] | str | ComplexFieldType + current_type: list[str] | str + condition: str | None @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) @@ -83,11 +84,11 @@ class SchemaTypeIdentifier: Identifies schema details for dynamic schema extraction and processing. """ - key_pointer: List[Union[InterpolatedString, str]] + key_pointer: list[InterpolatedString | str] parameters: InitVar[Mapping[str, Any]] - type_pointer: Optional[List[Union[InterpolatedString, str]]] = None - types_mapping: Optional[List[TypesMap]] = None - schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None + type_pointer: list[InterpolatedString | str] | None = None + types_mapping: list[TypesMap] | None = None + schema_pointer: list[InterpolatedString | str] | None = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.schema_pointer = ( @@ -100,8 +101,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: @staticmethod def _update_pointer( - pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any] - ) -> Optional[List[Union[InterpolatedString, str]]]: + pointer: list[InterpolatedString | str] | None, parameters: Mapping[str, Any] + ) -> list[InterpolatedString | str] | None: return ( [ InterpolatedString.create(path, parameters=parameters) @@ -125,7 +126,7 @@ class DynamicSchemaLoader(SchemaLoader): config: Config parameters: InitVar[Mapping[str, Any]] schema_type_identifier: SchemaTypeIdentifier - schema_transformations: List[RecordTransformation] = field(default_factory=lambda: []) + schema_transformations: list[RecordTransformation] = field(default_factory=lambda: []) def get_json_schema(self) -> Mapping[str, Any]: """ @@ -164,7 +165,7 @@ def _transform( self, properties: Mapping[str, Any], stream_state: StreamState, - stream_slice: Optional[StreamSlice] = None, + stream_slice: StreamSlice | None = None, ) -> Mapping[str, Any]: for transformation in self.schema_transformations: transformation.transform( @@ -176,7 +177,7 @@ def _transform( def _get_key( self, raw_schema: MutableMapping[str, Any], - field_key_path: List[Union[InterpolatedString, str]], + field_key_path: list[InterpolatedString | str], ) -> str: """ Extracts the key field from the schema using the specified path. @@ -189,8 +190,8 @@ def _get_key( def _get_type( self, raw_schema: MutableMapping[str, Any], - field_type_path: Optional[List[Union[InterpolatedString, str]]], - ) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]: + field_type_path: list[InterpolatedString | str] | None, + ) -> Mapping[str, Any] | list[Mapping[str, Any]]: """ Determines the JSON Schema type for a field, supporting nullable and combined types. """ @@ -236,9 +237,9 @@ def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, def _replace_type_if_not_valid( self, - field_type: Union[List[str], str], + field_type: list[str] | str, raw_schema: MutableMapping[str, Any], - ) -> Union[List[str], str, ComplexFieldType]: + ) -> list[str] | str | ComplexFieldType: """ Replaces a field type if it matches a type mapping in `types_map`. """ @@ -267,7 +268,7 @@ def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]: def _extract_data( self, body: Mapping[str, Any], - extraction_path: Optional[List[Union[InterpolatedString, str]]] = None, + extraction_path: list[InterpolatedString | str] | None = None, default: Any = None, ) -> Any: """ diff --git a/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py b/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py index 72a46b7e5..6675badde 100644 --- a/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, Mapping +from typing import Any from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader @@ -12,7 +13,7 @@ class InlineSchemaLoader(SchemaLoader): """Describes a stream's schema""" - schema: Dict[str, Any] + schema: dict[str, Any] parameters: InitVar[Mapping[str, Any]] def get_json_schema(self) -> Mapping[str, Any]: diff --git a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py index af51fe5db..2a62b998c 100644 --- a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py @@ -5,8 +5,9 @@ import json import pkgutil import sys +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Mapping, Tuple, Union +from typing import Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader @@ -43,7 +44,7 @@ class JsonFileSchemaLoader(ResourceSchemaLoader, SchemaLoader): config: Config parameters: InitVar[Mapping[str, Any]] - file_path: Union[InterpolatedString, str] = field(default="") + file_path: InterpolatedString | str = field(default="") def __post_init__(self, parameters: Mapping[str, Any]) -> None: if not self.file_path: @@ -58,7 +59,7 @@ def get_json_schema(self) -> Mapping[str, Any]: raw_json_file = pkgutil.get_data(resource, schema_path) if not raw_json_file: - raise IOError(f"Cannot find file {json_schema_path}") + raise OSError(f"Cannot find file {json_schema_path}") try: raw_schema = json.loads(raw_json_file) except ValueError as err: @@ -70,7 +71,7 @@ def _get_json_filepath(self) -> Any: return self.file_path.eval(self.config) # type: ignore # file_path is always cast to an interpolated string @staticmethod - def extract_resource_and_schema_path(json_schema_path: str) -> Tuple[str, str]: + def extract_resource_and_schema_path(json_schema_path: str) -> tuple[str, str]: """ When the connector is running on a docker container, package_data is accessible from the resource (source_), so we extract the resource from the first part of the schema path and the remaining path is used to find the schema file. This is a slight diff --git a/airbyte_cdk/sources/declarative/schema/schema_loader.py b/airbyte_cdk/sources/declarative/schema/schema_loader.py index a6beb70ae..fb7f45cb6 100644 --- a/airbyte_cdk/sources/declarative/schema/schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/schema_loader.py @@ -3,8 +3,9 @@ # from abc import abstractmethod +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Mapping +from typing import Any @dataclass diff --git a/airbyte_cdk/sources/declarative/spec/spec.py b/airbyte_cdk/sources/declarative/spec/spec.py index 914e99e93..7892e3c45 100644 --- a/airbyte_cdk/sources/declarative/spec/spec.py +++ b/airbyte_cdk/sources/declarative/spec/spec.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional +from typing import Any from airbyte_cdk.models import ( AdvancedAuth, @@ -25,8 +26,8 @@ class Spec: connection_specification: Mapping[str, Any] parameters: InitVar[Mapping[str, Any]] - documentation_url: Optional[str] = None - advanced_auth: Optional[AuthFlow] = None + documentation_url: str | None = None + advanced_auth: AuthFlow | None = None def generate_spec(self) -> ConnectorSpecification: """ diff --git a/airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py b/airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py index 91ce28e7a..1e25aeb83 100644 --- a/airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +++ b/airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py @@ -1,6 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.sources.declarative.retrievers import Retriever from airbyte_cdk.sources.message import MessageRepository @@ -66,7 +67,7 @@ def read(self) -> Iterable[Record]: else: self._message_repository.emit_message(stream_data) - def to_slice(self) -> Optional[Mapping[str, Any]]: + def to_slice(self) -> Mapping[str, Any] | None: return self._stream_slice def stream_name(self) -> str: diff --git a/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte_cdk/sources/declarative/transformations/add_fields.py index 08de277eb..9049e7f1a 100644 --- a/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -2,8 +2,9 @@ # Copyright (c) 2025 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any, Dict, List, Mapping, Optional, Type, Union +from typing import Any import dpath @@ -18,8 +19,8 @@ class AddedFieldDefinition: """Defines the field to add on a record""" path: FieldPointer - value: Union[InterpolatedString, str] - value_type: Optional[Type[Any]] + value: InterpolatedString | str + value_type: type[Any] | None parameters: InitVar[Mapping[str, Any]] @@ -29,7 +30,7 @@ class ParsedAddFieldDefinition: path: FieldPointer value: InterpolatedString - value_type: Optional[Type[Any]] + value_type: type[Any] | None parameters: InitVar[Mapping[str, Any]] @@ -85,10 +86,10 @@ class AddFields(RecordTransformation): fields (List[AddedFieldDefinition]): A list of transformations (path and corresponding value) that will be added to the record """ - fields: List[AddedFieldDefinition] + fields: list[AddedFieldDefinition] parameters: InitVar[Mapping[str, Any]] condition: str = "" - _parsed_fields: List[ParsedAddFieldDefinition] = field( + _parsed_fields: list[ParsedAddFieldDefinition] = field( init=False, repr=False, default_factory=list ) @@ -127,10 +128,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: if config is None: config = {} diff --git a/airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py b/airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py index 1486f7667..0d7ef09ca 100644 --- a/airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py @@ -1,5 +1,6 @@ +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, List, Mapping, Optional, Union +from typing import Any import dpath @@ -20,7 +21,7 @@ class DpathFlattenFields(RecordTransformation): """ config: Config - field_path: List[Union[InterpolatedString, str]] + field_path: list[InterpolatedString | str] parameters: InitVar[Mapping[str, Any]] delete_origin_value: bool = False replace_record: bool = False @@ -37,10 +38,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: path = [path.eval(self.config) for path in self._field_path] if "*" in path: diff --git a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py index 24bfba660..3735a99be 100644 --- a/airbyte_cdk/sources/declarative/transformations/flatten_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/flatten_fields.py @@ -3,7 +3,7 @@ # from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @@ -15,18 +15,18 @@ class FlattenFields(RecordTransformation): def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: transformed_record = self.flatten_record(record) record.clear() record.update(transformed_record) - def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]: + def flatten_record(self, record: dict[str, Any]) -> dict[str, Any]: stack = [(record, "_")] - transformed_record: Dict[str, Any] = {} + transformed_record: dict[str, Any] = {} force_with_parent_name = False while stack: diff --git a/airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py b/airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py index 8fe0bbffb..00deb5130 100644 --- a/airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, Mapping, Optional +from typing import Any from airbyte_cdk import InterpolatedString from airbyte_cdk.sources.declarative.transformations import RecordTransformation @@ -34,10 +35,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: if config is None: config = {} @@ -46,7 +47,7 @@ def transform( old_key = str(self._old.eval(config, **kwargs)) new_key = str(self._new.eval(config, **kwargs)) - def _transform(data: Dict[str, Any]) -> Dict[str, Any]: + def _transform(data: dict[str, Any]) -> dict[str, Any]: result = {} for key, value in data.items(): updated_key = key.replace(old_key, new_key) diff --git a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py index 53db3d49a..e133ac043 100644 --- a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py @@ -3,7 +3,7 @@ # from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @@ -13,10 +13,10 @@ class KeysToLowerTransformation(RecordTransformation): def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: for key in set(record.keys()): record[key.lower()] = record.pop(key) diff --git a/airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py b/airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py index 6ef2cb4c5..9fb3be9eb 100644 --- a/airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py @@ -4,7 +4,7 @@ import re from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from typing import Any import anyascii @@ -20,16 +20,16 @@ class KeysToSnakeCaseTransformation(RecordTransformation): def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: transformed_record = self._transform_record(record) record.clear() record.update(transformed_record) - def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]: + def _transform_record(self, record: dict[str, Any]) -> dict[str, Any]: transformed_record = {} for key, value in record.items(): transformed_key = self.process_key(key) @@ -50,19 +50,19 @@ def process_key(self, key: str) -> str: def normalize_key(self, key: str) -> str: return str(anyascii.anyascii(key)) - def tokenize_key(self, key: str) -> List[str]: + def tokenize_key(self, key: str) -> list[str]: tokens = [] for match in self.token_pattern.finditer(key): token = match.group(0) if match.group("NoToken") is None else "" tokens.append(token) return tokens - def filter_tokens(self, tokens: List[str]) -> List[str]: + def filter_tokens(self, tokens: list[str]) -> list[str]: if len(tokens) >= 3: tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:] if tokens and tokens[0].isdigit(): tokens.insert(0, "") return tokens - def tokens_to_snake_case(self, tokens: List[str]) -> str: + def tokens_to_snake_case(self, tokens: list[str]) -> str: return "_".join(token.lower() for token in tokens) diff --git a/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte_cdk/sources/declarative/transformations/remove_fields.py index f5d8164df..eda81a8de 100644 --- a/airbyte_cdk/sources/declarative/transformations/remove_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/remove_fields.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Dict, List, Mapping, Optional +from typing import Any import dpath import dpath.exceptions @@ -40,7 +41,7 @@ class RemoveFields(RecordTransformation): field_pointers (List[FieldPointer]): pointers to the fields that should be removed """ - field_pointers: List[FieldPointer] + field_pointers: list[FieldPointer] parameters: InitVar[Mapping[str, Any]] condition: str = "" @@ -51,10 +52,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: """ :param record: The record to be transformed diff --git a/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte_cdk/sources/declarative/transformations/transformation.py index f5b226429..4dea78f90 100644 --- a/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -4,7 +4,7 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @@ -18,10 +18,10 @@ class RecordTransformation: @abstractmethod def transform( self, - record: Dict[str, Any], - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, + record: dict[str, Any], + config: Config | None = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, ) -> None: """ Transform a record by adding, deleting, or mutating fields directly from the record reference passed in argument. diff --git a/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 93bdc55e9..979233c43 100644 --- a/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -3,7 +3,8 @@ # import pkgutil -from typing import Any, List, Mapping, Optional +from collections.abc import Mapping +from typing import Any import yaml @@ -14,16 +15,16 @@ from airbyte_cdk.sources.types import ConnectionDefinition -class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]): +class YamlDeclarativeSource(ConcurrentDeclarativeSource[list[AirbyteStateMessage]]): """Declarative source defined by a yaml file""" def __init__( self, path_to_yaml: str, debug: bool = False, - catalog: Optional[ConfiguredAirbyteCatalog] = None, - config: Optional[Mapping[str, Any]] = None, - state: Optional[List[AirbyteStateMessage]] = None, + catalog: ConfiguredAirbyteCatalog | None = None, + config: Mapping[str, Any] | None = None, + state: list[AirbyteStateMessage] | None = None, ) -> None: """ :param path_to_yaml: Path to the yaml file describing the source @@ -41,7 +42,7 @@ def __init__( def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition: try: # For testing purposes, we want to allow to just pass a file - with open(path_to_yaml_file, "r") as f: + with open(path_to_yaml_file) as f: return yaml.safe_load(f) # type: ignore # we assume the yaml represents a ConnectionDefinition except FileNotFoundError: # Running inside the container, the working directory during an operation is not structured the same as the static files diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index 12e1740b6..0062b2478 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -6,7 +6,7 @@ import logging from abc import abstractmethod -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING from airbyte_cdk.sources import Source from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy @@ -28,8 +28,8 @@ def check_availability( # type: ignore[override] # Signature doesn't match bas self, stream: Stream, logger: logging.Logger, - _: Optional[Source], - ) -> Tuple[bool, Optional[str]]: + _: Source | None, + ) -> tuple[bool, str | None]: """ Perform a connection check for the stream. @@ -42,8 +42,8 @@ def check_availability_and_parsability( self, stream: AbstractFileBasedStream, logger: logging.Logger, - _: Optional[Source], - ) -> Tuple[bool, Optional[str]]: + _: Source | None, + ) -> tuple[bool, str | None]: """ Performs a connection check for the stream, as well as additional checks that verify that the connection is working as expected. @@ -65,9 +65,7 @@ def check_availability(self, logger: logging.Logger) -> StreamAvailability: return StreamAvailable() return StreamUnavailable(reason or "") - def check_availability_and_parsability( - self, logger: logging.Logger - ) -> Tuple[bool, Optional[str]]: + def check_availability_and_parsability(self, logger: logging.Logger) -> tuple[bool, str | None]: return self.stream.availability_strategy.check_availability_and_parsability( self.stream, logger, None ) diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index c9d416a72..6e54e26f0 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -6,7 +6,7 @@ import logging import traceback -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING from airbyte_cdk import AirbyteTracedException from airbyte_cdk.sources import Source @@ -34,8 +34,8 @@ def check_availability( # type: ignore[override] # Signature doesn't match bas self, stream: AbstractFileBasedStream, logger: logging.Logger, - _: Optional[Source], - ) -> Tuple[bool, Optional[str]]: + _: Source | None, + ) -> tuple[bool, str | None]: """ Perform a connection check for the stream (verify that we can list files from the stream). @@ -52,8 +52,8 @@ def check_availability_and_parsability( self, stream: AbstractFileBasedStream, logger: logging.Logger, - _: Optional[Source], - ) -> Tuple[bool, Optional[str]]: + _: Source | None, + ) -> tuple[bool, str | None]: """ Perform a connection check for the stream. diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index d9b67e34c..12fb68410 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -4,7 +4,7 @@ import copy from abc import abstractmethod -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal import dpath from pydantic.v1 import AnyUrl, BaseModel, Field @@ -50,7 +50,7 @@ class AbstractFileBasedSpec(BaseModel): that are needed when users configure a file-based source. """ - start_date: Optional[str] = Field( + start_date: str | None = Field( title="Start Date", description="UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", examples=["2021-01-01T00:00:00.000000Z"], @@ -60,13 +60,13 @@ class AbstractFileBasedSpec(BaseModel): order=1, ) - streams: List[FileBasedStreamConfig] = Field( + streams: list[FileBasedStreamConfig] = Field( title="The list of streams to sync", description='Each instance of this configuration defines a stream. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.', order=10, ) - delivery_method: Union[DeliverRecords, DeliverRawFiles, DeliverPermissions] = Field( + delivery_method: DeliverRecords | DeliverRawFiles | DeliverPermissions = Field( title="Delivery Method", discriminator="delivery_type", type="object", @@ -85,12 +85,12 @@ def documentation_url(cls) -> AnyUrl: """ @classmethod - def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: + def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: """ Generates the mapping comprised of the config fields """ schema = super().schema(*args, **kwargs) - transformed_schema: Dict[str, Any] = copy.deepcopy(schema) + transformed_schema: dict[str, Any] = copy.deepcopy(schema) schema_helpers.expand_refs(transformed_schema) cls.replace_enum_allOf_and_anyOf(transformed_schema) cls.remove_discriminator(transformed_schema) @@ -98,12 +98,12 @@ def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: return transformed_schema @staticmethod - def remove_discriminator(schema: Dict[str, Any]) -> None: + def remove_discriminator(schema: dict[str, Any]) -> None: """pydantic adds "discriminator" to the schema for oneOfs, which is not treated right by the platform as we inline all references""" dpath.delete(schema, "properties/**/discriminator") @staticmethod - def replace_enum_allOf_and_anyOf(schema: Dict[str, Any]) -> Dict[str, Any]: + def replace_enum_allOf_and_anyOf(schema: dict[str, Any]) -> dict[str, Any]: """ allOfs are not supported by the UI, but pydantic is automatically writing them for enums. Unpacks the enums under allOf and moves them up a level under the enum key @@ -147,7 +147,7 @@ def replace_enum_allOf_and_anyOf(schema: Dict[str, Any]) -> Dict[str, Any]: return schema @staticmethod - def move_enum_to_root(object_property: Dict[str, Any]) -> None: + def move_enum_to_root(object_property: dict[str, Any]) -> None: if "allOf" in object_property and "enum" in object_property["allOf"][0]: object_property["enum"] = object_property["allOf"][0]["enum"] object_property.pop("allOf") diff --git a/airbyte_cdk/sources/file_based/config/csv_format.py b/airbyte_cdk/sources/file_based/config/csv_format.py index 1441d8411..a774583f4 100644 --- a/airbyte_cdk/sources/file_based/config/csv_format.py +++ b/airbyte_cdk/sources/file_based/config/csv_format.py @@ -4,7 +4,7 @@ import codecs from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union +from typing import Any from pydantic.v1 import BaseModel, Field, root_validator, validator from pydantic.v1.error_wrappers import ValidationError @@ -60,7 +60,7 @@ class Config(OneOfOptionConfig): CsvHeaderDefinitionType.USER_PROVIDED.value, const=True, ) - column_names: List[str] = Field( + column_names: list[str] = Field( title="Column Names", description="The column names that will be used while emitting the CSV records", ) @@ -69,7 +69,7 @@ def has_header_row(self) -> bool: return False @validator("column_names") - def validate_column_names(cls, v: List[str]) -> List[str]: + def validate_column_names(cls, v: list[str]) -> list[str]: if not v: raise ValueError( "At least one column name needs to be provided when using user provided headers" @@ -100,12 +100,12 @@ class Config(OneOfOptionConfig): default='"', description="The character used for quoting CSV values. To disallow quoting, make this field blank.", ) - escape_char: Optional[str] = Field( + escape_char: str | None = Field( title="Escape Character", default=None, description="The character used for escaping special characters. To disallow escaping, leave this field blank.", ) - encoding: Optional[str] = Field( + encoding: str | None = Field( default="utf8", description='The character encoding of the CSV data. Leave blank to default to UTF8. See list of python encodings for allowable options.', ) @@ -114,7 +114,7 @@ class Config(OneOfOptionConfig): default=True, description="Whether two quotes in a quoted CSV value denote a single quote in the data.", ) - null_values: Set[str] = Field( + null_values: set[str] = Field( title="Null Values", default=[], description="A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.", @@ -134,19 +134,17 @@ class Config(OneOfOptionConfig): default=0, description="The number of rows to skip after the header row.", ) - header_definition: Union[CsvHeaderFromCsv, CsvHeaderAutogenerated, CsvHeaderUserProvided] = ( - Field( - title="CSV Header Definition", - default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value), - description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.", - ) + header_definition: CsvHeaderFromCsv | CsvHeaderAutogenerated | CsvHeaderUserProvided = Field( + title="CSV Header Definition", + default=CsvHeaderFromCsv(header_definition_type=CsvHeaderDefinitionType.FROM_CSV.value), + description="How headers will be defined. `User Provided` assumes the CSV does not have a header row and uses the headers provided and `Autogenerated` assumes the CSV does not have a header row and the CDK will generate headers using for `f{i}` where `i` is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows.", ) - true_values: Set[str] = Field( + true_values: set[str] = Field( title="True Values", default=DEFAULT_TRUE_VALUES, description="A set of case-sensitive strings that should be interpreted as true values.", ) - false_values: Set[str] = Field( + false_values: set[str] = Field( title="False Values", default=DEFAULT_FALSE_VALUES, description="A set of case-sensitive strings that should be interpreted as false values.", @@ -194,7 +192,7 @@ def validate_encoding(cls, v: str) -> str: return v @root_validator - def validate_optional_args(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def validate_optional_args(cls, values: dict[str, Any]) -> dict[str, Any]: definition_type = values.get("header_definition_type") column_names = values.get("user_provided_column_names") if definition_type == CsvHeaderDefinitionType.USER_PROVIDED and not column_names: diff --git a/airbyte_cdk/sources/file_based/config/file_based_stream_config.py b/airbyte_cdk/sources/file_based/config/file_based_stream_config.py index eb592a4aa..98ccfa2c7 100644 --- a/airbyte_cdk/sources/file_based/config/file_based_stream_config.py +++ b/airbyte_cdk/sources/file_based/config/file_based_stream_config.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from enum import Enum -from typing import Any, List, Mapping, Optional, Union +from typing import Any, Optional from pydantic.v1 import BaseModel, Field, validator @@ -16,7 +17,7 @@ from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema -PrimaryKeyType = Optional[Union[str, List[str]]] +PrimaryKeyType = Optional[str | list[str]] class ValidationPolicy(Enum): @@ -27,13 +28,13 @@ class ValidationPolicy(Enum): class FileBasedStreamConfig(BaseModel): name: str = Field(title="Name", description="The name of the stream.") - globs: Optional[List[str]] = Field( + globs: list[str] | None = Field( default=["**"], title="Globs", description='The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look here.', order=1, ) - legacy_prefix: Optional[str] = Field( + legacy_prefix: str | None = Field( title="Legacy Prefix", description="The path prefix configured in v3 versions of the S3 connector. This option is deprecated in favor of a single glob.", airbyte_hidden=True, @@ -43,11 +44,11 @@ class FileBasedStreamConfig(BaseModel): description="The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema.", default=ValidationPolicy.emit_record, ) - input_schema: Optional[str] = Field( + input_schema: str | None = Field( title="Input Schema", description="The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.", ) - primary_key: Optional[str] = Field( + primary_key: str | None = Field( title="Primary Key", description="The column or columns (for a composite key) that serves as the unique identifier of a record. If empty, the primary key will default to the parser's default primary key.", airbyte_hidden=True, # Users can create/modify primary keys in the connection configuration so we shouldn't duplicate it here. @@ -57,9 +58,9 @@ class FileBasedStreamConfig(BaseModel): description="When the state history of the file store is full, syncs will only read files that were last modified in the provided day range.", default=3, ) - format: Union[ - AvroFormat, CsvFormat, JsonlFormat, ParquetFormat, UnstructuredFormat, ExcelFormat - ] = Field( + format: ( + AvroFormat | CsvFormat | JsonlFormat | ParquetFormat | UnstructuredFormat | ExcelFormat + ) = Field( title="Format", description="The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.", ) @@ -68,7 +69,7 @@ class FileBasedStreamConfig(BaseModel): description="When enabled, syncs will not validate or structure records against the stream's schema.", default=False, ) - recent_n_files_to_read_for_schema_discovery: Optional[int] = Field( + recent_n_files_to_read_for_schema_discovery: int | None = Field( title="Files To Read For Schema Discover", description="The number of resent files which will be used to discover the schema for this stream.", default=None, @@ -76,7 +77,7 @@ class FileBasedStreamConfig(BaseModel): ) @validator("input_schema", pre=True) - def validate_input_schema(cls, v: Optional[str]) -> Optional[str]: + def validate_input_schema(cls, v: str | None) -> str | None: if v: if type_mapping_to_jsonschema(v): return v @@ -84,7 +85,7 @@ def validate_input_schema(cls, v: Optional[str]) -> Optional[str]: raise ConfigValidationError(FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA) return None - def get_input_schema(self) -> Optional[Mapping[str, Any]]: + def get_input_schema(self) -> Mapping[str, Any] | None: """ User defined input_schema is defined as a string in the config. This method takes the string representation and converts it into a Mapping[str, Any] which is used by file-based CDK components. diff --git a/airbyte_cdk/sources/file_based/config/unstructured_format.py b/airbyte_cdk/sources/file_based/config/unstructured_format.py index c03540ce6..7f8ae9369 100644 --- a/airbyte_cdk/sources/file_based/config/unstructured_format.py +++ b/airbyte_cdk/sources/file_based/config/unstructured_format.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import List, Literal, Optional, Union +from typing import Literal from pydantic.v1 import BaseModel, Field @@ -50,7 +50,7 @@ class APIProcessingConfigModel(BaseModel): examples=["https://api.unstructured.com"], ) - parameters: Optional[List[APIParameterConfigModel]] = Field( + parameters: list[APIParameterConfigModel] | None = Field( default=[], always_show=True, title="Additional URL Parameters", @@ -90,10 +90,7 @@ class Config(OneOfOptionConfig): description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf", ) - processing: Union[ - LocalProcessingConfigModel, - APIProcessingConfigModel, - ] = Field( + processing: LocalProcessingConfigModel | APIProcessingConfigModel = Field( default=LocalProcessingConfigModel(mode="local"), title="Processing", description="Processing configuration", diff --git a/airbyte_cdk/sources/file_based/exceptions.py b/airbyte_cdk/sources/file_based/exceptions.py index b0d38947f..e8796ab4e 100644 --- a/airbyte_cdk/sources/file_based/exceptions.py +++ b/airbyte_cdk/sources/file_based/exceptions.py @@ -3,7 +3,7 @@ # from enum import Enum -from typing import Any, List, Union +from typing import Any from airbyte_cdk.models import AirbyteMessage, FailureType from airbyte_cdk.utils import AirbyteTracedException @@ -43,7 +43,7 @@ class FileBasedErrorsCollector: The placeholder for all errors collected. """ - errors: List[AirbyteMessage] = [] + errors: list[AirbyteMessage] = [] def yield_and_raise_collected(self) -> Any: if self.errors: @@ -63,7 +63,7 @@ def collect(self, logged_error: AirbyteMessage) -> None: class BaseFileBasedSourceError(Exception): - def __init__(self, error: Union[FileBasedSourceError, str], **kwargs): # type: ignore # noqa + def __init__(self, error: FileBasedSourceError | str, **kwargs): # type: ignore # noqa if isinstance(error, FileBasedSourceError): error = FileBasedSourceError(error).value super().__init__( @@ -112,7 +112,7 @@ class ErrorListingFiles(BaseFileBasedSourceError): class DuplicatedFilesError(BaseFileBasedSourceError): - def __init__(self, duplicated_files_names: List[dict[str, List[str]]], **kwargs: Any): + def __init__(self, duplicated_files_names: list[dict[str, list[str]]], **kwargs: Any): self._duplicated_files_names = duplicated_files_names self._stream_name: str = kwargs["stream"] super().__init__(self._format_duplicate_files_error_message(), **kwargs) diff --git a/airbyte_cdk/sources/file_based/file_based_source.py b/airbyte_cdk/sources/file_based/file_based_source.py index 2d34fe5dc..25b693a9d 100644 --- a/airbyte_cdk/sources/file_based/file_based_source.py +++ b/airbyte_cdk/sources/file_based/file_based_source.py @@ -6,7 +6,8 @@ import traceback from abc import ABC from collections import Counter -from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union +from collections.abc import Iterator, Mapping +from typing import Any from pydantic.v1.error_wrappers import ValidationError @@ -90,20 +91,20 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): def __init__( self, stream_reader: AbstractFileBasedStreamReader, - spec_class: Type[AbstractFileBasedSpec], - catalog: Optional[ConfiguredAirbyteCatalog], - config: Optional[Mapping[str, Any]], - state: Optional[List[AirbyteStateMessage]], - availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None, + spec_class: type[AbstractFileBasedSpec], + catalog: ConfiguredAirbyteCatalog | None, + config: Mapping[str, Any] | None, + state: list[AirbyteStateMessage] | None, + availability_strategy: AbstractFileBasedAvailabilityStrategy | None = None, discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(), - parsers: Mapping[Type[Any], FileTypeParser] = default_parsers, + parsers: Mapping[type[Any], FileTypeParser] = default_parsers, validation_policies: Mapping[ ValidationPolicy, AbstractSchemaValidationPolicy ] = DEFAULT_SCHEMA_VALIDATION_POLICIES, - cursor_cls: Type[ - Union[AbstractConcurrentFileBasedCursor, AbstractFileBasedCursor] + cursor_cls: type[ + AbstractConcurrentFileBasedCursor | AbstractFileBasedCursor ] = FileBasedConcurrentCursor, - stream_permissions_reader: Optional[AbstractFileBasedStreamPermissionsReader] = None, + stream_permissions_reader: AbstractFileBasedStreamPermissionsReader | None = None, ): self.stream_reader = stream_reader self.stream_permissions_reader = stream_permissions_reader @@ -123,7 +124,7 @@ def __init__( self.cursor_cls = cursor_cls self.logger = init_logger(f"airbyte.{self.name}") self.errors_collector: FileBasedErrorsCollector = FileBasedErrorsCollector() - self._message_repository: Optional[MessageRepository] = None + self._message_repository: MessageRepository | None = None concurrent_source = ConcurrentSource.create( MAX_CONCURRENCY, INITIAL_N_PARTITIONS, @@ -144,7 +145,7 @@ def message_repository(self) -> MessageRepository: def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: """ Check that the source can be accessed using the user-provided configuration. @@ -224,7 +225,7 @@ def check_connection( return not bool(errors), (errors or None) - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: """ Return a list of this source's streams. """ @@ -241,7 +242,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: self.stream_reader.config = parsed_config if self.stream_permissions_reader: self.stream_permissions_reader.config = parsed_config - streams: List[Stream] = [] + streams: list[Stream] = [] for stream_config in parsed_config.streams: # Like state_manager, `catalog_stream` may be None during `check` catalog_stream = self._get_stream_from_catalog(stream_config) @@ -327,7 +328,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: def _make_default_stream( self, stream_config: FileBasedStreamConfig, - cursor: Optional[AbstractFileBasedCursor], + cursor: AbstractFileBasedCursor | None, parsed_config: AbstractFileBasedSpec, ) -> AbstractFileBasedStream: return DefaultFileBasedStream( @@ -355,7 +356,7 @@ def _ensure_permissions_reader_available(self) -> None: ) def _make_permissions_stream( - self, stream_config: FileBasedStreamConfig, cursor: Optional[AbstractFileBasedCursor] + self, stream_config: FileBasedStreamConfig, cursor: AbstractFileBasedCursor | None ) -> AbstractFileBasedStream: """ Creates a stream that reads permissions from files. @@ -377,7 +378,7 @@ def _make_permissions_stream( def _make_file_based_stream( self, stream_config: FileBasedStreamConfig, - cursor: Optional[AbstractFileBasedCursor], + cursor: AbstractFileBasedCursor | None, parsed_config: AbstractFileBasedSpec, ) -> AbstractFileBasedStream: """ @@ -402,14 +403,14 @@ def _make_identities_stream( def _get_stream_from_catalog( self, stream_config: FileBasedStreamConfig - ) -> Optional[AirbyteStream]: + ) -> AirbyteStream | None: if self.catalog: for stream in self.catalog.streams or []: if stream.stream.name == stream_config.name: return stream.stream return None - def _get_sync_mode_from_catalog(self, stream_name: str) -> Optional[SyncMode]: + def _get_sync_mode_from_catalog(self, stream_name: str) -> SyncMode | None: if self.catalog: for catalog_stream in self.catalog.streams: if stream_name == catalog_stream.stream.name: @@ -422,7 +423,7 @@ def read( logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, ) -> Iterator[AirbyteMessage]: yield from super().read(logger, config, catalog, state) # emit all the errors collected diff --git a/airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py b/airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py index aff5c652c..800c1f91c 100644 --- a/airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py +++ b/airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py @@ -4,7 +4,8 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import Any from airbyte_cdk.sources.file_based import AbstractFileBasedSpec from airbyte_cdk.sources.file_based.remote_file import RemoteFile @@ -19,7 +20,7 @@ def __init__(self) -> None: self._config = None @property - def config(self) -> Optional[AbstractFileBasedSpec]: + def config(self) -> AbstractFileBasedSpec | None: return self._config @config.setter @@ -37,7 +38,7 @@ def config(self, value: AbstractFileBasedSpec) -> None: ... @abstractmethod - def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]: + def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> dict[str, Any]: """ This function should return the allow list for a given file, i.e. the list of all identities and their permission levels associated with it @@ -54,7 +55,7 @@ def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger): ... @abstractmethod - def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]: + def load_identity_groups(self, logger: logging.Logger) -> Iterable[dict[str, Any]]: """ This function should return the Identities in a determined "space" or "domain" where the file metadata (ACLs) are fetched and ACLs items (Identities) exists. @@ -77,7 +78,7 @@ def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any @property @abstractmethod - def file_permissions_schema(self) -> Dict[str, Any]: + def file_permissions_schema(self) -> dict[str, Any]: """ This function should return the permissions schema for file permissions stream. @@ -101,7 +102,7 @@ def file_permissions_schema(self) -> Dict[str, Any]: @property @abstractmethod - def identities_schema(self) -> Dict[str, Any]: + def identities_schema(self) -> dict[str, Any]: """ This function should return the identities schema for file identity stream. diff --git a/airbyte_cdk/sources/file_based/file_based_stream_reader.py b/airbyte_cdk/sources/file_based/file_based_stream_reader.py index cbf3d119b..0f1deecb8 100644 --- a/airbyte_cdk/sources/file_based/file_based_stream_reader.py +++ b/airbyte_cdk/sources/file_based/file_based_stream_reader.py @@ -4,11 +4,12 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Iterable from datetime import datetime from enum import Enum from io import IOBase from os import makedirs, path -from typing import Any, Dict, Iterable, List, Optional, Set +from typing import Any from wcmatch.glob import GLOBSTAR, globmatch @@ -33,7 +34,7 @@ def __init__(self) -> None: self._config = None @property - def config(self) -> Optional[AbstractFileBasedSpec]: + def config(self) -> AbstractFileBasedSpec | None: return self._config @config.setter @@ -52,7 +53,7 @@ def config(self, value: AbstractFileBasedSpec) -> None: @abstractmethod def open_file( - self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger + self, file: RemoteFile, mode: FileReadMode, encoding: str | None, logger: logging.Logger ) -> IOBase: """ Return a file handle for reading. @@ -68,8 +69,8 @@ def open_file( @abstractmethod def get_matching_files( self, - globs: List[str], - prefix: Optional[str], + globs: list[str], + prefix: str | None, logger: logging.Logger, ) -> Iterable[RemoteFile]: """ @@ -89,7 +90,7 @@ def get_matching_files( ... def filter_files_by_globs_and_start_date( - self, files: List[RemoteFile], globs: List[str] + self, files: list[RemoteFile], globs: list[str] ) -> Iterable[RemoteFile]: """ Utility method for filtering files based on globs. @@ -118,13 +119,13 @@ def file_size(self, file: RemoteFile) -> int: ... @staticmethod - def file_matches_globs(file: RemoteFile, globs: List[str]) -> bool: + def file_matches_globs(file: RemoteFile, globs: list[str]) -> bool: # Use the GLOBSTAR flag to enable recursive ** matching # (https://facelessuser.github.io/wcmatch/wcmatch/#globstar) return any(globmatch(file.uri, g, flags=GLOBSTAR) for g in globs) @staticmethod - def get_prefixes_from_globs(globs: List[str]) -> Set[str]: + def get_prefixes_from_globs(globs: list[str]) -> set[str]: """ Utility method for extracting prefixes from the globs. """ @@ -150,7 +151,7 @@ def include_identities_stream(self) -> bool: @abstractmethod def get_file( self, file: RemoteFile, local_directory: str, logger: logging.Logger - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ This is required for connectors that will support writing to files. It will handle the logic to download,get,read,acquire or @@ -171,7 +172,7 @@ def get_file( """ ... - def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]: + def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> list[str]: preserve_directory_structure = self.preserve_directory_structure() if preserve_directory_structure: # Remove left slashes from source path format to make relative path for writing locally diff --git a/airbyte_cdk/sources/file_based/file_types/__init__.py b/airbyte_cdk/sources/file_based/file_types/__init__.py index b9d8f1d52..e90150da8 100644 --- a/airbyte_cdk/sources/file_based/file_types/__init__.py +++ b/airbyte_cdk/sources/file_based/file_types/__init__.py @@ -1,4 +1,5 @@ -from typing import Any, Mapping, Type +from collections.abc import Mapping +from typing import Any, Type from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat @@ -16,7 +17,7 @@ from .parquet_parser import ParquetParser from .unstructured_parser import UnstructuredParser -default_parsers: Mapping[Type[Any], FileTypeParser] = { +default_parsers: Mapping[type[Any], FileTypeParser] = { AvroFormat: AvroParser(), CsvFormat: CsvParser(), ExcelFormat: ExcelParser(), diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 85e5afa22..e66a5d370 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -3,7 +3,8 @@ # import logging -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast +from collections.abc import Iterable, Mapping +from typing import Any, cast import fastavro @@ -46,7 +47,7 @@ class AvroParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ AvroParser does not require config checks, implicit pydantic validation is enough. """ @@ -173,8 +174,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None, + ) -> Iterable[dict[str, Any]]: avro_format = config.format or AvroFormat(filetype="avro") if not isinstance(avro_format, AvroFormat): raise ValueError(f"Expected ParquetFormat, got {avro_format}") diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index e3010690e..531c995ff 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -7,9 +7,10 @@ import logging from abc import ABC, abstractmethod from collections import defaultdict +from collections.abc import Callable, Generator, Iterable, Mapping from functools import partial from io import IOBase -from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple +from typing import Any from uuid import uuid4 import orjson @@ -43,7 +44,7 @@ def read_data( stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, file_read_mode: FileReadMode, - ) -> Generator[Dict[str, Any], None, None]: + ) -> Generator[dict[str, Any], None, None]: config_format = _extract_format(config) lineno = 0 @@ -111,7 +112,7 @@ def read_data( # due to RecordParseError or GeneratorExit csv.unregister_dialect(dialect_name) - def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) -> List[str]: + def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) -> list[str]: """ Assumes the fp is pointing to the beginning of the files and will reset it as such """ @@ -133,7 +134,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) fp.seek(0) return headers - def _auto_generate_headers(self, fp: IOBase, dialect_name: str) -> List[str]: + def _auto_generate_headers(self, fp: IOBase, dialect_name: str) -> list[str]: """ Generates field names as [f0, f1, ...] in the same way as pyarrow's csv reader with autogenerate_column_names=True. See https://arrow.apache.org/docs/python/generated/pyarrow.csv.ReadOptions.html @@ -154,14 +155,14 @@ def _skip_rows(fp: IOBase, rows_to_skip: int) -> None: class CsvParser(FileTypeParser): _MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 - def __init__(self, csv_reader: Optional[_CsvReader] = None, csv_field_max_bytes: int = 2**31): + def __init__(self, csv_reader: _CsvReader | None = None, csv_field_max_bytes: int = 2**31): # Increase the maximum length of data that can be parsed in a single CSV field. The default is 128k, which is typically sufficient # but given the use of Airbyte in loading a large variety of data it is best to allow for a larger maximum field size to avoid # skipping data on load. https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 csv.field_size_limit(csv_field_max_bytes) self._csv_reader = csv_reader if csv_reader else _CsvReader() - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ CsvParser does not require config checks, implicit pydantic validation is enough. """ @@ -181,7 +182,7 @@ async def infer_schema( # todo: the existing InMemoryFilesSource.open_file() test source doesn't currently require an encoding, but actual # sources will likely require one. Rather than modify the interface now we can wait until the real use case config_format = _extract_format(config) - type_inferrer_by_field: Dict[str, _TypeInferrer] = defaultdict( + type_inferrer_by_field: dict[str, _TypeInferrer] = defaultdict( lambda: _JsonTypeInferrer( config_format.true_values, config_format.false_values, config_format.null_values ) @@ -221,8 +222,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None, + ) -> Iterable[dict[str, Any]]: line_no = 0 try: config_format = _extract_format(config) @@ -281,9 +282,9 @@ def _get_cast_function( def _to_nullable( row: Mapping[str, str], deduped_property_types: Mapping[str, str], - null_values: Set[str], + null_values: set[str], strings_can_be_null: bool, - ) -> Dict[str, Optional[str]]: + ) -> dict[str, str | None]: nullable = { k: None if CsvParser._value_is_none( @@ -297,14 +298,14 @@ def _to_nullable( @staticmethod def _value_is_none( value: Any, - deduped_property_type: Optional[str], - null_values: Set[str], + deduped_property_type: str | None, + null_values: set[str], strings_can_be_null: bool, ) -> bool: return value in null_values and (strings_can_be_null or deduped_property_type != "string") @staticmethod - def _pre_propcess_property_types(property_types: Dict[str, Any]) -> Mapping[str, str]: + def _pre_propcess_property_types(property_types: dict[str, Any]) -> Mapping[str, str]: """ Transform the property types to be non-nullable and remove duplicate types if any. Sample input: @@ -335,11 +336,11 @@ def _pre_propcess_property_types(property_types: Dict[str, Any]) -> Mapping[str, @staticmethod def _cast_types( - row: Dict[str, str], + row: dict[str, str], deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Casts the values in the input 'row' dictionary according to the types defined in the JSON schema. @@ -425,12 +426,12 @@ class _JsonTypeInferrer(_TypeInferrer): _STRING_TYPE = "string" def __init__( - self, boolean_trues: Set[str], boolean_falses: Set[str], null_values: Set[str] + self, boolean_trues: set[str], boolean_falses: set[str], null_values: set[str] ) -> None: self._boolean_trues = boolean_trues self._boolean_falses = boolean_falses self._null_values = null_values - self._values: Set[str] = set() + self._values: set[str] = set() def add_value(self, value: Any) -> None: self._values.add(value) @@ -453,7 +454,7 @@ def infer(self) -> str: return self._NUMBER_TYPE return self._STRING_TYPE - def _infer_type(self, value: str) -> Set[str]: + def _infer_type(self, value: str) -> set[str]: inferred_types = set() if value in self._null_values: @@ -493,7 +494,7 @@ def _is_number(value: str) -> bool: return False -def _value_to_bool(value: str, true_values: Set[str], false_values: Set[str]) -> bool: +def _value_to_bool(value: str, true_values: set[str], false_values: set[str]) -> bool: if value in true_values: return True if value in false_values: @@ -501,7 +502,7 @@ def _value_to_bool(value: str, true_values: Set[str], false_values: Set[str]) -> raise ValueError(f"Value {value} is not a valid boolean value") -def _value_to_list(value: str) -> List[Any]: +def _value_to_list(value: str) -> list[Any]: parsed_value = json.loads(value) if isinstance(parsed_value, list): return parsed_value @@ -512,7 +513,7 @@ def _value_to_python_type(value: str, python_type: type) -> Any: return python_type(value) -def _format_warning(key: str, value: str, expected_type: Optional[Any]) -> str: +def _format_warning(key: str, value: str, expected_type: Any | None) -> str: return f"{key}: value={value},expected_type={expected_type}" diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index 5a0332171..ee34e2e57 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -3,9 +3,10 @@ # import logging +from collections.abc import Iterable, Mapping from io import IOBase from pathlib import Path -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union +from typing import Any import orjson import pandas as pd @@ -34,7 +35,7 @@ class ExcelParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ ExcelParser does not require config checks, implicit pydantic validation is enough. """ @@ -63,7 +64,7 @@ async def infer_schema( # Validate the format of the config self.validate_format(config.format, logger) - fields: Dict[str, str] = {} + fields: dict[str, str] = {} with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: df = self.open_and_parse_file(fp) @@ -91,8 +92,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]] = None, - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None = None, + ) -> Iterable[dict[str, Any]]: """ Parses records from an Excel file based on the provided configuration. @@ -140,7 +141,7 @@ def file_read_mode(self) -> FileReadMode: @staticmethod def dtype_to_json_type( - current_type: Optional[str], + current_type: str | None, dtype: dtype_, # type: ignore [type-arg] ) -> str: """ @@ -183,7 +184,7 @@ def validate_format(excel_format: BaseModel, logger: logging.Logger) -> None: raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR) @staticmethod - def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame: + def open_and_parse_file(fp: IOBase | str | Path) -> pd.DataFrame: """ Opens and parses the Excel file. diff --git a/airbyte_cdk/sources/file_based/file_types/file_transfer.py b/airbyte_cdk/sources/file_based/file_types/file_transfer.py index 154b6ff44..1cf1196c3 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_transfer.py +++ b/airbyte_cdk/sources/file_based/file_types/file_transfer.py @@ -3,7 +3,8 @@ # import logging import os -from typing import Any, Dict, Iterable +from collections.abc import Iterable +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader @@ -27,7 +28,7 @@ def get_file( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - ) -> Iterable[Dict[str, Any]]: + ) -> Iterable[dict[str, Any]]: try: yield stream_reader.get_file( file=file, local_directory=self._local_directory, logger=logger diff --git a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py index e6a9c5cb1..8d0d58ce7 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py @@ -4,7 +4,8 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.file_based_stream_reader import ( @@ -14,7 +15,7 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.schema_helpers import SchemaType -Record = Dict[str, Any] +Record = dict[str, Any] class FileTypeParser(ABC): @@ -24,27 +25,27 @@ class FileTypeParser(ABC): """ @property - def parser_max_n_files_for_schema_inference(self) -> Optional[int]: + def parser_max_n_files_for_schema_inference(self) -> int | None: """ The discovery policy decides how many files are loaded for schema inference. This method can provide a parser-specific override. If it's defined, the smaller of the two values will be used. """ return None @property - def parser_max_n_files_for_parsability(self) -> Optional[int]: + def parser_max_n_files_for_parsability(self) -> int | None: """ The availability policy decides how many files are loaded for checking whether parsing works correctly. This method can provide a parser-specific override. If it's defined, the smaller of the two values will be used. """ return None - def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> Optional[str]: + def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: """ The parser can define a primary key. If no user-defined primary key is provided, this will be used. """ return None @abstractmethod - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ Check whether the config is valid for this file type. If it is, return True and None. If it's not, return False and an error message explaining why it's invalid. """ @@ -70,7 +71,7 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], + discovered_schema: Mapping[str, SchemaType] | None, ) -> Iterable[Record]: """ Parse and emit each record. diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index 722ad329b..f898f7f3c 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -4,7 +4,8 @@ import json import logging -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any import orjson @@ -27,7 +28,7 @@ class JsonlParser(FileTypeParser): MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 ENCODING = "utf8" - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ JsonlParser does not require config checks, implicit pydantic validation is enough. """ @@ -58,8 +59,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None, + ) -> Iterable[dict[str, Any]]: """ This code supports parsing json objects over multiple lines even though this does not align with the JSONL format. This is for backward compatibility reasons i.e. the previous source-s3 parser did support this. The drawback is: @@ -73,7 +74,7 @@ def parse_records( yield from self._parse_jsonl_entries(file, stream_reader, logger) @classmethod - def _infer_schema_for_record(cls, record: Dict[str, Any]) -> Dict[str, Any]: + def _infer_schema_for_record(cls, record: dict[str, Any]) -> dict[str, Any]: record_schema = {} for key, value in record.items(): if value is None: @@ -93,7 +94,7 @@ def _parse_jsonl_entries( stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, read_limit: bool = False, - ) -> Iterable[Dict[str, Any]]: + ) -> Iterable[dict[str, Any]]: with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: read_bytes = 0 @@ -138,7 +139,7 @@ def _parse_jsonl_entries( ) @staticmethod - def _instantiate_accumulator(line: Union[bytes, str]) -> Union[bytes, str]: + def _instantiate_accumulator(line: bytes | str) -> bytes | str: if isinstance(line, bytes): return bytes("", json.detect_encoding(line)) elif isinstance(line, str): diff --git a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py index 28cfb14c9..6ab5eddb6 100644 --- a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py @@ -5,7 +5,8 @@ import json import logging import os -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any from urllib.parse import unquote import pyarrow as pa @@ -33,7 +34,7 @@ class ParquetParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ ParquetParser does not require config checks, implicit pydantic validation is enough. """ @@ -74,8 +75,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None, + ) -> Iterable[dict[str, Any]]: parquet_format = config.format if not isinstance(parquet_format, ParquetFormat): logger.info(f"Expected ParquetFormat, got {parquet_format}") @@ -109,7 +110,7 @@ def parse_records( ) from exc @staticmethod - def _extract_partitions(filepath: str) -> List[str]: + def _extract_partitions(filepath: str) -> list[str]: return [unquote(partition) for partition in filepath.split(os.sep) if "=" in partition] @property @@ -118,7 +119,7 @@ def file_read_mode(self) -> FileReadMode: @staticmethod def _to_output_value( - parquet_value: Union[Scalar, DictionaryArray], parquet_format: ParquetFormat + parquet_value: Scalar | DictionaryArray, parquet_format: ParquetFormat ) -> Any: """ Convert an entry in a pyarrow table to a value that can be output by the source. @@ -182,7 +183,7 @@ def _scalar_to_python_value(parquet_value: Scalar, parquet_format: ParquetFormat return parquet_value.as_py() @staticmethod - def _dictionary_array_to_python_value(parquet_value: DictionaryArray) -> Dict[str, Any]: + def _dictionary_array_to_python_value(parquet_value: DictionaryArray) -> dict[str, Any]: """ Convert a pyarrow dictionary array to a value that can be output by the source. diff --git a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index f55675e0a..a6467d937 100644 --- a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -4,9 +4,10 @@ import logging import os import traceback +from collections.abc import Iterable, Mapping from datetime import datetime from io import BytesIO, IOBase -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from typing import Any import backoff import dpath @@ -73,7 +74,7 @@ def get_nltk_temp_folder() -> str: nltk.download("averaged_perceptron_tagger_eng", download_dir=nltk_data_dir, quiet=True) -def optional_decode(contents: Union[str, bytes]) -> str: +def optional_decode(contents: str | bytes) -> str: if isinstance(contents, bytes): return contents.decode("utf-8") return contents @@ -110,20 +111,20 @@ def user_error(e: Exception) -> bool: class UnstructuredParser(FileTypeParser): @property - def parser_max_n_files_for_schema_inference(self) -> Optional[int]: + def parser_max_n_files_for_schema_inference(self) -> int | None: """ Just check one file as the schema is static """ return 1 @property - def parser_max_n_files_for_parsability(self) -> Optional[int]: + def parser_max_n_files_for_parsability(self) -> int | None: """ Do not check any files for parsability because it might be an expensive operation and doesn't give much confidence whether the sync will succeed. """ return 0 - def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> Optional[str]: + def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: """ Return the document_key field as the primary key. @@ -168,8 +169,8 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Optional[Mapping[str, SchemaType]], - ) -> Iterable[Dict[str, Any]]: + discovered_schema: Mapping[str, SchemaType] | None, + ) -> Iterable[dict[str, Any]]: format = _extract_format(config) with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle: try: @@ -256,9 +257,9 @@ def _read_file( return result def _params_to_dict( - self, params: Optional[List[APIParameterConfigModel]], strategy: str - ) -> Dict[str, Union[str, List[str]]]: - result_dict: Dict[str, Union[str, List[str]]] = {"strategy": strategy} + self, params: list[APIParameterConfigModel] | None, strategy: str + ) -> dict[str, str | list[str]]: + result_dict: dict[str, str | list[str]] = {"strategy": strategy} if params is None: return result_dict for item in params: @@ -277,7 +278,7 @@ def _params_to_dict( return result_dict - def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: """ Perform a connection check for the parser config: - Verify that encryption is enabled if the API is hosted on a cloud instance. @@ -396,7 +397,7 @@ def _create_parse_error( FileBasedSourceError.ERROR_PARSING_RECORD, filename=remote_file.uri, message=message ) - def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileType]: + def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> FileType | None: """ Detect the file type based on the file name and the file content. @@ -439,7 +440,7 @@ def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileT return None - def _supported_file_types(self) -> List[Any]: + def _supported_file_types(self) -> list[Any]: return [FileType.MD, FileType.PDF, FileType.DOCX, FileType.PPTX, FileType.TXT] def _get_file_type_error_message( @@ -449,15 +450,15 @@ def _get_file_type_error_message( supported_file_types = ", ".join([str(type) for type in self._supported_file_types()]) return f"File type {file_type or 'None'!s} is not supported. Supported file types are {supported_file_types}" - def _render_markdown(self, elements: List[Any]) -> str: - return "\n\n".join((self._convert_to_markdown(el) for el in elements)) + def _render_markdown(self, elements: list[Any]) -> str: + return "\n\n".join(self._convert_to_markdown(el) for el in elements) - def _convert_to_markdown(self, el: Dict[str, Any]) -> str: + def _convert_to_markdown(self, el: dict[str, Any]) -> str: if dpath.get(el, "type") == "Title": category_depth = dpath.get(el, "metadata/category_depth", default=1) or 1 if not isinstance(category_depth, int): category_depth = ( - int(category_depth) if isinstance(category_depth, (str, float)) else 1 + int(category_depth) if isinstance(category_depth, str | float) else 1 ) heading_str = "#" * category_depth return f"{heading_str} {dpath.get(el, 'text')}" diff --git a/airbyte_cdk/sources/file_based/remote_file.py b/airbyte_cdk/sources/file_based/remote_file.py index 0197a35fd..48d4e2513 100644 --- a/airbyte_cdk/sources/file_based/remote_file.py +++ b/airbyte_cdk/sources/file_based/remote_file.py @@ -3,7 +3,6 @@ # from datetime import datetime -from typing import Optional from pydantic.v1 import BaseModel @@ -15,4 +14,4 @@ class RemoteFile(BaseModel): uri: str last_modified: datetime - mime_type: Optional[str] = None + mime_type: str | None = None diff --git a/airbyte_cdk/sources/file_based/schema_helpers.py b/airbyte_cdk/sources/file_based/schema_helpers.py index 1b653db67..dc338beef 100644 --- a/airbyte_cdk/sources/file_based/schema_helpers.py +++ b/airbyte_cdk/sources/file_based/schema_helpers.py @@ -3,10 +3,11 @@ # import json +from collections.abc import Mapping from copy import deepcopy from enum import Enum from functools import total_ordering -from typing import Any, Dict, List, Literal, Mapping, Optional, Tuple, Type, Union +from typing import Any, Literal, Union from airbyte_cdk.sources.file_based.exceptions import ( ConfigValidationError, @@ -14,7 +15,7 @@ SchemaInferenceError, ) -JsonSchemaSupportedType = Union[List[str], Literal["string"], str] +JsonSchemaSupportedType = Union[list[str], Literal["string"], str] SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]] schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}} @@ -40,7 +41,7 @@ def __lt__(self, other: Any) -> bool: return NotImplemented -TYPE_PYTHON_MAPPING: Mapping[str, Tuple[str, Optional[Type[Any]]]] = { +TYPE_PYTHON_MAPPING: Mapping[str, tuple[str, type[Any] | None]] = { "null": ("null", None), "array": ("array", list), "boolean": ("boolean", bool), @@ -53,7 +54,7 @@ def __lt__(self, other: Any) -> bool: PYTHON_TYPE_MAPPING = {t: k for k, (_, t) in TYPE_PYTHON_MAPPING.items()} -def get_comparable_type(value: Any) -> Optional[ComparableType]: +def get_comparable_type(value: Any) -> ComparableType | None: if value == "null": return ComparableType.NULL if value == "boolean": @@ -70,7 +71,7 @@ def get_comparable_type(value: Any) -> Optional[ComparableType]: return None -def get_inferred_type(value: Any) -> Optional[ComparableType]: +def get_inferred_type(value: Any) -> ComparableType | None: if value is None: return ComparableType.NULL if isinstance(value, bool): @@ -107,7 +108,7 @@ def merge_schemas(schema1: SchemaType, schema2: SchemaType) -> SchemaType: if not isinstance(t, dict) or "type" not in t or not _is_valid_type(t["type"]): raise SchemaInferenceError(FileBasedSourceError.UNRECOGNIZED_TYPE, key=k, type=t) - merged_schema: Dict[str, Any] = deepcopy(schema1) # type: ignore # as of 2023-08-08, deepcopy can copy Mapping + merged_schema: dict[str, Any] = deepcopy(schema1) # type: ignore # as of 2023-08-08, deepcopy can copy Mapping for k2, t2 in schema2.items(): t1 = merged_schema.get(k2) if t1 is None: @@ -216,7 +217,7 @@ def conforms_to_schema(record: Mapping[str, Any], schema: Mapping[str, Any]) -> return True -def _parse_json_input(input_schema: Union[str, Mapping[str, str]]) -> Optional[Mapping[str, str]]: +def _parse_json_input(input_schema: str | Mapping[str, str]) -> Mapping[str, str] | None: try: if isinstance(input_schema, str): schema: Mapping[str, str] = json.loads(input_schema) @@ -235,8 +236,8 @@ def _parse_json_input(input_schema: Union[str, Mapping[str, str]]) -> Optional[M def type_mapping_to_jsonschema( - input_schema: Optional[Union[str, Mapping[str, str]]], -) -> Optional[Mapping[str, Any]]: + input_schema: str | Mapping[str, str] | None, +) -> Mapping[str, Any] | None: """ Return the user input schema (type mapping), transformed to JSON Schema format. diff --git a/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py b/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py index 139511a98..c00ec40dd 100644 --- a/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +++ b/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py @@ -3,7 +3,8 @@ # from abc import ABC, abstractmethod -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any class AbstractSchemaValidationPolicy(ABC): @@ -12,7 +13,7 @@ class AbstractSchemaValidationPolicy(ABC): @abstractmethod def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]] + self, record: Mapping[str, Any], schema: Mapping[str, Any] | None ) -> bool: """ Return True if the record passes the user's validation policy. diff --git a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py index 261b0fabd..39d58af97 100644 --- a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +++ b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import ValidationPolicy from airbyte_cdk.sources.file_based.exceptions import ( @@ -17,7 +18,7 @@ class EmitRecordPolicy(AbstractSchemaValidationPolicy): name = "emit_record" def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]] + self, record: Mapping[str, Any], schema: Mapping[str, Any] | None ) -> bool: return True @@ -26,7 +27,7 @@ class SkipRecordPolicy(AbstractSchemaValidationPolicy): name = "skip_record" def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]] + self, record: Mapping[str, Any], schema: Mapping[str, Any] | None ) -> bool: return schema is not None and conforms_to_schema(record, schema) @@ -36,7 +37,7 @@ class WaitForDiscoverPolicy(AbstractSchemaValidationPolicy): validate_schema_before_sync = True def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]] + self, record: Mapping[str, Any], schema: Mapping[str, Any] | None ) -> bool: if schema is None or not conforms_to_schema(record, schema): raise StopSyncPerValidationPolicy( diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index ef258b34d..219d61ae5 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -3,8 +3,9 @@ # from abc import abstractmethod -from functools import cache, cached_property, lru_cache -from typing import Any, Dict, Iterable, List, Mapping, Optional, Type +from collections.abc import Iterable, Mapping +from functools import cache, cached_property +from typing import Any from typing_extensions import deprecated @@ -53,11 +54,11 @@ class AbstractFileBasedStream(Stream): def __init__( self, config: FileBasedStreamConfig, - catalog_schema: Optional[Mapping[str, Any]], + catalog_schema: Mapping[str, Any] | None, stream_reader: AbstractFileBasedStreamReader, availability_strategy: AbstractFileBasedAvailabilityStrategy, discovery_policy: AbstractDiscoveryPolicy, - parsers: Dict[Type[Any], FileTypeParser], + parsers: dict[type[Any], FileTypeParser], validation_policy: AbstractSchemaValidationPolicy, errors_collector: FileBasedErrorsCollector, cursor: AbstractFileBasedCursor, @@ -78,7 +79,7 @@ def __init__( def primary_key(self) -> PrimaryKeyType: ... @cache - def list_files(self) -> List[RemoteFile]: + def list_files(self) -> list[RemoteFile]: """ List all files that belong to the stream. @@ -98,9 +99,9 @@ def get_files(self) -> Iterable[RemoteFile]: def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[StreamSlice] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: StreamSlice | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any] | AirbyteMessage]: """ Yield all records from all remote files in `list_files_for_this_sync`. @@ -124,9 +125,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: """ This method acts as an adapter between the generic Stream interface and the file-based's stream since file-based streams manage their own states. @@ -134,7 +135,7 @@ def stream_slices( return self.compute_slices() @abstractmethod - def compute_slices(self) -> Iterable[Optional[StreamSlice]]: + def compute_slices(self) -> Iterable[StreamSlice | None]: """ Return a list of slices that will be used to read files in the current sync. :return: The slices to use for the current sync. @@ -142,7 +143,7 @@ def compute_slices(self) -> Iterable[Optional[StreamSlice]]: ... @abstractmethod - @lru_cache(maxsize=None) + @cache def get_json_schema(self) -> Mapping[str, Any]: """ Return the JSON Schema for a stream. @@ -150,7 +151,7 @@ def get_json_schema(self) -> Mapping[str, Any]: ... @abstractmethod - def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: + def infer_schema(self, files: list[RemoteFile]) -> Mapping[str, Any]: """ Infer the schema for files in the stream. """ @@ -187,7 +188,7 @@ def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: def name(self) -> str: return self.config.name - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: """ This is a temporary hack. Because file-based, declarative, and concurrent have _slightly_ different cursor implementations the file-based cursor isn't compatible with the cursor-based iteration flow in core.py top-level CDK. By setting this to diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index f02602d58..9a0b0b475 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -4,8 +4,9 @@ import copy import logging -from functools import cache, lru_cache -from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union +from collections.abc import Iterable, Mapping, MutableMapping +from functools import cache +from typing import TYPE_CHECKING, Any from typing_extensions import deprecated @@ -67,7 +68,7 @@ def create_from_stream( stream: AbstractFileBasedStream, source: AbstractSource, logger: logging.Logger, - state: Optional[MutableMapping[str, Any]], + state: MutableMapping[str, Any] | None, cursor: "AbstractConcurrentFileBasedCursor", ) -> "FileBasedStreamFacade": """ @@ -131,7 +132,7 @@ def __init__( self.validation_policy = legacy_stream.validation_policy @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: if self._abstract_stream.cursor_field is None: return [] else: @@ -150,7 +151,7 @@ def supports_incremental(self) -> bool: def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._legacy_stream.availability_strategy - @lru_cache(maxsize=None) + @cache def get_json_schema(self) -> Mapping[str, Any]: return self._abstract_stream.get_json_schema() @@ -170,10 +171,10 @@ def get_files(self) -> Iterable[RemoteFile]: def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Mapping[str, Any]]: yield from self._legacy_stream.read_records_from_slice(stream_slice) # type: ignore[misc] # Only Mapping[str, Any] is expected for legacy streams, not AirbyteMessage - def compute_slices(self) -> Iterable[Optional[StreamSlice]]: + def compute_slices(self) -> Iterable[StreamSlice | None]: return self._legacy_stream.compute_slices() - def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: + def infer_schema(self, files: list[RemoteFile]) -> Mapping[str, Any]: return self._legacy_stream.infer_schema(files) def get_underlying_stream(self) -> DefaultStream: @@ -193,9 +194,9 @@ def read( def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: try: yield from self._read_records() @@ -225,11 +226,11 @@ class FileBasedStreamPartition(Partition): def __init__( self, stream: AbstractFileBasedStream, - _slice: Optional[Mapping[str, Any]], + _slice: Mapping[str, Any] | None, message_repository: MessageRepository, sync_mode: SyncMode, - cursor_field: Optional[List[str]], - state: Optional[MutableMapping[str, Any]], + cursor_field: list[str] | None, + state: MutableMapping[str, Any] | None, ): self._stream = stream self._slice = _slice @@ -281,7 +282,7 @@ def read(self) -> Iterable[Record]: else: raise e - def to_slice(self) -> Optional[Mapping[str, Any]]: + def to_slice(self) -> Mapping[str, Any] | None: if self._slice is None: return None assert len(self._slice["files"]) == 1, ( @@ -320,8 +321,8 @@ def __init__( stream: AbstractFileBasedStream, message_repository: MessageRepository, sync_mode: SyncMode, - cursor_field: Optional[List[str]], - state: Optional[MutableMapping[str, Any]], + cursor_field: list[str] | None, + state: MutableMapping[str, Any] | None, cursor: "AbstractConcurrentFileBasedCursor", ): self._stream = stream diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py index 5c30fda4a..255c2af3c 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py @@ -4,8 +4,9 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Iterable, MutableMapping from datetime import datetime -from typing import TYPE_CHECKING, Any, Iterable, List, MutableMapping +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor @@ -33,7 +34,7 @@ def observe(self, record: Record) -> None: ... def close_partition(self, partition: Partition) -> None: ... @abstractmethod - def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: ... + def set_pending_partitions(self, partitions: list["FileBasedStreamPartition"]) -> None: ... @abstractmethod def add_file(self, file: RemoteFile) -> None: ... diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py index a70169197..ce5ac5601 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py @@ -3,9 +3,10 @@ # import logging +from collections.abc import Iterable, MutableMapping from datetime import datetime, timedelta from threading import RLock -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, MutableMapping, Optional, Tuple +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, Type from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager @@ -41,7 +42,7 @@ def __init__( self, stream_config: FileBasedStreamConfig, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, stream_state: MutableMapping[str, Any], message_repository: MessageRepository, connector_state_manager: ConnectorStateManager, @@ -60,7 +61,7 @@ def __init__( ) self._state_lock = RLock() self._pending_files_lock = RLock() - self._pending_files: Optional[Dict[str, RemoteFile]] = None + self._pending_files: dict[str, RemoteFile] | None = None self._file_to_datetime_history = stream_state.get("history", {}) if stream_state else {} self._prev_cursor_value = self._compute_prev_sync_cursor(stream_state) self._sync_start = self._compute_start_time() @@ -79,7 +80,7 @@ def close_partition(self, partition: Partition) -> None: "Expected pending partitions to be set but it was not. This is unexpected. Please contact Support." ) - def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: + def set_pending_partitions(self, partitions: list["FileBasedStreamPartition"]) -> None: with self._pending_files_lock: self._pending_files = {} for partition in partitions: @@ -93,7 +94,7 @@ def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) - ) self._pending_files.update({file.uri: file}) - def _compute_prev_sync_cursor(self, value: Optional[StreamState]) -> Tuple[datetime, str]: + def _compute_prev_sync_cursor(self, value: StreamState | None) -> tuple[datetime, str]: if not value: return self.zero_value, "" prev_cursor_str = value.get(self._cursor_field.cursor_field_key) or self.zero_cursor_value @@ -112,12 +113,12 @@ def _compute_prev_sync_cursor(self, value: Optional[StreamState]) -> Tuple[datet cursor_dt, cursor_uri = cursor_str.split("_", 1) return datetime.strptime(cursor_dt, self.DATE_TIME_FORMAT), cursor_uri - def _get_cursor_key_from_file(self, file: Optional[RemoteFile]) -> str: + def _get_cursor_key_from_file(self, file: RemoteFile | None) -> str: if file: return f"{datetime.strftime(file.last_modified, self.DATE_TIME_FORMAT)}_{file.uri}" return self.zero_cursor_value - def _compute_earliest_file_in_history(self) -> Optional[RemoteFile]: + def _compute_earliest_file_in_history(self) -> RemoteFile | None: with self._state_lock: if self._file_to_datetime_history: filename, last_modified = min( @@ -196,13 +197,13 @@ def _get_new_cursor_value(self) -> str: else: return f"{self.zero_value.strftime(self.DATE_TIME_FORMAT)}_" - def _compute_earliest_pending_file(self) -> Optional[RemoteFile]: + def _compute_earliest_pending_file(self) -> RemoteFile | None: if self._pending_files: return min(self._pending_files.values(), key=lambda x: x.last_modified) else: return None - def _compute_latest_file_in_history(self) -> Optional[RemoteFile]: + def _compute_latest_file_in_history(self) -> RemoteFile | None: with self._state_lock: if self._file_to_datetime_history: filename, last_modified = max( diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py index e219292d1..b208ac1dd 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Iterable, MutableMapping from datetime import datetime -from typing import TYPE_CHECKING, Any, Iterable, List, MutableMapping, Optional +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig @@ -29,7 +30,7 @@ def __init__( self, stream_config: FileBasedStreamConfig, message_repository: MessageRepository, - stream_namespace: Optional[str], + stream_namespace: str | None, **kwargs: Any, ): self._stream_name = stream_config.name @@ -50,7 +51,7 @@ def observe(self, record: Record) -> None: def close_partition(self, partition: Partition) -> None: pass - def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: + def set_pending_partitions(self, partitions: list["FileBasedStreamPartition"]) -> None: pass def add_file(self, file: RemoteFile) -> None: diff --git a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py index 4a5eadb4e..29610c6dc 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py @@ -4,8 +4,9 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Iterable, MutableMapping from datetime import datetime -from typing import Any, Iterable, MutableMapping +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.remote_file import RemoteFile diff --git a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py index 08ad8c3ae..00ca7f116 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Iterable, MutableMapping from datetime import datetime, timedelta -from typing import Any, Iterable, MutableMapping, Optional +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.remote_file import RemoteFile @@ -34,7 +35,7 @@ def __init__(self, stream_config: FileBasedStreamConfig, **_: Any): ) self._start_time = self._compute_start_time() - self._initial_earliest_file_in_history: Optional[RemoteFile] = None + self._initial_earliest_file_in_history: RemoteFile | None = None def set_initial_state(self, value: StreamState) -> None: self._file_to_datetime_history = value.get("history", {}) @@ -59,7 +60,7 @@ def get_state(self) -> StreamState: state = {"history": self._file_to_datetime_history, self.CURSOR_FIELD: self._get_cursor()} return state - def _get_cursor(self) -> Optional[str]: + def _get_cursor(self) -> str | None: """ Returns the cursor value. @@ -126,7 +127,7 @@ def get_files_to_sync( def get_start_time(self) -> datetime: return self._start_time - def _compute_earliest_file_in_history(self) -> Optional[RemoteFile]: + def _compute_earliest_file_in_history(self) -> RemoteFile | None: if self._file_to_datetime_history: filename, last_modified = min( self._file_to_datetime_history.items(), key=lambda f: (f[1], f[0]) diff --git a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 42d01577c..e9c977120 100644 --- a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -6,10 +6,11 @@ import itertools import traceback from collections import defaultdict +from collections.abc import Iterable, Mapping, MutableMapping from copy import deepcopy from functools import cache from os import path -from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union +from typing import Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level from airbyte_cdk.models import Type as MessageType @@ -76,7 +77,7 @@ def state(self, value: MutableMapping[str, Any]) -> None: self._cursor.set_initial_state(value) @property # type: ignore # mypy complains wrong type, but AbstractFileBasedCursor is parent of file-based cursors - def cursor(self) -> Optional[AbstractFileBasedCursor]: + def cursor(self) -> AbstractFileBasedCursor | None: return self._cursor @cursor.setter @@ -94,8 +95,8 @@ def primary_key(self) -> PrimaryKeyType: ) def _filter_schema_invalid_properties( - self, configured_catalog_json_schema: Dict[str, Any] - ) -> Dict[str, Any]: + self, configured_catalog_json_schema: dict[str, Any] + ) -> dict[str, Any]: if self.use_file_transfer: return { "type": "object", @@ -109,9 +110,9 @@ def _filter_schema_invalid_properties( return super()._filter_schema_invalid_properties(configured_catalog_json_schema) def _duplicated_files_names( - self, slices: List[dict[str, List[RemoteFile]]] - ) -> List[dict[str, List[str]]]: - seen_file_names: Dict[str, List[str]] = defaultdict(list) + self, slices: list[dict[str, list[RemoteFile]]] + ) -> list[dict[str, list[str]]]: + seen_file_names: dict[str, list[str]] = defaultdict(list) for file_slice in slices: for file_found in file_slice[self.FILES_KEY]: file_name = path.basename(file_found.uri) @@ -120,7 +121,7 @@ def _duplicated_files_names( {file_name: paths} for file_name, paths in seen_file_names.items() if len(paths) > 1 ] - def compute_slices(self) -> Iterable[Optional[Mapping[str, Any]]]: + def compute_slices(self) -> Iterable[Mapping[str, Any] | None]: # Sort files by last_modified, uri and return them grouped by last_modified all_files = self.list_files() files_to_read = self._cursor.get_files_to_sync(all_files, self.logger) @@ -250,7 +251,7 @@ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Airbyte ) @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: """ Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. If the cursor is nested, return an array consisting of the path to the cursor. @@ -341,7 +342,7 @@ def get_files(self) -> Iterable[RemoteFile]: self.config.globs or [], self.config.legacy_prefix, self.logger ) - def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: + def infer_schema(self, files: list[RemoteFile]) -> Mapping[str, Any]: loop = asyncio.get_event_loop() schema = loop.run_until_complete(self._infer_schema(files)) # as infer schema returns a Mapping that is assumed to be immutable, we need to create a deepcopy to avoid modifying the reference @@ -356,7 +357,7 @@ def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]: if "null" not in v: schema[k] = ["null"] + v elif v != "null": - if isinstance(v, (str, list)): + if isinstance(v, str | list): schema[k] = ["null", v] else: DefaultFileBasedStream._fill_nulls(v) @@ -367,7 +368,7 @@ def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]: DefaultFileBasedStream._fill_nulls(item) return schema - async def _infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: + async def _infer_schema(self, files: list[RemoteFile]) -> Mapping[str, Any]: """ Infer the schema for a stream. @@ -375,7 +376,7 @@ async def _infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: Dispatch on file type. """ base_schema: SchemaType = {} - pending_tasks: Set[asyncio.tasks.Task[SchemaType]] = set() + pending_tasks: set[asyncio.tasks.Task[SchemaType]] = set() n_started, n_files = 0, len(files) files_iterator = iter(files) diff --git a/airbyte_cdk/sources/file_based/stream/identities_stream.py b/airbyte_cdk/sources/file_based/stream/identities_stream.py index 2c582d0b2..8384aad4b 100644 --- a/airbyte_cdk/sources/file_based/stream/identities_stream.py +++ b/airbyte_cdk/sources/file_based/stream/identities_stream.py @@ -2,8 +2,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +from collections.abc import Iterable, Mapping, MutableMapping from functools import cache -from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy @@ -25,7 +26,7 @@ class FileIdentitiesStream(IdentitiesStream): def __init__( self, - catalog_schema: Optional[Mapping[str, Any]], + catalog_schema: Mapping[str, Any] | None, stream_permissions_reader: AbstractFileBasedStreamPermissionsReader, discovery_policy: AbstractDiscoveryPolicy, errors_collector: FileBasedErrorsCollector, @@ -41,7 +42,7 @@ def __init__( def primary_key(self) -> PrimaryKeyType: return None - def load_identity_groups(self) -> Iterable[Dict[str, Any]]: + def load_identity_groups(self) -> Iterable[dict[str, Any]]: return self.stream_permissions_reader.load_identity_groups(logger=self.logger) @cache diff --git a/airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py index 52003c7ae..52a5b7156 100644 --- a/airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py @@ -3,7 +3,8 @@ # import traceback -from typing import Any, Dict, Iterable +from collections.abc import Iterable +from typing import Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level from airbyte_cdk.models import Type as MessageType @@ -36,8 +37,8 @@ def __init__( self.stream_permissions_reader = stream_permissions_reader def _filter_schema_invalid_properties( - self, configured_catalog_json_schema: Dict[str, Any] - ) -> Dict[str, Any]: + self, configured_catalog_json_schema: dict[str, Any] + ) -> dict[str, Any]: return self.stream_permissions_reader.file_permissions_schema def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]: diff --git a/airbyte_cdk/sources/file_based/types.py b/airbyte_cdk/sources/file_based/types.py index b83bf37a3..1180ecc9a 100644 --- a/airbyte_cdk/sources/file_based/types.py +++ b/airbyte_cdk/sources/file_based/types.py @@ -4,7 +4,8 @@ from __future__ import annotations -from typing import Any, Mapping, MutableMapping +from collections.abc import Mapping, MutableMapping +from typing import Any StreamSlice = Mapping[str, Any] StreamState = MutableMapping[str, Any] diff --git a/airbyte_cdk/sources/http_logger.py b/airbyte_cdk/sources/http_logger.py index 6d6785c86..77ba7d333 100644 --- a/airbyte_cdk/sources/http_logger.py +++ b/airbyte_cdk/sources/http_logger.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Optional, Union import requests @@ -13,9 +12,9 @@ def format_http_message( response: requests.Response, title: str, description: str, - stream_name: Optional[str], + stream_name: str | None, is_auxiliary: bool | None = None, - type: Optional[str] = None, + type: str | None = None, ) -> LogMessage: request_type: str = type if type else "HTTP" request = response.request @@ -51,5 +50,5 @@ def format_http_message( return log_message # type: ignore[return-value] # got "dict[str, object]", expected "dict[str, JsonType]" -def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]: - return body_str.decode() if isinstance(body_str, (bytes, bytearray)) else body_str +def _normalize_body_string(body_str: str | bytes | None) -> str | None: + return body_str.decode() if isinstance(body_str, bytes | bytearray) else body_str diff --git a/airbyte_cdk/sources/message/repository.py b/airbyte_cdk/sources/message/repository.py index 2fc156e8c..20caa6d51 100644 --- a/airbyte_cdk/sources/message/repository.py +++ b/airbyte_cdk/sources/message/repository.py @@ -6,7 +6,7 @@ import logging from abc import ABC, abstractmethod from collections import deque -from typing import Callable, Deque, Iterable, List, Optional +from collections.abc import Callable, Iterable from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, Type from airbyte_cdk.sources.utils.types import JsonType @@ -73,7 +73,7 @@ def consume_queue(self) -> Iterable[AirbyteMessage]: class InMemoryMessageRepository(MessageRepository): def __init__(self, log_level: Level = Level.INFO) -> None: - self._message_queue: Deque[AirbyteMessage] = deque() + self._message_queue: deque[AirbyteMessage] = deque() self._log_level = log_level def emit_message(self, message: AirbyteMessage) -> None: @@ -119,7 +119,7 @@ def consume_queue(self) -> Iterable[AirbyteMessage]: return self._decorated.consume_queue() def _append_second_to_first( - self, first: LogMessage, second: LogMessage, path: Optional[List[str]] = None + self, first: LogMessage, second: LogMessage, path: list[str] | None = None ) -> LogMessage: if path is None: path = [] @@ -130,7 +130,7 @@ def _append_second_to_first( self._append_second_to_first(first[key], second[key], path + [str(key)]) # type: ignore # type is verified above else: if first[key] != second[key]: - _LOGGER.warning("Conflict at %s" % ".".join(path + [str(key)])) + _LOGGER.warning("Conflict at {}".format(".".join(path + [str(key)]))) first[key] = second[key] else: first[key] = second[key] diff --git a/airbyte_cdk/sources/source.py b/airbyte_cdk/sources/source.py index 2958d82ca..c325544bf 100644 --- a/airbyte_cdk/sources/source.py +++ b/airbyte_cdk/sources/source.py @@ -5,7 +5,8 @@ import logging from abc import ABC, abstractmethod -from typing import Any, Generic, Iterable, List, Mapping, Optional, TypeVar +from collections.abc import Iterable, Mapping +from typing import Any, Generic, TypeVar from airbyte_cdk.connector import BaseConnector, DefaultConnectorMixin, TConfig from airbyte_cdk.models import ( @@ -38,7 +39,7 @@ def read( logger: logging.Logger, config: TConfig, catalog: TCatalog, - state: Optional[TState] = None, + state: TState | None = None, ) -> Iterable[AirbyteMessage]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. @@ -54,12 +55,12 @@ def discover(self, logger: logging.Logger, config: TConfig) -> AirbyteCatalog: class Source( DefaultConnectorMixin, - BaseSource[Mapping[str, Any], List[AirbyteStateMessage], ConfiguredAirbyteCatalog], + BaseSource[Mapping[str, Any], list[AirbyteStateMessage], ConfiguredAirbyteCatalog], ABC, ): # can be overridden to change an input state. @classmethod - def read_state(cls, state_path: str) -> List[AirbyteStateMessage]: + def read_state(cls, state_path: str) -> list[AirbyteStateMessage]: """ Retrieves the input state of a sync by reading from the specified JSON file. Incoming state can be deserialized into either a JSON object for legacy state input or as a list of AirbyteStateMessages for the per-stream state format. Regardless of the diff --git a/airbyte_cdk/sources/specs/transfer_modes.py b/airbyte_cdk/sources/specs/transfer_modes.py index 7b5651e42..9c5b2f07a 100644 --- a/airbyte_cdk/sources/specs/transfer_modes.py +++ b/airbyte_cdk/sources/specs/transfer_modes.py @@ -4,7 +4,7 @@ from typing import Literal -from pydantic.v1 import AnyUrl, BaseModel, Field +from pydantic.v1 import BaseModel, Field from airbyte_cdk import OneOfOptionConfig diff --git a/airbyte_cdk/sources/streams/availability_strategy.py b/airbyte_cdk/sources/streams/availability_strategy.py index 312ddae19..b36f8b303 100644 --- a/airbyte_cdk/sources/streams/availability_strategy.py +++ b/airbyte_cdk/sources/streams/availability_strategy.py @@ -5,7 +5,8 @@ import logging import typing from abc import ABC, abstractmethod -from typing import Any, Mapping, Optional, Tuple +from collections.abc import Mapping +from typing import Any, Optional from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams.core import Stream, StreamData @@ -22,7 +23,7 @@ class AvailabilityStrategy(ABC): @abstractmethod def check_availability( self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None - ) -> Tuple[bool, Optional[str]]: + ) -> tuple[bool, str | None]: """ Checks stream availability. @@ -36,7 +37,7 @@ def check_availability( """ @staticmethod - def get_first_stream_slice(stream: Stream) -> Optional[Mapping[str, Any]]: + def get_first_stream_slice(stream: Stream) -> Mapping[str, Any] | None: """ Gets the first stream_slice from a given stream's stream_slices. :param stream: stream @@ -55,7 +56,7 @@ def get_first_stream_slice(stream: Stream) -> Optional[Mapping[str, Any]]: @staticmethod def get_first_record_for_slice( - stream: Stream, stream_slice: Optional[Mapping[str, Any]] + stream: Stream, stream_slice: Mapping[str, Any] | None ) -> StreamData: """ Gets the first record for a stream_slice of a stream. diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py index 14f823e45..be9375880 100644 --- a/airbyte_cdk/sources/streams/call_rate.py +++ b/airbyte_cdk/sources/streams/call_rate.py @@ -8,9 +8,10 @@ import logging import re import time +from collections.abc import Mapping from datetime import timedelta from threading import RLock -from typing import TYPE_CHECKING, Any, Mapping, Optional +from typing import TYPE_CHECKING, Any from urllib import parse import requests @@ -77,9 +78,7 @@ def try_acquire(self, request: Any, weight: int) -> None: """ @abc.abstractmethod - def update( - self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime] - ) -> None: + def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: """Update call rate counting with current values :param available_calls: @@ -104,10 +103,10 @@ class HttpRequestMatcher(RequestMatcher): def __init__( self, - method: Optional[str] = None, - url: Optional[str] = None, - params: Optional[Mapping[str, Any]] = None, - headers: Optional[Mapping[str, Any]] = None, + method: str | None = None, + url: str | None = None, + params: Mapping[str, Any] | None = None, + headers: Mapping[str, Any] | None = None, ): """Constructor @@ -161,11 +160,11 @@ class HttpRequestRegexMatcher(RequestMatcher): def __init__( self, - method: Optional[str] = None, - url_base: Optional[str] = None, - url_path_pattern: Optional[str] = None, - params: Optional[Mapping[str, Any]] = None, - headers: Optional[Mapping[str, Any]] = None, + method: str | None = None, + url_base: str | None = None, + url_path_pattern: str | None = None, + params: Mapping[str, Any] | None = None, + headers: Mapping[str, Any] | None = None, ): """ :param method: HTTP method (e.g. "GET", "POST"); compared case-insensitively. @@ -293,9 +292,7 @@ class UnlimitedCallRatePolicy(BaseCallRatePolicy): def try_acquire(self, request: Any, weight: int) -> None: """Do nothing""" - def update( - self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime] - ) -> None: + def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: """Do nothing""" @@ -355,9 +352,7 @@ def __str__(self) -> str: f"matchers=[{matcher_str}])" ) - def update( - self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime] - ) -> None: + def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: """Update call rate counters, by default, only reacts to decreasing updates of available_calls and changes to call_reset_ts. We ignore updates with available_calls > current_available_calls to support call rate limits that are lower than API limits. @@ -439,9 +434,7 @@ def try_acquire(self, request: Any, weight: int) -> None: time_to_wait=timedelta(milliseconds=time_to_wait), ) - def update( - self, available_calls: Optional[int], call_reset_ts: Optional[datetime.datetime] - ) -> None: + def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: """Adjust call bucket to reflect the state of the API server :param available_calls: @@ -486,9 +479,7 @@ class AbstractAPIBudget(abc.ABC): """ @abc.abstractmethod - def acquire_call( - self, request: Any, block: bool = True, timeout: Optional[float] = None - ) -> None: + def acquire_call(self, request: Any, block: bool = True, timeout: float | None = None) -> None: """Try to get a call from budget, will block by default :param request: @@ -498,7 +489,7 @@ def acquire_call( """ @abc.abstractmethod - def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]: + def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: """Find matching call rate policy for specific request""" @abc.abstractmethod @@ -543,15 +534,13 @@ def _extract_endpoint(self, request: Any) -> str: return endpoint return "unknown endpoint" - def get_matching_policy(self, request: Any) -> Optional[AbstractCallRatePolicy]: + def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: for policy in self._policies: if policy.matches(request): return policy return None - def acquire_call( - self, request: Any, block: bool = True, timeout: Optional[float] = None - ) -> None: + def acquire_call(self, request: Any, block: bool = True, timeout: float | None = None) -> None: """Try to get a call from budget, will block by default. Matchers will be called sequentially in the same order they were added. The first matcher that returns True will @@ -581,7 +570,7 @@ def update_from_response(self, request: Any, response: Any) -> None: pass def _do_acquire( - self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: Optional[float] + self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: float | None ) -> None: """Internal method to try to acquire a call credit. @@ -658,16 +647,14 @@ def update_from_response(self, request: Any, response: Any) -> None: reset_ts = self.get_reset_ts_from_response(response) policy.update(available_calls=available_calls, call_reset_ts=reset_ts) - def get_reset_ts_from_response( - self, response: requests.Response - ) -> Optional[datetime.datetime]: + def get_reset_ts_from_response(self, response: requests.Response) -> datetime.datetime | None: if response.headers.get(self._ratelimit_reset_header): return datetime.datetime.fromtimestamp( int(response.headers[self._ratelimit_reset_header]) ) return None - def get_calls_left_from_response(self, response: requests.Response) -> Optional[int]: + def get_calls_left_from_response(self, response: requests.Response) -> int | None: if response.headers.get(self._ratelimit_remaining_header): return int(response.headers[self._ratelimit_remaining_header]) diff --git a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py index 6e4ef98d7..aecef4a5b 100644 --- a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +++ b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py @@ -1,8 +1,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from abc import ABC, abstractmethod +from collections.abc import Iterable, Mapping from enum import Enum -from typing import Any, Iterable, Mapping, Optional +from typing import Any from airbyte_cdk.sources.types import StreamSlice @@ -25,7 +26,7 @@ class CheckpointReader(ABC): """ @abstractmethod - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: """ Returns the next slice that will be used to fetch the next group of records. Returning None indicates that the reader has finished iterating over all slices. @@ -41,7 +42,7 @@ def observe(self, new_state: Mapping[str, Any]) -> None: """ @abstractmethod - def get_checkpoint(self) -> Optional[Mapping[str, Any]]: + def get_checkpoint(self) -> Mapping[str, Any] | None: """ Retrieves the current state value of the stream. The connector does not emit state messages if the checkpoint value is None. """ @@ -54,13 +55,13 @@ class IncrementalCheckpointReader(CheckpointReader): """ def __init__( - self, stream_state: Mapping[str, Any], stream_slices: Iterable[Optional[Mapping[str, Any]]] + self, stream_state: Mapping[str, Any], stream_slices: Iterable[Mapping[str, Any] | None] ): - self._state: Optional[Mapping[str, Any]] = stream_state + self._state: Mapping[str, Any] | None = stream_state self._stream_slices = iter(stream_slices) self._has_slices = False - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: try: next_slice = next(self._stream_slices) self._has_slices = True @@ -76,7 +77,7 @@ def next(self) -> Optional[Mapping[str, Any]]: def observe(self, new_state: Mapping[str, Any]) -> None: self._state = new_state - def get_checkpoint(self) -> Optional[Mapping[str, Any]]: + def get_checkpoint(self) -> Mapping[str, Any] | None: return self._state @@ -92,7 +93,7 @@ class CursorBasedCheckpointReader(CheckpointReader): def __init__( self, cursor: Cursor, - stream_slices: Iterable[Optional[Mapping[str, Any]]], + stream_slices: Iterable[Mapping[str, Any] | None], read_state_from_cursor: bool = False, ): self._cursor = cursor @@ -100,11 +101,11 @@ def __init__( # read_state_from_cursor is used to delineate that partitions should determine when to stop syncing dynamically according # to the value of the state at runtime. This currently only applies to streams that use resumable full refresh. self._read_state_from_cursor = read_state_from_cursor - self._current_slice: Optional[StreamSlice] = None + self._current_slice: StreamSlice | None = None self._finished_sync = False - self._previous_state: Optional[Mapping[str, Any]] = None + self._previous_state: Mapping[str, Any] | None = None - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: try: self.current_slice = self._find_next_slice() return self.current_slice @@ -117,7 +118,7 @@ def observe(self, new_state: Mapping[str, Any]) -> None: # while processing records pass - def get_checkpoint(self) -> Optional[Mapping[str, Any]]: + def get_checkpoint(self) -> Mapping[str, Any] | None: # This is used to avoid sending a duplicate state messages new_state = self._cursor.get_stream_state() if new_state != self._previous_state: @@ -194,7 +195,7 @@ def _find_next_slice(self) -> StreamSlice: return self.read_and_convert_slice() @property - def current_slice(self) -> Optional[StreamSlice]: + def current_slice(self) -> StreamSlice | None: return self._current_slice @current_slice.setter @@ -234,7 +235,7 @@ class LegacyCursorBasedCheckpointReader(CursorBasedCheckpointReader): def __init__( self, cursor: Cursor, - stream_slices: Iterable[Optional[Mapping[str, Any]]], + stream_slices: Iterable[Mapping[str, Any] | None], read_state_from_cursor: bool = False, ): super().__init__( @@ -243,7 +244,7 @@ def __init__( read_state_from_cursor=read_state_from_cursor, ) - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: try: self.current_slice = self._find_next_slice() @@ -293,7 +294,7 @@ def __init__(self, stream_state: Mapping[str, Any]): self._first_page = bool(stream_state == {}) self._state: Mapping[str, Any] = stream_state - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: if self._first_page: self._first_page = False return self._state @@ -305,7 +306,7 @@ def next(self) -> Optional[Mapping[str, Any]]: def observe(self, new_state: Mapping[str, Any]) -> None: self._state = new_state - def get_checkpoint(self) -> Optional[Mapping[str, Any]]: + def get_checkpoint(self) -> Mapping[str, Any] | None: return self._state or {} @@ -315,11 +316,11 @@ class FullRefreshCheckpointReader(CheckpointReader): is not capable of managing state. At the end of a sync, a final state message is emitted to signal completion. """ - def __init__(self, stream_slices: Iterable[Optional[Mapping[str, Any]]]): + def __init__(self, stream_slices: Iterable[Mapping[str, Any] | None]): self._stream_slices = iter(stream_slices) self._final_checkpoint = False - def next(self) -> Optional[Mapping[str, Any]]: + def next(self) -> Mapping[str, Any] | None: try: return next(self._stream_slices) except StopIteration: @@ -329,7 +330,7 @@ def next(self) -> Optional[Mapping[str, Any]]: def observe(self, new_state: Mapping[str, Any]) -> None: pass - def get_checkpoint(self) -> Optional[Mapping[str, Any]]: + def get_checkpoint(self) -> Mapping[str, Any] | None: if self._final_checkpoint: return {"__ab_no_cursor_state_message": True} return None diff --git a/airbyte_cdk/sources/streams/checkpoint/cursor.py b/airbyte_cdk/sources/streams/checkpoint/cursor.py index 6d758bf4e..8ddbe32b0 100644 --- a/airbyte_cdk/sources/streams/checkpoint/cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/cursor.py @@ -3,7 +3,7 @@ # from abc import ABC, abstractmethod -from typing import Any, Optional +from typing import Any from airbyte_cdk.sources.types import Record, StreamSlice, StreamState @@ -69,7 +69,7 @@ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: """ @abstractmethod - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: """ Get the state value of a specific stream_slice. For incremental or resumable full refresh cursors which only manage state in a single dimension this is the entire state object. For per-partition cursors used by substreams, this returns the state of diff --git a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py index e0dee4a92..de7a8e911 100644 --- a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +++ b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import json -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any class PerPartitionKeySerializer: diff --git a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py index 86abd253f..d0eb612f6 100644 --- a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from dataclasses import dataclass -from typing import Any, Optional +from typing import Any from airbyte_cdk.sources.streams.checkpoint import Cursor from airbyte_cdk.sources.types import Record, StreamSlice, StreamState @@ -46,6 +46,6 @@ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: """ return False - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # A top-level RFR cursor only manages the state of a single partition return self._cursor diff --git a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py index 9966959f0..374b239fb 100644 --- a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +from collections.abc import Mapping, MutableMapping from dataclasses import dataclass -from typing import Any, Mapping, MutableMapping, Optional +from typing import Any from airbyte_cdk.models import FailureType from airbyte_cdk.sources.streams.checkpoint import Cursor @@ -95,7 +96,7 @@ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: """ return False - def select_state(self, stream_slice: Optional[StreamSlice] = None) -> Optional[StreamState]: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: if not stream_slice: raise ValueError("A partition needs to be provided in order to extract a state") diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py index 26e6f09d4..4e5d321d8 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py @@ -3,7 +3,8 @@ # from abc import ABC, abstractmethod -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any from typing_extensions import deprecated @@ -58,7 +59,7 @@ def name(self) -> str: @property @abstractmethod - def cursor_field(self) -> Optional[str]: + def cursor_field(self) -> str | None: """ Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. Nested cursor fields are not supported. diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py index 18cacbc50..4d44c9083 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from abc import ABC, abstractmethod -from typing import Generic, Optional, TypeVar +from typing import Generic, TypeVar from airbyte_cdk.sources.streams.concurrent.exceptions import ExceptionWithDisplayMessage @@ -21,7 +21,7 @@ def source_defined_cursor(self) -> bool: # Streams must be aware of their cursor at instantiation time return True - def get_error_display_message(self, exception: BaseException) -> Optional[str]: + def get_error_display_message(self, exception: BaseException) -> str | None: """ Retrieves the user-friendly display message that corresponds to an exception. This will be called when encountering an exception while reading records from the stream, and used to build the AirbyteTraceMessage. diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index 7da594155..19ba584a5 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -5,8 +5,9 @@ import copy import json import logging -from functools import lru_cache -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping, MutableMapping +from functools import cache +from typing import Any, Optional from typing_extensions import deprecated @@ -68,7 +69,7 @@ def create_from_stream( stream: Stream, source: AbstractSource, logger: logging.Logger, - state: Optional[MutableMapping[str, Any]], + state: MutableMapping[str, Any] | None, cursor: Cursor, ) -> Stream: """ @@ -155,9 +156,9 @@ def read( def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: try: yield from self._read_records() @@ -187,22 +188,22 @@ def name(self) -> str: return self._abstract_stream.name @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: # This method is not expected to be called directly. It is only implemented for backward compatibility with the old interface return self.as_airbyte_stream().source_defined_primary_key # type: ignore # source_defined_primary_key is known to be an Optional[List[List[str]]] @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: if self._abstract_stream.cursor_field is None: return [] else: return self._abstract_stream.cursor_field @property - def cursor(self) -> Optional[Cursor]: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor + def cursor(self) -> Cursor | None: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor return self._cursor - @lru_cache(maxsize=None) + @cache def get_json_schema(self) -> Mapping[str, Any]: return self._abstract_stream.get_json_schema() @@ -212,7 +213,7 @@ def supports_incremental(self) -> bool: def check_availability( self, logger: logging.Logger, source: Optional["Source"] = None - ) -> Tuple[bool, Optional[str]]: + ) -> tuple[bool, str | None]: """ Verifies the stream is available. Delegates to the underlying AbstractStream and ignores the parameters :param logger: (ignored) @@ -254,11 +255,11 @@ class StreamPartition(Partition): def __init__( self, stream: Stream, - _slice: Optional[Mapping[str, Any]], + _slice: Mapping[str, Any] | None, message_repository: MessageRepository, sync_mode: SyncMode, - cursor_field: Optional[List[str]], - state: Optional[MutableMapping[str, Any]], + cursor_field: list[str] | None, + state: MutableMapping[str, Any] | None, ): """ :param stream: The stream to delegate to @@ -319,7 +320,7 @@ def read(self) -> Iterable[Record]: else: raise e - def to_slice(self) -> Optional[Mapping[str, Any]]: + def to_slice(self) -> Mapping[str, Any] | None: return self._slice def __hash__(self) -> int: @@ -345,8 +346,8 @@ def __init__( stream: Stream, message_repository: MessageRepository, sync_mode: SyncMode, - cursor_field: Optional[List[str]], - state: Optional[MutableMapping[str, Any]], + cursor_field: list[str] | None, + state: MutableMapping[str, Any] | None, ): """ :param stream: The stream to delegate to @@ -382,7 +383,7 @@ def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy) def check_availability( self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None - ) -> Tuple[bool, Optional[str]]: + ) -> tuple[bool, str | None]: """ Checks stream availability. diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index 118a7d0bb..48235d3f8 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -4,7 +4,6 @@ import logging from abc import ABC, abstractmethod -from typing import Optional from typing_extensions import deprecated @@ -19,7 +18,7 @@ def is_available(self) -> bool: """ @abstractmethod - def message(self) -> Optional[str]: + def message(self) -> str | None: """ :return: A message describing why the stream is not available. If the stream is available, this should return None. """ @@ -29,7 +28,7 @@ class StreamAvailable(StreamAvailability): def is_available(self) -> bool: return True - def message(self) -> Optional[str]: + def message(self) -> str | None: return None @@ -40,7 +39,7 @@ def __init__(self, message: str): def is_available(self) -> bool: return False - def message(self) -> Optional[str]: + def message(self) -> str | None: return self._message diff --git a/airbyte_cdk/sources/streams/concurrent/clamping.py b/airbyte_cdk/sources/streams/concurrent/clamping.py index 022534bc7..da2fb8fc2 100644 --- a/airbyte_cdk/sources/streams/concurrent/clamping.py +++ b/airbyte_cdk/sources/streams/concurrent/clamping.py @@ -1,7 +1,7 @@ from abc import ABC +from collections.abc import Callable from datetime import datetime, timedelta from enum import Enum -from typing import Callable from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index 88d15bc8a..dd8731ad8 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -5,16 +5,9 @@ import functools import logging from abc import ABC, abstractmethod +from collections.abc import Callable, Iterable, Mapping, MutableMapping from typing import ( Any, - Callable, - Iterable, - List, - Mapping, - MutableMapping, - Optional, - Tuple, - Union, ) from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager @@ -32,7 +25,7 @@ LOGGER = logging.getLogger("airbyte") -def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any: +def _extract_value(mapping: Mapping[str, Any], path: list[str]) -> Any: return functools.reduce(lambda a, b: a[b], path, mapping) @@ -88,7 +81,7 @@ class FinalStateCursor(Cursor): def __init__( self, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, message_repository: MessageRepository, ) -> None: self._stream_name = stream_name @@ -131,18 +124,18 @@ class ConcurrentCursor(Cursor): def __init__( self, stream_name: str, - stream_namespace: Optional[str], + stream_namespace: str | None, stream_state: Any, message_repository: MessageRepository, connector_state_manager: ConnectorStateManager, connector_state_converter: AbstractStreamStateConverter, cursor_field: CursorField, - slice_boundary_fields: Optional[Tuple[str, str]], - start: Optional[CursorValueType], + slice_boundary_fields: tuple[str, str] | None, + start: CursorValueType | None, end_provider: Callable[[], CursorValueType], - lookback_window: Optional[GapType] = None, - slice_range: Optional[GapType] = None, - cursor_granularity: Optional[GapType] = None, + lookback_window: GapType | None = None, + slice_range: GapType | None = None, + cursor_granularity: GapType | None = None, clamping_strategy: ClampingStrategy = NoClamping(), ) -> None: self._stream_name = stream_name @@ -159,7 +152,7 @@ def __init__( self._lookback_window = lookback_window self._slice_range = slice_range self._most_recent_cursor_value_per_partition: MutableMapping[ - Union[StreamSlice, Mapping[str, Any], None], Any + StreamSlice | Mapping[str, Any] | None, Any ] = {} self._has_closed_at_least_one_slice = False self._cursor_granularity = cursor_granularity @@ -178,7 +171,7 @@ def cursor_field(self) -> CursorField: return self._cursor_field @property - def _slice_boundary_fields_wrapper(self) -> Tuple[str, str]: + def _slice_boundary_fields_wrapper(self) -> tuple[str, str]: return ( self._slice_boundary_fields if self._slice_boundary_fields @@ -190,7 +183,7 @@ def _slice_boundary_fields_wrapper(self) -> Tuple[str, str]: def _get_concurrent_state( self, state: MutableMapping[str, Any] - ) -> Tuple[CursorValueType, MutableMapping[str, Any]]: + ) -> tuple[CursorValueType, MutableMapping[str, Any]]: if self._connector_state_converter.is_state_message_compatible(state): return ( self._start or self._connector_state_converter.zero_value, diff --git a/airbyte_cdk/sources/streams/concurrent/default_stream.py b/airbyte_cdk/sources/streams/concurrent/default_stream.py index 7679a1eb6..9645da304 100644 --- a/airbyte_cdk/sources/streams/concurrent/default_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/default_stream.py @@ -2,9 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from functools import lru_cache +from collections.abc import Iterable, Mapping +from functools import cache from logging import Logger -from typing import Any, Iterable, List, Mapping, Optional +from typing import Any from airbyte_cdk.models import AirbyteStream, SyncMode from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream @@ -24,11 +25,11 @@ def __init__( name: str, json_schema: Mapping[str, Any], availability_strategy: AbstractAvailabilityStrategy, - primary_key: List[str], - cursor_field: Optional[str], + primary_key: list[str], + cursor_field: str | None, logger: Logger, cursor: Cursor, - namespace: Optional[str] = None, + namespace: str | None = None, ) -> None: self._stream_partition_generator = partition_generator self._name = name @@ -48,17 +49,17 @@ def name(self) -> str: return self._name @property - def namespace(self) -> Optional[str]: + def namespace(self) -> str | None: return self._namespace def check_availability(self) -> StreamAvailability: return self._availability_strategy.check_availability(self._logger) @property - def cursor_field(self) -> Optional[str]: + def cursor_field(self) -> str | None: return self._cursor_field - @lru_cache(maxsize=None) + @cache def get_json_schema(self) -> Mapping[str, Any]: return self._json_schema diff --git a/airbyte_cdk/sources/streams/concurrent/helpers.py b/airbyte_cdk/sources/streams/concurrent/helpers.py index 5e2edf055..f881be24b 100644 --- a/airbyte_cdk/sources/streams/concurrent/helpers.py +++ b/airbyte_cdk/sources/streams/concurrent/helpers.py @@ -1,13 +1,12 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from typing import List, Optional, Union from airbyte_cdk.sources.streams import Stream def get_primary_key_from_stream( - stream_primary_key: Optional[Union[str, List[str], List[List[str]]]], -) -> List[str]: + stream_primary_key: str | list[str] | list[list[str]] | None, +) -> list[str]: if stream_primary_key is None: return [] elif isinstance(stream_primary_key, str): @@ -28,7 +27,7 @@ def get_primary_key_from_stream( raise ValueError(f"Invalid type for primary key: {stream_primary_key}") -def get_cursor_field_from_stream(stream: Stream) -> Optional[str]: +def get_cursor_field_from_stream(stream: Stream) -> str | None: if isinstance(stream.cursor_field, list): if len(stream.cursor_field) > 1: raise ValueError( diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/partition.py b/airbyte_cdk/sources/streams/concurrent/partitions/partition.py index 8391a5a2b..b3f1cda50 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/partition.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/partition.py @@ -3,7 +3,8 @@ # from abc import ABC, abstractmethod -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.sources.types import Record @@ -22,7 +23,7 @@ def read(self) -> Iterable[Record]: pass @abstractmethod - def to_slice(self) -> Optional[Mapping[str, Any]]: + def to_slice(self) -> Mapping[str, Any] | None: """ Converts the partition to a slice that can be serialized and deserialized. diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py b/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py index eff978564..f4bd77bd9 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py @@ -3,7 +3,7 @@ # from abc import ABC, abstractmethod -from typing import Iterable +from collections.abc import Iterable from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py b/airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py index 98ac04ed7..e10fada49 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py @@ -1,7 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from abc import ABC, abstractmethod -from typing import Iterable +from collections.abc import Iterable from airbyte_cdk.sources.types import StreamSlice diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py index 7489eaf40..621aadb95 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py @@ -3,8 +3,9 @@ # from abc import ABC, abstractmethod +from collections.abc import MutableMapping from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from airbyte_cdk.sources.streams.concurrent.cursor import CursorField @@ -51,7 +52,7 @@ def convert_to_state_message( else: return self.serialize(stream_state, ConcurrencyCompatibleStateType.date_range) - def _get_latest_complete_time(self, slices: List[MutableMapping[str, Any]]) -> Any: + def _get_latest_complete_time(self, slices: list[MutableMapping[str, Any]]) -> Any: """ Get the latest time before which all records have been processed. """ @@ -107,8 +108,8 @@ def convert_from_sequential_state( self, cursor_field: "CursorField", # to deprecate as it is only needed for sequential state stream_state: MutableMapping[str, Any], - start: Optional[Any], - ) -> Tuple[Any, MutableMapping[str, Any]]: + start: Any | None, + ) -> tuple[Any, MutableMapping[str, Any]]: """ Convert the state message to the format required by the ConcurrentCursor. @@ -137,8 +138,8 @@ def output_format(self, value: Any) -> Any: ... def merge_intervals( - self, intervals: List[MutableMapping[str, Any]] - ) -> List[MutableMapping[str, Any]]: + self, intervals: list[MutableMapping[str, Any]] + ) -> list[MutableMapping[str, Any]]: """ Compute and return a list of merged intervals. diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index fdb5d4d77..5525ef3d7 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -3,8 +3,9 @@ # from abc import abstractmethod +from collections.abc import Callable, MutableMapping from datetime import datetime, timedelta, timezone -from typing import Any, Callable, List, MutableMapping, Optional, Tuple +from typing import Any # FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest. @@ -58,8 +59,8 @@ def convert_from_sequential_state( self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], - start: Optional[datetime], - ) -> Tuple[datetime, MutableMapping[str, Any]]: + start: datetime | None, + ) -> tuple[datetime, MutableMapping[str, Any]]: """ Convert the state message to the format required by the ConcurrentCursor. @@ -97,7 +98,7 @@ def _get_sync_start( self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], - start: Optional[datetime], + start: datetime | None, ) -> datetime: sync_start = start if start is not None else self.zero_value prev_sync_low_water_mark = ( @@ -159,7 +160,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter): _zero_value = "0001-01-01T00:00:00.000Z" def __init__( - self, is_sequential_state: bool = True, cursor_granularity: Optional[timedelta] = None + self, is_sequential_state: bool = True, cursor_granularity: timedelta | None = None ): super().__init__(is_sequential_state=is_sequential_state) self._cursor_granularity = cursor_granularity or timedelta(milliseconds=1) @@ -199,9 +200,9 @@ class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateC def __init__( self, datetime_format: str, - input_datetime_formats: Optional[List[str]] = None, + input_datetime_formats: list[str] | None = None, is_sequential_state: bool = True, - cursor_granularity: Optional[timedelta] = None, + cursor_granularity: timedelta | None = None, ): super().__init__( is_sequential_state=is_sequential_state, cursor_granularity=cursor_granularity diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py index fecc984bc..ebfce063c 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Callable, MutableMapping, Optional, Tuple +from collections.abc import Callable, MutableMapping +from typing import Any from airbyte_cdk.sources.streams.concurrent.cursor import CursorField from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( @@ -26,8 +27,8 @@ def convert_from_sequential_state( self, cursor_field: "CursorField", # to deprecate as it is only needed for sequential state stream_state: MutableMapping[str, Any], - start: Optional[Any], - ) -> Tuple[Any, MutableMapping[str, Any]]: + start: Any | None, + ) -> tuple[Any, MutableMapping[str, Any]]: """ Convert the state message to the format required by the ConcurrentCursor. @@ -78,10 +79,10 @@ def _get_sync_start( self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], - start: Optional[int], + start: int | None, ) -> int: sync_start = start if start is not None else self.zero_value - prev_sync_low_water_mark: Optional[int] = ( + prev_sync_low_water_mark: int | None = ( stream_state[cursor_field.cursor_field_key] if cursor_field.cursor_field_key in stream_state else None diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 6cc5c8b5d..f20ad5a4e 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -6,9 +6,10 @@ import itertools import logging from abc import ABC, abstractmethod +from collections.abc import Iterable, Iterator, Mapping, MutableMapping from dataclasses import dataclass -from functools import cached_property, lru_cache -from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union +from functools import cache, cached_property +from typing import Any, Union from typing_extensions import deprecated @@ -120,7 +121,7 @@ class Stream(ABC): Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol. """ - _configured_json_schema: Optional[Dict[str, Any]] = None + _configured_json_schema: dict[str, Any] | None = None _exit_on_rate_limit: bool = False # Use self.logger in subclasses to log any messages @@ -131,7 +132,7 @@ def logger(self) -> logging.Logger: # TypeTransformer object to perform output data transformation transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform) - cursor: Optional[Cursor] = None + cursor: Cursor | None = None has_multiple_slices = False @@ -142,7 +143,7 @@ def name(self) -> str: """ return casing.camel_to_snake(self.__class__.__name__) - def get_error_display_message(self, exception: BaseException) -> Optional[str]: + def get_error_display_message(self, exception: BaseException) -> str | None: """ Retrieves the user-friendly display message that corresponds to an exception. This will be called when encountering an exception while reading records from the stream, and used to build the AirbyteTraceMessage. @@ -252,7 +253,7 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o airbyte_state_message = self._checkpoint_state(checkpoint, state_manager=state_manager) yield airbyte_state_message - def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterable[StreamData]: + def read_only_records(self, state: Mapping[str, Any] | None = None) -> Iterable[StreamData]: """ Helper method that performs a read on a stream with an optional state and emits records. If the parent stream supports incremental, this operation does not update the stream's internal state (if it uses the modern state setter/getter) @@ -284,15 +285,15 @@ def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterab def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: """ This method should be overridden by subclasses to read records based on the inputs """ - @lru_cache(maxsize=None) + @cache def get_json_schema(self) -> Mapping[str, Any]: """ :return: A dict of the JSON schema representing this stream. @@ -356,11 +357,11 @@ def is_resumable(self) -> bool: # the stream's get_updated_state() differs from the Stream class and therefore has been overridden return type(self).get_updated_state != Stream.get_updated_state - def _wrapped_cursor_field(self) -> List[str]: + def _wrapped_cursor_field(self) -> list[str]: return [self.cursor_field] if isinstance(self.cursor_field, str) else self.cursor_field @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: """ Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. If the cursor is nested, return an array consisting of the path to the cursor. @@ -368,7 +369,7 @@ def cursor_field(self) -> Union[str, List[str]]: return [] @property - def namespace(self) -> Optional[str]: + def namespace(self) -> str | None: """ Override to return the namespace of this stream, e.g. the Postgres schema which this stream will emit records for. :return: A string containing the name of the namespace. @@ -394,7 +395,7 @@ def exit_on_rate_limit(self, value: bool) -> None: @property @abstractmethod - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: """ :return: string if single primary key, list of strings if composite primary key, list of list of strings if composite primary key consisting of nested fields. If the stream has no primary keys, return None. @@ -404,9 +405,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: """ Override to define the slices for this stream. See the stream slicing section of the docs for more information. @@ -418,7 +419,7 @@ def stream_slices( yield StreamSlice(partition={}, cursor_slice={}) @property - def state_checkpoint_interval(self) -> Optional[int]: + def state_checkpoint_interval(self) -> int | None: """ Decides how often to checkpoint state (i.e: emit a STATE message). E.g: if this returns a value of 100, then state is persisted after reading 100 records, then 200, 300, etc.. A good default value is 1000 although your mileage may vary depending on the underlying data source. @@ -455,7 +456,7 @@ def get_updated_state( """ return {} - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: """ A Cursor is an interface that a stream can implement to manage how its internal state is read and updated while reading records. Historically, Python connectors had no concept of a cursor to manage state. Python streams need @@ -466,7 +467,7 @@ def get_cursor(self) -> Optional[Cursor]: def _get_checkpoint_reader( self, logger: logging.Logger, - cursor_field: Optional[List[str]], + cursor_field: list[str] | None, sync_mode: SyncMode, stream_state: MutableMapping[str, Any], ) -> CheckpointReader: @@ -533,7 +534,7 @@ def _checkpoint_mode(self) -> CheckpointMode: @staticmethod def _classify_stream( - mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]], + mappings_or_slices: Iterator[Mapping[str, Any] | StreamSlice | None], ) -> StreamClassification: """ This is a bit of a crazy solution, but also the only way we can detect certain attributes about the stream since Python @@ -601,8 +602,8 @@ def log_stream_sync_configuration(self) -> None: @staticmethod def _wrapped_primary_key( - keys: Optional[Union[str, List[str], List[List[str]]]], - ) -> Optional[List[List[str]]]: + keys: str | list[str] | list[list[str]] | None, + ) -> list[list[str]] | None: """ :return: wrap the primary_key property in a list of list of strings required by the Airbyte Stream object. """ @@ -625,7 +626,7 @@ def _wrapped_primary_key( raise ValueError(f"Element must be either list or str. Got: {type(keys)}") def _observe_state( - self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None + self, checkpoint_reader: CheckpointReader, stream_state: Mapping[str, Any] | None = None ) -> None: """ Convenience method that attempts to read the Stream's state using the recommended way of connector's managing their @@ -660,7 +661,7 @@ def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateMana return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return] @property - def configured_json_schema(self) -> Optional[Dict[str, Any]]: + def configured_json_schema(self) -> dict[str, Any] | None: """ This property is set from the read method. @@ -669,12 +670,12 @@ def configured_json_schema(self) -> Optional[Dict[str, Any]]: return self._configured_json_schema @configured_json_schema.setter - def configured_json_schema(self, json_schema: Dict[str, Any]) -> None: + def configured_json_schema(self, json_schema: dict[str, Any]) -> None: self._configured_json_schema = self._filter_schema_invalid_properties(json_schema) def _filter_schema_invalid_properties( - self, configured_catalog_json_schema: Dict[str, Any] - ) -> Dict[str, Any]: + self, configured_catalog_json_schema: dict[str, Any] + ) -> dict[str, Any]: """ Filters the properties in json_schema that are not present in the stream schema. Configured Schemas can have very old fields, so we need to housekeeping ourselves. diff --git a/airbyte_cdk/sources/streams/http/availability_strategy.py b/airbyte_cdk/sources/streams/http/availability_strategy.py index 494fcf151..cf5474a9b 100644 --- a/airbyte_cdk/sources/streams/http/availability_strategy.py +++ b/airbyte_cdk/sources/streams/http/availability_strategy.py @@ -4,7 +4,7 @@ import logging import typing -from typing import Optional, Tuple +from typing import Optional from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy @@ -17,7 +17,7 @@ class HttpAvailabilityStrategy(AvailabilityStrategy): def check_availability( self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None - ) -> Tuple[bool, Optional[str]]: + ) -> tuple[bool, str | None]: """ Check stream availability by attempting to read the first record of the stream. @@ -30,7 +30,7 @@ def check_availability( for some reason and the str should describe what went wrong and how to resolve the unavailability, if possible. """ - reason: Optional[str] + reason: str | None try: # Some streams need a stream slice to read records (e.g. if they have a SubstreamPartitionRouter) # Streams that don't need a stream slice will return `None` as their first stream slice. diff --git a/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py b/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py index 6ed821791..c30992627 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py @@ -3,7 +3,6 @@ # from abc import ABC, abstractmethod -from typing import Optional, Union import requests @@ -12,9 +11,9 @@ class BackoffStrategy(ABC): @abstractmethod def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: """ Override this method to dynamically determine backoff time e.g: by reading the X-Retry-After header. diff --git a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py index 2c3e10ad7..fd152081f 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py @@ -1,8 +1,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Optional, Union - import requests from .backoff_strategy import BackoffStrategy @@ -11,7 +9,7 @@ class DefaultBackoffStrategy(BackoffStrategy): def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: return None diff --git a/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py b/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py index da616e0ee..997641966 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py @@ -2,7 +2,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -from typing import Mapping, Type, Union +from collections.abc import Mapping from requests.exceptions import InvalidSchema, InvalidURL, RequestException @@ -12,7 +12,7 @@ ResponseAction, ) -DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = { +DEFAULT_ERROR_MAPPING: Mapping[int | str | type[Exception], ErrorResolution] = { InvalidSchema: ErrorResolution( response_action=ResponseAction.FAIL, failure_type=FailureType.config_error, diff --git a/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py b/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py index b231e72e0..7af046202 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py @@ -1,7 +1,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from abc import ABC, abstractmethod -from typing import Optional, Union import requests @@ -15,7 +14,7 @@ class ErrorHandler(ABC): @property @abstractmethod - def max_retries(self) -> Optional[int]: + def max_retries(self) -> int | None: """ The maximum number of retries to attempt before giving up. """ @@ -23,16 +22,14 @@ def max_retries(self) -> Optional[int]: @property @abstractmethod - def max_time(self) -> Optional[int]: + def max_time(self) -> int | None: """ The maximum amount of time in seconds to retry before giving up. """ pass @abstractmethod - def interpret_response( - self, response: Optional[Union[requests.Response, Exception]] - ) -> ErrorResolution: + def interpret_response(self, response: requests.Response | Exception | None) -> ErrorResolution: """ Interpret the response or exception and return the corresponding response action, failure type, and error message. diff --git a/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py b/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py index 966fe93a1..d5d413f3d 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py @@ -3,14 +3,13 @@ # from abc import ABC, abstractmethod -from typing import Optional import requests class ErrorMessageParser(ABC): @abstractmethod - def parse_response_error_message(self, response: requests.Response) -> Optional[str]: + def parse_response_error_message(self, response: requests.Response) -> str | None: """ Parse error message from response. :param response: response received for the request diff --git a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py index 18daca3de..67bc2c2b3 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py @@ -3,8 +3,8 @@ # import logging +from collections.abc import Mapping from datetime import timedelta -from typing import Mapping, Optional, Union import requests @@ -23,7 +23,7 @@ class HttpStatusErrorHandler(ErrorHandler): def __init__( self, logger: logging.Logger, - error_mapping: Optional[Mapping[Union[int, str, type[Exception]], ErrorResolution]] = None, + error_mapping: Mapping[int | str | type[Exception], ErrorResolution] | None = None, max_retries: int = 5, max_time: timedelta = timedelta(seconds=600), ) -> None: @@ -38,15 +38,15 @@ def __init__( self._max_time = int(max_time.total_seconds()) @property - def max_retries(self) -> Optional[int]: + def max_retries(self) -> int | None: return self._max_retries @property - def max_time(self) -> Optional[int]: + def max_time(self) -> int | None: return self._max_time def interpret_response( - self, response_or_exception: Optional[Union[requests.Response, Exception]] = None + self, response_or_exception: requests.Response | Exception | None = None ) -> ErrorResolution: """ Interpret the response and return the corresponding response action, failure type, and error message. @@ -56,7 +56,7 @@ def interpret_response( """ if isinstance(response_or_exception, Exception): - mapped_error: Optional[ErrorResolution] = self._error_mapping.get( + mapped_error: ErrorResolution | None = self._error_mapping.get( response_or_exception.__class__ ) diff --git a/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py b/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py index 7c58280c7..49ed61204 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Optional import requests @@ -11,7 +10,7 @@ class JsonErrorMessageParser(ErrorMessageParser): - def _try_get_error(self, value: Optional[JsonType]) -> Optional[str]: + def _try_get_error(self, value: JsonType | None) -> str | None: if isinstance(value, str): return value elif isinstance(value, list): @@ -35,7 +34,7 @@ def _try_get_error(self, value: Optional[JsonType]) -> Optional[str]: return self._try_get_error(new_value) return None - def parse_response_error_message(self, response: requests.Response) -> Optional[str]: + def parse_response_error_message(self, response: requests.Response) -> str | None: """ Parses the raw response object from a failed request into a user-friendly error message. diff --git a/airbyte_cdk/sources/streams/http/error_handlers/response_models.py b/airbyte_cdk/sources/streams/http/error_handlers/response_models.py index e882b89bd..482c4bc0d 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/response_models.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/response_models.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional, Union import requests from requests import HTTPError @@ -21,9 +20,9 @@ class ResponseAction(Enum): @dataclass class ErrorResolution: - response_action: Optional[ResponseAction] = None - failure_type: Optional[FailureType] = None - error_message: Optional[str] = None + response_action: ResponseAction | None = None + failure_type: FailureType | None = None + error_message: str | None = None def _format_exception_error_message(exception: Exception) -> str: @@ -43,7 +42,7 @@ def _format_response_error_message(response: requests.Response) -> str: def create_fallback_error_resolution( - response_or_exception: Optional[Union[requests.Response, Exception]], + response_or_exception: requests.Response | Exception | None, ) -> ErrorResolution: if response_or_exception is None: # We do not expect this case to happen but if it does, it would be good to understand the cause and improve the error message diff --git a/airbyte_cdk/sources/streams/http/exceptions.py b/airbyte_cdk/sources/streams/http/exceptions.py index ee4687626..977b7ed50 100644 --- a/airbyte_cdk/sources/streams/http/exceptions.py +++ b/airbyte_cdk/sources/streams/http/exceptions.py @@ -3,8 +3,6 @@ # -from typing import Optional, Union - import requests @@ -12,7 +10,7 @@ class BaseBackoffException(requests.exceptions.HTTPError): def __init__( self, request: requests.PreparedRequest, - response: Optional[Union[requests.Response, Exception]], + response: requests.Response | Exception | None, error_message: str = "", ): if isinstance(response, requests.Response): @@ -39,9 +37,9 @@ class UserDefinedBackoffException(BaseBackoffException): def __init__( self, - backoff: Union[int, float], + backoff: int | float, request: requests.PreparedRequest, - response: Optional[Union[requests.Response, Exception]], + response: requests.Response | Exception | None, error_message: str = "", ): """ diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index fbf4fe35d..75363e7d8 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -4,8 +4,9 @@ import logging from abc import ABC, abstractmethod +from collections.abc import Callable, Iterable, Mapping, MutableMapping from datetime import timedelta -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any from urllib.parse import urljoin import requests @@ -47,13 +48,11 @@ class HttpStream(Stream, CheckpointMixin, ABC): """ source_defined_cursor = True # Most HTTP streams use a source defined cursor (i.e: the user can't configure it like on a SQL table) - page_size: Optional[int] = ( + page_size: int | None = ( None # Use this variable to define page size for API http requests with pagination support ) - def __init__( - self, authenticator: Optional[AuthBase] = None, api_budget: Optional[APIBudget] = None - ): + def __init__(self, authenticator: AuthBase | None = None, api_budget: APIBudget | None = None): self._exit_on_rate_limit: bool = False self._http_client = HttpClient( name=self.name, @@ -135,7 +134,7 @@ def raise_on_http_errors(self) -> bool: "Deprecated as of CDK version 3.0.0. " "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) - def max_retries(self) -> Union[int, None]: + def max_retries(self) -> int | None: """ Override if needed. Specifies maximum amount of retries for backoff policy. Return None for no limit. """ @@ -146,7 +145,7 @@ def max_retries(self) -> Union[int, None]: "Deprecated as of CDK version 3.0.0. " "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) - def max_time(self) -> Union[int, None]: + def max_time(self) -> int | None: """ Override if needed. Specifies maximum total waiting time (in seconds) for backoff policy. Return None for no limit. """ @@ -164,7 +163,7 @@ def retry_factor(self) -> float: return 5 @abstractmethod - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: """ Override this method to define a pagination strategy. @@ -177,9 +176,9 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: """ Returns the URL path for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "some_entity" @@ -187,9 +186,9 @@ def path( def request_params( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: """ Override this method to define the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -200,9 +199,9 @@ def request_params( def request_headers( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Override to return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method. @@ -211,10 +210,10 @@ def request_headers( def request_body_data( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[Union[Mapping[str, Any], str]]: + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | str | None: """ Override when creating POST/PUT/PATCH requests to populate the body of the request with a non-JSON payload. @@ -228,10 +227,10 @@ def request_body_data( def request_body_json( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Optional[Mapping[str, Any]]: + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> Mapping[str, Any] | None: """ Override when creating POST/PUT/PATCH requests to populate the body of the request with a JSON payload. @@ -241,9 +240,9 @@ def request_body_json( def request_kwargs( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: """ Override to return a mapping of keyword arguments to be used when creating the HTTP request. @@ -258,8 +257,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: """ Parses the raw response object into a list of records. @@ -271,7 +270,7 @@ def parse_response( :return: An iterable containing the parsed response """ - def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]: + def get_backoff_strategy(self) -> BackoffStrategy | list[BackoffStrategy] | None: """ Used to initialize Adapter to avoid breaking changes. If Stream has a `backoff_time` method implementation, we know this stream uses old (pre-HTTPClient) backoff handlers and thus an adapter is needed. @@ -284,7 +283,7 @@ def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffSt else: return None - def get_error_handler(self) -> Optional[ErrorHandler]: + def get_error_handler(self) -> ErrorHandler | None: """ Used to initialize Adapter to avoid breaking changes. If Stream has a `should_retry` method implementation, we know this stream uses old (pre-HTTPClient) error handlers and thus an adapter is needed. @@ -308,7 +307,7 @@ def _join_url(cls, url_base: str, path: str) -> str: return urljoin(url_base, path) @classmethod - def parse_response_error_message(cls, response: requests.Response) -> Optional[str]: + def parse_response_error_message(cls, response: requests.Response) -> str | None: """ Parses the raw response object from a failed request into a user-friendly error message. By default, this method tries to grab the error message from JSON responses by following common API patterns. Override to parse differently. @@ -318,7 +317,7 @@ def parse_response_error_message(cls, response: requests.Response) -> Optional[s """ # default logic to grab error from common fields - def _try_get_error(value: Optional[JsonType]) -> Optional[str]: + def _try_get_error(value: JsonType | None) -> str | None: if isinstance(value, str): return value elif isinstance(value, list): @@ -343,7 +342,7 @@ def _try_get_error(value: Optional[JsonType]) -> Optional[str]: except requests.exceptions.JSONDecodeError: return None - def get_error_display_message(self, exception: BaseException) -> Optional[str]: + def get_error_display_message(self, exception: BaseException) -> str | None: """ Retrieves the user-friendly display message that corresponds to an exception. This will be called when encountering an exception while reading records from the stream, and used to build the AirbyteTraceMessage. @@ -361,9 +360,9 @@ def get_error_display_message(self, exception: BaseException) -> Optional[str]: def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: # A cursor_field indicates this is an incremental stream which offers better checkpointing than RFR enabled via the cursor if self.cursor_field or not isinstance(self.get_cursor(), ResumableFullRefreshCursor): @@ -397,7 +396,7 @@ def state(self, value: MutableMapping[str, Any]) -> None: cursor.set_initial_state(value) self._state = value - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: # I don't love that this is semi-stateful but not sure what else to do. We don't know exactly what type of cursor to # instantiate when creating the class. We can make a few assumptions like if there is a cursor_field which implies # incremental, but we don't know until runtime if this is a substream. Ideally, a stream should explicitly define @@ -416,12 +415,12 @@ def _read_pages( requests.PreparedRequest, requests.Response, Mapping[str, Any], - Optional[Mapping[str, Any]], + Mapping[str, Any] | None, ], Iterable[StreamData], ], - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: stream_state = stream_state or {} pagination_complete = False @@ -451,12 +450,12 @@ def _read_single_page( requests.PreparedRequest, requests.Response, Mapping[str, Any], - Optional[Mapping[str, Any]], + Mapping[str, Any] | None, ], Iterable[StreamData], ], - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: partition, cursor_slice, remaining_slice = self._extract_slice_fields( stream_slice=stream_slice @@ -480,7 +479,7 @@ def _read_single_page( @staticmethod def _extract_slice_fields( - stream_slice: Optional[Mapping[str, Any]], + stream_slice: Mapping[str, Any] | None, ) -> tuple[Mapping[str, Any], Mapping[str, Any], Mapping[str, Any]]: if not stream_slice: return {}, {}, {} @@ -504,10 +503,10 @@ def _extract_slice_fields( def _fetch_next_page( self, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Tuple[requests.PreparedRequest, requests.Response]: + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> tuple[requests.PreparedRequest, requests.Response]: request, response = self._http_client.send_request( http_method=self.http_method, url=self._join_url( @@ -550,7 +549,7 @@ def _fetch_next_page( return request, response - def get_log_formatter(self) -> Optional[Callable[[requests.Response], Any]]: + def get_log_formatter(self) -> Callable[[requests.Response], Any] | None: """ :return Optional[Callable[[requests.Response], Any]]: Function that will be used in logging inside HttpClient @@ -584,9 +583,9 @@ def __init__(self, parent: HttpStream, **kwargs: Any): def stream_slices( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does # not support either substreams or RFR, but something that needs to be considered once we do for parent_record in self.parent.read_only_records(stream_state): @@ -611,9 +610,9 @@ def __init__(self, stream: HttpStream): def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: return self.stream.backoff_time(response_or_exception) # type: ignore # noqa # HttpStream.backoff_time has been deprecated @@ -627,7 +626,7 @@ def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa super().__init__(**kwargs) def interpret_response( - self, response_or_exception: Optional[Union[requests.Response, Exception]] = None + self, response_or_exception: requests.Response | Exception | None = None ) -> ErrorResolution: if isinstance(response_or_exception, Exception): return super().interpret_response(response_or_exception) diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index c4fa86866..9bebe5a85 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -5,8 +5,9 @@ import logging import os import urllib +from collections.abc import Callable, Mapping from pathlib import Path -from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union +from typing import Any import orjson import requests @@ -82,15 +83,15 @@ def __init__( self, name: str, logger: logging.Logger, - error_handler: Optional[ErrorHandler] = None, - api_budget: Optional[APIBudget] = None, - session: Optional[Union[requests.Session, requests_cache.CachedSession]] = None, - authenticator: Optional[AuthBase] = None, + error_handler: ErrorHandler | None = None, + api_budget: APIBudget | None = None, + session: requests.Session | requests_cache.CachedSession | None = None, + authenticator: AuthBase | None = None, use_cache: bool = False, - backoff_strategy: Optional[Union[BackoffStrategy, List[BackoffStrategy]]] = None, - error_message_parser: Optional[ErrorMessageParser] = None, + backoff_strategy: BackoffStrategy | list[BackoffStrategy] | None = None, + error_message_parser: ErrorMessageParser | None = None, disable_retries: bool = False, - message_repository: Optional[MessageRepository] = None, + message_repository: MessageRepository | None = None, ): self._name = name self._api_budget: APIBudget = api_budget or APIBudget(policies=[]) @@ -117,7 +118,7 @@ def __init__( else: self._backoff_strategies = [DefaultBackoffStrategy()] self._error_message_parser = error_message_parser or JsonErrorMessageParser() - self._request_attempt_count: Dict[requests.PreparedRequest, int] = {} + self._request_attempt_count: dict[requests.PreparedRequest, int] = {} self._disable_retries = disable_retries self._message_repository = message_repository @@ -165,9 +166,7 @@ def clear_cache(self) -> None: if isinstance(self._session, requests_cache.CachedSession): self._session.cache.clear() # type: ignore # cache.clear is not typed - def _dedupe_query_params( - self, url: str, params: Optional[Mapping[str, str]] - ) -> Mapping[str, str]: + def _dedupe_query_params(self, url: str, params: Mapping[str, str] | None) -> Mapping[str, str]: """ Remove query parameters from params mapping if they are already encoded in the URL. :param url: URL with @@ -189,10 +188,10 @@ def _create_prepared_request( http_method: str, url: str, dedupe_query_params: bool = False, - headers: Optional[Mapping[str, str]] = None, - params: Optional[Mapping[str, str]] = None, - json: Optional[Mapping[str, Any]] = None, - data: Optional[Union[str, Mapping[str, Any]]] = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, str] | None = None, + json: Mapping[str, Any] | None = None, + data: str | Mapping[str, Any] | None = None, ) -> requests.PreparedRequest: if dedupe_query_params: query_params = self._dedupe_query_params(url, params) @@ -241,8 +240,8 @@ def _send_with_retry( self, request: requests.PreparedRequest, request_kwargs: Mapping[str, Any], - log_formatter: Optional[Callable[[requests.Response], Any]] = None, - exit_on_rate_limit: Optional[bool] = False, + log_formatter: Callable[[requests.Response], Any] | None = None, + exit_on_rate_limit: bool | None = False, ) -> requests.Response: """ Sends a request with retry logic. @@ -280,8 +279,8 @@ def _send( self, request: requests.PreparedRequest, request_kwargs: Mapping[str, Any], - log_formatter: Optional[Callable[[requests.Response], Any]] = None, - exit_on_rate_limit: Optional[bool] = False, + log_formatter: Callable[[requests.Response], Any] | None = None, + exit_on_rate_limit: bool | None = False, ) -> requests.Response: if request not in self._request_attempt_count: self._request_attempt_count[request] = 1 @@ -295,8 +294,8 @@ def _send( extra={"headers": request.headers, "url": request.url, "request_body": request.body}, ) - response: Optional[requests.Response] = None - exc: Optional[requests.RequestException] = None + response: requests.Response | None = None + exc: requests.RequestException | None = None try: response = self._session.send(request, **request_kwargs) @@ -347,7 +346,7 @@ def _send( return response # type: ignore # will either return a valid response of type requests.Response or raise an exception - def _get_response_body(self, response: requests.Response) -> Optional[JsonType]: + def _get_response_body(self, response: requests.Response) -> JsonType | None: """ Extracts and returns the body of an HTTP response. @@ -383,11 +382,11 @@ def _evict_key(self, prepared_request: requests.PreparedRequest) -> None: def _handle_error_resolution( self, - response: Optional[requests.Response], - exc: Optional[requests.RequestException], + response: requests.Response | None, + exc: requests.RequestException | None, request: requests.PreparedRequest, error_resolution: ErrorResolution, - exit_on_rate_limit: Optional[bool] = False, + exit_on_rate_limit: bool | None = False, ) -> None: if error_resolution.response_action not in self._ACTIONS_TO_RETRY_ON: self._evict_key(request) @@ -499,14 +498,14 @@ def send_request( http_method: str, url: str, request_kwargs: Mapping[str, Any], - headers: Optional[Mapping[str, str]] = None, - params: Optional[Mapping[str, str]] = None, - json: Optional[Mapping[str, Any]] = None, - data: Optional[Union[str, Mapping[str, Any]]] = None, + headers: Mapping[str, str] | None = None, + params: Mapping[str, str] | None = None, + json: Mapping[str, Any] | None = None, + data: str | Mapping[str, Any] | None = None, dedupe_query_params: bool = False, - log_formatter: Optional[Callable[[requests.Response], Any]] = None, - exit_on_rate_limit: Optional[bool] = False, - ) -> Tuple[requests.PreparedRequest, requests.Response]: + log_formatter: Callable[[requests.Response], Any] | None = None, + exit_on_rate_limit: bool | None = False, + ) -> tuple[requests.PreparedRequest, requests.Response]: """ Prepares and sends request and return request and response objects. """ diff --git a/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte_cdk/sources/streams/http/rate_limiting.py index 926a7ad56..ff1dd31e7 100644 --- a/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -5,7 +5,8 @@ import logging import sys import time -from typing import Any, Callable, Mapping, Optional +from collections.abc import Callable, Mapping +from typing import Any import backoff from requests import PreparedRequest, RequestException, Response, codes, exceptions @@ -31,7 +32,7 @@ def default_backoff_handler( - max_tries: Optional[int], factor: float, max_time: Optional[int] = None, **kwargs: Any + max_tries: int | None, factor: float, max_time: int | None = None, **kwargs: Any ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def log_retry_attempt(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() @@ -72,7 +73,7 @@ def should_give_up(exc: Exception) -> bool: def http_client_default_backoff_handler( - max_tries: Optional[int], max_time: Optional[int] = None, **kwargs: Any + max_tries: int | None, max_time: int | None = None, **kwargs: Any ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def log_retry_attempt(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() @@ -101,7 +102,7 @@ def should_give_up(exc: Exception) -> bool: def user_defined_backoff_handler( - max_tries: Optional[int], max_time: Optional[int] = None, **kwargs: Any + max_tries: int | None, max_time: int | None = None, **kwargs: Any ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def sleep_on_ratelimit(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py index b0afeca6e..642ce3cad 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py @@ -4,9 +4,10 @@ import logging from abc import abstractmethod +from collections.abc import Mapping, MutableMapping from datetime import timedelta from json import JSONDecodeError -from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any import backoff import requests @@ -43,9 +44,9 @@ class AbstractOauth2Authenticator(AuthBase): def __init__( self, - refresh_token_error_status_codes: Tuple[int, ...] = (), + refresh_token_error_status_codes: tuple[int, ...] = (), refresh_token_error_key: str = "", - refresh_token_error_values: Tuple[str, ...] = (), + refresh_token_error_values: tuple[str, ...] = (), ) -> None: """ If all of refresh_token_error_status_codes, refresh_token_error_key, and refresh_token_error_values are set, @@ -73,7 +74,7 @@ def token_expiry_is_time_of_expiration(self) -> bool: return False @property - def token_expiry_date_format(self) -> Optional[str]: + def token_expiry_date_format(self) -> str | None: """ Format of the datetime; exists it if expires_in is returned as the expiration datetime instead of seconds until it expires """ @@ -130,7 +131,7 @@ def build_refresh_request_headers(self) -> Mapping[str, Any] | None: headers = self.get_refresh_request_headers() return headers if headers else None - def refresh_access_token(self) -> Tuple[str, Union[str, int]]: + def refresh_access_token(self) -> tuple[str, str | int]: """ Returns the refresh token and its expiration datetime @@ -255,7 +256,7 @@ def _ensure_access_token_in_response(self, response_data: Mapping[str, Any]) -> except ResponseKeysMaxRecurtionReached as e: raise e - def _parse_token_expiration_date(self, value: Union[str, int]) -> AirbyteDateTime: + def _parse_token_expiration_date(self, value: str | int) -> AirbyteDateTime: """ Return the expiration datetime of the refresh token @@ -374,7 +375,7 @@ def _find_and_get_value_from_response( return None @property - def _message_repository(self) -> Optional[MessageRepository]: + def _message_repository(self) -> MessageRepository | None: """ The implementation can define a message_repository if it wants debugging logs for HTTP requests """ @@ -405,7 +406,7 @@ def _log_response(self, response: requests.Response) -> None: # ---------------- @abstractmethod - def get_token_refresh_endpoint(self) -> Optional[str]: + def get_token_refresh_endpoint(self) -> str | None: """Returns the endpoint to refresh the access token""" @abstractmethod @@ -429,11 +430,11 @@ def get_refresh_token_name(self) -> str: """The refresh token name to authenticate""" @abstractmethod - def get_refresh_token(self) -> Optional[str]: + def get_refresh_token(self) -> str | None: """The token used to refresh the access token when it expires""" @abstractmethod - def get_scopes(self) -> List[str]: + def get_scopes(self) -> list[str]: """List of requested scopes""" @abstractmethod @@ -441,7 +442,7 @@ def get_token_expiry_date(self) -> AirbyteDateTime: """Expiration date of the access token""" @abstractmethod - def set_token_expiry_date(self, value: Union[str, int]) -> None: + def set_token_expiry_date(self, value: str | int) -> None: """Setter for access token expiration date""" @abstractmethod diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py index ffcc8e851..78f5809e9 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py @@ -3,7 +3,8 @@ # from abc import abstractmethod -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any import requests from requests.auth import AuthBase diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 2ff2f60e9..d38bcef76 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping, Sequence from datetime import timedelta -from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any import dpath @@ -38,7 +39,7 @@ def __init__( client_id_name: str = "client_id", client_secret_name: str = "client_secret", refresh_token_name: str = "refresh_token", - scopes: List[str] | None = None, + scopes: list[str] | None = None, token_expiry_date: AirbyteDateTime | None = None, token_expiry_date_format: str | None = None, access_token_name: str = "access_token", @@ -48,9 +49,9 @@ def __init__( grant_type_name: str = "grant_type", grant_type: str = "refresh_token", token_expiry_is_time_of_expiration: bool = False, - refresh_token_error_status_codes: Tuple[int, ...] = (), + refresh_token_error_status_codes: tuple[int, ...] = (), refresh_token_error_key: str = "", - refresh_token_error_values: Tuple[str, ...] = (), + refresh_token_error_values: tuple[str, ...] = (), ) -> None: self._token_refresh_endpoint = token_refresh_endpoint self._client_secret_name = client_secret_name @@ -120,7 +121,7 @@ def get_grant_type(self) -> str: def get_token_expiry_date(self) -> AirbyteDateTime: return self._token_expiry_date - def set_token_expiry_date(self, value: Union[str, int]) -> None: + def set_token_expiry_date(self, value: str | int) -> None: self._token_expiry_date = self._parse_token_expiration_date(value) @property @@ -128,7 +129,7 @@ def token_expiry_is_time_of_expiration(self) -> bool: return self._token_expiry_is_time_of_expiration @property - def token_expiry_date_format(self) -> Optional[str]: + def token_expiry_date_format(self) -> str | None: return self._token_expiry_date_format @property @@ -154,7 +155,7 @@ def __init__( self, connector_config: Mapping[str, Any], token_refresh_endpoint: str, - scopes: List[str] | None = None, + scopes: list[str] | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", refresh_token_name: str = "refresh_token", @@ -163,18 +164,18 @@ def __init__( grant_type_name: str = "grant_type", grant_type: str = "refresh_token", client_id_name: str = "client_id", - client_id: Optional[str] = None, + client_id: str | None = None, client_secret_name: str = "client_secret", - client_secret: Optional[str] = None, + client_secret: str | None = None, access_token_config_path: Sequence[str] = ("credentials", "access_token"), refresh_token_config_path: Sequence[str] = ("credentials", "refresh_token"), token_expiry_date_config_path: Sequence[str] = ("credentials", "token_expiry_date"), - token_expiry_date_format: Optional[str] = None, + token_expiry_date_format: str | None = None, message_repository: MessageRepository = NoopMessageRepository(), token_expiry_is_time_of_expiration: bool = False, - refresh_token_error_status_codes: Tuple[int, ...] = (), + refresh_token_error_status_codes: tuple[int, ...] = (), refresh_token_error_key: str = "", - refresh_token_error_values: Tuple[str, ...] = (), + refresh_token_error_values: tuple[str, ...] = (), ) -> None: """ Args: @@ -355,7 +356,7 @@ def get_access_token(self) -> str: self._emit_control_message() return self.access_token - def refresh_access_token(self) -> Tuple[str, str, str]: # type: ignore[override] + def refresh_access_token(self) -> tuple[str, str, str]: # type: ignore[override] """ Refreshes the access token by making a handled request and extracting the necessary token information. @@ -369,7 +370,7 @@ def refresh_access_token(self) -> Tuple[str, str, str]: # type: ignore[override self._extract_refresh_token(response_json), ) - def _set_config_value_by_path(self, config_path: Union[str, Sequence[str]], value: Any) -> None: + def _set_config_value_by_path(self, config_path: str | Sequence[str], value: Any) -> None: """ Set a value in the connector configuration at the specified path. @@ -384,7 +385,7 @@ def _set_config_value_by_path(self, config_path: Union[str, Sequence[str]], valu dpath.new(self._connector_config, config_path, value) # type: ignore[arg-type] def _get_config_value_by_path( - self, config_path: Union[str, Sequence[str]], default: Optional[str] = None + self, config_path: str | Sequence[str], default: str | None = None ) -> str | Any: """ Retrieve a value from the connector configuration using a specified path. diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py index eec7fd0c5..76a307aad 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py @@ -4,7 +4,6 @@ import base64 from itertools import cycle -from typing import List from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import ( AbstractHeaderAuthenticator, @@ -27,7 +26,7 @@ def token(self) -> str: return f"{self._auth_method} {next(self._tokens_iter)}" def __init__( - self, tokens: List[str], auth_method: str = "Bearer", auth_header: str = "Authorization" + self, tokens: list[str], auth_method: str = "Bearer", auth_header: str = "Authorization" ): self._auth_method = auth_method self._auth_header = auth_header @@ -76,7 +75,7 @@ def __init__( auth_method: str = "Basic", auth_header: str = "Authorization", ): - auth_string = f"{username}:{password}".encode("utf8") + auth_string = f"{username}:{password}".encode() b64_encoded = base64.b64encode(auth_string).decode("utf8") self._auth_header = auth_header self._auth_method = auth_method diff --git a/airbyte_cdk/sources/streams/permissions/identities_stream.py b/airbyte_cdk/sources/streams/permissions/identities_stream.py index 8101234c5..7ec7bbb18 100644 --- a/airbyte_cdk/sources/streams/permissions/identities_stream.py +++ b/airbyte_cdk/sources/streams/permissions/identities_stream.py @@ -4,7 +4,8 @@ import traceback from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional +from collections.abc import Iterable, Mapping, MutableMapping +from typing import Any from airbyte_protocol_dataclasses.models import SyncMode @@ -42,9 +43,9 @@ def state(self, value: MutableMapping[str, Any]) -> None: def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any] | AirbyteMessage]: try: identity_groups = self.load_identity_groups() @@ -64,12 +65,12 @@ def read_records( ) @abstractmethod - def load_identity_groups(self) -> Iterable[Dict[str, Any]]: + def load_identity_groups(self) -> Iterable[dict[str, Any]]: raise NotImplementedError("Implement this method to read identity records") @property def name(self) -> str: return self.IDENTITIES_STREAM_NAME - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: return None diff --git a/airbyte_cdk/sources/types.py b/airbyte_cdk/sources/types.py index 6ee7f652a..b36ebe6cd 100644 --- a/airbyte_cdk/sources/types.py +++ b/airbyte_cdk/sources/types.py @@ -4,17 +4,18 @@ from __future__ import annotations -from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView +from collections.abc import ItemsView, Iterator, KeysView, Mapping, ValuesView +from typing import Any from airbyte_cdk.utils.slice_hasher import SliceHasher # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2": # "hello"}] returns "hello" -FieldPointer = List[str] +FieldPointer = list[str] Config = Mapping[str, Any] ConnectionDefinition = Mapping[str, Any] StreamState = Mapping[str, Any] -EmptyString = str() +EmptyString = "" class Record(Mapping[str, Any]): @@ -22,7 +23,7 @@ def __init__( self, data: Mapping[str, Any], stream_name: str, - associated_slice: Optional[StreamSlice] = None, + associated_slice: StreamSlice | None = None, is_file_transfer_message: bool = False, ): self._data = data @@ -35,7 +36,7 @@ def data(self) -> Mapping[str, Any]: return self._data @property - def associated_slice(self) -> Optional[StreamSlice]: + def associated_slice(self) -> StreamSlice | None: return self._associated_slice def __repr__(self) -> str: @@ -69,7 +70,7 @@ def __init__( *, partition: Mapping[str, Any], cursor_slice: Mapping[str, Any], - extra_fields: Optional[Mapping[str, Any]] = None, + extra_fields: Mapping[str, Any] | None = None, ) -> None: """ :param partition: The partition keys representing a unique partition in the stream. @@ -134,7 +135,7 @@ def items(self) -> ItemsView[str, Any]: def values(self) -> ValuesView[Any]: return self._stream_slice.values() - def get(self, key: str, default: Any = None) -> Optional[Any]: + def get(self, key: str, default: Any = None) -> Any | None: return self._stream_slice.get(key, default) def __eq__(self, other: Any) -> bool: diff --git a/airbyte_cdk/sources/utils/record_helper.py b/airbyte_cdk/sources/utils/record_helper.py index 3d2cbcecf..d71a317e3 100644 --- a/airbyte_cdk/sources/utils/record_helper.py +++ b/airbyte_cdk/sources/utils/record_helper.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import time +from collections.abc import Mapping from collections.abc import Mapping as ABCMapping -from typing import Any, Mapping, Optional +from typing import Any from airbyte_cdk.models import ( AirbyteLogMessage, @@ -21,7 +22,7 @@ def stream_data_to_airbyte_message( stream_name: str, data_or_message: StreamData, transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform), - schema: Optional[Mapping[str, Any]] = None, + schema: Mapping[str, Any] | None = None, is_file_transfer_message: bool = False, ) -> AirbyteMessage: if schema is None: diff --git a/airbyte_cdk/sources/utils/schema_helpers.py b/airbyte_cdk/sources/utils/schema_helpers.py index f15578238..09d284fb6 100644 --- a/airbyte_cdk/sources/utils/schema_helpers.py +++ b/airbyte_cdk/sources/utils/schema_helpers.py @@ -7,7 +7,8 @@ import json import os import pkgutil -from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple +from collections.abc import Mapping, MutableMapping +from typing import Any, ClassVar import jsonref from jsonschema import RefResolver, validate @@ -29,7 +30,7 @@ def __init__(self, uri_base: str, shared: str): self.shared = shared self.uri_base = uri_base - def __call__(self, uri: str) -> Dict[str, Any]: + def __call__(self, uri: str) -> dict[str, Any]: uri = uri.replace(self.uri_base, f"{self.uri_base}/{self.shared}/") with open(uri) as f: data = json.load(f) @@ -63,7 +64,7 @@ def resolve_ref_links(obj: Any) -> Any: return obj -def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None: +def _expand_refs(schema: Any, ref_resolver: RefResolver | None = None) -> None: """Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive. :param schema: schema that will be patched @@ -82,7 +83,7 @@ def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> Non else: for key, value in schema.items(): _expand_refs(value, ref_resolver=ref_resolver) - elif isinstance(schema, List): + elif isinstance(schema, list): for value in schema: _expand_refs(value, ref_resolver=ref_resolver) @@ -134,7 +135,7 @@ def get_schema(self, name: str) -> dict[str, Any]: schema_filename = f"schemas/{name}.json" raw_file = pkgutil.get_data(self.package_name, schema_filename) if not raw_file: - raise IOError(f"Cannot find file {schema_filename}") + raise OSError(f"Cannot find file {schema_filename}") try: raw_schema = json.loads(raw_file) except ValueError as err: @@ -208,7 +209,7 @@ def is_limit_reached(self, records_counter: int) -> bool: return False -def split_config(config: Mapping[str, Any]) -> Tuple[dict[str, Any], InternalConfig]: +def split_config(config: Mapping[str, Any]) -> tuple[dict[str, Any], InternalConfig]: """ Break config map object into 2 instances: first is a dict with user defined configuration and second is internal config that contains private keys for diff --git a/airbyte_cdk/sources/utils/slice_logger.py b/airbyte_cdk/sources/utils/slice_logger.py index ee802a7a6..6ce6c4cd8 100644 --- a/airbyte_cdk/sources/utils/slice_logger.py +++ b/airbyte_cdk/sources/utils/slice_logger.py @@ -5,7 +5,8 @@ import json import logging from abc import ABC, abstractmethod -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level from airbyte_cdk.models import Type as MessageType @@ -19,7 +20,7 @@ class SliceLogger(ABC): SLICE_LOG_PREFIX = "slice:" - def create_slice_log_message(self, _slice: Optional[Mapping[str, Any]]) -> AirbyteMessage: + def create_slice_log_message(self, _slice: Mapping[str, Any] | None) -> AirbyteMessage: """ Mapping is an interface that can be implemented in various ways. However, json.dumps will just do a `str()` if the slice is a class implementing Mapping. Therefore, we want to cast this as a dict before passing this to json.dump diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index e19aad3a3..f0ced6353 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Callable, Generator, Mapping from enum import Flag, auto -from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast +from typing import Any, cast from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators @@ -68,7 +69,7 @@ class TypeTransformer: Class for transforming object before output. """ - _custom_normalizer: Optional[Callable[[Any, Dict[str, Any]], Any]] = None + _custom_normalizer: Callable[[Any, dict[str, Any]], Any] | None = None def __init__(self, config: TransformConfig): """ @@ -106,7 +107,7 @@ def registerCustomTransform( self._custom_normalizer = normalization_callback return normalization_callback - def __normalize(self, original_item: Any, subschema: Dict[str, Any]) -> Any: + def __normalize(self, original_item: Any, subschema: dict[str, Any]) -> Any: """ Applies different transform function to object's field according to config. :param original_item original value of field. @@ -121,7 +122,7 @@ def __normalize(self, original_item: Any, subschema: Dict[str, Any]) -> Any: return original_item @staticmethod - def default_convert(original_item: Any, subschema: Dict[str, Any]) -> Any: + def default_convert(original_item: Any, subschema: dict[str, Any]) -> Any: """ Default transform function that is used when TransformConfig.DefaultSchemaNormalization flag set. :param original_item original value of field. @@ -178,7 +179,7 @@ def normalizator( validator_instance: Validator, property_value: Any, instance: Any, - schema: Dict[str, Any], + schema: dict[str, Any], ) -> Generator[Any, Any, None]: """ Jsonschema validator callable it uses for validating instance. We @@ -228,7 +229,7 @@ def resolve(subschema: dict[str, Any]) -> dict[str, Any]: def transform( self, - record: Dict[str, Any], + record: dict[str, Any], schema: Mapping[str, Any], ) -> None: """ diff --git a/airbyte_cdk/sql/shared/sql_processor.py b/airbyte_cdk/sql/shared/sql_processor.py index a53925206..74a459b2f 100644 --- a/airbyte_cdk/sql/shared/sql_processor.py +++ b/airbyte_cdk/sql/shared/sql_processor.py @@ -210,7 +210,7 @@ def get_sql_engine(self) -> Engine: return self.sql_config.get_sql_engine() @contextmanager - def get_sql_connection(self) -> Generator[sqlalchemy.engine.Connection, None, None]: + def get_sql_connection(self) -> Generator[sqlalchemy.engine.Connection]: """A context manager which returns a new SQL connection for running queries. If the connection needs to close, it will be closed automatically. diff --git a/airbyte_cdk/test/catalog_builder.py b/airbyte_cdk/test/catalog_builder.py index b1bf4341c..46d7c5fd5 100644 --- a/airbyte_cdk/test/catalog_builder.py +++ b/airbyte_cdk/test/catalog_builder.py @@ -1,6 +1,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from typing import Any, Dict, List, Union, overload +from typing import Any, overload from airbyte_cdk.models import ( ConfiguredAirbyteCatalog, @@ -12,7 +12,7 @@ class ConfiguredAirbyteStreamBuilder: def __init__(self) -> None: - self._stream: Dict[str, Any] = { + self._stream: dict[str, Any] = { "stream": { "name": "any name", "json_schema": {}, @@ -32,12 +32,12 @@ def with_sync_mode(self, sync_mode: SyncMode) -> "ConfiguredAirbyteStreamBuilder self._stream["sync_mode"] = sync_mode.name return self - def with_primary_key(self, pk: List[List[str]]) -> "ConfiguredAirbyteStreamBuilder": + def with_primary_key(self, pk: list[list[str]]) -> "ConfiguredAirbyteStreamBuilder": self._stream["primary_key"] = pk self._stream["stream"]["source_defined_primary_key"] = pk # type: ignore # we assume that self._stream["stream"] is a Dict[str, Any] return self - def with_json_schema(self, json_schema: Dict[str, Any]) -> "ConfiguredAirbyteStreamBuilder": + def with_json_schema(self, json_schema: dict[str, Any]) -> "ConfiguredAirbyteStreamBuilder": self._stream["stream"]["json_schema"] = json_schema return self @@ -47,7 +47,7 @@ def build(self) -> ConfiguredAirbyteStream: class CatalogBuilder: def __init__(self) -> None: - self._streams: List[ConfiguredAirbyteStreamBuilder] = [] + self._streams: list[ConfiguredAirbyteStreamBuilder] = [] @overload def with_stream(self, name: ConfiguredAirbyteStreamBuilder) -> "CatalogBuilder": ... @@ -57,8 +57,8 @@ def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder": ... def with_stream( self, - name: Union[str, ConfiguredAirbyteStreamBuilder], - sync_mode: Union[SyncMode, None] = None, + name: str | ConfiguredAirbyteStreamBuilder, + sync_mode: SyncMode | None = None, ) -> "CatalogBuilder": # As we are introducing a fully fledge ConfiguredAirbyteStreamBuilder, we would like to deprecate the previous interface # with_stream(str, SyncMode) diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index f8e85bfb0..848b979ca 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -19,9 +19,10 @@ import re import tempfile import traceback +from collections.abc import Mapping from io import StringIO from pathlib import Path -from typing import Any, List, Mapping, Optional, Union +from typing import Any import orjson from pydantic import ValidationError as V2ValidationError @@ -47,7 +48,7 @@ class EntrypointOutput: - def __init__(self, messages: List[str], uncaught_exception: Optional[BaseException] = None): + def __init__(self, messages: list[str], uncaught_exception: BaseException | None = None): try: self._messages = [self._parse_message(message) for message in messages] except V2ValidationError as exception: @@ -71,15 +72,15 @@ def _parse_message(message: str) -> AirbyteMessage: ) @property - def records_and_state_messages(self) -> List[AirbyteMessage]: + def records_and_state_messages(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.RECORD, Type.STATE]) @property - def records(self) -> List[AirbyteMessage]: + def records(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.RECORD]) @property - def state_messages(self) -> List[AirbyteMessage]: + def state_messages(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.STATE]) @property @@ -90,19 +91,19 @@ def most_recent_state(self) -> Any: return state_messages[-1].state.stream # type: ignore[union-attr] # state has `stream` @property - def logs(self) -> List[AirbyteMessage]: + def logs(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.LOG]) @property - def trace_messages(self) -> List[AirbyteMessage]: + def trace_messages(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.TRACE]) @property - def analytics_messages(self) -> List[AirbyteMessage]: + def analytics_messages(self) -> list[AirbyteMessage]: return self._get_trace_message_by_trace_type(TraceType.ANALYTICS) @property - def errors(self) -> List[AirbyteMessage]: + def errors(self) -> list[AirbyteMessage]: return self._get_trace_message_by_trace_type(TraceType.ERROR) @property @@ -112,7 +113,7 @@ def catalog(self) -> AirbyteMessage: raise ValueError(f"Expected exactly one catalog but got {len(catalog)}") return catalog[0] - def get_stream_statuses(self, stream_name: str) -> List[AirbyteStreamStatus]: + def get_stream_statuses(self, stream_name: str) -> list[AirbyteStreamStatus]: status_messages = map( lambda message: message.trace.stream_status.status, # type: ignore filter( @@ -122,10 +123,10 @@ def get_stream_statuses(self, stream_name: str) -> List[AirbyteStreamStatus]: ) return list(status_messages) - def _get_message_by_types(self, message_types: List[Type]) -> List[AirbyteMessage]: + def _get_message_by_types(self, message_types: list[Type]) -> list[AirbyteMessage]: return [message for message in self._messages if message.type in message_types] - def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[AirbyteMessage]: + def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> list[AirbyteMessage]: return [ message for message in self._get_message_by_types([Type.TRACE]) @@ -149,7 +150,7 @@ def is_not_in_logs(self, pattern: str) -> bool: def _run_command( - source: Source, args: List[str], expecting_exception: bool = False + source: Source, args: list[str], expecting_exception: bool = False ) -> EntrypointOutput: log_capture_buffer = StringIO() stream_handler = logging.StreamHandler(log_capture_buffer) @@ -203,7 +204,7 @@ def read( source: Source, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, expecting_exception: bool = False, ) -> EntrypointOutput: """ @@ -241,7 +242,7 @@ def read( def make_file( - path: Path, file_contents: Optional[Union[str, Mapping[str, Any], List[Mapping[str, Any]]]] + path: Path, file_contents: str | Mapping[str, Any] | list[Mapping[str, Any]] | None ) -> str: if isinstance(file_contents, str): path.write_text(file_contents) diff --git a/airbyte_cdk/test/mock_http/mocker.py b/airbyte_cdk/test/mock_http/mocker.py index 204d43140..c61b65f92 100644 --- a/airbyte_cdk/test/mock_http/mocker.py +++ b/airbyte_cdk/test/mock_http/mocker.py @@ -3,9 +3,9 @@ import contextlib import functools from collections import defaultdict +from collections.abc import Callable, Iterable from enum import Enum from types import TracebackType -from typing import Callable, Dict, Iterable, List, Optional, Union import requests_mock @@ -41,7 +41,7 @@ class HttpMocker(contextlib.ContextDecorator): def __init__(self) -> None: self._mocker = requests_mock.Mocker() - self._matchers: Dict[SupportedHttpMethods, List[HttpRequestMatcher]] = defaultdict(list) + self._matchers: dict[SupportedHttpMethods, list[HttpRequestMatcher]] = defaultdict(list) def __enter__(self) -> "HttpMocker": self._mocker.__enter__() @@ -49,9 +49,9 @@ def __enter__(self) -> "HttpMocker": def __exit__( self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], + exc_type: BaseException | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, ) -> None: self._mocker.__exit__(exc_type, exc_val, exc_tb) @@ -64,7 +64,7 @@ def _mock_request_method( self, method: SupportedHttpMethods, request: HttpRequest, - responses: Union[HttpResponse, List[HttpResponse]], + responses: HttpResponse | list[HttpResponse], ) -> None: if isinstance(responses, HttpResponse): responses = [responses] @@ -91,25 +91,19 @@ def _mock_request_method( def _get_body_field(response: HttpResponse) -> str: return "text" if isinstance(response.body, str) else "content" - def get(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None: + def get(self, request: HttpRequest, responses: HttpResponse | list[HttpResponse]) -> None: self._mock_request_method(SupportedHttpMethods.GET, request, responses) - def patch( - self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]] - ) -> None: + def patch(self, request: HttpRequest, responses: HttpResponse | list[HttpResponse]) -> None: self._mock_request_method(SupportedHttpMethods.PATCH, request, responses) - def post( - self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]] - ) -> None: + def post(self, request: HttpRequest, responses: HttpResponse | list[HttpResponse]) -> None: self._mock_request_method(SupportedHttpMethods.POST, request, responses) - def put(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None: + def put(self, request: HttpRequest, responses: HttpResponse | list[HttpResponse]) -> None: self._mock_request_method(SupportedHttpMethods.PUT, request, responses) - def delete( - self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]] - ) -> None: + def delete(self, request: HttpRequest, responses: HttpResponse | list[HttpResponse]) -> None: self._mock_request_method(SupportedHttpMethods.DELETE, request, responses) @staticmethod diff --git a/airbyte_cdk/test/mock_http/request.py b/airbyte_cdk/test/mock_http/request.py index 7209513d8..69e7b0776 100644 --- a/airbyte_cdk/test/mock_http/request.py +++ b/airbyte_cdk/test/mock_http/request.py @@ -1,7 +1,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. import json -from typing import Any, List, Mapping, Optional, Union +from collections.abc import Mapping +from typing import Any from urllib.parse import parse_qs, urlencode, urlparse ANY_QUERY_PARAMS = "any query_parameters" @@ -15,9 +16,9 @@ class HttpRequest: def __init__( self, url: str, - query_params: Optional[Union[str, Mapping[str, Union[str, List[str]]]]] = None, - headers: Optional[Mapping[str, str]] = None, - body: Optional[Union[str, bytes, Mapping[str, Any]]] = None, + query_params: str | Mapping[str, str | list[str]] | None = None, + headers: Mapping[str, str] | None = None, + body: str | bytes | Mapping[str, Any] | None = None, ) -> None: self._parsed_url = urlparse(url) self._query_params = query_params @@ -32,7 +33,7 @@ def __init__( self._body = body @staticmethod - def _encode_qs(query_params: Union[str, Mapping[str, Union[str, List[str]]]]) -> str: + def _encode_qs(query_params: str | Mapping[str, str | list[str]]) -> str: if isinstance(query_params, str): return query_params return urlencode(query_params, doseq=True) @@ -65,8 +66,8 @@ def matches(self, other: Any) -> bool: @staticmethod def _to_mapping( - body: Optional[Union[str, bytes, Mapping[str, Any]]], - ) -> Optional[Mapping[str, Any]]: + body: str | bytes | Mapping[str, Any] | None, + ) -> Mapping[str, Any] | None: if isinstance(body, Mapping): return body elif isinstance(body, bytes): @@ -76,7 +77,7 @@ def _to_mapping( return None @staticmethod - def _to_bytes(body: Optional[Union[str, bytes]]) -> bytes: + def _to_bytes(body: str | bytes | None) -> bytes: if isinstance(body, bytes): return body elif isinstance(body, str): diff --git a/airbyte_cdk/test/mock_http/response.py b/airbyte_cdk/test/mock_http/response.py index fefe762e9..22a10c5dc 100644 --- a/airbyte_cdk/test/mock_http/response.py +++ b/airbyte_cdk/test/mock_http/response.py @@ -1,13 +1,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from collections.abc import Mapping from types import MappingProxyType -from typing import Mapping, Union class HttpResponse: def __init__( self, - body: Union[str, bytes], + body: str | bytes, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({}), ): @@ -16,7 +16,7 @@ def __init__( self._headers = headers @property - def body(self) -> Union[str, bytes]: + def body(self) -> str | bytes: return self._body @property diff --git a/airbyte_cdk/test/mock_http/response_builder.py b/airbyte_cdk/test/mock_http/response_builder.py index 41766af1b..33ce35886 100644 --- a/airbyte_cdk/test/mock_http/response_builder.py +++ b/airbyte_cdk/test/mock_http/response_builder.py @@ -4,24 +4,24 @@ import json from abc import ABC, abstractmethod from pathlib import Path as FilePath -from typing import Any, Dict, List, Optional, Union +from typing import Any from airbyte_cdk.test.mock_http.response import HttpResponse from airbyte_cdk.test.utils.data import get_unit_test_folder -def _extract(path: List[str], response_template: Dict[str, Any]) -> Any: +def _extract(path: list[str], response_template: dict[str, Any]) -> Any: return functools.reduce(lambda a, b: a[b], path, response_template) -def _replace_value(dictionary: Dict[str, Any], path: List[str], value: Any) -> None: +def _replace_value(dictionary: dict[str, Any], path: list[str], value: Any) -> None: current = dictionary for key in path[:-1]: current = current[key] current[path[-1]] = value -def _write(dictionary: Dict[str, Any], path: List[str], value: Any) -> None: +def _write(dictionary: dict[str, Any], path: list[str], value: Any) -> None: current = dictionary for key in path[:-1]: current = current.setdefault(key, {}) @@ -30,14 +30,14 @@ def _write(dictionary: Dict[str, Any], path: List[str], value: Any) -> None: class Path(ABC): @abstractmethod - def write(self, template: Dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: pass @abstractmethod - def update(self, template: Dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: pass - def extract(self, template: Dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: pass @@ -45,13 +45,13 @@ class FieldPath(Path): def __init__(self, field: str): self._path = [field] - def write(self, template: Dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: _write(template, self._path, value) - def update(self, template: Dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: _replace_value(template, self._path, value) - def extract(self, template: Dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: return _extract(self._path, template) def __str__(self) -> str: @@ -59,16 +59,16 @@ def __str__(self) -> str: class NestedPath(Path): - def __init__(self, path: List[str]): + def __init__(self, path: list[str]): self._path = path - def write(self, template: Dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: _write(template, self._path, value) - def update(self, template: Dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: _replace_value(template, self._path, value) - def extract(self, template: Dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: return _extract(self._path, template) def __str__(self) -> str: @@ -77,7 +77,7 @@ def __str__(self) -> str: class PaginationStrategy(ABC): @abstractmethod - def update(self, response: Dict[str, Any]) -> None: + def update(self, response: dict[str, Any]) -> None: pass @@ -86,16 +86,16 @@ def __init__(self, path: Path, value: Any): self._path = path self._value = value - def update(self, response: Dict[str, Any]) -> None: + def update(self, response: dict[str, Any]) -> None: self._path.update(response, self._value) class RecordBuilder: def __init__( self, - template: Dict[str, Any], - id_path: Optional[Path], - cursor_path: Optional[Union[FieldPath, NestedPath]], + template: dict[str, Any], + id_path: Path | None, + cursor_path: FieldPath | NestedPath | None, ): self._record = template self._id_path = id_path @@ -111,7 +111,7 @@ def _validate_template(self) -> None: for field_name, field_path in paths_to_validate: self._validate_field(field_name, field_path) - def _validate_field(self, field_name: str, path: Optional[Path]) -> None: + def _validate_field(self, field_name: str, path: Path | None) -> None: try: if path and not path.extract(self._record): raise ValueError( @@ -134,7 +134,7 @@ def with_field(self, path: Path, value: Any) -> "RecordBuilder": path.write(self._record, value) return self - def _set_field(self, field_name: str, path: Optional[Path], value: Any) -> None: + def _set_field(self, field_name: str, path: Path | None, value: Any) -> None: if not path: raise ValueError( f"{field_name}_path was not provided and hence, the record {field_name} can't be modified. Please provide `id_field` while " @@ -142,19 +142,19 @@ def _set_field(self, field_name: str, path: Optional[Path], value: Any) -> None: ) path.update(self._record, value) - def build(self) -> Dict[str, Any]: + def build(self) -> dict[str, Any]: return self._record class HttpResponseBuilder: def __init__( self, - template: Dict[str, Any], - records_path: Union[FieldPath, NestedPath], - pagination_strategy: Optional[PaginationStrategy], + template: dict[str, Any], + records_path: FieldPath | NestedPath, + pagination_strategy: PaginationStrategy | None, ): self._response = template - self._records: List[RecordBuilder] = [] + self._records: list[RecordBuilder] = [] self._records_path = records_path self._pagination_strategy = pagination_strategy self._status_code = 200 @@ -186,7 +186,7 @@ def _get_unit_test_folder(execution_folder: str) -> FilePath: return get_unit_test_folder(execution_folder) -def find_template(resource: str, execution_folder: str) -> Dict[str, Any]: +def find_template(resource: str, execution_folder: str) -> dict[str, Any]: response_template_filepath = str( get_unit_test_folder(execution_folder) / "resource" @@ -194,15 +194,15 @@ def find_template(resource: str, execution_folder: str) -> Dict[str, Any]: / "response" / f"{resource}.json" ) - with open(response_template_filepath, "r") as template_file: + with open(response_template_filepath) as template_file: return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file def create_record_builder( - response_template: Dict[str, Any], - records_path: Union[FieldPath, NestedPath], - record_id_path: Optional[Path] = None, - record_cursor_path: Optional[Union[FieldPath, NestedPath]] = None, + response_template: dict[str, Any], + records_path: FieldPath | NestedPath, + record_id_path: Path | None = None, + record_cursor_path: FieldPath | NestedPath | None = None, ) -> RecordBuilder: """ This will use the first record define at `records_path` as a template for the records. If more records are defined, they will be ignored @@ -222,8 +222,8 @@ def create_record_builder( def create_response_builder( - response_template: Dict[str, Any], - records_path: Union[FieldPath, NestedPath], - pagination_strategy: Optional[PaginationStrategy] = None, + response_template: dict[str, Any], + records_path: FieldPath | NestedPath, + pagination_strategy: PaginationStrategy | None = None, ) -> HttpResponseBuilder: return HttpResponseBuilder(response_template, records_path, pagination_strategy) diff --git a/airbyte_cdk/test/state_builder.py b/airbyte_cdk/test/state_builder.py index a1315cf4e..4f6826eb6 100644 --- a/airbyte_cdk/test/state_builder.py +++ b/airbyte_cdk/test/state_builder.py @@ -1,6 +1,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -from typing import Any, List +from typing import Any from airbyte_cdk.models import ( AirbyteStateBlob, @@ -13,7 +13,7 @@ class StateBuilder: def __init__(self) -> None: - self._state: List[AirbyteStateMessage] = [] + self._state: list[AirbyteStateMessage] = [] def with_stream_state(self, stream_name: str, state: Any) -> "StateBuilder": self._state.append( @@ -29,5 +29,5 @@ def with_stream_state(self, stream_name: str, state: Any) -> "StateBuilder": ) return self - def build(self) -> List[AirbyteStateMessage]: + def build(self) -> list[AirbyteStateMessage]: return self._state diff --git a/airbyte_cdk/test/utils/http_mocking.py b/airbyte_cdk/test/utils/http_mocking.py index 7fd1419fc..327803728 100644 --- a/airbyte_cdk/test/utils/http_mocking.py +++ b/airbyte_cdk/test/utils/http_mocking.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import re -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any from requests_mock import Mocker diff --git a/airbyte_cdk/test/utils/reading.py b/airbyte_cdk/test/utils/reading.py index 2d89cb870..d48d77721 100644 --- a/airbyte_cdk/test/utils/reading.py +++ b/airbyte_cdk/test/utils/reading.py @@ -1,6 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Any, List, Mapping, Optional +from collections.abc import Mapping +from typing import Any from airbyte_cdk import AbstractSource from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode @@ -18,7 +19,7 @@ def read_records( config: Mapping[str, Any], stream_name: str, sync_mode: SyncMode, - state: Optional[List[AirbyteStateMessage]] = None, + state: list[AirbyteStateMessage] | None = None, expecting_exception: bool = False, ) -> EntrypointOutput: """Read records from a stream.""" diff --git a/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte_cdk/utils/airbyte_secrets_utils.py index bb5a6be59..b4dcf35ff 100644 --- a/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -2,15 +2,16 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping +from collections.abc import Mapping +from typing import Any import dpath -def get_secret_paths(spec: Mapping[str, Any]) -> List[List[str]]: +def get_secret_paths(spec: Mapping[str, Any]) -> list[list[str]]: paths = [] - def traverse_schema(schema_item: Any, path: List[str]) -> None: + def traverse_schema(schema_item: Any, path: list[str]) -> None: """ schema_item can be any property or value in the originally input jsonschema, depending on how far down the recursion stack we go path is the path to that schema item in the original input @@ -38,7 +39,7 @@ def traverse_schema(schema_item: Any, path: List[str]) -> None: def get_secrets( connection_specification: Mapping[str, Any], config: Mapping[str, Any] -) -> List[Any]: +) -> list[Any]: """ Get a list of secret values from the source config based on the source specification :type connection_specification: the connection_specification field of an AirbyteSpecification i.e the JSONSchema definition @@ -55,10 +56,10 @@ def get_secrets( return result -__SECRETS_FROM_CONFIG: List[str] = [] +__SECRETS_FROM_CONFIG: list[str] = [] -def update_secrets(secrets: List[str]) -> None: +def update_secrets(secrets: list[str]) -> None: """Update the list of secrets to be replaced""" global __SECRETS_FROM_CONFIG __SECRETS_FROM_CONFIG = secrets diff --git a/airbyte_cdk/utils/analytics_message.py b/airbyte_cdk/utils/analytics_message.py index 82a074913..07c7ff9e1 100644 --- a/airbyte_cdk/utils/analytics_message.py +++ b/airbyte_cdk/utils/analytics_message.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. import time -from typing import Any, Optional +from typing import Any from airbyte_cdk.models import ( AirbyteAnalyticsTraceMessage, @@ -12,7 +12,7 @@ ) -def create_analytics_message(type: str, value: Optional[Any]) -> AirbyteMessage: +def create_analytics_message(type: str, value: Any | None) -> AirbyteMessage: return AirbyteMessage( type=Type.TRACE, trace=AirbyteTraceMessage( diff --git a/airbyte_cdk/utils/datetime_format_inferrer.py b/airbyte_cdk/utils/datetime_format_inferrer.py index 28eaefa31..33e5672e5 100644 --- a/airbyte_cdk/utils/datetime_format_inferrer.py +++ b/airbyte_cdk/utils/datetime_format_inferrer.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Dict, Optional +from typing import Any from airbyte_cdk.models import AirbyteRecordMessage from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser @@ -15,7 +15,7 @@ class DatetimeFormatInferrer: def __init__(self) -> None: self._parser = DatetimeParser() - self._datetime_candidates: Optional[Dict[str, str]] = None + self._datetime_candidates: dict[str, str] | None = None self._formats = [ "%Y-%m-%d", "%Y-%m-%d %H:%M:%S", @@ -39,7 +39,7 @@ def _can_be_datetime(self, value: Any) -> bool: This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds. This is separate from the format check for performance reasons""" - if isinstance(value, (str, int)): + if isinstance(value, str | int): try: value_as_int = int(value) for timestamp_range in self._timestamp_heuristic_ranges: @@ -86,7 +86,7 @@ def accumulate(self, record: AirbyteRecordMessage) -> None: """Analyzes the record and updates the internal state of candidate datetime fields""" self._initialize(record) if self._datetime_candidates is None else self._validate(record) - def get_inferred_datetime_formats(self) -> Dict[str, str]: + def get_inferred_datetime_formats(self) -> dict[str, str]: """ Returns the list of candidate datetime fields - the keys are the field names and the values are the inferred datetime formats. For these fields the format was consistent across all visited records. diff --git a/airbyte_cdk/utils/datetime_helpers.py b/airbyte_cdk/utils/datetime_helpers.py index 99cf1ad23..56aa6397b 100644 --- a/airbyte_cdk/utils/datetime_helpers.py +++ b/airbyte_cdk/utils/datetime_helpers.py @@ -82,11 +82,10 @@ """ from datetime import datetime, timedelta, timezone -from typing import Any, Optional, Union, overload +from typing import Any, Union, overload from dateutil import parser -from typing_extensions import Never -from whenever import Instant, LocalDateTime, ZonedDateTime +from whenever import Instant class AirbyteDateTime(datetime): @@ -263,7 +262,7 @@ def __sub__( result = super().__sub__(other) # type: ignore[call-overload] if isinstance(result, datetime): return AirbyteDateTime.from_datetime(result) - elif isinstance(other, (datetime, AirbyteDateTime)): + elif isinstance(other, datetime | AirbyteDateTime): result = super().__sub__(other) # type: ignore[call-overload] if isinstance(result, timedelta): return result @@ -462,7 +461,7 @@ def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None: def ab_datetime_format( - dt: Union[datetime, AirbyteDateTime], + dt: datetime | AirbyteDateTime, format: str | None = None, ) -> str: """Formats a datetime object as an ISO8601/RFC3339 string with 'T' delimiter and timezone. diff --git a/airbyte_cdk/utils/event_timing.py b/airbyte_cdk/utils/event_timing.py index 3f489c096..479f11c03 100644 --- a/airbyte_cdk/utils/event_timing.py +++ b/airbyte_cdk/utils/event_timing.py @@ -5,9 +5,10 @@ import datetime import logging import time +from collections.abc import Generator from contextlib import contextmanager from dataclasses import dataclass, field -from typing import Any, Generator, Literal, Optional +from typing import Any, Literal logger = logging.getLogger("airbyte") @@ -60,7 +61,7 @@ def report(self, order_by: Literal["name", "duration"] = "name") -> str: class Event: name: str start: float = field(default_factory=time.perf_counter_ns) - end: Optional[float] = field(default=None) + end: float | None = field(default=None) @property def duration(self) -> float: diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index bfe2b7709..f49ee241f 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -4,7 +4,8 @@ import copy -from typing import Any, Dict, List, Mapping, Optional, Union +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.declarative.requesters.request_option import ( RequestOption, @@ -14,9 +15,9 @@ def _merge_mappings( - target: Dict[str, Any], + target: dict[str, Any], source: Mapping[str, Any], - path: Optional[List[str]] = None, + path: list[str] | None = None, allow_same_value_merge: bool = False, ) -> None: """ @@ -56,9 +57,9 @@ def _merge_mappings( def combine_mappings( - mappings: List[Optional[Union[Mapping[str, Any], str]]], + mappings: list[Mapping[str, Any] | str | None], allow_same_value_merge: bool = False, -) -> Union[Mapping[str, Any], str]: +) -> Mapping[str, Any] | str: """ Combine multiple mappings into a single mapping. @@ -106,7 +107,7 @@ def combine_mappings( return next(m for m in non_empty_mappings if isinstance(m, str)) # Start with an empty result and merge each mapping into it - result: Dict[str, Any] = {} + result: dict[str, Any] = {} for mapping in non_empty_mappings: if mapping and isinstance(mapping, Mapping): _merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge) @@ -115,13 +116,13 @@ def combine_mappings( def _validate_component_request_option_paths( - config: Config, *request_options: Optional[RequestOption] + config: Config, *request_options: RequestOption | None ) -> None: """ Validates that a component with multiple request options does not have conflicting paths. Uses dummy values for validation since actual values might not be available at init time. """ - grouped_options: Dict[RequestOptionType, List[RequestOption]] = {} + grouped_options: dict[RequestOptionType, list[RequestOption]] = {} for option in request_options: if option: grouped_options.setdefault(option.inject_into, []).append(option) @@ -130,9 +131,9 @@ def _validate_component_request_option_paths( if len(options) <= 1: continue - option_dicts: List[Optional[Union[Mapping[str, Any], str]]] = [] + option_dicts: list[Mapping[str, Any] | str | None] = [] for i, option in enumerate(options): - option_dict: Dict[str, Any] = {} + option_dict: dict[str, Any] = {} # Use indexed dummy values to ensure we catch conflicts option.inject_into_request(option_dict, f"dummy_value_{i}", config) option_dicts.append(option_dict) @@ -146,9 +147,9 @@ def _validate_component_request_option_paths( def get_interpolation_context( - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: StreamState | None = None, + stream_slice: StreamSlice | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Mapping[str, Any]: return { "stream_slice": stream_slice, diff --git a/airbyte_cdk/utils/oneof_option_config.py b/airbyte_cdk/utils/oneof_option_config.py index 17ebf0511..a3adec500 100644 --- a/airbyte_cdk/utils/oneof_option_config.py +++ b/airbyte_cdk/utils/oneof_option_config.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Dict +from typing import Any class OneOfOptionConfig: @@ -26,7 +26,7 @@ class Config(OneOfOptionConfig): """ @staticmethod - def schema_extra(schema: Dict[str, Any], model: Any) -> None: + def schema_extra(schema: dict[str, Any], model: Any) -> None: if hasattr(model.Config, "description"): schema["description"] = model.Config.description if hasattr(model.Config, "discriminator"): diff --git a/airbyte_cdk/utils/print_buffer.py b/airbyte_cdk/utils/print_buffer.py index ae5a2020c..8025892fd 100644 --- a/airbyte_cdk/utils/print_buffer.py +++ b/airbyte_cdk/utils/print_buffer.py @@ -5,7 +5,6 @@ from io import StringIO from threading import RLock from types import TracebackType -from typing import Optional class PrintBuffer: @@ -67,9 +66,9 @@ def __enter__(self) -> "PrintBuffer": def __exit__( self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], + exc_type: BaseException | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, ) -> None: self.flush() sys.stdout, sys.stderr = self.old_stdout, self.old_stderr diff --git a/airbyte_cdk/utils/schema_inferrer.py b/airbyte_cdk/utils/schema_inferrer.py index f3c6b2fae..1fd1f76e5 100644 --- a/airbyte_cdk/utils/schema_inferrer.py +++ b/airbyte_cdk/utils/schema_inferrer.py @@ -3,7 +3,8 @@ # from collections import defaultdict -from typing import Any, Dict, List, Mapping, Optional +from collections.abc import Mapping +from typing import Any from genson import SchemaBuilder, SchemaNode from genson.schema.strategies.object import Object @@ -31,7 +32,7 @@ class NoRequiredObj(Object): """ def to_schema(self) -> Mapping[str, Any]: - schema: Dict[str, Any] = super(NoRequiredObj, self).to_schema() + schema: dict[str, Any] = super().to_schema() schema.pop("required", None) return schema @@ -51,13 +52,13 @@ class NoRequiredSchemaBuilder(SchemaBuilder): # This type is inferred from the genson lib, but there is no alias provided for it - creating it here for type safety -InferredSchema = Dict[str, Any] +InferredSchema = dict[str, Any] class SchemaValidationException(Exception): @classmethod def merge_exceptions( - cls, exceptions: List["SchemaValidationException"] + cls, exceptions: list["SchemaValidationException"] ) -> "SchemaValidationException": # We assume the schema is the same for all SchemaValidationException return SchemaValidationException( @@ -65,7 +66,7 @@ def merge_exceptions( [x for exception in exceptions for x in exception._validation_errors], ) - def __init__(self, schema: InferredSchema, validation_errors: List[Exception]): + def __init__(self, schema: InferredSchema, validation_errors: list[Exception]): self._schema = schema self._validation_errors = validation_errors @@ -74,7 +75,7 @@ def schema(self) -> InferredSchema: return self._schema @property - def validation_errors(self) -> List[str]: + def validation_errors(self) -> list[str]: return list(map(lambda error: str(error), self._validation_errors)) @@ -88,10 +89,10 @@ class SchemaInferrer: """ - stream_to_builder: Dict[str, SchemaBuilder] + stream_to_builder: dict[str, SchemaBuilder] def __init__( - self, pk: Optional[List[List[str]]] = None, cursor_field: Optional[List[List[str]]] = None + self, pk: list[list[str]] | None = None, cursor_field: list[list[str]] | None = None ) -> None: self.stream_to_builder = defaultdict(NoRequiredSchemaBuilder) self._pk = [] if pk is None else pk @@ -184,11 +185,11 @@ def _add_required_properties(self, node: InferredSchema) -> InferredSchema: return node - def _add_fields_as_required(self, node: InferredSchema, composite_key: List[List[str]]) -> None: + def _add_fields_as_required(self, node: InferredSchema, composite_key: list[list[str]]) -> None: """ Take a list of nested keys (this list represents a composite key) and travel the schema to mark every node as required. """ - errors: List[Exception] = [] + errors: list[Exception] = [] for path in composite_key: try: @@ -200,7 +201,7 @@ def _add_fields_as_required(self, node: InferredSchema, composite_key: List[List raise SchemaValidationException(node, errors) def _add_field_as_required( - self, node: InferredSchema, path: List[str], traveled_path: Optional[List[str]] = None + self, node: InferredSchema, path: list[str], traveled_path: list[str] | None = None ) -> None: """ Take a nested key and travel the schema to mark every node as required. @@ -247,7 +248,7 @@ def _add_field_as_required( traveled_path.append(next_node) self._add_field_as_required(node[_PROPERTIES][next_node], path[1:], traveled_path) - def _is_leaf(self, path: List[str]) -> bool: + def _is_leaf(self, path: list[str]) -> bool: return len(path) == 0 def _remove_null_from_type(self, node: InferredSchema) -> None: @@ -257,7 +258,7 @@ def _remove_null_from_type(self, node: InferredSchema) -> None: if len(node[_TYPE]) == 1: node[_TYPE] = node[_TYPE][0] - def get_stream_schema(self, stream_name: str) -> Optional[InferredSchema]: + def get_stream_schema(self, stream_name: str) -> InferredSchema | None: """ Returns the inferred JSON schema for the specified stream. Might be `None` if there were no records for the given stream name. """ diff --git a/airbyte_cdk/utils/slice_hasher.py b/airbyte_cdk/utils/slice_hasher.py index 25950a934..28af4af87 100644 --- a/airbyte_cdk/utils/slice_hasher.py +++ b/airbyte_cdk/utils/slice_hasher.py @@ -1,6 +1,7 @@ import hashlib import json -from typing import Any, Final, Mapping, Optional +from collections.abc import Mapping +from typing import Any, Final class SliceEncoder(json.JSONEncoder): @@ -19,7 +20,7 @@ class SliceHasher: def hash( cls, stream_name: str = "", - stream_slice: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, ) -> int: """ Note that streams partition with the same slicing value but with different names might collapse if stream name is not provided diff --git a/airbyte_cdk/utils/stream_status_utils.py b/airbyte_cdk/utils/stream_status_utils.py index 49c07f49c..00f91a229 100644 --- a/airbyte_cdk/utils/stream_status_utils.py +++ b/airbyte_cdk/utils/stream_status_utils.py @@ -4,7 +4,6 @@ from datetime import datetime -from typing import List, Optional, Union from airbyte_cdk.models import ( AirbyteMessage, @@ -20,9 +19,9 @@ def as_airbyte_message( - stream: Union[AirbyteStream, StreamDescriptor], + stream: AirbyteStream | StreamDescriptor, current_status: AirbyteStreamStatus, - reasons: Optional[List[AirbyteStreamStatusReason]] = None, + reasons: list[AirbyteStreamStatusReason] | None = None, ) -> AirbyteMessage: """ Builds an AirbyteStreamStatusTraceMessage for the provided stream diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 59dbab2a5..879b4f614 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -3,7 +3,7 @@ # import time import traceback -from typing import Any, Optional +from typing import Any import orjson @@ -29,11 +29,11 @@ class AirbyteTracedException(Exception): def __init__( self, - internal_message: Optional[str] = None, - message: Optional[str] = None, + internal_message: str | None = None, + message: str | None = None, failure_type: FailureType = FailureType.system_error, - exception: Optional[BaseException] = None, - stream_descriptor: Optional[StreamDescriptor] = None, + exception: BaseException | None = None, + stream_descriptor: StreamDescriptor | None = None, ): """ :param internal_message: the internal error that caused the failure @@ -50,7 +50,7 @@ def __init__( super().__init__(internal_message) def as_airbyte_message( - self, stream_descriptor: Optional[StreamDescriptor] = None + self, stream_descriptor: StreamDescriptor | None = None ) -> AirbyteMessage: """ Builds an AirbyteTraceMessage from the exception @@ -80,7 +80,7 @@ def as_airbyte_message( return AirbyteMessage(type=MessageType.TRACE, trace=trace_message) - def as_connection_status_message(self) -> Optional[AirbyteMessage]: + def as_connection_status_message(self) -> AirbyteMessage | None: if self.failure_type == FailureType.config_error: return AirbyteMessage( type=MessageType.CONNECTION_STATUS, @@ -103,7 +103,7 @@ def emit_message(self) -> None: def from_exception( cls, exc: BaseException, - stream_descriptor: Optional[StreamDescriptor] = None, + stream_descriptor: StreamDescriptor | None = None, *args: Any, **kwargs: Any, ) -> "AirbyteTracedException": @@ -121,7 +121,7 @@ def from_exception( ) # type: ignore # ignoring because of args and kwargs def as_sanitized_airbyte_message( - self, stream_descriptor: Optional[StreamDescriptor] = None + self, stream_descriptor: StreamDescriptor | None = None ) -> AirbyteMessage: """ Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body diff --git a/debug_manifest/debug_manifest.py b/debug_manifest/debug_manifest.py index c520d0b0c..6c61fd587 100644 --- a/debug_manifest/debug_manifest.py +++ b/debug_manifest/debug_manifest.py @@ -3,7 +3,8 @@ # import sys -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any from airbyte_cdk.entrypoint import launch from airbyte_cdk.sources.declarative.yaml_declarative_source import ( diff --git a/ruff.toml b/ruff.toml index 5ed2f45e2..fb6440937 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,4 +3,4 @@ target-version = "py310" line-length = 100 [lint] -select = ["I"] +select = ["I", "UP", "F"] diff --git a/unit_tests/connector_builder/test_message_grouper.py b/unit_tests/connector_builder/test_message_grouper.py index c40514a27..33f46a300 100644 --- a/unit_tests/connector_builder/test_message_grouper.py +++ b/unit_tests/connector_builder/test_message_grouper.py @@ -3,7 +3,8 @@ # import json -from typing import Any, Iterator, List, Mapping +from collections.abc import Iterator, Mapping +from typing import Any from unittest.mock import MagicMock, Mock, patch import orjson @@ -998,7 +999,7 @@ def request_response_log_message( ) -def any_request_and_response_with_a_record() -> List[AirbyteMessage]: +def any_request_and_response_with_a_record() -> list[AirbyteMessage]: return [ request_response_log_message({"request": 1}, {"response": 2}, "http://any_url.com"), record_message("hashiras", {"name": "Shinobu Kocho"}), diff --git a/unit_tests/connector_builder/utils.py b/unit_tests/connector_builder/utils.py index a94a04164..9dcede061 100644 --- a/unit_tests/connector_builder/utils.py +++ b/unit_tests/connector_builder/utils.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteCatalogSerializer diff --git a/unit_tests/destinations/test_destination.py b/unit_tests/destinations/test_destination.py index 1f8f6573f..af7ae43db 100644 --- a/unit_tests/destinations/test_destination.py +++ b/unit_tests/destinations/test_destination.py @@ -5,9 +5,10 @@ import argparse import io import json +from collections.abc import Iterable, Mapping from os import PathLike from pathlib import Path -from typing import Any, Dict, Iterable, List, Mapping, Union +from typing import Any from unittest.mock import ANY import orjson @@ -55,7 +56,7 @@ class TestArgParsing: ], ) def test_successful_parse( - self, arg_list: List[str], expected_output: Mapping[str, Any], destination: Destination + self, arg_list: list[str], expected_output: Mapping[str, Any], destination: Destination ): parsed_args = vars(destination.parse_args(arg_list)) assert parsed_args == expected_output, ( @@ -76,39 +77,37 @@ def test_successful_parse( (["check", "path"]), ], ) - def test_failed_parse(self, arg_list: List[str], destination: Destination): + def test_failed_parse(self, arg_list: list[str], destination: Destination): # We use BaseException because it encompasses SystemExit (raised by failed parsing) and other exceptions (raised by additional semantic # checks) with pytest.raises(BaseException): destination.parse_args(arg_list) -def _state(state: Dict[str, Any]) -> AirbyteStateMessage: +def _state(state: dict[str, Any]) -> AirbyteStateMessage: return AirbyteStateMessage(data=state) -def _record(stream: str, data: Dict[str, Any]) -> AirbyteRecordMessage: +def _record(stream: str, data: dict[str, Any]) -> AirbyteRecordMessage: return AirbyteRecordMessage(stream=stream, data=data, emitted_at=0) -def _spec(schema: Dict[str, Any]) -> ConnectorSpecification: +def _spec(schema: dict[str, Any]) -> ConnectorSpecification: return ConnectorSpecification(connectionSpecification=schema) -def write_file(path: PathLike, content: Union[str, Mapping]): +def write_file(path: PathLike, content: str | Mapping): content = json.dumps(content) if isinstance(content, Mapping) else content with open(path, "w") as f: f.write(content) def _wrapped( - msg: Union[ - AirbyteRecordMessage, - AirbyteStateMessage, - AirbyteCatalog, - ConnectorSpecification, - AirbyteConnectionStatus, - ], + msg: AirbyteRecordMessage + | AirbyteStateMessage + | AirbyteCatalog + | ConnectorSpecification + | AirbyteConnectionStatus, ) -> AirbyteMessage: if isinstance(msg, AirbyteRecordMessage): return AirbyteMessage(type=Type.RECORD, record=msg) @@ -284,7 +283,7 @@ def test_run_write(self, mocker, destination: Destination, tmp_path, monkeypatch "airbyte_cdk.destinations.destination.check_config_against_spec_or_exit" ) # mock input is a record followed by some state messages - mocked_input: List[AirbyteMessage] = [ + mocked_input: list[AirbyteMessage] = [ _wrapped(_record("s1", {"k1": "v1"})), *expected_write_result, ] diff --git a/unit_tests/destinations/vector_db_based/config_test.py b/unit_tests/destinations/vector_db_based/config_test.py index ea6f446bc..fd939e098 100644 --- a/unit_tests/destinations/vector_db_based/config_test.py +++ b/unit_tests/destinations/vector_db_based/config_test.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Union import dpath from pydantic.v1 import BaseModel, Field @@ -29,13 +28,13 @@ class IndexingModel(BaseModel): class ConfigModel(BaseModel): indexing: IndexingModel - embedding: Union[ - OpenAIEmbeddingConfigModel, - CohereEmbeddingConfigModel, - FakeEmbeddingConfigModel, - AzureOpenAIEmbeddingConfigModel, - OpenAICompatibleEmbeddingConfigModel, - ] = Field( + embedding: ( + OpenAIEmbeddingConfigModel + | CohereEmbeddingConfigModel + | FakeEmbeddingConfigModel + | AzureOpenAIEmbeddingConfigModel + | OpenAICompatibleEmbeddingConfigModel + ) = Field( ..., title="Embedding", description="Embedding configuration", diff --git a/unit_tests/destinations/vector_db_based/document_processor_test.py b/unit_tests/destinations/vector_db_based/document_processor_test.py index ede889216..da7ad063b 100644 --- a/unit_tests/destinations/vector_db_based/document_processor_test.py +++ b/unit_tests/destinations/vector_db_based/document_processor_test.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping, Optional +from collections.abc import Mapping +from typing import Any from unittest.mock import MagicMock import pytest @@ -501,7 +502,7 @@ def test_text_splitter_check(label, split_config, has_error_message): ], ) def test_rename_metadata_fields( - mappings: Optional[List[FieldNameMappingConfigModel]], + mappings: list[FieldNameMappingConfigModel] | None, fields: Mapping[str, Any], expected_chunk_metadata: Mapping[str, Any], ): @@ -556,7 +557,7 @@ def test_rename_metadata_fields( def test_process_multiple_chunks_with_dedupe_mode( primary_key_value: Mapping[str, Any], stringified_primary_key: str, - primary_key: List[List[str]], + primary_key: list[list[str]], ): processor = initialize_processor() diff --git a/unit_tests/destinations/vector_db_based/writer_test.py b/unit_tests/destinations/vector_db_based/writer_test.py index 6475f619d..9c87afce3 100644 --- a/unit_tests/destinations/vector_db_based/writer_test.py +++ b/unit_tests/destinations/vector_db_based/writer_test.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Optional from unittest.mock import ANY, MagicMock, call import pytest @@ -21,7 +20,7 @@ def _generate_record_message( - index: int, stream: str = "example_stream", namespace: Optional[str] = None + index: int, stream: str = "example_stream", namespace: str | None = None ): return AirbyteMessage( type=Type.RECORD, @@ -37,7 +36,7 @@ def _generate_record_message( BATCH_SIZE = 32 -def generate_stream(name: str = "example_stream", namespace: Optional[str] = None): +def generate_stream(name: str = "example_stream", namespace: str | None = None): return { "stream": { "name": name, diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 3d61e65e8..dc04e6afe 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -34,13 +34,13 @@ def invalid_local_manifest(): @pytest.fixture def valid_local_manifest_yaml(valid_local_manifest): - with open(valid_local_manifest, "r") as file: + with open(valid_local_manifest) as file: return yaml.safe_load(file) @pytest.fixture def invalid_local_manifest_yaml(invalid_local_manifest): - with open(invalid_local_manifest, "r") as file: + with open(invalid_local_manifest) as file: return yaml.safe_load(file) diff --git a/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components.py b/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components.py index 5e7e16f71..d836ad9a8 100644 --- a/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components.py +++ b/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components.py @@ -1,9 +1,5 @@ """A sample implementation of custom components that does nothing but will cause syncs to fail if missing.""" -from typing import Any, Mapping - -import requests - from airbyte_cdk.sources.declarative.extractors import DpathExtractor diff --git a/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components_failing.py b/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components_failing.py index 5c05881e7..3d4446c6f 100644 --- a/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components_failing.py +++ b/unit_tests/source_declarative_manifest/resources/source_pokeapi_w_components_py/components_failing.py @@ -4,8 +4,7 @@ """A sample implementation of custom components that does nothing but will cause syncs to fail if missing.""" from collections.abc import Iterable, MutableMapping -from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Optional, Union +from typing import Any import requests diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 521572bec..bb2f5956e 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -96,7 +96,7 @@ def get_py_components_config_dict( "components.py" if not failing_components else "components_failing.py" ) config_yaml_path: Path = connector_dir / "valid_config.yaml" - secrets_yaml_path: Path = connector_dir / "secrets.yaml" + connector_dir / "secrets.yaml" manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) assert manifest_dict, "Failed to load the manifest file." @@ -136,7 +136,7 @@ def test_missing_checksum_fails_to_run( Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() with pytest.raises(ValueError): - source = create_declarative_source( + create_declarative_source( ["check", "--config", temp_config_file.name], ) @@ -168,7 +168,7 @@ def test_invalid_checksum_fails_to_run( Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() with pytest.raises(AirbyteCodeTamperedError): - source = create_declarative_source( + create_declarative_source( ["check", "--config", temp_config_file.name], ) diff --git a/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py b/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py index 8f41253fa..89eb0f6ff 100644 --- a/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +++ b/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py @@ -3,7 +3,8 @@ # import logging -from typing import Any, List, Mapping, Optional, Tuple +from collections.abc import Mapping +from typing import Any from unittest.mock import Mock import freezegun @@ -45,10 +46,10 @@ def __init__( def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: raise NotImplementedError - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: return [ self.convert_to_concurrent_stream(self._logger, s, Mock()) if is_concurrent else s for s, is_concurrent in self._streams_to_is_concurrent.items() @@ -139,7 +140,7 @@ def _mock_stream(name: str, data=[], available: bool = True): return s -def _configured_catalog(streams: List[Stream]): +def _configured_catalog(streams: list[Stream]): return ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( diff --git a/unit_tests/sources/declarative/async_job/test_integration.py b/unit_tests/sources/declarative/async_job/test_integration.py index a0b6195b7..63fb4dfa7 100644 --- a/unit_tests/sources/declarative/async_job/test_integration.py +++ b/unit_tests/sources/declarative/async_job/test_integration.py @@ -2,7 +2,8 @@ import logging -from typing import Any, Iterable, List, Mapping, Optional, Set, Tuple +from collections.abc import Iterable, Mapping +from typing import Any from unittest import TestCase, mock from airbyte_cdk import ( @@ -40,7 +41,7 @@ class MockAsyncJobRepository(AsyncJobRepository): def start(self, stream_slice: StreamSlice) -> AsyncJob: return AsyncJob("a_job_id", stream_slice) - def update_jobs_status(self, jobs: Set[AsyncJob]) -> None: + def update_jobs_status(self, jobs: set[AsyncJob]) -> None: for job in jobs: job.update_status(AsyncJobStatus.COMPLETED) @@ -55,19 +56,19 @@ def delete(self, job: AsyncJob) -> None: class MockSource(AbstractSource): - def __init__(self, stream_slicer: Optional[StreamSlicer] = None) -> None: + def __init__(self, stream_slicer: StreamSlicer | None = None) -> None: self._stream_slicer = SinglePartitionRouter({}) if stream_slicer is None else stream_slicer self._message_repository = NoopMessageRepository() def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: return True, None def spec(self, logger: logging.Logger) -> ConnectorSpecification: return ConnectorSpecification(connectionSpecification={}) - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: noop_record_selector = RecordSelector( extractor=_EXTRACTOR_NOT_USED, config={}, diff --git a/unit_tests/sources/declarative/async_job/test_job_orchestrator.py b/unit_tests/sources/declarative/async_job/test_job_orchestrator.py index d99f8502f..def14e038 100644 --- a/unit_tests/sources/declarative/async_job/test_job_orchestrator.py +++ b/unit_tests/sources/declarative/async_job/test_job_orchestrator.py @@ -4,7 +4,7 @@ import sys import threading import time -from typing import Callable, List, Mapping, Optional, Set, Tuple +from collections.abc import Callable, Mapping from unittest import TestCase, mock from unittest.mock import MagicMock, Mock, call @@ -63,11 +63,11 @@ def test_given_only_completed_jobs_when_status_then_return_running(self) -> None def _status_update_per_jobs( - status_update_per_jobs: Mapping[AsyncJob, List[AsyncJobStatus]], + status_update_per_jobs: Mapping[AsyncJob, list[AsyncJobStatus]], ) -> Callable[[set[AsyncJob]], None]: status_index_by_job = {job: 0 for job in status_update_per_jobs.keys()} - def _update_status(jobs: Set[AsyncJob]) -> None: + def _update_status(jobs: set[AsyncJob]) -> None: for job in jobs: status_index = status_index_by_job[job] job.update_status(status_update_per_jobs[job][status_index]) @@ -183,7 +183,7 @@ def test_when_fetch_records_then_yield_records_from_each_job(self) -> None: assert self._job_repository.delete.mock_calls == [call(first_job), call(second_job)] def _orchestrator( - self, slices: List[StreamSlice], job_tracker: Optional[JobTracker] = None + self, slices: list[StreamSlice], job_tracker: JobTracker | None = None ) -> AsyncJobOrchestrator: job_tracker = job_tracker if job_tracker else JobTracker(_NO_JOB_LIMIT) return AsyncJobOrchestrator( @@ -366,7 +366,7 @@ def _an_async_job(self, job_id: str, stream_slice: StreamSlice) -> AsyncJob: def _accumulate_create_and_get_completed_partitions( self, orchestrator: AsyncJobOrchestrator - ) -> Tuple[List[AsyncPartition], Optional[Exception]]: + ) -> tuple[list[AsyncPartition], Exception | None]: result = [] try: for i in orchestrator.create_and_get_completed_partitions(): diff --git a/unit_tests/sources/declarative/async_job/test_job_tracker.py b/unit_tests/sources/declarative/async_job/test_job_tracker.py index 4ddba1112..b5447eb63 100644 --- a/unit_tests/sources/declarative/async_job/test_job_tracker.py +++ b/unit_tests/sources/declarative/async_job/test_job_tracker.py @@ -1,6 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import List from unittest import TestCase import pytest @@ -37,7 +36,7 @@ def test_given_limit_reached_when_add_job_then_limit_is_still_reached(self) -> N with pytest.raises(ConcurrentJobLimitReached): self._tracker.try_to_get_intent() - def _reach_limit(self) -> List[str]: + def _reach_limit(self) -> list[str]: return [self._tracker.try_to_get_intent() for i in range(_LIMIT)] diff --git a/unit_tests/sources/declarative/auth/test_jwt.py b/unit_tests/sources/declarative/auth/test_jwt.py index 49b7ea570..443fbb4aa 100644 --- a/unit_tests/sources/declarative/auth/test_jwt.py +++ b/unit_tests/sources/declarative/auth/test_jwt.py @@ -111,7 +111,7 @@ def test_given_overriden_reserverd_properties_get_jwt_payload_throws_error(self) @pytest.mark.parametrize( "base64_encode_secret_key, secret_key, expected", [ - (True, "test", base64.b64encode("test".encode()).decode()), + (True, "test", base64.b64encode(b"test").decode()), (False, "test", "test"), ], ) diff --git a/unit_tests/sources/declarative/auth/test_oauth.py b/unit_tests/sources/declarative/auth/test_oauth.py index c54b9982f..697380ed3 100644 --- a/unit_tests/sources/declarative/auth/test_oauth.py +++ b/unit_tests/sources/declarative/auth/test_oauth.py @@ -5,7 +5,7 @@ import base64 import json import logging -from datetime import timedelta, timezone +from datetime import timedelta from unittest.mock import Mock import freezegun diff --git a/unit_tests/sources/declarative/checks/test_check_stream.py b/unit_tests/sources/declarative/checks/test_check_stream.py index 3cbaf8fd8..0ab9b6eb7 100644 --- a/unit_tests/sources/declarative/checks/test_check_stream.py +++ b/unit_tests/sources/declarative/checks/test_check_stream.py @@ -4,8 +4,9 @@ import json import logging +from collections.abc import Iterable, Mapping from copy import deepcopy -from typing import Any, Iterable, Mapping, Optional +from typing import Any from unittest.mock import MagicMock import pytest @@ -133,7 +134,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.resp_counter = 1 - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def path(self, **kwargs) -> str: @@ -672,7 +673,7 @@ def test_check_stream_missing_fields(): }, } with pytest.raises(ValidationError): - source = ConcurrentDeclarativeSource( + ConcurrentDeclarativeSource( source_config=manifest, config=_CONFIG, catalog=None, diff --git a/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py b/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py index 3f3e04e57..6472dba9f 100644 --- a/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py +++ b/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py @@ -1,6 +1,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Any, Mapping, Optional, Type, Union +from collections.abc import Mapping +from typing import Any import pytest @@ -33,7 +34,7 @@ ], ) def test_stream_slices( - default_concurrency: Union[int, str], max_concurrency: int, expected_concurrency: int + default_concurrency: int | str, max_concurrency: int, expected_concurrency: int ) -> None: config = {"num_workers": 50} concurrency_level = ConcurrencyLevel( @@ -63,8 +64,8 @@ def test_stream_slices( ) def test_default_concurrency_input_types_and_errors( config: Mapping[str, Any], - expected_concurrency: Optional[int], - expected_error: Optional[Type[Exception]], + expected_concurrency: int | None, + expected_error: type[Exception] | None, ) -> None: concurrency_level = ConcurrencyLevel( default_concurrency="{{ config['num_workers'] or 30 }}", diff --git a/unit_tests/sources/declarative/custom_state_migration.py b/unit_tests/sources/declarative/custom_state_migration.py index 86ca4a5c4..2ff7edf95 100644 --- a/unit_tests/sources/declarative/custom_state_migration.py +++ b/unit_tests/sources/declarative/custom_state_migration.py @@ -2,7 +2,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString diff --git a/unit_tests/sources/declarative/decoders/test_composite_decoder.py b/unit_tests/sources/declarative/decoders/test_composite_decoder.py index 02c0993b6..f09d23249 100644 --- a/unit_tests/sources/declarative/decoders/test_composite_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_composite_decoder.py @@ -5,10 +5,10 @@ import gzip import json import socket +from collections.abc import Iterable from http.server import BaseHTTPRequestHandler, HTTPServer from io import BytesIO, StringIO from threading import Thread -from typing import Iterable from unittest.mock import Mock, patch import pytest diff --git a/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py b/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py index 6901c6382..02cf2e91b 100644 --- a/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py +++ b/unit_tests/sources/declarative/decoders/test_decoders_memory_usage.py @@ -1,12 +1,10 @@ # # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -import gzip import json import os import pytest -import requests from airbyte_cdk import YamlDeclarativeSource from airbyte_cdk.models import SyncMode diff --git a/unit_tests/sources/declarative/decoders/test_json_decoder.py b/unit_tests/sources/declarative/decoders/test_json_decoder.py index 5992bf45a..2fad9a890 100644 --- a/unit_tests/sources/declarative/decoders/test_json_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_json_decoder.py @@ -1,7 +1,6 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import gzip import json import os diff --git a/unit_tests/sources/declarative/decoders/test_zipfile_decoder.py b/unit_tests/sources/declarative/decoders/test_zipfile_decoder.py index f5c988d0f..af3ead6af 100644 --- a/unit_tests/sources/declarative/decoders/test_zipfile_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_zipfile_decoder.py @@ -5,7 +5,6 @@ import json import zipfile from io import BytesIO -from typing import Union import pytest import requests @@ -13,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders import GzipParser, JsonParser, ZipfileDecoder -def create_zip_from_dict(data: Union[dict, list]) -> bytes: +def create_zip_from_dict(data: dict | list) -> bytes: zip_buffer = BytesIO() with zipfile.ZipFile(zip_buffer, mode="w") as zip_file: zip_file.writestr("data.json", data) diff --git a/unit_tests/sources/declarative/extractors/test_dpath_extractor.py b/unit_tests/sources/declarative/extractors/test_dpath_extractor.py index fa216685a..8c25a3135 100644 --- a/unit_tests/sources/declarative/extractors/test_dpath_extractor.py +++ b/unit_tests/sources/declarative/extractors/test_dpath_extractor.py @@ -3,7 +3,6 @@ # import io import json -from typing import Dict, List, Union import pytest import requests @@ -25,7 +24,7 @@ decoder_iterable = IterableDecoder(parameters={}) -def create_response(body: Union[Dict, bytes]): +def create_response(body: dict | bytes): response = requests.Response() response.raw = io.BytesIO(body if isinstance(body, bytes) else json.dumps(body).encode("utf-8")) return response @@ -113,7 +112,7 @@ def create_response(body: Union[Dict, bytes]): "test_extract_from_string_per_line_iterable", ], ) -def test_dpath_extractor(field_path: List, decoder: Decoder, body, expected_records: List): +def test_dpath_extractor(field_path: list, decoder: Decoder, body, expected_records: list): extractor = DpathExtractor( field_path=field_path, config=config, decoder=decoder, parameters=parameters ) diff --git a/unit_tests/sources/declarative/extractors/test_record_filter.py b/unit_tests/sources/declarative/extractors/test_record_filter.py index 03274e732..bef8b0321 100644 --- a/unit_tests/sources/declarative/extractors/test_record_filter.py +++ b/unit_tests/sources/declarative/extractors/test_record_filter.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import List, Mapping, Optional +from collections.abc import Mapping import pytest @@ -112,7 +112,7 @@ ], ) def test_record_filter( - filter_template: str, records: List[Mapping], expected_records: List[Mapping] + filter_template: str, records: list[Mapping], expected_records: list[Mapping] ): config = {"response_override": "stop_if_you_see_me"} parameters = {"created_at": "06-07-21"} @@ -266,11 +266,11 @@ def test_record_filter( ) def test_client_side_record_filter_decorator_no_parent_stream( datetime_format: str, - stream_state: Optional[Mapping], + stream_state: Mapping | None, record_filter_expression: str, - end_datetime: Optional[str], - records_to_filter: List[Mapping], - expected_record_ids: List[int], + end_datetime: str | None, + records_to_filter: list[Mapping], + expected_record_ids: list[int], ): date_time_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime( @@ -361,7 +361,7 @@ def test_client_side_record_filter_decorator_no_parent_stream( ], ) def test_client_side_record_filter_decorator_with_cursor_types( - stream_state: Optional[Mapping], cursor_type: str, expected_record_ids: List[int] + stream_state: Mapping | None, cursor_type: str, expected_record_ids: list[int] ): def date_time_based_cursor_factory() -> DatetimeBasedCursor: return DatetimeBasedCursor( diff --git a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py index e23d03a4a..2508fded0 100644 --- a/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py +++ b/unit_tests/sources/declarative/incremental/test_concurrent_perpartitioncursor.py @@ -1,8 +1,9 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import copy +from collections.abc import Mapping, MutableMapping from copy import deepcopy from datetime import datetime, timedelta -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from typing import Any from unittest.mock import MagicMock, patch from urllib.parse import unquote @@ -372,7 +373,7 @@ def _run_read( manifest: Mapping[str, Any], config: Mapping[str, Any], stream_name: str, - state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None, + state: list[AirbyteStateMessage] | MutableMapping[str, Any] | None = None, ) -> EntrypointOutput: source = ConcurrentDeclarativeSource( source_config=manifest, config=config, catalog=None, state=state diff --git a/unit_tests/sources/declarative/interpolation/test_filters.py b/unit_tests/sources/declarative/interpolation/test_filters.py index a48c6a301..98100131a 100644 --- a/unit_tests/sources/declarative/interpolation/test_filters.py +++ b/unit_tests/sources/declarative/interpolation/test_filters.py @@ -14,7 +14,7 @@ def test_hash_md5_no_salt() -> None: input_string = "abcd" - s = "{{ '%s' | hash('md5') }}" % input_string + s = f"{{{{ '{input_string}' | hash('md5') }}}}" filter_hash = interpolation.eval(s, config={}) # compute expected hash calling hashlib directly @@ -27,7 +27,7 @@ def test_hash_md5_no_salt() -> None: def test_hash_md5_on_numeric_value() -> None: input_value = 123.456 - s = "{{ %f | hash('md5') }}" % input_value + s = f"{{{{ {input_value:f} | hash('md5') }}}}" filter_hash = interpolation.eval(s, config={}) # compute expected hash calling hashlib directly @@ -42,7 +42,7 @@ def test_hash_md5_with_salt() -> None: input_string = "test_input_string" input_salt = "test_input_salt" - s = "{{ '%s' | hash('md5', '%s' ) }}" % (input_string, input_salt) + s = f"{{{{ '{input_string}' | hash('md5', '{input_salt}' ) }}}}" filter_hash = interpolation.eval(s, config={}) # compute expected value calling hashlib directly @@ -58,7 +58,7 @@ def test_hash_md5_with_salt() -> None: ["test_input_client_id", "some_client_secret_1", "12345", "775.78"], ) def test_base64encode(input_string: str) -> None: - s = "{{ '%s' | base64encode }}" % input_string + s = f"{{{{ '{input_string}' | base64encode }}}}" filter_base64encode = interpolation.eval(s, config={}) # compute expected base64encode calling base64 library directly @@ -76,7 +76,7 @@ def test_base64encode(input_string: str) -> None: ], ) def test_base64decode(input_string: str, expected_string: str) -> None: - s = "{{ '%s' | base64decode }}" % input_string + s = f"{{{{ '{input_string}' | base64decode }}}}" filter_base64decode = interpolation.eval(s, config={}) assert filter_base64decode == expected_string @@ -112,7 +112,7 @@ def test_hmac_sha256_default() -> None: message = "test_message" secret_key = "test_secret_key" - s = "{{ '%s' | hmac('%s') }}" % (message, secret_key) + s = f"{{{{ '{message}' | hmac('{secret_key}') }}}}" filter_hmac = interpolation.eval(s, config={}) # compute expected hmac using the hmac library directly @@ -128,7 +128,7 @@ def test_hmac_sha256_explicit() -> None: message = "test_message" secret_key = "test_secret_key" - s = "{{ '%s' | hmac('%s', 'sha256') }}" % (message, secret_key) + s = f"{{{{ '{message}' | hmac('{secret_key}', 'sha256') }}}}" filter_hmac = interpolation.eval(s, config={}) # compute expected hmac using the hmac library directly @@ -160,7 +160,7 @@ def test_hmac_with_invalid_hash_type() -> None: message = "test_message" secret_key = "test_secret_key" - s = "{{ '%s' | hmac('%s', 'md5') }}" % (message, secret_key) + s = f"{{{{ '{message}' | hmac('{secret_key}', 'md5') }}}}" with pytest.raises(ValueError): interpolation.eval(s, config={}) diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index 856106bfe..1766bf1d5 100644 --- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -3,8 +3,9 @@ # # mypy: ignore-errors -from datetime import datetime, timedelta, timezone -from typing import Any, Iterable, Mapping +from collections.abc import Iterable, Mapping +from datetime import timedelta, timezone +from typing import Any import freezegun import pytest diff --git a/unit_tests/sources/declarative/parsers/testing_components.py b/unit_tests/sources/declarative/parsers/testing_components.py index 0d49e8627..61195bc9e 100644 --- a/unit_tests/sources/declarative/parsers/testing_components.py +++ b/unit_tests/sources/declarative/parsers/testing_components.py @@ -3,7 +3,6 @@ # from dataclasses import dataclass, field -from typing import List, Optional from airbyte_cdk.sources.declarative.extractors import DpathExtractor from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter @@ -25,8 +24,8 @@ class TestingSomeComponent(DefaultErrorHandler): default_factory=lambda: DpathExtractor(field_path=[], config={}, parameters={}) ) basic_field: str = "" - optional_subcomponent_field: Optional[RequestOption] = None - list_of_subcomponents: List[RequestOption] = None + optional_subcomponent_field: RequestOption | None = None + list_of_subcomponents: list[RequestOption] = None without_hint = None paginator: DefaultPaginator = None diff --git a/unit_tests/sources/declarative/partition_routers/test_async_job_partition_router.py b/unit_tests/sources/declarative/partition_routers/test_async_job_partition_router.py index 2a5ac3277..d2b225499 100644 --- a/unit_tests/sources/declarative/partition_routers/test_async_job_partition_router.py +++ b/unit_tests/sources/declarative/partition_routers/test_async_job_partition_router.py @@ -2,7 +2,6 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import ( AsyncJobOrchestrator, - AsyncPartition, ) from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus diff --git a/unit_tests/sources/declarative/partition_routers/test_grouping_partition_router.py b/unit_tests/sources/declarative/partition_routers/test_grouping_partition_router.py index 9bea606e4..95852f982 100644 --- a/unit_tests/sources/declarative/partition_routers/test_grouping_partition_router.py +++ b/unit_tests/sources/declarative/partition_routers/test_grouping_partition_router.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Iterable, List, Mapping, Optional, Union from unittest.mock import MagicMock import pytest diff --git a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py index 4fbbd7355..27d8bdc2e 100644 --- a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py +++ b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py @@ -1,7 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. import copy -from typing import Any, List, Mapping, MutableMapping, Optional, Union +from collections.abc import Mapping, MutableMapping +from typing import Any from unittest.mock import MagicMock import orjson @@ -238,8 +239,8 @@ def _run_read( manifest: Mapping[str, Any], config: Mapping[str, Any], stream_name: str, - state: Optional[Union[List[AirbyteStateMessage], MutableMapping[str, Any]]] = None, -) -> List[AirbyteMessage]: + state: list[AirbyteStateMessage] | MutableMapping[str, Any] | None = None, +) -> list[AirbyteMessage]: source = ManifestDeclarativeSource(source_config=manifest) catalog = ConfiguredAirbyteCatalog( streams=[ diff --git a/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py b/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py index 80c8f1e10..dd29ce9c4 100644 --- a/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py +++ b/unit_tests/sources/declarative/partition_routers/test_substream_partition_router.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Iterable, Mapping, MutableMapping from functools import partial -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any import pytest as pytest @@ -83,7 +84,7 @@ def name(self) -> str: return self._name @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return "id" @property @@ -98,16 +99,16 @@ def state(self, value: Mapping[str, Any]) -> None: def is_resumable(self) -> bool: return bool(self._cursor) - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: return self._cursor def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_state: Mapping[str, Any] = None, - ) -> Iterable[Optional[StreamSlice]]: + ) -> Iterable[StreamSlice | None]: for s in self._slices: if isinstance(s, StreamSlice): yield s @@ -117,7 +118,7 @@ def stream_slices( def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -157,7 +158,7 @@ def __init__(self, slices, records, name, cursor_field="", cursor=None, date_ran def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -179,7 +180,7 @@ def __init__( name, cursor_field="", cursor=None, - record_pages: Optional[List[List[Mapping[str, Any]]]] = None, + record_pages: list[list[Mapping[str, Any]]] | None = None, ): super().__init__(slices, [], name, cursor_field, cursor) if record_pages: @@ -191,7 +192,7 @@ def __init__( def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: diff --git a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py index ba670f507..2a6d6a1fa 100644 --- a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py +++ b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_time_from_header.py @@ -39,7 +39,7 @@ ), ("test_wait_time_from_header_not_a_number", "wait_time", "61,60", None, None), ("test_wait_time_from_header_with_regex", "wait_time", "61,60", r"([-+]?\d+)", 61), # noqa - ("test_wait_time_fœrom_header_with_regex_no_match", "wait_time", "...", "[-+]?\d+", None), # noqa + ("test_wait_time_fœrom_header_with_regex_no_match", "wait_time", "...", r"[-+]?\d+", None), # noqa ("test_wait_time_from_header", "absent_header", None, None, None), ], ) diff --git a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py index 4c4c5a6f7..bf6710b80 100644 --- a/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py +++ b/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_wait_until_time_from_header.py @@ -53,7 +53,7 @@ "wait_until", "1600000060,60", None, - "[-+]?\d+", + r"[-+]?\d+", 60, ), # noqa ( @@ -78,7 +78,7 @@ "wait_time", "...", None, - "[-+]?\d+", + r"[-+]?\d+", None, ), # noqa ( diff --git a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py index 6e7b60a92..ab5d0de8d 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py @@ -26,7 +26,7 @@ PageIncrement, ) from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath -from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from airbyte_cdk.sources.declarative.types import Record, StreamSlice @pytest.mark.parametrize( diff --git a/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py b/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py index 4cd827e88..d577c3d26 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py @@ -3,7 +3,7 @@ # import json -from typing import Any, Optional +from typing import Any import pytest import requests @@ -92,7 +92,7 @@ def test_offset_increment_paginator_strategy_rises(): ], ) def test_offset_increment_paginator_strategy_initial_token( - inject_on_first_request: bool, expected_initial_token: Optional[Any] + inject_on_first_request: bool, expected_initial_token: Any | None ): paginator_strategy = OffsetIncrement( page_size=20, parameters={}, config={}, inject_on_first_request=inject_on_first_request diff --git a/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py b/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py index 32af20b50..8cc21de7a 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py @@ -3,7 +3,7 @@ # import json -from typing import Any, Optional +from typing import Any import pytest import requests @@ -94,7 +94,7 @@ def test_page_increment_paginator_strategy_malformed_page_size(page_size): ], ) def test_page_increment_paginator_strategy_initial_token( - inject_on_first_request: bool, start_from_page: int, expected_initial_token: Optional[Any] + inject_on_first_request: bool, start_from_page: int, expected_initial_token: Any | None ): paginator_strategy = PageIncrement( page_size=20, diff --git a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py index 115ce688d..68f5fbd09 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Type +from typing import Any import pytest @@ -35,10 +35,10 @@ ], ) def test_request_option_validation( - field_name: Optional[str], + field_name: str | None, field_path: Any, inject_into: RequestOptionType, - error_type: Type[Exception], + error_type: type[Exception], error_message: str, ): """Test various validation cases for RequestOption""" @@ -81,11 +81,11 @@ def test_request_option_validation( ], ) def test_inject_into_request_cases( - request_option_args: Dict[str, Any], value: Any, expected_result: Dict[str, Any] + request_option_args: dict[str, Any], value: Any, expected_result: dict[str, Any] ): """Test various injection cases""" config = {"base_field": "value"} - target: Dict[str, Any] = {} + target: dict[str, Any] = {} request_option = RequestOption(**request_option_args, parameters={}) request_option.inject_into_request(target, value, config) @@ -116,16 +116,16 @@ def test_inject_into_request_cases( ], ) def test_interpolation_cases( - config: Dict[str, Any], - parameters: Dict[str, Any], - field_path: List[str], - expected_structure: Dict[str, Any], + config: dict[str, Any], + parameters: dict[str, Any], + field_path: list[str], + expected_structure: dict[str, Any], ): """Test various interpolation scenarios""" request_option = RequestOption( field_path=field_path, inject_into=RequestOptionType.body_json, parameters=parameters ) - target: Dict[str, Any] = {} + target: dict[str, Any] = {} request_option.inject_into_request(target, "test_value", config) assert target == expected_structure @@ -141,10 +141,10 @@ def test_interpolation_cases( (None, type(None)), ], ) -def test_value_type_handling(value: Any, expected_type: Type): +def test_value_type_handling(value: Any, expected_type: type): """Test handling of different value types""" config = {} - target: Dict[str, Any] = {} + target: dict[str, Any] = {} request_option = RequestOption( field_path=["data", "test"], inject_into=RequestOptionType.body_json, parameters={} ) @@ -161,8 +161,8 @@ def test_value_type_handling(value: Any, expected_type: Type): ], ) def test__is_field_path( - field_name: Optional[str], - field_path: Optional[List[str]], + field_name: str | None, + field_path: list[str] | None, inject_into: RequestOptionType, expected__is_field_path: bool, ): diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py index 8fce688d7..1408c1de3 100644 --- a/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -2,8 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from datetime import timedelta -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any from unittest import mock from unittest.mock import MagicMock from urllib.parse import parse_qs, urlparse @@ -15,7 +15,6 @@ from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.declarative.requesters.error_handlers import HttpResponseFilter from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( ConstantBackoffStrategy, ExponentialBackoffStrategy, @@ -30,12 +29,8 @@ ) from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.streams.call_rate import ( - AbstractAPIBudget, HttpAPIBudget, - MovingWindowCallRatePolicy, - Rate, ) -from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction from airbyte_cdk.sources.streams.http.exceptions import ( RequestBodyException, UserDefinedBackoffException, @@ -50,14 +45,14 @@ def factory( url_base: str = "https://test_base_url.com", path: str = "/", http_method: str = HttpMethod.GET, - request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None, - authenticator: Optional[DeclarativeAuthenticator] = None, - error_handler: Optional[ErrorHandler] = None, - api_budget: Optional[HttpAPIBudget] = None, - config: Optional[Config] = None, + request_options_provider: InterpolatedRequestOptionsProvider | None = None, + authenticator: DeclarativeAuthenticator | None = None, + error_handler: ErrorHandler | None = None, + api_budget: HttpAPIBudget | None = None, + config: Config | None = None, parameters: Mapping[str, Any] = None, disable_retries: bool = False, - message_repository: Optional[MessageRepository] = None, + message_repository: MessageRepository | None = None, use_cache: bool = False, ) -> HttpRequester: return HttpRequester( @@ -192,12 +187,12 @@ def test_path(test_name, path, expected_path): def create_requester( - url_base: Optional[str] = None, - parameters: Optional[Mapping[str, Any]] = {}, - config: Optional[Config] = None, - path: Optional[str] = None, - authenticator: Optional[DeclarativeAuthenticator] = None, - error_handler: Optional[ErrorHandler] = None, + url_base: str | None = None, + parameters: Mapping[str, Any] | None = {}, + config: Config | None = None, + path: str | None = None, + authenticator: DeclarativeAuthenticator | None = None, + error_handler: ErrorHandler | None = None, ) -> HttpRequester: requester = HttpRequester( name="name", diff --git a/unit_tests/sources/declarative/resolvers/test_http_components_resolver.py b/unit_tests/sources/declarative/resolvers/test_http_components_resolver.py index 437822181..30651b434 100644 --- a/unit_tests/sources/declarative/resolvers/test_http_components_resolver.py +++ b/unit_tests/sources/declarative/resolvers/test_http_components_resolver.py @@ -513,9 +513,9 @@ def test_dynamic_streams_read_with_http_components_resolver(): ] assert len(actual_catalog.streams) == 2 - assert set([stream.name for stream in actual_catalog.streams]) == expected_stream_names + assert {stream.name for stream in actual_catalog.streams} == expected_stream_names assert len(records) == 2 - assert set([record.stream for record in records]) == expected_stream_names + assert {record.stream for record in records} == expected_stream_names def test_duplicated_dynamic_streams_read_with_http_components_resolver(): diff --git a/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 0425b4e84..3617110a3 100644 --- a/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -3,8 +3,9 @@ # import json +from collections.abc import Iterable, Mapping from functools import partial -from typing import Any, Iterable, Mapping, Optional +from typing import Any from unittest.mock import MagicMock, Mock, patch import pytest @@ -833,7 +834,7 @@ def test_given_initial_token_is_zero_when_read_records_then_pass_initial_token() response = requests.Response() response.status_code = 200 - response._content = "{}".encode() + response._content = b"{}" with patch.object( SimpleRetriever, @@ -957,7 +958,7 @@ def test_retriever_last_page_size_for_page_increment(): Record(data={"id": "4d", "name": "Investment Banking Division"}, stream_name="departments"), ] - def mock_parse_records(response: Optional[requests.Response]) -> Iterable[Record]: + def mock_parse_records(response: requests.Response | None) -> Iterable[Record]: yield from expected_records actual_records = list( @@ -1007,7 +1008,7 @@ def test_retriever_last_record_for_page_increment(): ), ] - def mock_parse_records(response: Optional[requests.Response]) -> Iterable[Record]: + def mock_parse_records(response: requests.Response | None) -> Iterable[Record]: yield from expected_records actual_records = list( @@ -1055,8 +1056,8 @@ def test_retriever_is_stateless(): ).encode("utf-8") def mock_send_request( - next_page_token: Optional[Mapping[str, Any]] = None, **kwargs - ) -> Optional[requests.Response]: + next_page_token: Mapping[str, Any] | None = None, **kwargs + ) -> requests.Response | None: page_number = next_page_token.get("next_page_token") if next_page_token else None if page_number is None: return page_response_1 diff --git a/unit_tests/sources/declarative/stream_slicers/test_declarative_partition_generator.py b/unit_tests/sources/declarative/stream_slicers/test_declarative_partition_generator.py index 3ced03a69..29d4866b5 100644 --- a/unit_tests/sources/declarative/stream_slicers/test_declarative_partition_generator.py +++ b/unit_tests/sources/declarative/stream_slicers/test_declarative_partition_generator.py @@ -1,6 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import List from unittest import TestCase from unittest.mock import Mock @@ -77,7 +76,7 @@ def test_given_not_a_record_when_read_then_send_to_message_repository(self) -> N message_repository.emit_message.assert_called_once_with(_AIRBYTE_LOG_MESSAGE) @staticmethod - def _mock_retriever(read_return_value: List[StreamData]) -> Mock: + def _mock_retriever(read_return_value: list[StreamData]) -> Mock: retriever = Mock(spec=Retriever) retriever.read_records.return_value = iter(read_return_value) return retriever diff --git a/unit_tests/sources/declarative/test_concurrent_declarative_source.py b/unit_tests/sources/declarative/test_concurrent_declarative_source.py index 6af69eac5..c525e8e85 100644 --- a/unit_tests/sources/declarative/test_concurrent_declarative_source.py +++ b/unit_tests/sources/declarative/test_concurrent_declarative_source.py @@ -5,8 +5,9 @@ import copy import json import math +from collections.abc import Iterable, Mapping from datetime import datetime, timedelta, timezone -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from typing import Any from unittest.mock import patch import freezegun @@ -614,7 +615,7 @@ class DeclarativeStreamDecorator(Stream): def __init__( self, declarative_stream: DeclarativeStream, - slice_to_records_mapping: Mapping[tuple[str, str], List[Mapping[str, Any]]], + slice_to_records_mapping: Mapping[tuple[str, str], list[Mapping[str, Any]]], ): self._declarative_stream = declarative_stream self._slice_to_records_mapping = slice_to_records_mapping @@ -624,15 +625,15 @@ def name(self) -> str: return self._declarative_stream.name @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return self._declarative_stream.primary_key def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: if isinstance(stream_slice, StreamSlice): slice_key = (stream_slice.get("start_time"), stream_slice.get("end_time")) @@ -656,7 +657,7 @@ def read_records( def get_json_schema(self) -> Mapping[str, Any]: return self._declarative_stream.get_json_schema() - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: return self._declarative_stream.get_cursor() @@ -891,8 +892,8 @@ def test_discover(): def _mock_requests( http_mocker: HttpMocker, url: str, - query_params: List[Dict[str, str]], - responses: List[HttpResponse], + query_params: list[dict[str, str]], + responses: list[HttpResponse], ) -> None: assert len(query_params) == len(responses), "Expecting as many slices as response" @@ -901,7 +902,7 @@ def _mock_requests( def _mock_party_members_requests( - http_mocker: HttpMocker, slices_and_responses: List[Tuple[Dict[str, str], HttpResponse]] + http_mocker: HttpMocker, slices_and_responses: list[tuple[dict[str, str], HttpResponse]] ) -> None: slices = list(map(lambda slice_and_response: slice_and_response[0], slices_and_responses)) responses = list(map(lambda slice_and_response: slice_and_response[1], slices_and_responses)) @@ -914,7 +915,7 @@ def _mock_party_members_requests( ) -def _mock_locations_requests(http_mocker: HttpMocker, slices: List[Dict[str, str]]) -> None: +def _mock_locations_requests(http_mocker: HttpMocker, slices: list[dict[str, str]]) -> None: locations_query_params = list( map(lambda _slice: _slice | {"m": "active", "i": "1", "g": "country"}, slices) ) @@ -1884,7 +1885,7 @@ def create_wrapped_stream(stream: DeclarativeStream) -> Stream: ) -def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], List[StreamData]]: +def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], list[StreamData]]: match stream_name: case "locations": slices = [ @@ -2010,8 +2011,8 @@ def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], def get_records_for_stream( - stream_name: str, messages: List[AirbyteMessage] -) -> List[AirbyteRecordMessage]: + stream_name: str, messages: list[AirbyteMessage] +) -> list[AirbyteRecordMessage]: return [ message.record for message in messages @@ -2020,8 +2021,8 @@ def get_records_for_stream( def get_states_for_stream( - stream_name: str, messages: List[AirbyteMessage] -) -> List[AirbyteStateMessage]: + stream_name: str, messages: list[AirbyteMessage] +) -> list[AirbyteStateMessage]: return [ message.state for message in messages diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source.py b/unit_tests/sources/declarative/test_manifest_declarative_source.py index 519bd0955..092aacaff 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -6,9 +6,10 @@ import logging import os import sys +from collections.abc import Mapping from copy import deepcopy from pathlib import Path -from typing import Any, List, Mapping +from typing import Any from unittest.mock import call, patch import pytest @@ -16,7 +17,6 @@ import yaml from jsonschema.exceptions import ValidationError -import unit_tests.sources.declarative.external_component # Needed for dynamic imports to work from airbyte_cdk.models import ( AirbyteLogMessage, AirbyteMessage, @@ -108,7 +108,7 @@ def declarative_stream_config( }, "schema_loader": { "name": "{{ parameters.stream_name }}", - "file_path": f"./source_sendgrid/schemas/{{{{ parameters.name }}}}.yaml", + "file_path": "./source_sendgrid/schemas/{{ parameters.name }}.yaml", }, "retriever": { "paginator": { @@ -1940,7 +1940,7 @@ def test_only_parent_streams_use_cache(): assert not streams[2].retriever.requester.use_cache -def _run_read(manifest: Mapping[str, Any], stream_name: str) -> List[AirbyteMessage]: +def _run_read(manifest: Mapping[str, Any], stream_name: str) -> list[AirbyteMessage]: source = ManifestDeclarativeSource(source_config=manifest) catalog = ConfiguredAirbyteCatalog( streams=[ @@ -1958,10 +1958,10 @@ def _run_read(manifest: Mapping[str, Any], stream_name: str) -> List[AirbyteMess def test_declarative_component_schema_valid_ref_links(): def load_yaml(file_path) -> Mapping[str, Any]: - with open(file_path, "r") as file: + with open(file_path) as file: return yaml.safe_load(file) - def extract_refs(data, base_path="#") -> List[str]: + def extract_refs(data, base_path="#") -> list[str]: refs = [] if isinstance(data, dict): for key, value in data.items(): @@ -1986,7 +1986,7 @@ def resolve_pointer(data: Mapping[str, Any], pointer: str) -> bool: except (KeyError, TypeError): return False - def validate_refs(yaml_file: str) -> List[str]: + def validate_refs(yaml_file: str) -> list[str]: data = load_yaml(yaml_file) refs = extract_refs(data) invalid_refs = [ref for ref in refs if not resolve_pointer(data, ref.replace("#", ""))] diff --git a/unit_tests/sources/declarative/test_yaml_declarative_source.py b/unit_tests/sources/declarative/test_yaml_declarative_source.py index 67f198eb2..8bed8235a 100644 --- a/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -37,7 +37,7 @@ def _read_and_parse_yaml_file(self, path_to_yaml_file): not mounted during runtime which prevents pkgutil.get_data() from being able to find the yaml file needed to generate # the declarative source. For tests we use open() which supports using an absolute path. """ - with open(path_to_yaml_file, "r") as f: + with open(path_to_yaml_file) as f: config_content = f.read() parsed_config = YamlDeclarativeSource._parse(config_content) return parsed_config diff --git a/unit_tests/sources/declarative/transformations/test_add_fields.py b/unit_tests/sources/declarative/transformations/test_add_fields.py index 94b6f07ed..9751f7942 100644 --- a/unit_tests/sources/declarative/transformations/test_add_fields.py +++ b/unit_tests/sources/declarative/transformations/test_add_fields.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping, Optional, Tuple +from collections.abc import Mapping +from typing import Any import pytest @@ -198,9 +199,9 @@ ) def test_add_fields( input_record: Mapping[str, Any], - field: List[Tuple[FieldPointer, str]], - field_type: Optional[str], - condition: Optional[str], + field: list[tuple[FieldPointer, str]], + field_type: str | None, + condition: str | None, kwargs: Mapping[str, Any], expected: Mapping[str, Any], ): diff --git a/unit_tests/sources/declarative/transformations/test_remove_fields.py b/unit_tests/sources/declarative/transformations/test_remove_fields.py index 3bbc916b3..c5352b43b 100644 --- a/unit_tests/sources/declarative/transformations/test_remove_fields.py +++ b/unit_tests/sources/declarative/transformations/test_remove_fields.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping +from collections.abc import Mapping +from typing import Any import pytest @@ -162,7 +163,7 @@ ) def test_remove_fields( input_record: Mapping[str, Any], - field_pointers: List[FieldPointer], + field_pointers: list[FieldPointer], condition: str, expected: Mapping[str, Any], ): diff --git a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py index ec37567a8..fc488c5ca 100644 --- a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +++ b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Type import pytest from jsonschema import ValidationError, validate @@ -31,7 +30,7 @@ ], ) def test_parquet_file_type_is_not_a_valid_csv_file_type( - file_format: BaseModel, file_type: str, expected_error: Type[Exception] + file_format: BaseModel, file_type: str, expected_error: type[Exception] ) -> None: format_config = {file_type: {"filetype": file_type, "decimal_as_float": True}} diff --git a/unit_tests/sources/file_based/config/test_file_based_stream_config.py b/unit_tests/sources/file_based/config/test_file_based_stream_config.py index 4c6c9d02f..809e81de6 100644 --- a/unit_tests/sources/file_based/config/test_file_based_stream_config.py +++ b/unit_tests/sources/file_based/config/test_file_based_stream_config.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping, Type +from collections.abc import Mapping +from typing import Any import pytest as pytest from pydantic.v1.error_wrappers import ValidationError @@ -91,7 +92,7 @@ def test_csv_config( file_type: str, input_format: Mapping[str, Any], expected_format: Mapping[str, Any], - expected_error: Type[Exception], + expected_error: type[Exception], ) -> None: stream_config = { "name": "stream1", diff --git a/unit_tests/sources/file_based/file_types/test_csv_parser.py b/unit_tests/sources/file_based/file_types/test_csv_parser.py index 0b49dd66d..2928085b0 100644 --- a/unit_tests/sources/file_based/file_types/test_csv_parser.py +++ b/unit_tests/sources/file_based/file_types/test_csv_parser.py @@ -7,8 +7,9 @@ import io import logging import unittest +from collections.abc import Generator from datetime import datetime -from typing import Any, Dict, Generator, List, Set +from typing import Any from unittest import TestCase, mock from unittest.mock import Mock @@ -189,10 +190,10 @@ ], ) def test_cast_to_python_type( - row: Dict[str, str], - true_values: Set[str], - false_values: Set[str], - expected_output: Dict[str, Any], + row: dict[str, str], + true_values: set[str], + false_values: set[str], + expected_output: dict[str, Any], ) -> None: csv_format = CsvFormat(true_values=true_values, false_values=false_values) assert CsvParser._cast_types(row, PROPERTY_TYPES, csv_format, logger) == expected_output @@ -321,7 +322,7 @@ def test_given_empty_csv_file_when_infer_schema_then_raise_config_error(self) -> self._infer_schema() assert exception.value.failure_type == FailureType.config_error - def _test_infer_schema(self, rows: List[str], expected_type: str) -> None: + def _test_infer_schema(self, rows: list[str], expected_type: str) -> None: self._csv_reader.read_data.return_value = ({self._HEADER_NAME: row} for row in rows) inferred_schema = self._infer_schema() assert inferred_schema == {self._HEADER_NAME: {"type": expected_type}} @@ -337,14 +338,14 @@ def _infer_schema(self): class CsvFileBuilder: def __init__(self) -> None: - self._prefixed_rows: List[str] = [] - self._data: List[str] = [] + self._prefixed_rows: list[str] = [] + self._data: list[str] = [] - def with_prefixed_rows(self, rows: List[str]) -> "CsvFileBuilder": + def with_prefixed_rows(self, rows: list[str]) -> "CsvFileBuilder": self._prefixed_rows = rows return self - def with_data(self, data: List[str]) -> "CsvFileBuilder": + def with_data(self, data: list[str]) -> "CsvFileBuilder": self._data = data return self @@ -658,7 +659,7 @@ def test_read_data_with_encoding_error(self) -> None: assert "encoding" in ate.value.message assert self._csv_reader._get_headers.called - def _read_data(self) -> Generator[Dict[str, str], None, None]: + def _read_data(self) -> Generator[dict[str, str], None, None]: data_generator = self._csv_reader.read_data( self._config, self._file, diff --git a/unit_tests/sources/file_based/file_types/test_jsonl_parser.py b/unit_tests/sources/file_based/file_types/test_jsonl_parser.py index e2d87c9d9..578cd3476 100644 --- a/unit_tests/sources/file_based/file_types/test_jsonl_parser.py +++ b/unit_tests/sources/file_based/file_types/test_jsonl_parser.py @@ -5,7 +5,7 @@ import asyncio import io import json -from typing import Any, Dict +from typing import Any from unittest.mock import MagicMock, Mock import pytest @@ -44,7 +44,7 @@ def stream_reader() -> MagicMock: return MagicMock(spec=AbstractFileBasedStreamReader) -def _infer_schema(stream_reader: MagicMock) -> Dict[str, Any]: +def _infer_schema(stream_reader: MagicMock) -> dict[str, Any]: loop = asyncio.new_event_loop() task = loop.create_task(JsonlParser().infer_schema(Mock(), Mock(), stream_reader, Mock())) loop.run_until_complete(task) @@ -87,13 +87,13 @@ def test_given_str_io_when_infer_then_return_proper_types(stream_reader: MagicMo def test_given_empty_record_when_infer_then_return_empty_schema(stream_reader: MagicMock) -> None: - stream_reader.open_file.return_value.__enter__.return_value = io.BytesIO("{}".encode("utf-8")) + stream_reader.open_file.return_value.__enter__.return_value = io.BytesIO(b"{}") schema = _infer_schema(stream_reader) assert schema == {} def test_given_no_records_when_infer_then_return_empty_schema(stream_reader: MagicMock) -> None: - stream_reader.open_file.return_value.__enter__.return_value = io.BytesIO("".encode("utf-8")) + stream_reader.open_file.return_value.__enter__.return_value = io.BytesIO(b"") schema = _infer_schema(stream_reader) assert schema == {} @@ -140,7 +140,7 @@ def test_given_multiline_json_objects_and_hits_read_limit_when_infer_then_return def test_given_multiple_records_then_merge_types(stream_reader: MagicMock) -> None: stream_reader.open_file.return_value.__enter__.return_value = io.BytesIO( - '{"col1": 1}\n{"col1": 2.3}'.encode("utf-8") + b'{"col1": 1}\n{"col1": 2.3}' ) schema = _infer_schema(stream_reader) assert schema == {"col1": {"type": "number"}} diff --git a/unit_tests/sources/file_based/file_types/test_parquet_parser.py b/unit_tests/sources/file_based/file_types/test_parquet_parser.py index 17814813b..9f777e4ea 100644 --- a/unit_tests/sources/file_based/file_types/test_parquet_parser.py +++ b/unit_tests/sources/file_based/file_types/test_parquet_parser.py @@ -5,7 +5,8 @@ import asyncio import datetime import math -from typing import Any, Mapping, Union +from collections.abc import Mapping +from typing import Any from unittest.mock import Mock import pyarrow as pa @@ -326,9 +327,7 @@ def test_type_mapping( "this is a string", id="test_parquet_string", ), - pytest.param( - pa.utf8(), _default_parquet_format, "utf8".encode("utf8"), "utf8", id="test_utf8" - ), + pytest.param(pa.utf8(), _default_parquet_format, b"utf8", "utf8", id="test_utf8"), pytest.param( pa.large_binary(), _default_parquet_format, @@ -502,7 +501,7 @@ def test_null_value_does_not_throw(parquet_type, parquet_format) -> None: pytest.param(JsonlFormat(), id="test_jsonl_format"), ], ) -def test_wrong_file_format(file_format: Union[CsvFormat, JsonlFormat]) -> None: +def test_wrong_file_format(file_format: CsvFormat | JsonlFormat) -> None: parser = ParquetParser() config = FileBasedStreamConfig( name="test.parquet", diff --git a/unit_tests/sources/file_based/helpers.py b/unit_tests/sources/file_based/helpers.py index 2138cdc57..6af6aebff 100644 --- a/unit_tests/sources/file_based/helpers.py +++ b/unit_tests/sources/file_based/helpers.py @@ -3,9 +3,10 @@ # import logging +from collections.abc import Mapping from datetime import datetime from io import IOBase -from typing import Any, Dict, List, Mapping, Optional +from typing import Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.discovery_policy import DefaultDiscoveryPolicy @@ -30,7 +31,7 @@ async def infer_schema( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: return {} @@ -46,9 +47,9 @@ class LowInferenceBytesJsonlParser(JsonlParser): class TestErrorListMatchingFilesInMemoryFilesStreamReader(InMemoryFilesStreamReader): def get_matching_files( self, - globs: List[str], - from_date: Optional[datetime] = None, - ) -> List[RemoteFile]: + globs: list[str], + from_date: datetime | None = None, + ) -> list[RemoteFile]: raise Exception("Error listing files") @@ -57,7 +58,7 @@ def open_file( self, file: RemoteFile, file_read_mode: FileReadMode, - encoding: Optional[str], + encoding: str | None, logger: logging.Logger, ) -> IOBase: raise Exception("Error opening file") @@ -68,7 +69,7 @@ class FailingSchemaValidationPolicy(AbstractSchemaValidationPolicy): validate_schema_before_sync = True def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Optional[Mapping[str, Any]] + self, record: Mapping[str, Any], schema: Mapping[str, Any] | None ) -> bool: return False @@ -81,7 +82,7 @@ class LowHistoryLimitConcurrentCursor(FileBasedConcurrentCursor): DEFAULT_MAX_HISTORY_SIZE = 3 -def make_remote_files(files: List[str]) -> List[RemoteFile]: +def make_remote_files(files: list[str]) -> list[RemoteFile]: return [ RemoteFile( uri=f, diff --git a/unit_tests/sources/file_based/in_memory_files_source.py b/unit_tests/sources/file_based/in_memory_files_source.py index c8ee78f0f..4a7ecf5e2 100644 --- a/unit_tests/sources/file_based/in_memory_files_source.py +++ b/unit_tests/sources/file_based/in_memory_files_source.py @@ -7,9 +7,10 @@ import json import logging import tempfile +from collections.abc import Iterable, Mapping from datetime import datetime from io import IOBase -from typing import Any, Dict, Iterable, List, Mapping, Optional +from typing import Any import avro.io as ai import avro.schema as avro_schema @@ -54,16 +55,16 @@ def __init__( self, files: Mapping[str, Any], file_type: str, - availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy], - discovery_policy: Optional[AbstractDiscoveryPolicy], + availability_strategy: AbstractFileBasedAvailabilityStrategy | None, + discovery_policy: AbstractDiscoveryPolicy | None, validation_policies: Mapping[str, AbstractSchemaValidationPolicy], parsers: Mapping[str, FileTypeParser], - stream_reader: Optional[AbstractFileBasedStreamReader], - catalog: Optional[Mapping[str, Any]], - config: Optional[Mapping[str, Any]], - state: Optional[TState], + stream_reader: AbstractFileBasedStreamReader | None, + catalog: Mapping[str, Any] | None, + config: Mapping[str, Any] | None, + state: TState | None, file_write_options: Mapping[str, Any], - cursor_cls: Optional[AbstractFileBasedCursor], + cursor_cls: AbstractFileBasedCursor | None, ): # Attributes required for test purposes self.files = files @@ -104,7 +105,7 @@ def __init__( self, files: Mapping[str, Mapping[str, Any]], file_type: str, - file_write_options: Optional[Mapping[str, Any]] = None, + file_write_options: Mapping[str, Any] | None = None, ): self.files = files self.file_type = file_type @@ -112,7 +113,7 @@ def __init__( super().__init__() @property - def config(self) -> Optional[AbstractFileBasedSpec]: + def config(self) -> AbstractFileBasedSpec | None: return self._config @config.setter @@ -121,8 +122,8 @@ def config(self, value: AbstractFileBasedSpec) -> None: def get_matching_files( self, - globs: List[str], - prefix: Optional[str], + globs: list[str], + prefix: str | None, logger: logging.Logger, ) -> Iterable[RemoteFile]: yield from self.filter_files_by_globs_and_start_date( @@ -142,25 +143,25 @@ def file_size(self, file: RemoteFile) -> int: def get_file( self, file: RemoteFile, local_directory: str, logger: logging.Logger - ) -> Dict[str, Any]: + ) -> dict[str, Any]: return {} - def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]: + def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> dict[str, Any]: return {} - def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]: + def load_identity_groups(self, logger: logging.Logger) -> Iterable[dict[str, Any]]: return [{}] @property - def file_permissions_schema(self) -> Dict[str, Any]: + def file_permissions_schema(self) -> dict[str, Any]: return {"type": "object", "properties": {}} @property - def identities_schema(self) -> Dict[str, Any]: + def identities_schema(self) -> dict[str, Any]: return {"type": "object", "properties": {}} def open_file( - self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger + self, file: RemoteFile, mode: FileReadMode, encoding: str | None, logger: logging.Logger ) -> IOBase: if self.file_type == "csv": return self._make_csv_file_contents(file.uri) @@ -224,7 +225,7 @@ class TemporaryParquetFilesStreamReader(InMemoryFilesStreamReader): """ def open_file( - self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger + self, file: RemoteFile, mode: FileReadMode, encoding: str | None, logger: logging.Logger ) -> IOBase: return io.BytesIO(self._create_file(file.uri)) @@ -247,7 +248,7 @@ class TemporaryAvroFilesStreamReader(InMemoryFilesStreamReader): """ def open_file( - self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger + self, file: RemoteFile, mode: FileReadMode, encoding: str | None, logger: logging.Logger ) -> IOBase: return io.BytesIO(self._make_file_contents(file.uri)) @@ -273,7 +274,7 @@ class TemporaryExcelFilesStreamReader(InMemoryFilesStreamReader): """ def open_file( - self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger + self, file: RemoteFile, mode: FileReadMode, encoding: str | None, logger: logging.Logger ) -> IOBase: return io.BytesIO(self._make_file_contents(file.uri)) diff --git a/unit_tests/sources/file_based/scenarios/file_based_source_builder.py b/unit_tests/sources/file_based/scenarios/file_based_source_builder.py index 4c2939f6a..cf5801fe3 100644 --- a/unit_tests/sources/file_based/scenarios/file_based_source_builder.py +++ b/unit_tests/sources/file_based/scenarios/file_based_source_builder.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from copy import deepcopy -from typing import Any, Mapping, Optional, Type +from typing import Any from airbyte_cdk.sources.file_based.availability_strategy.abstract_file_based_availability_strategy import ( AbstractFileBasedAvailabilityStrategy, @@ -25,22 +26,22 @@ class FileBasedSourceBuilder(SourceBuilder[InMemoryFilesSource]): def __init__(self) -> None: self._files: Mapping[str, Any] = {} - self._file_type: Optional[str] = None - self._availability_strategy: Optional[AbstractFileBasedAvailabilityStrategy] = None + self._file_type: str | None = None + self._availability_strategy: AbstractFileBasedAvailabilityStrategy | None = None self._discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy() - self._validation_policies: Optional[Mapping[str, AbstractSchemaValidationPolicy]] = None + self._validation_policies: Mapping[str, AbstractSchemaValidationPolicy] | None = None self._parsers = default_parsers - self._stream_reader: Optional[AbstractFileBasedStreamReader] = None + self._stream_reader: AbstractFileBasedStreamReader | None = None self._file_write_options: Mapping[str, Any] = {} - self._cursor_cls: Optional[Type[AbstractFileBasedCursor]] = None - self._config: Optional[Mapping[str, Any]] = None - self._state: Optional[TState] = None + self._cursor_cls: type[AbstractFileBasedCursor] | None = None + self._config: Mapping[str, Any] | None = None + self._state: TState | None = None def build( self, - configured_catalog: Optional[Mapping[str, Any]], - config: Optional[Mapping[str, Any]], - state: Optional[TState], + configured_catalog: Mapping[str, Any] | None, + config: Mapping[str, Any] | None, + state: TState | None, ) -> InMemoryFilesSource: if self._file_type is None: raise ValueError("file_type is not set") @@ -67,7 +68,7 @@ def set_file_type(self, file_type: str) -> "FileBasedSourceBuilder": self._file_type = file_type return self - def set_parsers(self, parsers: Mapping[Type[Any], FileTypeParser]) -> "FileBasedSourceBuilder": + def set_parsers(self, parsers: Mapping[type[Any], FileTypeParser]) -> "FileBasedSourceBuilder": self._parsers = parsers return self diff --git a/unit_tests/sources/file_based/scenarios/scenario_builder.py b/unit_tests/sources/file_based/scenarios/scenario_builder.py index da8c7ba87..bdceafed9 100644 --- a/unit_tests/sources/file_based/scenarios/scenario_builder.py +++ b/unit_tests/sources/file_based/scenarios/scenario_builder.py @@ -2,9 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # from abc import ABC, abstractmethod +from collections.abc import Mapping from copy import deepcopy from dataclasses import dataclass, field -from typing import Any, Generic, List, Mapping, Optional, Set, Tuple, Type, TypeVar +from typing import Any, Generic, TypeVar from airbyte_cdk.models import ( AirbyteAnalyticsTraceMessage, @@ -19,8 +20,8 @@ @dataclass class IncrementalScenarioConfig: - input_state: List[Mapping[str, Any]] = field(default_factory=list) - expected_output_state: Optional[Mapping[str, Any]] = None + input_state: list[Mapping[str, Any]] = field(default_factory=list) + expected_output_state: Mapping[str, Any] | None = None SourceType = TypeVar("SourceType", bound=AbstractSource) @@ -34,9 +35,9 @@ class SourceBuilder(ABC, Generic[SourceType]): @abstractmethod def build( self, - configured_catalog: Optional[Mapping[str, Any]], - config: Optional[Mapping[str, Any]], - state: Optional[TState], + configured_catalog: Mapping[str, Any] | None, + config: Mapping[str, Any] | None, + state: TState | None, ) -> SourceType: raise NotImplementedError() @@ -47,18 +48,18 @@ def __init__( name: str, config: Mapping[str, Any], source: SourceType, - expected_spec: Optional[Mapping[str, Any]], - expected_check_status: Optional[str], - expected_catalog: Optional[Mapping[str, Any]], - expected_logs: Optional[Mapping[str, List[Mapping[str, Any]]]], - expected_records: List[Mapping[str, Any]], - expected_check_error: Tuple[Optional[Type[Exception]], Optional[str]], - expected_discover_error: Tuple[Optional[Type[Exception]], Optional[str]], - expected_read_error: Tuple[Optional[Type[Exception]], Optional[str]], - incremental_scenario_config: Optional[IncrementalScenarioConfig], - expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]] = None, - log_levels: Optional[Set[str]] = None, - catalog: Optional[ConfiguredAirbyteCatalog] = None, + expected_spec: Mapping[str, Any] | None, + expected_check_status: str | None, + expected_catalog: Mapping[str, Any] | None, + expected_logs: Mapping[str, list[Mapping[str, Any]]] | None, + expected_records: list[Mapping[str, Any]], + expected_check_error: tuple[type[Exception] | None, str | None], + expected_discover_error: tuple[type[Exception] | None, str | None], + expected_read_error: tuple[type[Exception] | None, str | None], + incremental_scenario_config: IncrementalScenarioConfig | None, + expected_analytics: list[AirbyteAnalyticsTraceMessage] | None = None, + log_levels: set[str] | None = None, + catalog: ConfiguredAirbyteCatalog | None = None, ): if log_levels is None: log_levels = {"ERROR", "WARN", "WARNING"} @@ -82,7 +83,7 @@ def __init__( def validate(self) -> None: assert self.name - def configured_catalog(self, sync_mode: SyncMode) -> Optional[Mapping[str, Any]]: + def configured_catalog(self, sync_mode: SyncMode) -> Mapping[str, Any] | None: # The preferred way of returning the catalog for the TestScenario is by providing it at the initialization. The previous solution # relied on `self.source.streams` which might raise an exception hence screwing the tests results as the user might expect the # exception to be raised as part of the actual check/discover/read commands @@ -106,7 +107,7 @@ def configured_catalog(self, sync_mode: SyncMode) -> Optional[Mapping[str, Any]] return catalog - def input_state(self) -> List[Mapping[str, Any]]: + def input_state(self) -> list[Mapping[str, Any]]: if self.incremental_scenario_config: return self.incremental_scenario_config.input_state else: @@ -121,18 +122,18 @@ class TestScenarioBuilder(Generic[SourceType]): def __init__(self) -> None: self._name = "" self._config: Mapping[str, Any] = {} - self._catalog: Optional[ConfiguredAirbyteCatalog] = None - self._expected_spec: Optional[Mapping[str, Any]] = None - self._expected_check_status: Optional[str] = None + self._catalog: ConfiguredAirbyteCatalog | None = None + self._expected_spec: Mapping[str, Any] | None = None + self._expected_check_status: str | None = None self._expected_catalog: Mapping[str, Any] = {} - self._expected_logs: Optional[Mapping[str, Any]] = None - self._expected_records: List[Mapping[str, Any]] = [] - self._expected_check_error: Tuple[Optional[Type[Exception]], Optional[str]] = None, None - self._expected_discover_error: Tuple[Optional[Type[Exception]], Optional[str]] = None, None - self._expected_read_error: Tuple[Optional[Type[Exception]], Optional[str]] = None, None - self._incremental_scenario_config: Optional[IncrementalScenarioConfig] = None - self._expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]] = None - self.source_builder: Optional[SourceBuilder[SourceType]] = None + self._expected_logs: Mapping[str, Any] | None = None + self._expected_records: list[Mapping[str, Any]] = [] + self._expected_check_error: tuple[type[Exception] | None, str | None] = None, None + self._expected_discover_error: tuple[type[Exception] | None, str | None] = None, None + self._expected_read_error: tuple[type[Exception] | None, str | None] = None, None + self._incremental_scenario_config: IncrementalScenarioConfig | None = None + self._expected_analytics: list[AirbyteAnalyticsTraceMessage] | None = None + self.source_builder: SourceBuilder[SourceType] | None = None self._log_levels = None def set_name(self, name: str) -> "TestScenarioBuilder[SourceType]": @@ -166,13 +167,13 @@ def set_expected_catalog( return self def set_expected_logs( - self, expected_logs: Mapping[str, List[Mapping[str, Any]]] + self, expected_logs: Mapping[str, list[Mapping[str, Any]]] ) -> "TestScenarioBuilder[SourceType]": self._expected_logs = expected_logs return self def set_expected_records( - self, expected_records: Optional[List[Mapping[str, Any]]] + self, expected_records: list[Mapping[str, Any]] | None ) -> "TestScenarioBuilder[SourceType]": self._expected_records = expected_records return self @@ -184,24 +185,24 @@ def set_incremental_scenario_config( return self def set_expected_check_error( - self, error: Optional[Type[Exception]], message: str + self, error: type[Exception] | None, message: str ) -> "TestScenarioBuilder[SourceType]": self._expected_check_error = error, message return self def set_expected_discover_error( - self, error: Type[Exception], message: str + self, error: type[Exception], message: str ) -> "TestScenarioBuilder[SourceType]": self._expected_discover_error = error, message return self def set_expected_read_error( - self, error: Type[Exception], message: str + self, error: type[Exception], message: str ) -> "TestScenarioBuilder[SourceType]": self._expected_read_error = error, message return self - def set_log_levels(self, levels: Set[str]) -> "TestScenarioBuilder": + def set_log_levels(self, levels: set[str]) -> "TestScenarioBuilder": self._log_levels = levels return self @@ -212,7 +213,7 @@ def set_source_builder( return self def set_expected_analytics( - self, expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]] + self, expected_analytics: list[AirbyteAnalyticsTraceMessage] | None ) -> "TestScenarioBuilder[SourceType]": self._expected_analytics = expected_analytics return self @@ -255,7 +256,7 @@ def build(self) -> "TestScenario[SourceType]": self._catalog, ) - def _configured_catalog(self, sync_mode: SyncMode) -> Optional[Mapping[str, Any]]: + def _configured_catalog(self, sync_mode: SyncMode) -> Mapping[str, Any] | None: if not self._expected_catalog: return None catalog: Mapping[str, Any] = {"streams": []} diff --git a/unit_tests/sources/file_based/schema_validation_policies/test_default_schema_validation_policy.py b/unit_tests/sources/file_based/schema_validation_policies/test_default_schema_validation_policy.py index ab4e87919..d53d39f4f 100644 --- a/unit_tests/sources/file_based/schema_validation_policies/test_default_schema_validation_policy.py +++ b/unit_tests/sources/file_based/schema_validation_policies/test_default_schema_validation_policy.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any import pytest diff --git a/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py b/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py index 2c8b74ea5..d30e27e25 100644 --- a/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py +++ b/unit_tests/sources/file_based/stream/concurrent/test_file_based_concurrent_cursor.py @@ -1,8 +1,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from collections.abc import MutableMapping from datetime import datetime -from typing import Any, Dict, List, MutableMapping, Optional, Tuple +from typing import Any from unittest.mock import MagicMock import pytest @@ -19,7 +20,7 @@ MOCK_DAYS_TO_SYNC_IF_HISTORY_IS_FULL = 3 -def _make_cursor(input_state: Optional[MutableMapping[str, Any]]) -> FileBasedConcurrentCursor: +def _make_cursor(input_state: MutableMapping[str, Any] | None) -> FileBasedConcurrentCursor: stream = MagicMock() stream.name = "test" stream.namespace = None @@ -101,7 +102,7 @@ def _make_cursor(input_state: Optional[MutableMapping[str, Any]]) -> FileBasedCo ], ) def test_compute_prev_sync_cursor( - input_state: MutableMapping[str, Any], expected_cursor_value: Tuple[datetime, str] + input_state: MutableMapping[str, Any], expected_cursor_value: tuple[datetime, str] ): cursor = _make_cursor(input_state) assert cursor._compute_prev_sync_cursor(input_state) == expected_cursor_value @@ -189,10 +190,10 @@ def test_compute_prev_sync_cursor( ) def test_add_file( initial_state: MutableMapping[str, Any], - pending_files: List[Tuple[str, str]], - file_to_add: Tuple[str, str], - expected_history: Dict[str, Any], - expected_pending_files: List[Tuple[str, str]], + pending_files: list[tuple[str, str]], + file_to_add: tuple[str, str], + expected_history: dict[str, Any], + expected_pending_files: list[tuple[str, str]], expected_cursor_value: str, ): cursor = _make_cursor(initial_state) @@ -262,10 +263,10 @@ def test_add_file( ) def test_add_file_invalid( initial_state: MutableMapping[str, Any], - pending_files: List[Tuple[str, str]], - file_to_add: Tuple[str, str], - expected_history: Dict[str, Any], - expected_pending_files: List[Tuple[str, str]], + pending_files: list[tuple[str, str]], + file_to_add: tuple[str, str], + expected_history: dict[str, Any], + expected_pending_files: list[tuple[str, str]], expected_cursor_value: str, ): cursor = _make_cursor(initial_state) @@ -328,7 +329,7 @@ def test_add_file_invalid( ) def test_get_new_cursor_value( input_state: MutableMapping[str, Any], - pending_files: List[Tuple[str, str]], + pending_files: list[tuple[str, str]], expected_cursor_value: str, ): cursor = _make_cursor(input_state) @@ -534,9 +535,9 @@ def test_get_files_to_sync( ) def test_should_sync_file( file_to_check: RemoteFile, - history: Dict[str, Any], + history: dict[str, Any], is_history_full: bool, - prev_cursor_value: Tuple[datetime, str], + prev_cursor_value: tuple[datetime, str], sync_start: datetime, expected_should_sync: bool, ): diff --git a/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py b/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py index afd81eeeb..b90ad41dd 100644 --- a/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py +++ b/unit_tests/sources/file_based/stream/test_default_file_based_cursor.py @@ -2,8 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from datetime import datetime, timedelta -from typing import Any, List, Mapping +from typing import Any from unittest.mock import MagicMock import pytest @@ -163,8 +164,8 @@ ], ) def test_add_file( - files_to_add: List[RemoteFile], - expected_start_time: List[datetime], + files_to_add: list[RemoteFile], + expected_start_time: list[datetime], expected_state_dict: Mapping[str, Any], ) -> None: cursor = get_cursor(max_history_size=3, days_to_sync_if_history_is_full=3) @@ -284,8 +285,8 @@ def test_add_file( ], ) def test_get_files_to_sync( - files: List[RemoteFile], - expected_files_to_sync: List[RemoteFile], + files: list[RemoteFile], + expected_files_to_sync: list[RemoteFile], max_history_size: int, history_is_partial: bool, ) -> None: diff --git a/unit_tests/sources/file_based/stream/test_default_file_based_stream.py b/unit_tests/sources/file_based/stream/test_default_file_based_stream.py index 1b85ed8dd..a86d382da 100644 --- a/unit_tests/sources/file_based/stream/test_default_file_based_stream.py +++ b/unit_tests/sources/file_based/stream/test_default_file_based_stream.py @@ -4,9 +4,10 @@ import traceback import unittest +from collections.abc import Iterable, Iterator, Mapping from copy import deepcopy from datetime import datetime, timezone -from typing import Any, Iterable, Iterator, Mapping +from typing import Any from unittest import mock from unittest.mock import Mock diff --git a/unit_tests/sources/file_based/test_file_based_stream_reader.py b/unit_tests/sources/file_based/test_file_based_stream_reader.py index 4a9d4e349..1fdda9cff 100644 --- a/unit_tests/sources/file_based/test_file_based_stream_reader.py +++ b/unit_tests/sources/file_based/test_file_based_stream_reader.py @@ -3,9 +3,10 @@ # import logging +from collections.abc import Iterable, Mapping from datetime import datetime from io import IOBase -from typing import Any, Dict, Iterable, List, Mapping, Optional, Set +from typing import Any import pytest from pydantic.v1 import AnyUrl @@ -63,14 +64,14 @@ class TestStreamReader(AbstractFileBasedStreamReader): @property - def config(self) -> Optional[AbstractFileBasedSpec]: + def config(self) -> AbstractFileBasedSpec | None: return self._config @config.setter def config(self, value: AbstractFileBasedSpec) -> None: self._config = value - def get_matching_files(self, globs: List[str]) -> Iterable[RemoteFile]: + def get_matching_files(self, globs: list[str]) -> Iterable[RemoteFile]: pass def open_file(self, file: RemoteFile) -> IOBase: @@ -81,21 +82,21 @@ def file_size(self, file: RemoteFile) -> int: def get_file( self, file: RemoteFile, local_directory: str, logger: logging.Logger - ) -> Dict[str, Any]: + ) -> dict[str, Any]: return {} - def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]: + def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> dict[str, Any]: return {} - def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]: + def load_identity_groups(self, logger: logging.Logger) -> Iterable[dict[str, Any]]: return [{}] @property - def file_permissions_schema(self) -> Dict[str, Any]: + def file_permissions_schema(self) -> dict[str, Any]: return {"type": "object", "properties": {}} @property - def identities_schema(self) -> Dict[str, Any]: + def identities_schema(self) -> dict[str, Any]: return {"type": "object", "properties": {}} @@ -368,17 +369,16 @@ def documentation_url(cls) -> AnyUrl: ], ) def test_globs_and_prefixes_from_globs( - globs: List[str], + globs: list[str], config: Mapping[str, Any], - expected_matches: Set[str], - expected_path_prefixes: Set[str], + expected_matches: set[str], + expected_path_prefixes: set[str], ) -> None: reader = TestStreamReader() reader.config = TestSpec(**config) - assert ( - set([f.uri for f in reader.filter_files_by_globs_and_start_date(FILES, globs)]) - == expected_matches - ) + assert { + f.uri for f in reader.filter_files_by_globs_and_start_date(FILES, globs) + } == expected_matches assert set(reader.get_prefixes_from_globs(globs)) == expected_path_prefixes diff --git a/unit_tests/sources/file_based/test_scenarios.py b/unit_tests/sources/file_based/test_scenarios.py index d70b7f4ef..b51008d42 100644 --- a/unit_tests/sources/file_based/test_scenarios.py +++ b/unit_tests/sources/file_based/test_scenarios.py @@ -4,8 +4,9 @@ import json import math +from collections.abc import Mapping from pathlib import Path, PosixPath -from typing import Any, Dict, List, Mapping, Optional, Union +from typing import Any import pytest from _pytest.capture import CaptureFixture @@ -153,7 +154,7 @@ def _verify_read_output(output: EntrypointOutput, scenario: TestScenario[Abstrac def _verify_state_record_counts( - records: List[AirbyteMessage], states: List[AirbyteMessage] + records: list[AirbyteMessage], states: list[AirbyteMessage] ) -> None: actual_record_counts = {} for record in records: @@ -178,8 +179,8 @@ def _verify_state_record_counts( def _verify_analytics( - analytics: List[AirbyteMessage], - expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]], + analytics: list[AirbyteMessage], + expected_analytics: list[AirbyteAnalyticsTraceMessage] | None, ) -> None: if expected_analytics: assert len(analytics) == len(expected_analytics), ( @@ -194,7 +195,7 @@ def _verify_analytics( def _verify_expected_logs( - logs: List[AirbyteLogMessage], expected_logs: Optional[List[Mapping[str, Any]]] + logs: list[AirbyteLogMessage], expected_logs: list[Mapping[str, Any]] | None ) -> None: if expected_logs: for actual, expected in zip(logs, expected_logs): @@ -237,7 +238,7 @@ def spec(capsys: CaptureFixture[str], scenario: TestScenario[AbstractSource]) -> def check( capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestScenario[AbstractSource] -) -> Dict[str, Any]: +) -> dict[str, Any]: launch( scenario.source, ["check", "--config", make_file(tmp_path / "config.json", scenario.config)], @@ -246,7 +247,7 @@ def check( return _find_connection_status(captured.out.splitlines()) -def _find_connection_status(output: List[str]) -> Mapping[str, Any]: +def _find_connection_status(output: list[str]) -> Mapping[str, Any]: for line in output: json_line = json.loads(line) if "connectionStatus" in json_line: @@ -256,7 +257,7 @@ def _find_connection_status(output: List[str]) -> Mapping[str, Any]: def discover( capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestScenario[AbstractSource] -) -> Dict[str, Any]: +) -> dict[str, Any]: launch( scenario.source, ["discover", "--config", make_file(tmp_path / "config.json", scenario.config)], @@ -286,9 +287,7 @@ def read_with_state(scenario: TestScenario[AbstractSource]) -> EntrypointOutput: ) -def make_file( - path: Path, file_contents: Optional[Union[Mapping[str, Any], List[Mapping[str, Any]]]] -) -> str: +def make_file(path: Path, file_contents: Mapping[str, Any] | list[Mapping[str, Any]] | None) -> str: path.write_text(json.dumps(file_contents)) return str(path) diff --git a/unit_tests/sources/file_based/test_schema_helpers.py b/unit_tests/sources/file_based/test_schema_helpers.py index b3d1a5220..fbf903c32 100644 --- a/unit_tests/sources/file_based/test_schema_helpers.py +++ b/unit_tests/sources/file_based/test_schema_helpers.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any import pytest @@ -354,7 +355,7 @@ def test_comparable_types() -> None: ], ) def test_merge_schemas( - schema1: SchemaType, schema2: SchemaType, expected_result: Optional[SchemaType] + schema1: SchemaType, schema2: SchemaType, expected_result: SchemaType | None ) -> None: if expected_result is not None: assert merge_schemas(schema1, schema2) == expected_result @@ -433,8 +434,8 @@ def test_merge_schemas( ) def test_type_mapping_to_jsonschema( type_mapping: Mapping[str, Any], - expected_schema: Optional[Mapping[str, Any]], - expected_exc_msg: Optional[str], + expected_schema: Mapping[str, Any] | None, + expected_exc_msg: str | None, ) -> None: if expected_exc_msg: with pytest.raises(ConfigValidationError) as exc: diff --git a/unit_tests/sources/fixtures/source_test_fixture.py b/unit_tests/sources/fixtures/source_test_fixture.py index 3c2183b68..37372ed15 100644 --- a/unit_tests/sources/fixtures/source_test_fixture.py +++ b/unit_tests/sources/fixtures/source_test_fixture.py @@ -5,7 +5,8 @@ import json import logging from abc import ABC -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any import requests from requests.auth import AuthBase @@ -31,9 +32,7 @@ class SourceTestFixture(AbstractSource): the need to load static files (ex. spec.yaml, config.json, configured_catalog.json) into the unit-test package. """ - def __init__( - self, streams: Optional[List[Stream]] = None, authenticator: Optional[AuthBase] = None - ): + def __init__(self, streams: list[Stream] | None = None, authenticator: AuthBase | None = None): self._streams = streams self._authenticator = authenticator @@ -77,10 +76,10 @@ def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog: ] ) - def check_connection(self, *args, **kwargs) -> Tuple[bool, Optional[Any]]: + def check_connection(self, *args, **kwargs) -> tuple[bool, Any | None]: return True, "" - def streams(self, *args, **kwargs) -> List[Stream]: + def streams(self, *args, **kwargs) -> list[Stream]: return [HttpTestStream(authenticator=self._authenticator)] @@ -88,14 +87,14 @@ class HttpTestStream(HttpStream, ABC): url_base = "https://airbyte.com/api/v1/" @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return ["updated_at"] @property def availability_strategy(self): return None - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return "id" def path( @@ -118,7 +117,7 @@ def parse_response( body = response.json() or {} return body["records"] - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def get_json_schema(self) -> Mapping[str, Any]: @@ -151,7 +150,7 @@ class SourceFixtureOauthAuthenticator(Oauth2Authenticator): Test OAuth authenticator that only overrides the request and response aspect of the authenticator flow """ - def refresh_access_token(self) -> Tuple[str, int]: + def refresh_access_token(self) -> tuple[str, int]: response = requests.request(method="POST", url=self.get_token_refresh_endpoint(), params={}) response.raise_for_status() return ( diff --git a/unit_tests/sources/mock_server_tests/mock_source_fixture.py b/unit_tests/sources/mock_server_tests/mock_source_fixture.py index 5ca7ae7cd..d4157871d 100644 --- a/unit_tests/sources/mock_server_tests/mock_source_fixture.py +++ b/unit_tests/sources/mock_server_tests/mock_source_fixture.py @@ -4,8 +4,9 @@ import logging from abc import ABC +from collections.abc import Iterable, Mapping, MutableMapping from datetime import datetime, timedelta, timezone -from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple +from typing import Any import requests from requests import HTTPError @@ -26,8 +27,8 @@ class FixtureAvailabilityStrategy(HttpAvailabilityStrategy): def reasons_for_unavailable_status_codes( self, stream: Stream, logger: logging.Logger, source: Source, error: HTTPError - ) -> Dict[int, str]: - reasons_for_codes: Dict[int, str] = { + ) -> dict[int, str]: + reasons_for_codes: dict[int, str] = { requests.codes.FORBIDDEN: "This is likely due to insufficient permissions for your Notion integration. " "Please make sure your integration has read access for the resources you are trying to sync" } @@ -53,7 +54,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp data = response.json().get("data", []) yield from data - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: has_more = response.json().get("has_more") if has_more: self.current_page += 1 @@ -77,9 +78,9 @@ def state(self, value: MutableMapping[str, Any]) -> None: def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: for record in super().read_records(sync_mode, cursor_field, stream_slice, stream_state): self.state = {self.cursor_field: record.get(self.cursor_field)} @@ -128,9 +129,9 @@ def get_json_schema(self) -> Mapping[str, Any]: def request_params( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return { "start_date": stream_slice.get("start_date"), @@ -141,9 +142,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: start_date = ab_datetime_parse(self.start_date) if stream_state: @@ -202,17 +203,17 @@ def get_updated_state( def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: yield from super().read_records(sync_mode, cursor_field, stream_slice, stream_state) def request_params( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return { "start_date": stream_slice.get("start_date"), @@ -223,9 +224,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: start_date = ab_datetime_parse(self.start_date) if stream_state: @@ -269,16 +270,16 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: return [{"divide_category": "dukes"}, {"divide_category": "mentats"}] def request_params( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return {"category": stream_slice.get("divide_category")} @@ -326,26 +327,26 @@ def state(self, value: MutableMapping[str, Any]) -> None: def request_params( self, - stream_state: Optional[Mapping[str, Any]], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> MutableMapping[str, Any]: return {"page": next_page_token.get("page")} def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: yield from self._read_single_page(cursor_field, stream_slice, stream_state) def _read_single_page( self, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: next_page_token = stream_slice request_headers = self.request_headers( @@ -388,7 +389,7 @@ def _read_single_page( self.next_page_token(response) - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: current_page = self._state.get("page") or 0 has_more = response.json().get("has_more") if has_more: @@ -400,10 +401,10 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, class SourceFixture(AbstractSource): def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, any]: + ) -> tuple[bool, any]: return True, None - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: return [ Dividers(config=config), JusticeSongs(config=config), diff --git a/unit_tests/sources/mock_server_tests/test_helpers/airbyte_message_assertions.py b/unit_tests/sources/mock_server_tests/test_helpers/airbyte_message_assertions.py index 73e125d71..6162f7e71 100644 --- a/unit_tests/sources/mock_server_tests/test_helpers/airbyte_message_assertions.py +++ b/unit_tests/sources/mock_server_tests/test_helpers/airbyte_message_assertions.py @@ -2,14 +2,13 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # -from typing import List import pytest from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, Type -def emits_successful_sync_status_messages(status_messages: List[AirbyteStreamStatus]) -> bool: +def emits_successful_sync_status_messages(status_messages: list[AirbyteStreamStatus]) -> bool: return ( len(status_messages) == 3 and status_messages[0] == AirbyteStreamStatus.STARTED @@ -18,7 +17,7 @@ def emits_successful_sync_status_messages(status_messages: List[AirbyteStreamSta ) -def validate_message_order(expected_message_order: List[Type], messages: List[AirbyteMessage]): +def validate_message_order(expected_message_order: list[Type], messages: list[AirbyteMessage]): if len(expected_message_order) != len(messages): pytest.fail( f"Expected message order count {len(expected_message_order)} did not match actual messages {len(messages)}" diff --git a/unit_tests/sources/mock_server_tests/test_mock_server_abstract_source.py b/unit_tests/sources/mock_server_tests/test_mock_server_abstract_source.py index 5552bdfd9..c1d74e526 100644 --- a/unit_tests/sources/mock_server_tests/test_mock_server_abstract_source.py +++ b/unit_tests/sources/mock_server_tests/test_mock_server_abstract_source.py @@ -3,7 +3,6 @@ # from datetime import datetime, timedelta, timezone -from typing import List, Optional from unittest import TestCase import freezegun @@ -21,7 +20,7 @@ create_response_builder, ) from airbyte_cdk.test.state_builder import StateBuilder -from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now +from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime from unit_tests.sources.mock_server_tests.mock_source_fixture import SourceFixture from unit_tests.sources.mock_server_tests.test_helpers import ( emits_successful_sync_status_messages, @@ -55,10 +54,10 @@ def users_endpoint(cls) -> "RequestBuilder": def __init__(self, resource: str) -> None: self._resource = resource - self._start_date: Optional[datetime] = None - self._end_date: Optional[datetime] = None - self._category: Optional[str] = None - self._page: Optional[int] = None + self._start_date: datetime | None = None + self._end_date: datetime | None = None + self._category: str | None = None + self._page: int | None = None def with_start_date(self, start_date: datetime) -> "RequestBuilder": self._start_date = start_date @@ -93,7 +92,7 @@ def build(self) -> HttpRequest: ) -def _create_catalog(names_and_sync_modes: List[tuple[str, SyncMode]]) -> ConfiguredAirbyteCatalog: +def _create_catalog(names_and_sync_modes: list[tuple[str, SyncMode]]) -> ConfiguredAirbyteCatalog: catalog_builder = CatalogBuilder() for stream_name, sync_mode in names_and_sync_modes: catalog_builder.with_stream(name=stream_name, sync_mode=sync_mode) diff --git a/unit_tests/sources/mock_server_tests/test_resumable_full_refresh.py b/unit_tests/sources/mock_server_tests/test_resumable_full_refresh.py index 5ba58e384..f00de5924 100644 --- a/unit_tests/sources/mock_server_tests/test_resumable_full_refresh.py +++ b/unit_tests/sources/mock_server_tests/test_resumable_full_refresh.py @@ -3,7 +3,7 @@ # from datetime import datetime, timezone -from typing import Any, Dict, List, Optional +from typing import Any from unittest import TestCase import freezegun @@ -44,7 +44,7 @@ def justice_songs_endpoint(cls) -> "RequestBuilder": def __init__(self, resource: str) -> None: self._resource = resource - self._page: Optional[int] = None + self._page: int | None = None def with_page(self, page: int) -> "RequestBuilder": self._page = page @@ -62,7 +62,7 @@ def build(self) -> HttpRequest: def _create_catalog( - names_and_sync_modes: List[tuple[str, SyncMode, Dict[str, Any]]], + names_and_sync_modes: list[tuple[str, SyncMode, dict[str, Any]]], ) -> ConfiguredAirbyteCatalog: stream_builder = ConfiguredAirbyteStreamBuilder() streams = [] diff --git a/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py b/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py index 50695ba1e..63e51fdeb 100644 --- a/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +++ b/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py @@ -4,7 +4,8 @@ import concurrent import logging -from typing import Any, List, Mapping, Optional, Tuple, Union +from collections.abc import Mapping +from typing import Any from airbyte_cdk.models import ( AirbyteStateMessage, @@ -41,11 +42,11 @@ class StreamFacadeConcurrentConnectorStateConverter(EpochValueConcurrentStreamSt class StreamFacadeSource(ConcurrentSourceAdapter): def __init__( self, - streams: List[Stream], + streams: list[Stream], threadpool: concurrent.futures.ThreadPoolExecutor, - cursor_field: Optional[CursorField] = None, - cursor_boundaries: Optional[Tuple[str, str]] = None, - input_state: Optional[List[Mapping[str, Any]]] = _NO_STATE, + cursor_field: CursorField | None = None, + cursor_boundaries: tuple[str, str] | None = None, + input_state: list[Mapping[str, Any]] | None = _NO_STATE, ): self._message_repository = InMemoryMessageRepository() threadpool_manager = ThreadPoolManager(threadpool, streams[0].logger) @@ -61,10 +62,10 @@ def __init__( def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: return True, None - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: state_manager = ConnectorStateManager( state=self._state, ) # The input values into the AirbyteStream are dummy values; the connector state manager only uses `name` and `namespace` @@ -88,7 +89,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ] @property - def message_repository(self) -> Union[None, MessageRepository]: + def message_repository(self) -> None | MessageRepository: return self._message_repository def spec(self, logger: logging.Logger) -> ConnectorSpecification: @@ -117,7 +118,7 @@ def __init__(self): self._input_state = None self._raw_input_state = None - def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder": + def set_streams(self, streams: list[Stream]) -> "StreamFacadeSourceBuilder": self._streams = streams return self @@ -126,21 +127,21 @@ def set_max_workers(self, max_workers: int) -> "StreamFacadeSourceBuilder": return self def set_incremental( - self, cursor_field: CursorField, cursor_boundaries: Optional[Tuple[str, str]] + self, cursor_field: CursorField, cursor_boundaries: tuple[str, str] | None ) -> "StreamFacadeSourceBuilder": self._cursor_field = cursor_field self._cursor_boundaries = cursor_boundaries return self - def set_input_state(self, state: List[Mapping[str, Any]]) -> "StreamFacadeSourceBuilder": + def set_input_state(self, state: list[Mapping[str, Any]]) -> "StreamFacadeSourceBuilder": self._input_state = state return self def build( self, - configured_catalog: Optional[Mapping[str, Any]], - config: Optional[Mapping[str, Any]], - state: Optional[TState], + configured_catalog: Mapping[str, Any] | None, + config: Mapping[str, Any] | None, + state: TState | None, ) -> StreamFacadeSource: threadpool = concurrent.futures.ThreadPoolExecutor( max_workers=self._max_workers, thread_name_prefix="workerpool" diff --git a/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py b/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py index b0dcd272c..1061c8665 100644 --- a/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +++ b/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py @@ -3,7 +3,8 @@ # import json import logging -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.models import ( ConfiguredAirbyteCatalog, @@ -28,15 +29,15 @@ class LegacyStream(Stream): - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return None def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: yield from [] @@ -44,8 +45,8 @@ def read_records( class ConcurrentCdkSource(ConcurrentSourceAdapter): def __init__( self, - streams: List[DefaultStream], - message_repository: Optional[MessageRepository], + streams: list[DefaultStream], + message_repository: MessageRepository | None, max_workers, timeout_in_seconds, ): @@ -58,11 +59,11 @@ def __init__( def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: # Check is not verified because it is up to the source to implement this method return True, None - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: return [ StreamFacade( s, @@ -104,12 +105,12 @@ def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: ) @property - def message_repository(self) -> Union[None, MessageRepository]: + def message_repository(self) -> None | MessageRepository: return self._message_repository class InMemoryPartitionGenerator(PartitionGenerator): - def __init__(self, partitions: List[Partition]): + def __init__(self, partitions: list[Partition]): self._partitions = partitions def generate(self) -> Iterable[Partition]: @@ -134,7 +135,7 @@ def read(self) -> Iterable[Record]: else: yield record_or_exception - def to_slice(self) -> Optional[Mapping[str, Any]]: + def to_slice(self) -> Mapping[str, Any] | None: return self._slice def __hash__(self) -> int: @@ -154,13 +155,13 @@ def is_closed(self) -> bool: class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]): def __init__(self): - self._streams: List[DefaultStream] = [] + self._streams: list[DefaultStream] = [] self._message_repository = None - def build(self, configured_catalog: Optional[Mapping[str, Any]], _, __) -> ConcurrentCdkSource: + def build(self, configured_catalog: Mapping[str, Any] | None, _, __) -> ConcurrentCdkSource: return ConcurrentCdkSource(self._streams, self._message_repository, 1, 1) - def set_streams(self, streams: List[DefaultStream]) -> "ConcurrentSourceBuilder": + def set_streams(self, streams: list[DefaultStream]) -> "ConcurrentSourceBuilder": self._streams = streams return self diff --git a/unit_tests/sources/streams/concurrent/scenarios/utils.py b/unit_tests/sources/streams/concurrent/scenarios/utils.py index 627891ee6..d3153b00e 100644 --- a/unit_tests/sources/streams/concurrent/scenarios/utils.py +++ b/unit_tests/sources/streams/concurrent/scenarios/utils.py @@ -1,7 +1,8 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import Stream @@ -12,7 +13,7 @@ class MockStream(Stream): def __init__( self, slices_and_records_or_exception: Iterable[ - Tuple[Optional[Mapping[str, Any]], Iterable[Union[Exception, Mapping[str, Any]]]] + tuple[Mapping[str, Any] | None, Iterable[Exception | Mapping[str, Any]]] ], name, json_schema, @@ -28,9 +29,9 @@ def __init__( def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: for _slice, records_or_exception in self._slices_and_records_or_exception: if stream_slice == _slice: @@ -40,7 +41,7 @@ def read_records( yield item @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return self._primary_key @property @@ -48,7 +49,7 @@ def name(self) -> str: return self._name @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return self._cursor_field or [] def get_json_schema(self) -> Mapping[str, Any]: @@ -58,9 +59,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: if self._slices_and_records_or_exception: yield from [ _slice for _slice, records_or_exception in self._slices_and_records_or_exception diff --git a/unit_tests/sources/streams/concurrent/test_cursor.py b/unit_tests/sources/streams/concurrent/test_cursor.py index ddca1c689..b9063986d 100644 --- a/unit_tests/sources/streams/concurrent/test_cursor.py +++ b/unit_tests/sources/streams/concurrent/test_cursor.py @@ -1,10 +1,11 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from collections.abc import Mapping from copy import deepcopy from datetime import datetime, timedelta, timezone from functools import partial -from typing import Any, Mapping, Optional +from typing import Any from unittest import TestCase from unittest.mock import Mock @@ -54,9 +55,7 @@ _NO_LOOKBACK_WINDOW = timedelta(seconds=0) -def _partition( - _slice: Optional[Mapping[str, Any]], _stream_name: Optional[str] = Mock() -) -> Partition: +def _partition(_slice: Mapping[str, Any] | None, _stream_name: str | None = Mock()) -> Partition: partition = Mock(spec=Partition) partition.to_slice.return_value = _slice partition.stream_name.return_value = _stream_name @@ -64,7 +63,7 @@ def _partition( def _record( - cursor_value: CursorValueType, partition: Optional[Partition] = Mock(spec=Partition) + cursor_value: CursorValueType, partition: Partition | None = Mock(spec=Partition) ) -> Record: return Record( data={_A_CURSOR_FIELD_KEY: cursor_value}, @@ -1015,7 +1014,7 @@ def _cursor( start: datetime, end_provider, slice_range: timedelta, - granularity: Optional[timedelta], + granularity: timedelta | None, clamping_strategy: ClampingStrategy, ) -> ConcurrentCursor: return ConcurrentCursor( diff --git a/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py b/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py index 02c1bdd1f..82e66ff29 100644 --- a/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py +++ b/unit_tests/sources/streams/concurrent/test_partition_enqueuer.py @@ -2,8 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import unittest +from collections.abc import Callable, Iterable from queue import Queue -from typing import Callable, Iterable, List from unittest.mock import Mock, patch from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import ( @@ -16,7 +16,7 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem -_SOME_PARTITIONS: List[Partition] = [Mock(spec=Partition), Mock(spec=Partition)] +_SOME_PARTITIONS: list[Partition] = [Mock(spec=Partition), Mock(spec=Partition)] _A_STREAM_NAME = "a_stream_name" @@ -88,23 +88,22 @@ def test_given_exception_when_generate_partitions_then_return_exception_and_sent ] def _partitions_before_raising( - self, partitions: List[Partition], exception: Exception + self, partitions: list[Partition], exception: Exception ) -> Callable[[], Iterable[Partition]]: def inner_function() -> Iterable[Partition]: - for partition in partitions: - yield partition + yield from partitions raise exception return inner_function @staticmethod - def _a_stream(partitions: List[Partition]) -> AbstractStream: + def _a_stream(partitions: list[Partition]) -> AbstractStream: stream = Mock(spec=AbstractStream) stream.generate_partitions.return_value = iter(partitions) return stream - def _consume_queue(self) -> List[QueueItem]: - queue_content: List[QueueItem] = [] + def _consume_queue(self) -> list[QueueItem]: + queue_content: list[QueueItem] = [] while queue_item := self._queue.get(): if isinstance(queue_item, PartitionGenerationCompletedSentinel): queue_content.append(queue_item) diff --git a/unit_tests/sources/streams/concurrent/test_partition_reader.py b/unit_tests/sources/streams/concurrent/test_partition_reader.py index 1910e034d..a48a924e4 100644 --- a/unit_tests/sources/streams/concurrent/test_partition_reader.py +++ b/unit_tests/sources/streams/concurrent/test_partition_reader.py @@ -2,8 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import unittest +from collections.abc import Callable, Iterable from queue import Queue -from typing import Callable, Iterable, List from unittest.mock import Mock import pytest @@ -61,14 +61,14 @@ def test_given_exception_when_process_partition_then_queue_records_and_exception PartitionCompleteSentinel(partition), ] - def _a_partition(self, records: List[Record]) -> Partition: + def _a_partition(self, records: list[Record]) -> Partition: partition = Mock(spec=Partition) partition.read.return_value = iter(records) return partition @staticmethod def _read_with_exception( - records: List[Record], exception: Exception + records: list[Record], exception: Exception ) -> Callable[[], Iterable[Record]]: def mocked_function() -> Iterable[Record]: yield from records diff --git a/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py b/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py index 99b626b43..59c880d53 100644 --- a/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py +++ b/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py @@ -1,6 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -from typing import Optional, Union import requests @@ -18,9 +17,9 @@ def test_given_no_arguments_default_backoff_strategy_returns_default_values(): class CustomBackoffStrategy(BackoffStrategy): def backoff_time( self, - response_or_exception: Optional[Union[requests.Response, requests.RequestException]], + response_or_exception: requests.Response | requests.RequestException | None, attempt_count: int, - ) -> Optional[float]: + ) -> float | None: return response_or_exception.headers["Retry-After"] diff --git a/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py b/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py index d756931c8..78567d94f 100644 --- a/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +++ b/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py @@ -4,8 +4,7 @@ import json import logging -from datetime import timedelta, timezone -from typing import Optional, Union +from datetime import timedelta from unittest.mock import Mock import freezegun @@ -400,8 +399,8 @@ def test_refresh_access_token_when_headers_provided(self, mocker): def test_parse_refresh_token_lifespan( self, mocker, - expires_in_response: Union[str, int], - token_expiry_date_format: Optional[str], + expires_in_response: str | int, + token_expiry_date_format: str | None, expected_token_expiry_date: AirbyteDateTime, ): oauth = Oauth2Authenticator( diff --git a/unit_tests/sources/streams/http/test_availability_strategy.py b/unit_tests/sources/streams/http/test_availability_strategy.py index bf49e09b4..c34375986 100644 --- a/unit_tests/sources/streams/http/test_availability_strategy.py +++ b/unit_tests/sources/streams/http/test_availability_strategy.py @@ -5,7 +5,8 @@ import io import json import logging -from typing import Any, Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Any import pytest import requests @@ -24,7 +25,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.resp_counter = 1 - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def path(self, **kwargs) -> str: diff --git a/unit_tests/sources/streams/http/test_http.py b/unit_tests/sources/streams/http/test_http.py index f7ad9e47e..8d75973c7 100644 --- a/unit_tests/sources/streams/http/test_http.py +++ b/unit_tests/sources/streams/http/test_http.py @@ -4,8 +4,9 @@ import json import logging +from collections.abc import Callable, Iterable, Mapping, MutableMapping from http import HTTPStatus -from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any from unittest.mock import ANY, MagicMock, patch import pytest @@ -44,7 +45,7 @@ def __init__(self, deduplicate_query_params: bool = False, **kwargs): self.resp_counter = 1 self._deduplicate_query_params = deduplicate_query_params - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def path(self, **kwargs) -> str: @@ -59,7 +60,7 @@ def must_deduplicate_query_params(self) -> bool: return self._deduplicate_query_params @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return ["updated_at"] @@ -105,7 +106,7 @@ def __init__(self, pages: int = 5): super().__init__() self._pages = pages - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: while self.current_page < self._pages: page_token = {"page": self.current_page} self.current_page += 1 @@ -157,7 +158,7 @@ def test_stub_bad_url_http_stream_read_records(mocker): class StubCustomBackoffHttpStream(StubBasicReadHttpStream): - def backoff_time(self, response: requests.Response) -> Optional[float]: + def backoff_time(self, response: requests.Response) -> float | None: return 0.5 @@ -185,7 +186,7 @@ class StubCustomBackoffHttpStreamRetries(StubCustomBackoffHttpStream): def max_retries(self): return retries - def get_error_handler(self) -> Optional[ErrorHandler]: + def get_error_handler(self) -> ErrorHandler | None: return HttpStatusErrorHandler(logging.Logger, max_retries=retries) stream = StubCustomBackoffHttpStreamRetries() @@ -207,7 +208,7 @@ def test_stub_custom_backoff_http_stream_endless_retries(mocker): mocker.patch("time.sleep", lambda x: None) class StubCustomBackoffHttpStreamRetries(StubCustomBackoffHttpStream): - def get_error_handler(self) -> Optional[ErrorHandler]: + def get_error_handler(self) -> ErrorHandler | None: return HttpStatusErrorHandler(logging.Logger, max_retries=99999) infinite_number = 20 @@ -275,7 +276,7 @@ class AutoFailFalseHttpStream(StubBasicReadHttpStream): raise_on_http_errors = False max_retries = 3 - def get_error_handler(self) -> Optional[ErrorHandler]: + def get_error_handler(self) -> ErrorHandler | None: return HttpStatusErrorHandler(logging.getLogger(), max_retries=3) @@ -336,7 +337,7 @@ def test_raise_on_http_errors(mocker, error): class StubHttpStreamWithErrorHandler(StubBasicReadHttpStream): - def get_error_handler(self) -> Optional[ErrorHandler]: + def get_error_handler(self) -> ErrorHandler | None: return HttpStatusErrorHandler(logging.getLogger()) @@ -448,7 +449,7 @@ def __init__(self, parent): def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: return [] - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def path(self, **kwargs) -> str: @@ -508,7 +509,7 @@ class CacheHttpStreamWithSlices(CacheHttpStream): def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"{stream_slice['path']}" if stream_slice else "" - def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + def stream_slices(self, **kwargs) -> Iterable[Mapping[str, Any] | None]: for path in self.paths: yield {"path": path} @@ -625,7 +626,7 @@ def test_send_raise_on_http_errors_logs(mocker, status_code): ({}, None), ], ) -def test_default_parse_response_error_message(api_response: dict, expected_message: Optional[str]): +def test_default_parse_response_error_message(api_response: dict, expected_message: str | None): stream = StubBasicReadHttpStream() response = MagicMock() response.json.return_value = api_response @@ -809,7 +810,7 @@ class StubParentHttpStream(HttpStream, CheckpointMixin): counter = 0 - def __init__(self, records: List[Mapping[str, Any]]): + def __init__(self, records: list[Mapping[str, Any]]): super().__init__() self._records = records self._state: MutableMapping[str, Any] = {} @@ -821,13 +822,13 @@ def url_base(self) -> str: def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/stub" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return {"__ab_full_refresh_sync_complete": True} def _read_single_page( @@ -837,12 +838,12 @@ def _read_single_page( requests.PreparedRequest, requests.Response, Mapping[str, Any], - Optional[Mapping[str, Any]], + Mapping[str, Any] | None, ], Iterable[StreamData], ], - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: yield from self._records @@ -853,8 +854,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] @@ -867,7 +868,7 @@ class StubParentResumableFullRefreshStream(HttpStream, CheckpointMixin): counter = 0 - def __init__(self, record_pages: List[List[Mapping[str, Any]]]): + def __init__(self, record_pages: list[list[Mapping[str, Any]]]): super().__init__() self._record_pages = record_pages self._state: MutableMapping[str, Any] = {} @@ -879,21 +880,21 @@ def url_base(self) -> str: def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/stub" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return {"__ab_full_refresh_sync_complete": True} def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: page_number = self.state.get("page") or 1 yield from self._record_pages[page_number - 1] @@ -908,8 +909,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] @@ -927,13 +928,13 @@ def url_base(self) -> str: def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/stub" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def _read_pages( @@ -943,12 +944,12 @@ def _read_pages( requests.PreparedRequest, requests.Response, Mapping[str, Any], - Optional[Mapping[str, Any]], + Mapping[str, Any] | None, ], Iterable[StreamData], ], - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: return [ {"id": "abc", "parent": stream_slice.get("id")}, @@ -960,8 +961,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] @@ -1052,7 +1053,7 @@ def __init__(self, deduplicate_query_params: bool = False, pages: int = 5, **kwa self._deduplicate_query_params = deduplicate_query_params self._pages = pages - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: current_page = self.cursor.get_stream_state().get("page", 1) if current_page < self._pages: current_page += 1 @@ -1077,9 +1078,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: yield from [{}] @@ -1267,7 +1268,7 @@ class StubSubstreamResumableFullRefreshStream(HttpSubStream, CheckpointMixin): def __init__( self, parent: HttpStream, - partition_id_to_child_records: Mapping[str, List[Mapping[str, Any]]], + partition_id_to_child_records: Mapping[str, list[Mapping[str, Any]]], ): super().__init__(parent=parent) self._partition_id_to_child_records = partition_id_to_child_records @@ -1280,13 +1281,13 @@ def url_base(self) -> str: def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return f"/parents/{stream_slice.get('parent_id')}/children" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None # def read_records( @@ -1306,10 +1307,10 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, def _fetch_next_page( self, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Tuple[requests.PreparedRequest, requests.Response]: + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, + ) -> tuple[requests.PreparedRequest, requests.Response]: return requests.PreparedRequest(), requests.Response() def parse_response( @@ -1317,8 +1318,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: partition_id = stream_slice.get("parent").get("parent_id") if partition_id in self._partition_id_to_child_records: @@ -1535,7 +1536,7 @@ class StubWithCursorFields(StubBasicReadHttpStream): def __init__( self, has_multiple_slices: bool, - set_cursor_field: List[str], + set_cursor_field: list[str], deduplicate_query_params: bool = False, **kwargs, ): @@ -1544,7 +1545,7 @@ def __init__( super().__init__() @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return self._cursor_field diff --git a/unit_tests/sources/streams/test_call_rate.py b/unit_tests/sources/streams/test_call_rate.py index b99905870..bee26e98f 100644 --- a/unit_tests/sources/streams/test_call_rate.py +++ b/unit_tests/sources/streams/test_call_rate.py @@ -4,8 +4,9 @@ import os import tempfile import time +from collections.abc import Iterable, Mapping from datetime import datetime, timedelta -from typing import Any, Iterable, Mapping, Optional +from typing import Any import pytest import requests @@ -31,7 +32,7 @@ class StubDummyHttpStream(HttpStream): url_base = "https://test_base_url.com" primary_key = "some_key" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return {"next_page_token": True} # endless pages def path(self, **kwargs) -> str: diff --git a/unit_tests/sources/streams/test_stream_read.py b/unit_tests/sources/streams/test_stream_read.py index ac11b7499..76439ad32 100644 --- a/unit_tests/sources/streams/test_stream_read.py +++ b/unit_tests/sources/streams/test_stream_read.py @@ -3,8 +3,9 @@ # import logging +from collections.abc import Iterable, Mapping, MutableMapping from copy import deepcopy -from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union +from typing import Any from unittest.mock import Mock import pytest @@ -48,32 +49,32 @@ class _MockStream(Stream): def __init__( self, - slice_to_records: Mapping[str, List[Mapping[str, Any]]], - json_schema: Dict[str, Any] = None, + slice_to_records: Mapping[str, list[Mapping[str, Any]]], + json_schema: dict[str, Any] = None, ): self._slice_to_records = slice_to_records self._mocked_json_schema = json_schema or {} @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return None def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: for partition in self._slice_to_records.keys(): yield {"partition_key": partition} def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: yield from self._slice_to_records[stream_slice["partition_key"]] @@ -94,15 +95,15 @@ def state(self, value: MutableMapping[str, Any]) -> None: self._state = value @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return ["created_at"] def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: cursor = self.cursor_field[0] for record in self._slice_to_records[stream_slice["partition_key"]]: @@ -157,7 +158,7 @@ def _concurrent_stream( slice_logger, logger, message_repository, - cursor: Optional[Cursor] = None, + cursor: Cursor | None = None, ): stream = _stream(slice_to_partition_mapping, slice_logger, logger, message_repository) cursor = cursor or FinalStateCursor( diff --git a/unit_tests/sources/streams/test_streams_core.py b/unit_tests/sources/streams/test_streams_core.py index 3e0f59c12..96e96a755 100644 --- a/unit_tests/sources/streams/test_streams_core.py +++ b/unit_tests/sources/streams/test_streams_core.py @@ -3,7 +3,8 @@ # import logging -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional +from collections.abc import Iterable, Mapping, MutableMapping +from typing import Any from unittest import mock import pytest @@ -34,7 +35,7 @@ class StreamStubFullRefresh(Stream): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -53,7 +54,7 @@ class StreamStubIncremental(Stream, CheckpointMixin): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -82,7 +83,7 @@ class StreamStubResumableFullRefresh(Stream, CheckpointMixin): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -109,7 +110,7 @@ class StreamStubLegacyStateInterface(Stream): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -133,7 +134,7 @@ class StreamStubIncrementalEmptyNamespace(Stream): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: @@ -155,15 +156,15 @@ class HttpSubStreamStubFullRefreshLegacySlices(HttpSubStream): def url_base(self) -> str: return "https://airbyte.io/api/v1" - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: pass def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/stub" @@ -172,14 +173,14 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] class CursorBasedStreamStubFullRefresh(StreamStubFullRefresh): - def get_cursor(self) -> Optional[Cursor]: + def get_cursor(self) -> Cursor | None: return ResumableFullRefreshCursor() @@ -188,9 +189,9 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: yield from [{}] @@ -209,23 +210,23 @@ def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: yield from [ StreamSlice(partition={"parent_id": "korra"}, cursor_slice={}), StreamSlice(partition={"parent_id": "asami"}, cursor_slice={}), ] - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: pass def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/stub" @@ -234,8 +235,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] @@ -247,21 +248,21 @@ class ParentHttpStreamStub(HttpStream): def read_records( self, sync_mode: SyncMode, - cursor_field: List[str] = None, + cursor_field: list[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: return [{"id": 400, "name": "a_parent_record"}] - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + def next_page_token(self, response: requests.Response) -> Mapping[str, Any] | None: return None def path( self, *, - stream_state: Optional[Mapping[str, Any]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_state: Mapping[str, Any] | None = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> str: return "/parent" @@ -270,8 +271,8 @@ def parse_response( response: requests.Response, *, stream_state: Mapping[str, Any], - stream_slice: Optional[Mapping[str, Any]] = None, - next_page_token: Optional[Mapping[str, Any]] = None, + stream_slice: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any]]: return [] diff --git a/unit_tests/sources/test_abstract_source.py b/unit_tests/sources/test_abstract_source.py index 4ca7f7fb6..7bc5650fc 100644 --- a/unit_tests/sources/test_abstract_source.py +++ b/unit_tests/sources/test_abstract_source.py @@ -5,17 +5,9 @@ import copy import datetime import logging +from collections.abc import Callable, Iterable, Mapping, MutableMapping from typing import ( Any, - Callable, - Dict, - Iterable, - List, - Mapping, - MutableMapping, - Optional, - Tuple, - Union, ) from unittest.mock import Mock @@ -62,8 +54,8 @@ class MockSource(AbstractSource): def __init__( self, - check_lambda: Callable[[], Tuple[bool, Optional[Any]]] = None, - streams: List[Stream] = None, + check_lambda: Callable[[], tuple[bool, Any | None]] = None, + streams: list[Stream] = None, message_repository: MessageRepository = None, exception_on_missing_stream: bool = True, stop_sync_on_stream_failure: bool = False, @@ -76,12 +68,12 @@ def __init__( def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: if self.check_lambda: return self.check_lambda() return False, "Missing callable." - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: if not self._streams: raise Exception("Stream is not set") return self._streams @@ -177,8 +169,8 @@ def test_raising_check(mocker): class MockStream(Stream): def __init__( self, - inputs_and_mocked_outputs: List[ - Tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]] + inputs_and_mocked_outputs: list[ + tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]] ] = None, name: str = None, ): @@ -202,11 +194,11 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: # type: ignore ) @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return "pk" @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return ["updated_at"] @@ -215,7 +207,7 @@ class MockStreamWithCursor(MockStream): def __init__( self, - inputs_and_mocked_outputs: List[Tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]]], + inputs_and_mocked_outputs: list[tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]]], name: str, ): super().__init__(inputs_and_mocked_outputs, name) @@ -224,7 +216,7 @@ def __init__( class MockStreamWithState(MockStreamWithCursor): def __init__( self, - inputs_and_mocked_outputs: List[Tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]]], + inputs_and_mocked_outputs: list[tuple[Mapping[str, Any], Iterable[Mapping[str, Any]]]], name: str, state=None, ): @@ -243,7 +235,7 @@ def state(self, value): class MockStreamEmittingAirbyteMessages(MockStreamWithState): def __init__( self, - inputs_and_mocked_outputs: List[Tuple[Mapping[str, Any], Iterable[AirbyteMessage]]] = None, + inputs_and_mocked_outputs: list[tuple[Mapping[str, Any], Iterable[AirbyteMessage]]] = None, name: str = None, state=None, ): @@ -256,7 +248,7 @@ def name(self): return self._name @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return "pk" @property @@ -271,7 +263,7 @@ def state(self, value: MutableMapping[str, Any]): class MockResumableFullRefreshStream(Stream): def __init__( self, - inputs_and_mocked_outputs: List[Tuple[Mapping[str, Any], Mapping[str, Any]]] = None, + inputs_and_mocked_outputs: list[tuple[Mapping[str, Any], Mapping[str, Any]]] = None, name: str = None, ): self._inputs_and_mocked_outputs = inputs_and_mocked_outputs @@ -304,7 +296,7 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: # type: ignore yield from output @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return "id" @property @@ -455,14 +447,14 @@ def test_read_stream_with_error_gets_display_message(mocker): GLOBAL_EMITTED_AT = 1 -def _as_record(stream: str, data: Dict[str, Any]) -> AirbyteMessage: +def _as_record(stream: str, data: dict[str, Any]) -> AirbyteMessage: return AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data=data, emitted_at=GLOBAL_EMITTED_AT), ) -def _as_records(stream: str, data: List[Dict[str, Any]]) -> List[AirbyteMessage]: +def _as_records(stream: str, data: list[dict[str, Any]]) -> list[AirbyteMessage]: return [_as_record(stream, datum) for datum in data] @@ -480,7 +472,7 @@ def _as_stream_status(stream: str, status: AirbyteStreamStatus) -> AirbyteMessag return AirbyteMessage(type=MessageType.TRACE, trace=trace_message) -def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None): +def _as_state(stream_name: str = "", per_stream_state: dict[str, Any] = None): return AirbyteMessage( type=Type.STATE, state=AirbyteStateMessage( @@ -496,9 +488,9 @@ def _as_state(stream_name: str = "", per_stream_state: Dict[str, Any] = None): def _as_error_trace( stream: str, error_message: str, - internal_message: Optional[str], - failure_type: Optional[FailureType], - stack_trace: Optional[str], + internal_message: str | None, + failure_type: FailureType | None, + stack_trace: str | None, ) -> AirbyteMessage: trace_message = AirbyteTraceMessage( emitted_at=datetime.datetime.now().timestamp() * 1000.0, @@ -523,7 +515,7 @@ def _configured_stream(stream: Stream, sync_mode: SyncMode): ) -def _fix_emitted_at(messages: List[AirbyteMessage]) -> List[AirbyteMessage]: +def _fix_emitted_at(messages: list[AirbyteMessage]) -> list[AirbyteMessage]: for msg in messages: if msg.type == Type.RECORD and msg.record: msg.record.emitted_at = GLOBAL_EMITTED_AT diff --git a/unit_tests/sources/test_config.py b/unit_tests/sources/test_config.py index 94d58540e..dd7f1587a 100644 --- a/unit_tests/sources/test_config.py +++ b/unit_tests/sources/test_config.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import List, Union from pydantic.v1 import BaseModel, Field @@ -24,15 +23,15 @@ class Choice1(BaseModel): class Choice2(BaseModel): selected_strategy = Field("option2", const=True) - sequence: List[str] + sequence: list[str] class SomeSourceConfig(BaseConfig): class Config: title = "Some Source" - items: List[InnerClass] - choice: Union[Choice1, Choice2] + items: list[InnerClass] + choice: Choice1 | Choice2 class TestBaseConfig: diff --git a/unit_tests/sources/test_connector_state_manager.py b/unit_tests/sources/test_connector_state_manager.py index f3bfa48c0..06b96135b 100644 --- a/unit_tests/sources/test_connector_state_manager.py +++ b/unit_tests/sources/test_connector_state_manager.py @@ -3,7 +3,6 @@ # from contextlib import nullcontext as does_not_raise -from typing import List import pytest @@ -159,7 +158,7 @@ ), ) def test_initialize_state_manager(input_stream_state, expected_stream_state, expected_error): - if isinstance(input_stream_state, List): + if isinstance(input_stream_state, list): input_stream_state = [ AirbyteStateMessageSerializer.load(state_obj) for state_obj in list(input_stream_state) ] diff --git a/unit_tests/sources/test_integration_source.py b/unit_tests/sources/test_integration_source.py index 39573fd35..f41a9a041 100644 --- a/unit_tests/sources/test_integration_source.py +++ b/unit_tests/sources/test_integration_source.py @@ -4,7 +4,8 @@ import json import os -from typing import Any, List, Mapping +from collections.abc import Mapping +from typing import Any from unittest import mock from unittest.mock import patch @@ -145,7 +146,7 @@ def test_external_oauth_request_source( launch(source, args) -def contains_error_trace_message(messages: List[Mapping[str, Any]], expected_error: str) -> bool: +def contains_error_trace_message(messages: list[Mapping[str, Any]], expected_error: str) -> bool: for message in messages: if message.get("type") != "TRACE": continue diff --git a/unit_tests/sources/test_source.py b/unit_tests/sources/test_source.py index 9554d2242..9521faf82 100644 --- a/unit_tests/sources/test_source.py +++ b/unit_tests/sources/test_source.py @@ -5,8 +5,9 @@ import json import logging import tempfile +from collections.abc import Mapping, MutableMapping from contextlib import nullcontext as does_not_raise -from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any import orjson import pytest @@ -49,13 +50,13 @@ def discover(self, logger: logging.Logger, config: Mapping[str, Any]): class MockAbstractSource(AbstractSource): - def __init__(self, streams: Optional[List[Stream]] = None): + def __init__(self, streams: list[Stream] | None = None): self._streams = streams - def check_connection(self, *args, **kwargs) -> Tuple[bool, Optional[Any]]: + def check_connection(self, *args, **kwargs) -> tuple[bool, Any | None]: return True, "" - def streams(self, *args, **kwargs) -> List[Stream]: + def streams(self, *args, **kwargs) -> list[Stream]: if self._streams: return self._streams return [] @@ -105,7 +106,7 @@ class MockHttpStream(mocker.MagicMock, HttpStream): _state = {} @property - def cursor_field(self) -> Union[str, List[str]]: + def cursor_field(self) -> str | list[str]: return ["updated_at"] def get_backoff_strategy(self): diff --git a/unit_tests/sources/test_source_read.py b/unit_tests/sources/test_source_read.py index a25f54a5a..a92049f71 100644 --- a/unit_tests/sources/test_source_read.py +++ b/unit_tests/sources/test_source_read.py @@ -2,7 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import logging -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from collections.abc import Iterable, Mapping +from typing import Any from unittest.mock import Mock import freezegun @@ -37,7 +38,7 @@ class _MockStream(Stream): - def __init__(self, slice_to_records: Mapping[str, List[Mapping[str, Any]]], name: str): + def __init__(self, slice_to_records: Mapping[str, list[Mapping[str, Any]]], name: str): self._slice_to_records = slice_to_records self._name = name @@ -46,25 +47,25 @@ def name(self) -> str: return self._name @property - def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + def primary_key(self) -> str | list[str] | list[list[str]] | None: return None def stream_slices( self, *, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_state: Optional[Mapping[str, Any]] = None, - ) -> Iterable[Optional[Mapping[str, Any]]]: + cursor_field: list[str] | None = None, + stream_state: Mapping[str, Any] | None = None, + ) -> Iterable[Mapping[str, Any] | None]: for partition in self._slice_to_records.keys(): yield {"partition": partition} def read_records( self, sync_mode: SyncMode, - cursor_field: Optional[List[str]] = None, - stream_slice: Optional[Mapping[str, Any]] = None, - stream_state: Optional[Mapping[str, Any]] = None, + cursor_field: list[str] | None = None, + stream_slice: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: for record_or_exception in self._slice_to_records[stream_slice["partition"]]: if isinstance(record_or_exception, Exception): @@ -81,13 +82,13 @@ class _MockSource(AbstractSource): def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: pass def set_streams(self, streams): self._streams = streams - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: return self._streams @@ -102,13 +103,13 @@ def __init__(self, logger): def check_connection( self, logger: logging.Logger, config: Mapping[str, Any] - ) -> Tuple[bool, Optional[Any]]: + ) -> tuple[bool, Any | None]: pass def set_streams(self, streams): self._streams = streams - def streams(self, config: Mapping[str, Any]) -> List[Stream]: + def streams(self, config: Mapping[str, Any]) -> list[Stream]: return self._streams diff --git a/unit_tests/test/mock_http/test_response_builder.py b/unit_tests/test/mock_http/test_response_builder.py index f5db2e73a..464cfcb3a 100644 --- a/unit_tests/test/mock_http/test_response_builder.py +++ b/unit_tests/test/mock_http/test_response_builder.py @@ -2,7 +2,7 @@ import json from copy import deepcopy from pathlib import Path as FilePath -from typing import Any, Dict, Optional, Union +from typing import Any from unittest import TestCase from unittest.mock import Mock @@ -34,10 +34,10 @@ def _record_builder( - response_template: Dict[str, Any], - records_path: Union[FieldPath, NestedPath], - record_id_path: Optional[Path] = None, - record_cursor_path: Optional[Union[FieldPath, NestedPath]] = None, + response_template: dict[str, Any], + records_path: FieldPath | NestedPath, + record_id_path: Path | None = None, + record_cursor_path: FieldPath | NestedPath | None = None, ) -> RecordBuilder: return create_record_builder( deepcopy(response_template), records_path, record_id_path, record_cursor_path @@ -51,16 +51,16 @@ def _any_record_builder() -> RecordBuilder: def _response_builder( - response_template: Dict[str, Any], - records_path: Union[FieldPath, NestedPath], - pagination_strategy: Optional[PaginationStrategy] = None, + response_template: dict[str, Any], + records_path: FieldPath | NestedPath, + pagination_strategy: PaginationStrategy | None = None, ) -> HttpResponseBuilder: return create_response_builder( deepcopy(response_template), records_path, pagination_strategy=pagination_strategy ) -def _body(response: HttpResponse) -> Dict[str, Any]: +def _body(response: HttpResponse) -> dict[str, Any]: return json.loads(response.body) diff --git a/unit_tests/test/test_entrypoint_wrapper.py b/unit_tests/test/test_entrypoint_wrapper.py index a8d02fca9..90de023a7 100644 --- a/unit_tests/test/test_entrypoint_wrapper.py +++ b/unit_tests/test/test_entrypoint_wrapper.py @@ -3,7 +3,8 @@ import json import logging import os -from typing import Any, Iterator, List, Mapping, Optional +from collections.abc import Iterator, Mapping +from typing import Any from unittest import TestCase from unittest.mock import Mock, patch @@ -113,7 +114,7 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM _A_LOG_MESSAGE = "a log message" -def _to_entrypoint_output(messages: List[AirbyteMessage]) -> Iterator[str]: +def _to_entrypoint_output(messages: list[AirbyteMessage]) -> Iterator[str]: return (orjson.dumps(AirbyteMessageSerializer.dump(message)).decode() for message in messages) @@ -135,8 +136,8 @@ def _validate_tmp_catalog(expected, file_path) -> None: def _create_tmp_file_validation( entrypoint, expected_config, - expected_catalog: Optional[Any] = None, - expected_state: Optional[Any] = None, + expected_catalog: Any | None = None, + expected_state: Any | None = None, ): def _validate_tmp_files(self): _validate_tmp_json_file(expected_config, entrypoint.parse_args.call_args.args[0][2]) diff --git a/unit_tests/test_connector.py b/unit_tests/test_connector.py index cf10dba01..f9f483c0c 100644 --- a/unit_tests/test_connector.py +++ b/unit_tests/test_connector.py @@ -8,8 +8,9 @@ import os import sys import tempfile +from collections.abc import Mapping from pathlib import Path -from typing import Any, Mapping +from typing import Any import pytest import yaml @@ -68,7 +69,7 @@ def test_read_non_json_config(nonjson_file, integration: Connector): def test_write_config(integration, mock_config): config_path = Path(tempfile.gettempdir()) / "config.json" integration.write_config(mock_config, str(config_path)) - with open(config_path, "r") as actual: + with open(config_path) as actual: assert json.loads(actual.read()) == mock_config diff --git a/unit_tests/test_entrypoint.py b/unit_tests/test_entrypoint.py index 52d742c07..1c386f873 100644 --- a/unit_tests/test_entrypoint.py +++ b/unit_tests/test_entrypoint.py @@ -5,8 +5,9 @@ import os from argparse import Namespace from collections import defaultdict +from collections.abc import Mapping, MutableMapping from copy import deepcopy -from typing import Any, List, Mapping, MutableMapping, Union +from typing import Any from unittest import mock from unittest.mock import MagicMock, patch @@ -63,7 +64,7 @@ def message_repository(self): pass -def _as_arglist(cmd: str, named_args: Mapping[str, Any]) -> List[str]: +def _as_arglist(cmd: str, named_args: Mapping[str, Any]) -> list[str]: out = [cmd] for k, v in named_args.items(): out.append(f"--{k}") @@ -188,9 +189,10 @@ def test_parse_missing_required_args( def _wrap_message( - submessage: Union[ - AirbyteConnectionStatus, ConnectorSpecification, AirbyteRecordMessage, AirbyteCatalog - ], + submessage: AirbyteConnectionStatus + | ConnectorSpecification + | AirbyteRecordMessage + | AirbyteCatalog, ) -> str: if isinstance(submessage, AirbyteConnectionStatus): message = AirbyteMessage(type=Type.CONNECTION_STATUS, connectionStatus=submessage) diff --git a/unit_tests/test_logger.py b/unit_tests/test_logger.py index 68d32ed74..a0d62b43b 100644 --- a/unit_tests/test_logger.py +++ b/unit_tests/test_logger.py @@ -4,7 +4,6 @@ import json import logging -from typing import Dict import pytest @@ -25,7 +24,7 @@ def test_formatter(logger, caplog): formatted_record_data = json.loads(formatted_record) assert formatted_record_data.get("type") == "LOG" log = formatted_record_data.get("log") - assert isinstance(log, Dict) + assert isinstance(log, dict) level = log.get("level") message = log.get("message") assert level == "INFO" diff --git a/unit_tests/test_secure_logger.py b/unit_tests/test_secure_logger.py index 757a069c7..e4629e14c 100644 --- a/unit_tests/test_secure_logger.py +++ b/unit_tests/test_secure_logger.py @@ -5,7 +5,8 @@ import logging import sys from argparse import Namespace -from typing import Any, Iterable, Mapping, MutableMapping +from collections.abc import Iterable, Mapping, MutableMapping +from typing import Any import pytest diff --git a/unit_tests/utils/test_datetime_format_inferrer.py b/unit_tests/utils/test_datetime_format_inferrer.py index 7e21c1ccc..e0d7dbf76 100644 --- a/unit_tests/utils/test_datetime_format_inferrer.py +++ b/unit_tests/utils/test_datetime_format_inferrer.py @@ -2,7 +2,6 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Dict, List import pytest @@ -98,7 +97,7 @@ ("no scope expand", [{}, {"d": "2022-02-03"}], {}), ], ) -def test_schema_inferrer(test_name, input_records: List, expected_candidate_fields: Dict[str, str]): +def test_schema_inferrer(test_name, input_records: list, expected_candidate_fields: dict[str, str]): inferrer = DatetimeFormatInferrer() for record in input_records: inferrer.accumulate(AirbyteRecordMessage(stream="abc", data=record, emitted_at=NOW)) diff --git a/unit_tests/utils/test_schema_inferrer.py b/unit_tests/utils/test_schema_inferrer.py index 2c8813699..4ae6b522d 100644 --- a/unit_tests/utils/test_schema_inferrer.py +++ b/unit_tests/utils/test_schema_inferrer.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import List, Mapping +from collections.abc import Mapping import pytest @@ -268,7 +268,7 @@ ), ], ) -def test_schema_derivation(input_records: List, expected_schemas: Mapping): +def test_schema_derivation(input_records: list, expected_schemas: Mapping): inferrer = SchemaInferrer() for record in input_records: inferrer.accumulate( @@ -289,7 +289,7 @@ def test_schema_derivation(input_records: List, expected_schemas: Mapping): _IS_CURSOR_FIELD = True -def _create_inferrer_with_required_field(is_pk: bool, field: List[List[str]]) -> SchemaInferrer: +def _create_inferrer_with_required_field(is_pk: bool, field: list[list[str]]) -> SchemaInferrer: if is_pk: return SchemaInferrer(field) return SchemaInferrer([[]], field)