Skip to content

Commit 8676125

Browse files
authored
Feat: Upgrade to Pydantic 2.0 and CDK 2.0 (#291)
1 parent 42e9c85 commit 8676125

File tree

18 files changed

+278
-145
lines changed

18 files changed

+278
-145
lines changed

.github/workflows/python_pytest.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ jobs:
142142
fail-fast: false
143143

144144
runs-on: "${{ matrix.os }}-latest"
145+
env:
146+
# Enforce UTF-8 encoding so Windows runners don't fail inside the connector code.
147+
# TODO: See if we can fully enforce this within PyAirbyte itself.
148+
PYTHONIOENCODING: utf-8
145149
steps:
146150
# Common steps:
147151
- name: Checkout code

.github/workflows/test-pr-command.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,11 @@ jobs:
6363
Windows,
6464
]
6565
fail-fast: false
66-
6766
runs-on: "${{ matrix.os }}-latest"
67+
env:
68+
# Enforce UTF-8 encoding so Windows runners don't fail inside the connector code.
69+
# TODO: See if we can fully enforce this within PyAirbyte itself.
70+
PYTHONIOENCODING: utf-8
6871
steps:
6972

7073
# Custom steps to fetch the PR and checkout the code:

airbyte/_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def _stream_from_subprocess(args: list[str]) -> Generator[Iterable[str], None, N
100100
stdout=subprocess.PIPE,
101101
stderr=subprocess.STDOUT,
102102
universal_newlines=True,
103+
encoding="utf-8",
103104
)
104105

105106
def _stream_from_file(file: IO[str]) -> Generator[str, Any, None]:

airbyte/_future_cdk/record_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _airbyte_messages_from_buffer(
133133
buffer: io.TextIOBase,
134134
) -> Iterator[AirbyteMessage]:
135135
"""Yield messages from a buffer."""
136-
yield from (AirbyteMessage.parse_raw(line) for line in buffer)
136+
yield from (AirbyteMessage.model_validate_json(line) for line in buffer)
137137

138138
@final
139139
def process_input_stream(

airbyte/_future_cdk/state_providers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,10 @@ def to_state_input_file_text(self) -> str:
7878
return (
7979
"["
8080
+ "\n, ".join(
81-
[state_artifact.json() for state_artifact in (self._state_message_artifacts or [])]
81+
[
82+
state_artifact.model_dump_json()
83+
for state_artifact in (self._state_message_artifacts or [])
84+
]
8285
)
8386
+ "]"
8487
)

airbyte/_future_cdk/state_writers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ def write_state(
3636
state_message: AirbyteStateMessage,
3737
) -> None:
3838
"""Save or 'write' a state artifact."""
39-
print(state_message.json())
39+
print(state_message.model_dump_json())

airbyte/_util/document_rendering.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _to_title_case(name: str, /) -> str:
2828
class CustomRenderingInstructions(BaseModel):
2929
"""Instructions for rendering a stream's records as documents."""
3030

31-
title_property: Optional[str]
31+
title_property: Optional[str] = None
3232
content_properties: list[str]
3333
frontmatter_properties: list[str]
3434
metadata_properties: list[str]
@@ -37,9 +37,9 @@ class CustomRenderingInstructions(BaseModel):
3737
class DocumentRenderer(BaseModel):
3838
"""Instructions for rendering a stream's records as documents."""
3939

40-
title_property: Optional[str]
41-
content_properties: Optional[list[str]]
42-
metadata_properties: Optional[list[str]]
40+
title_property: Optional[str] = None
41+
content_properties: Optional[list[str]] = None
42+
metadata_properties: Optional[list[str]] = None
4343
render_metadata: bool = False
4444

4545
# TODO: Add primary key and cursor key support:

airbyte/caches/_state_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def write_state(
104104
source_name=self.source_name,
105105
stream_name=stream_name,
106106
table_name=table_prefix + stream_name,
107-
state_json=state_message.json(),
107+
state_json=state_message.model_dump_json(),
108108
)
109109
)
110110
session.commit()
@@ -170,7 +170,7 @@ def get_state_provider(
170170

171171
return StaticInputState(
172172
from_state_messages=[
173-
AirbyteStateMessage.parse_raw(state.state_json) for state in states
173+
AirbyteStateMessage.model_validate_json(state.state_json) for state in states
174174
]
175175
)
176176

airbyte/caches/snowflake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@
3131
class SnowflakeCache(SnowflakeConfig, CacheBase):
3232
"""Configuration for the Snowflake cache."""
3333

34-
dedupe_mode = RecordDedupeMode.APPEND
34+
dedupe_mode: RecordDedupeMode = RecordDedupeMode.APPEND
3535

3636
_sql_processor_class = PrivateAttr(default=SnowflakeSqlProcessor)

airbyte/secrets/base.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,18 @@
66
import json
77
from abc import ABC, abstractmethod
88
from enum import Enum
9-
from typing import cast
9+
from typing import TYPE_CHECKING, Any, cast
10+
11+
from pydantic_core import CoreSchema, core_schema
1012

1113
from airbyte import exceptions as exc
1214

1315

16+
if TYPE_CHECKING:
17+
from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, ValidationInfo
18+
from pydantic.json_schema import JsonSchemaValue
19+
20+
1421
class SecretSourceEnum(str, Enum):
1522
ENV = "env"
1623
DOTENV = "dotenv"
@@ -65,6 +72,49 @@ def parse_json(self) -> dict:
6572
},
6673
) from None
6774

75+
# Pydantic compatibility
76+
77+
@classmethod
78+
def validate(
79+
cls,
80+
v: Any, # noqa: ANN401 # Must allow `Any` to match Pydantic signature
81+
info: ValidationInfo,
82+
) -> SecretString:
83+
"""Validate the input value is valid as a secret string."""
84+
_ = info # Unused
85+
if not isinstance(v, str):
86+
raise exc.PyAirbyteInputError(
87+
message="A valid `str` or `SecretString` object is required.",
88+
)
89+
return cls(v)
90+
91+
@classmethod
92+
def __get_pydantic_core_schema__( # noqa: PLW3201 # Pydantic dunder
93+
cls,
94+
source_type: Any, # noqa: ANN401 # Must allow `Any` to match Pydantic signature
95+
handler: GetCoreSchemaHandler,
96+
) -> CoreSchema:
97+
return core_schema.with_info_after_validator_function(
98+
function=cls.validate, schema=handler(str), field_name=handler.field_name
99+
)
100+
101+
@classmethod
102+
def __get_pydantic_json_schema__( # noqa: PLW3201 # Pydantic dunder method
103+
cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
104+
) -> JsonSchemaValue:
105+
"""
106+
Return a modified JSON schema for the secret string.
107+
108+
- `writeOnly=True` is the official way to prevent secrets from being exposed inadvertently.
109+
- `Format=password` is a popular and readable convention to indicate the field is sensitive.
110+
"""
111+
_ = _core_schema, handler # Unused
112+
return {
113+
"type": "string",
114+
"format": "password",
115+
"writeOnly": True,
116+
}
117+
68118

69119
class SecretManager(ABC):
70120
"""Abstract base class for secret managers.

0 commit comments

Comments
 (0)