Skip to content

Commit 9b6a6ea

Browse files
committed
improve json parsing error handling
1 parent 607e2f2 commit 9b6a6ea

File tree

1 file changed

+51
-9
lines changed

1 file changed

+51
-9
lines changed

airbyte_cdk/models/airbyte_protocol.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2+
import json
23
from collections.abc import Callable, Mapping
34
from dataclasses import InitVar, dataclass
45
from typing import (
@@ -72,32 +73,73 @@ def __init_subclass__(cls, **kwargs):
7273
custom_type_resolver=cls._type_resolver,
7374
)
7475

75-
def to_dict(self) -> Dict[str, Any]:
76+
def to_dict(self) -> dict[str, Any]:
77+
"""Serialize the object to a dictionary.
78+
79+
This method uses the `Serializer` to serialize the object to a dict as quickly as possible.
80+
"""
7681
return self._serializer.dump(self)
7782

7883
def to_json(self) -> str:
79-
# use to_dict so you only have one canonical dump
84+
"""Serialize the object to JSON.
85+
86+
This method uses `orjson` to serialize the object to JSON as quickly as possible.
87+
"""
8088
return orjson.dumps(self.to_dict()).decode("utf-8")
8189

90+
def __str__(self) -> str:
91+
"""Casting to `str` is the same as casting to JSON.
92+
93+
These are equivalent:
94+
>>> msg = AirbyteMessage(...)
95+
>>> str(msg)
96+
>>> msg.to_json()
97+
"""
98+
return self.to_json()
99+
82100
@classmethod
83-
def from_dict(cls: type[T], data: Dict[str, Any]) -> T:
101+
def from_dict(cls: type[T], data: dict[str, Any], /) -> T:
84102
return cls._serializer.load(data)
85103

86104
@classmethod
87-
def from_json(cls: type[T], s: str) -> T:
88-
return cls._serializer.load(orjson.loads(s))
105+
def from_json(cls: type[T], str_value: str, /) -> T:
106+
"""Load the object from JSON.
107+
108+
This method first tries to deserialize the JSON string using `orjson.loads()`,
109+
falling back to `json.loads()` if it fails. This is because `orjson` does not support
110+
all JSON features, such as `NaN` and `Infinity`, which are supported by the standard
111+
`json` module. The `orjson` library is used for its speed and efficiency, while the
112+
standard `json` library is used as a fallback for compatibility with more complex JSON
113+
structures.
114+
115+
Raises:
116+
orjson.JSONDecodeError: If the JSON string cannot be deserialized by either
117+
`orjson` or `json`.
118+
"""
119+
try:
120+
dict_value = orjson.loads(str_value)
121+
except orjson.JSONDecodeError as orjson_error:
122+
try:
123+
dict_value = json.loads(str_value)
124+
except json.JSONDecodeError as json_error:
125+
# Callers will expect `orjson.JSONDecodeError`, so we raise the original
126+
# `orjson` error when both options fail.
127+
# We also attach the second error, in case it is useful for debugging.
128+
raise orjson_error from json_error
129+
130+
return cls.from_dict(dict_value)
89131

90132

91133
def _custom_state_resolver(t: type) -> CustomType[AirbyteStateBlob, dict[str, Any]] | None:
92-
class AirbyteStateBlobType(CustomType[AirbyteStateBlob, Dict[str, Any]]):
93-
def serialize(self, value: AirbyteStateBlob) -> Dict[str, Any]:
134+
class AirbyteStateBlobType(CustomType[AirbyteStateBlob, dict[str, Any]]):
135+
def serialize(self, value: AirbyteStateBlob) -> dict[str, Any]:
94136
# cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete"
95137
return {k: v for k, v in value.__dict__.items()}
96138

97-
def deserialize(self, value: Dict[str, Any]) -> AirbyteStateBlob:
139+
def deserialize(self, value: dict[str, Any]) -> AirbyteStateBlob:
98140
return AirbyteStateBlob(value)
99141

100-
def get_json_schema(self) -> Dict[str, Any]:
142+
def get_json_schema(self) -> dict[str, Any]:
101143
return {"type": "object"}
102144

103145
return AirbyteStateBlobType() if t is AirbyteStateBlob else None

0 commit comments

Comments
 (0)