Skip to content

Commit 8d4b122

Browse files
gverkesmachichima
andauthored
Bugfix: Pydantic deserialization for FlyteFile and FlyteDirectory (#3339)
* Fix Pydantic deserialization for FlyteFile and FlyteDirectory Fixes #6669 Signed-off-by: Govert Verkes <gmverkes@gmail.com> Signed-off-by: Govert Verkes <govert@cusp.ai> * Add tests to improve codecov coverage for FlyteFile and FlyteDirectory validators Signed-off-by: Govert Verkes <govert@cusp.ai> * Simplify Pydantic deserialization with helper method Extract hasattr check into _was_initialized_via_init() helper for clarity. Remove redundant dict_to_flyte_* calls - fall through to to_python_value instead. Signed-off-by: Govert Verkes <govert@cusp.ai> * Update flytekit/types/file/file.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes <govert@cusp.ai> Signed-off-by: gverkes <gmverkes@gmail.com> * Update flytekit/types/directory/types.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes <govert@cusp.ai> Signed-off-by: gverkes <gmverkes@gmail.com> * Update tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com> Signed-off-by: Govert Verkes <govert@cusp.ai> Signed-off-by: gverkes <gmverkes@gmail.com> * Fix docstring return types for pydoclint DOC203 Add explicit 'bool:' return type to docstrings for _was_initialized_via_init methods in FlyteFile and FlyteDirectory to satisfy pydoclint linting. Signed-off-by: Govert Verkes <govert@cusp.ai> --------- Signed-off-by: Govert Verkes <gmverkes@gmail.com> Signed-off-by: Govert Verkes <govert@cusp.ai> Signed-off-by: gverkes <gmverkes@gmail.com> Co-authored-by: Nary Yeh <60069744+machichima@users.noreply.github.com>
1 parent 010a0ef commit 8d4b122

File tree

3 files changed

+138
-2
lines changed

3 files changed

+138
-2
lines changed

flytekit/types/directory/types.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,25 @@ def serialize_flyte_dir(self) -> Dict[str, str]:
176176
)
177177
return {"path": lv.scalar.blob.uri}
178178

179+
def _was_initialized_via_init(self) -> bool:
180+
"""Check if object was initialized via __init__ or deserialized by Pydantic.
181+
182+
When Pydantic deserializes a FlyteDirectory, it bypasses __init__ and directly
183+
sets the 'path' attribute. This means the _downloader and _remote_source attributes
184+
won't exist. We use this check to determine if we need to go through the transformer
185+
to properly set up these internal attributes.
186+
187+
Returns:
188+
bool: True if __init__ was called (normal initialization), False if created via
189+
Pydantic deserialization and needs to pass through transformer.
190+
"""
191+
return hasattr(self, "_downloader") and hasattr(self, "_remote_source")
192+
179193
@model_validator(mode="after")
180194
def deserialize_flyte_dir(self, info) -> FlyteDirectory:
181195
if info.context is None or info.context.get("deserialize") is not True:
182-
return self
196+
if self._was_initialized_via_init():
197+
return self
183198

184199
pv = FlyteDirToMultipartBlobTransformer().to_python_value(
185200
FlyteContextManager.current_context(),

flytekit/types/file/file.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,25 @@ def serialize_flyte_file(self) -> Dict[str, typing.Any]:
193193
out["metadata"] = lv.metadata
194194
return out
195195

196+
def _was_initialized_via_init(self) -> bool:
197+
"""Check if object was initialized via __init__ or deserialized by Pydantic.
198+
199+
When Pydantic deserializes a FlyteFile, it bypasses __init__ and directly
200+
sets the 'path' attribute. This means the _downloader and _remote_source attributes
201+
won't exist. We use this check to determine if we need to go through the transformer
202+
to properly set up these internal attributes.
203+
204+
Returns:
205+
bool: True if __init__ was called (normal initialization), False if created via
206+
Pydantic deserialization and needs to pass through transformer.
207+
"""
208+
return hasattr(self, "_downloader") and hasattr(self, "_remote_source")
209+
196210
@model_validator(mode="after")
197211
def deserialize_flyte_file(self, info) -> "FlyteFile":
198212
if info.context is None or info.context.get("deserialize") is not True:
199-
return self
213+
if self._was_initialized_via_init():
214+
return self
200215

201216
pv = FlyteFilePathTransformer().to_python_value(
202217
FlyteContextManager.current_context(),

tests/flytekit/unit/extras/pydantic_transformer/test_pydantic_basemodel_transformer.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,3 +1022,109 @@ def mock_resolve_remote_path(flyte_uri: str):
10221022

10231023
bm_revived = TypeEngine.to_python_value(ctx, lit, BM)
10241024
assert bm_revived.s.literal.uri == "/my/replaced/val"
1025+
1026+
1027+
def test_flytefile_pydantic_model_dump_validate_cycle():
1028+
class BM(BaseModel):
1029+
ff: FlyteFile
1030+
1031+
bm = BM(ff=FlyteFile.from_source("s3://my-bucket/file.txt"))
1032+
1033+
assert bm.ff.remote_source == "s3://my-bucket/file.txt"
1034+
1035+
bm_dict = bm.model_dump()
1036+
bm2 = BM.model_validate(bm_dict)
1037+
1038+
assert isinstance(bm2.ff, FlyteFile)
1039+
assert bm2.ff.remote_source == "s3://my-bucket/file.txt"
1040+
1041+
bm2_dict = bm2.model_dump()
1042+
assert bm_dict == bm2_dict
1043+
1044+
1045+
def test_flytefile_pydantic_with_local_file(local_dummy_file):
1046+
class BM(BaseModel):
1047+
ff: FlyteFile
1048+
1049+
bm = BM(ff=FlyteFile(path=local_dummy_file))
1050+
1051+
bm_dict = bm.model_dump()
1052+
bm2 = BM.model_validate(bm_dict)
1053+
1054+
assert isinstance(bm2.ff, FlyteFile)
1055+
assert hasattr(bm2.ff, "_downloader")
1056+
assert hasattr(bm2.ff, "_remote_source")
1057+
1058+
bm2.model_dump()
1059+
1060+
1061+
def test_flytefile_pydantic_with_metadata(local_dummy_file):
1062+
class BM(BaseModel):
1063+
ff: FlyteFile
1064+
1065+
bm = BM(ff=FlyteFile(path=local_dummy_file, metadata={"key": "value"}))
1066+
1067+
bm_dict = bm.model_dump()
1068+
bm2 = BM.model_validate(bm_dict)
1069+
1070+
assert isinstance(bm2.ff, FlyteFile)
1071+
assert hasattr(bm2.ff, "_downloader")
1072+
assert hasattr(bm2.ff, "_remote_source")
1073+
assert bm2.ff.metadata == {"key": "value"}
1074+
1075+
bm2.model_dump()
1076+
1077+
1078+
def test_flytefile_pydantic_direct_dict_validate(local_dummy_file):
1079+
class BM(BaseModel):
1080+
ff: FlyteFile
1081+
1082+
bm = BM.model_validate({"ff": {"path": local_dummy_file}})
1083+
1084+
assert isinstance(bm.ff, FlyteFile)
1085+
assert hasattr(bm.ff, "_downloader")
1086+
assert hasattr(bm.ff, "_remote_source")
1087+
1088+
1089+
def test_flytedirectory_pydantic_direct_dict_validate(local_dummy_directory):
1090+
class BM(BaseModel):
1091+
fd: FlyteDirectory
1092+
1093+
bm = BM.model_validate({"fd": {"path": local_dummy_directory}})
1094+
1095+
assert isinstance(bm.fd, FlyteDirectory)
1096+
assert hasattr(bm.fd, "_downloader")
1097+
assert hasattr(bm.fd, "_remote_source")
1098+
1099+
1100+
def test_flytedirectory_pydantic_model_dump_validate_cycle():
1101+
class BM(BaseModel):
1102+
fd: FlyteDirectory
1103+
1104+
bm = BM(fd=FlyteDirectory.from_source("s3://my-bucket/my-dir"))
1105+
1106+
assert bm.fd.remote_source == "s3://my-bucket/my-dir"
1107+
1108+
bm_dict = bm.model_dump()
1109+
bm2 = BM.model_validate(bm_dict)
1110+
1111+
assert isinstance(bm2.fd, FlyteDirectory)
1112+
assert bm2.fd.remote_source == "s3://my-bucket/my-dir"
1113+
1114+
bm2.model_dump()
1115+
1116+
1117+
def test_flytedirectory_pydantic_with_local_directory(local_dummy_directory):
1118+
class BM(BaseModel):
1119+
fd: FlyteDirectory
1120+
1121+
bm = BM(fd=FlyteDirectory(path=local_dummy_directory))
1122+
1123+
bm_dict = bm.model_dump()
1124+
bm2 = BM.model_validate(bm_dict)
1125+
1126+
assert isinstance(bm2.fd, FlyteDirectory)
1127+
assert hasattr(bm2.fd, "_downloader")
1128+
assert hasattr(bm2.fd, "_remote_source")
1129+
1130+
bm2.model_dump()

0 commit comments

Comments
 (0)