33from datetime import datetime
44from enum import Enum
55from typing import Literal
6+ from uuid import UUID
67
78from pydantic import BaseModel , ConfigDict , Field , ValidationInfo , field_validator
89
910from data_rentgen .server .schemas .v1 .dataset import DatasetResponseV1
1011from data_rentgen .server .schemas .v1 .job import JobResponseV1
1112from data_rentgen .server .schemas .v1 .operation import OperationResponseV1
1213from data_rentgen .server .schemas .v1 .run import RunResponseV1
13- from data_rentgen .utils import UUID
14+ from data_rentgen .utils import UUIDv6Plus
1415
1516
1617class LineageEntityKindV1 (str , Enum ):
@@ -34,9 +35,9 @@ def __str__(self) -> str:
3435
3536class LineageEntityV1 (BaseModel ):
3637 kind : LineageEntityKindV1 = Field (description = "Type of Lineage entity" )
37- id : int | UUID = Field (description = "Id of Lineage entity" )
38+ id : str | UUID = Field (description = "Id of Lineage entity" )
3839
39- model_config = ConfigDict (from_attributes = True , use_enum_values = True )
40+ model_config = ConfigDict (from_attributes = True )
4041
4142
4243class BaseLineageQueryV1 (BaseModel ):
@@ -94,11 +95,11 @@ class JobLineageQueryV1(BaseLineageQueryV1):
9495
9596
9697class OperationLineageQueryV1 (BaseLineageQueryV1 ):
97- start_node_id : UUID = Field (description = "Operation id" , examples = ["00000000-0000-0000-0000-000000000000" ])
98+ start_node_id : UUIDv6Plus = Field (description = "Operation id" , examples = ["00000000-0000-0000-0000-000000000000" ])
9899
99100
100101class RunLineageQueryV1 (BaseLineageQueryV1 ):
101- start_node_id : UUID = Field (description = "Run id" , examples = ["00000000-0000-0000-0000-000000000000" ])
102+ start_node_id : UUIDv6Plus = Field (description = "Run id" , examples = ["00000000-0000-0000-0000-000000000000" ])
102103 granularity : Literal ["OPERATION" , "RUN" ] = Field (
103104 default = "RUN" ,
104105 description = "Granularity of the run lineage" ,
@@ -107,36 +108,34 @@ class RunLineageQueryV1(BaseLineageQueryV1):
107108
108109
109110class LineageParentRelationV1 (BaseModel ):
110- kind : Literal ["PARENT" ] = "PARENT"
111111 from_ : LineageEntityV1 = Field (description = "Start point of relation" , serialization_alias = "from" )
112112 to : LineageEntityV1 = Field (description = "End point of relation" )
113113
114114
115- class LineageOutputRelationSchemaFieldV1 (BaseModel ):
115+ class LineageIORelationSchemaFieldV1 (BaseModel ):
116116 name : str
117117 type : str | None = Field (default = None )
118118 description : str | None = Field (default = None )
119- fields : list ["LineageOutputRelationSchemaFieldV1 " ] = Field (description = "Nested fields" , default_factory = list )
119+ fields : list ["LineageIORelationSchemaFieldV1 " ] = Field (description = "Nested fields" , default_factory = list )
120120
121121 model_config = ConfigDict (from_attributes = True )
122122
123123
124- class LineageOutputRelationSchemaV1 (BaseModel ):
125- id : int = Field (description = "Schema id" )
126- fields : list [LineageOutputRelationSchemaFieldV1 ] = Field (description = "Schema fields" )
124+ class LineageIORelationSchemaV1 (BaseModel ):
125+ id : str = Field (description = "Schema id" , coerce_numbers_to_str = True )
126+ fields : list [LineageIORelationSchemaFieldV1 ] = Field (description = "Schema fields" )
127127
128128 model_config = ConfigDict (from_attributes = True )
129129
130130
131131class LineageInputRelationV1 (BaseModel ):
132- kind : Literal ["INPUT" ] = "INPUT"
133132 from_ : LineageEntityV1 = Field (description = "Start point of relation" , serialization_alias = "from" )
134133 to : LineageEntityV1 = Field (description = "End point of relation" )
135134 last_interaction_at : datetime = Field (description = "Last interaction at" , examples = ["2008-09-15T15:53:00+05:00" ])
136135 num_bytes : int | None = Field (description = "Number of bytes" , examples = [42 ], default = None )
137136 num_rows : int | None = Field (description = "Number of rows" , examples = [42 ], default = None )
138137 num_files : int | None = Field (description = "Number of files" , examples = [42 ], default = None )
139- i_schema : LineageOutputRelationSchemaV1 | None = Field (
138+ i_schema : LineageIORelationSchemaV1 | None = Field (
140139 description = "Schema" ,
141140 default = None ,
142141 # pydantic models have reserved "schema" attribute, using alias
@@ -145,15 +144,14 @@ class LineageInputRelationV1(BaseModel):
145144
146145
147146class LineageOutputRelationV1 (BaseModel ):
148- kind : Literal ["OUTPUT" ] = "OUTPUT"
149147 from_ : LineageEntityV1 = Field (description = "Start point of relation" , serialization_alias = "from" )
150148 to : LineageEntityV1 = Field (description = "End point of relation" )
151149 type : str | None = Field (description = "Type of relation" , examples = ["CREATE" , "APPEND" ], default = None )
152150 last_interaction_at : datetime = Field (description = "Last interaction at" , examples = ["2008-09-15T15:53:00+05:00" ])
153151 num_bytes : int | None = Field (description = "Number of bytes" , examples = [42 ], default = None )
154152 num_rows : int | None = Field (description = "Number of rows" , examples = [42 ], default = None )
155153 num_files : int | None = Field (description = "Number of files" , examples = [42 ], default = None )
156- o_schema : LineageOutputRelationSchemaV1 | None = Field (
154+ o_schema : LineageIORelationSchemaV1 | None = Field (
157155 description = "Schema" ,
158156 default = None ,
159157 # pydantic models have reserved "schema" attribute, using alias
@@ -162,17 +160,28 @@ class LineageOutputRelationV1(BaseModel):
162160
163161
164162class LineageSymlinkRelationV1 (BaseModel ):
165- kind : Literal ["SYMLINK" ] = "SYMLINK"
166163 from_ : LineageEntityV1 = Field (description = "Start point of relation" , serialization_alias = "from" )
167164 to : LineageEntityV1 = Field (description = "End point of relation" )
168165 type : str = Field (description = "Type of relation between datasets" , examples = ["METASTORE" , "WAREHOUSE" ])
169166
170167
168+ class LineageRelationsResponseV1 (BaseModel ):
169+ parents : list [LineageParentRelationV1 ] = Field (description = "Parent relations" , default_factory = list )
170+ symlinks : list [LineageSymlinkRelationV1 ] = Field (description = "Symlink relations" , default_factory = list )
171+ inputs : list [LineageInputRelationV1 ] = Field (description = "Input relations" , default_factory = list )
172+ outputs : list [LineageOutputRelationV1 ] = Field (description = "Input relations" , default_factory = list )
173+
174+
175+ class LineageNodesResponseV1 (BaseModel ):
176+ datasets : dict [str , DatasetResponseV1 ] = Field (description = "Dataset nodes" , default_factory = dict )
177+ jobs : dict [str , JobResponseV1 ] = Field (description = "Job nodes" , default_factory = dict )
178+ runs : dict [UUID , RunResponseV1 ] = Field (description = "Run nodes" , default_factory = dict )
179+ operations : dict [UUID , OperationResponseV1 ] = Field (description = "Operation nodes" , default_factory = dict )
180+
181+
171182class LineageResponseV1 (BaseModel ):
172- relations : list [
173- LineageParentRelationV1 | LineageInputRelationV1 | LineageOutputRelationV1 | LineageSymlinkRelationV1
174- ] = Field (description = "List of relations" , default_factory = list )
175- nodes : list [RunResponseV1 | OperationResponseV1 | JobResponseV1 | DatasetResponseV1 ] = Field (
176- description = "List of nodes" ,
177- default_factory = list ,
183+ relations : LineageRelationsResponseV1 = Field (
184+ description = "Lineage relations" ,
185+ default_factory = LineageRelationsResponseV1 ,
178186 )
187+ nodes : LineageNodesResponseV1 = Field (description = "Lineage nodes" , default_factory = LineageNodesResponseV1 )
0 commit comments