Skip to content

Commit 9f2b276

Browse files
committed
[DOP-23867] Change API schema for lineage endpoints
1 parent d9ac4dd commit 9f2b276

File tree

25 files changed

+942
-463
lines changed

25 files changed

+942
-463
lines changed

data_rentgen/consumer/openlineage/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55

66
from data_rentgen.consumer.openlineage.base import OpenLineageBase
77
from data_rentgen.consumer.openlineage.run_facets import OpenLineageRunFacets
8-
from data_rentgen.utils import UUID
8+
from data_rentgen.utils import UUIDv6Plus
99

1010

1111
class OpenLineageRun(OpenLineageBase):
1212
"""Run model.
1313
See [Run](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json).
1414
"""
1515

16-
runId: UUID
16+
runId: UUIDv6Plus
1717
facets: OpenLineageRunFacets = Field(default_factory=OpenLineageRunFacets)

data_rentgen/consumer/openlineage/run_facets/parent_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from data_rentgen.consumer.openlineage.base import OpenLineageBase
55
from data_rentgen.consumer.openlineage.run_facets.base import OpenLineageRunFacet
6-
from data_rentgen.utils import UUID
6+
from data_rentgen.utils import UUIDv6Plus
77

88

99
class OpenLineageParentJob(OpenLineageBase):
@@ -20,7 +20,7 @@ class OpenLineageParentRun(OpenLineageBase):
2020
See [ParentRunFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/ParentRunFacet.json).
2121
"""
2222

23-
runId: UUID
23+
runId: UUIDv6Plus
2424

2525

2626
class OpenLineageParentRunFacet(OpenLineageRunFacet):

data_rentgen/db/repositories/operation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from datetime import datetime, timezone
55
from typing import Sequence
6+
from uuid import UUID
67

78
from sqlalchemy import Row, any_, func, select
89
from sqlalchemy.dialects.postgresql import insert
@@ -11,7 +12,6 @@
1112
from data_rentgen.db.repositories.base import Repository
1213
from data_rentgen.db.utils.uuid import extract_timestamp_from_uuid
1314
from data_rentgen.dto import OperationDTO, PaginationDTO
14-
from data_rentgen.utils import UUID
1515

1616

1717
class OperationRepository(Repository[Operation]):

data_rentgen/server/providers/auth/dummy_provider.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def __init__(
3131

3232
@classmethod
3333
def setup(cls, app: FastAPI) -> FastAPI:
34-
settings = DummyAuthProviderSettings.model_validate(app.state.settings.auth.dict(exclude={"provider"}))
34+
settings = DummyAuthProviderSettings.model_validate(
35+
app.state.settings.auth.model_dump(exclude={"provider"}),
36+
)
3537
logger.info("Using %s provider with settings:\n%s", cls.__name__, pformat(settings))
3638
app.dependency_overrides[AuthProvider] = cls
3739
app.dependency_overrides[DummyAuthProviderSettings] = lambda: settings

data_rentgen/server/providers/auth/keycloak_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(
3838
@classmethod
3939
def setup(cls, app: FastAPI) -> FastAPI:
4040
settings = KeycloakAuthProviderSettings.model_validate(
41-
app.state.settings.auth.dict(exclude={"provider"}),
41+
app.state.settings.auth.model_dump(exclude={"provider"}),
4242
)
4343
logger.info("Using %s provider with settings:\n%s", cls.__name__, settings)
4444
app.dependency_overrides[AuthProvider] = cls

data_rentgen/server/schemas/v1/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@
1818
LineageEntityKindV1,
1919
LineageEntityV1,
2020
LineageInputRelationV1,
21+
LineageIORelationSchemaFieldV1,
22+
LineageIORelationSchemaV1,
23+
LineageNodesResponseV1,
2124
LineageOutputRelationV1,
2225
LineageParentRelationV1,
26+
LineageRelationsResponseV1,
2327
LineageResponseV1,
2428
LineageSymlinkRelationV1,
2529
OperationLineageQueryV1,
@@ -68,12 +72,16 @@
6872
"DatasetPaginateQueryV1",
6973
"DatasetResponseV1",
7074
"LineageDirectionV1",
71-
"LineageEntityV1",
7275
"LineageEntityKindV1",
73-
"LineageResponseV1",
76+
"LineageEntityV1",
7477
"LineageInputRelationV1",
78+
"LineageIORelationSchemaFieldV1",
79+
"LineageIORelationSchemaV1",
80+
"LineageNodesResponseV1",
7581
"LineageOutputRelationV1",
7682
"LineageParentRelationV1",
83+
"LineageRelationsResponseV1",
84+
"LineageResponseV1",
7785
"LineageSymlinkRelationV1",
7886
"LocationDetailedResponseV1",
7987
"LocationPaginateQueryV1",

data_rentgen/server/schemas/v1/dataset.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
# SPDX-License-Identifier: Apache-2.0
33
from __future__ import annotations
44

5-
from typing import Literal
6-
75
from fastapi import Query
86
from pydantic import BaseModel, ConfigDict, Field
97

@@ -12,8 +10,7 @@
1210

1311

1412
class DatasetResponseV1(BaseModel):
15-
kind: Literal["DATASET"] = "DATASET"
16-
id: int = Field(description="Dataset id")
13+
id: str = Field(description="Dataset id", coerce_numbers_to_str=True)
1714
location: LocationResponseV1 = Field(description="Corresponding Location")
1815
name: str = Field(description="Dataset name")
1916
format: str | None = Field(description="Data format", default=None)

data_rentgen/server/schemas/v1/job.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# SPDX-FileCopyrightText: 2024-2025 MTS PJSC
22
# SPDX-License-Identifier: Apache-2.0
3-
from typing import Literal
43

54
from fastapi import Query
65
from pydantic import BaseModel, ConfigDict, Field
@@ -12,8 +11,7 @@
1211
class JobResponseV1(BaseModel):
1312
"""Job response"""
1413

15-
kind: Literal["JOB"] = "JOB"
16-
id: int = Field(description="Job id")
14+
id: str = Field(description="Job id", coerce_numbers_to_str=True)
1715
location: LocationResponseV1 = Field(description="Corresponding Location")
1816
name: str = Field(description="Job name")
1917
type: str = Field(description="Job type")

data_rentgen/server/schemas/v1/lineage.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
from datetime import datetime
44
from enum import Enum
55
from typing import Literal
6+
from uuid import UUID
67

78
from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
89

910
from data_rentgen.server.schemas.v1.dataset import DatasetResponseV1
1011
from data_rentgen.server.schemas.v1.job import JobResponseV1
1112
from data_rentgen.server.schemas.v1.operation import OperationResponseV1
1213
from data_rentgen.server.schemas.v1.run import RunResponseV1
13-
from data_rentgen.utils import UUID
14+
from data_rentgen.utils import UUIDv6Plus
1415

1516

1617
class LineageEntityKindV1(str, Enum):
@@ -34,9 +35,9 @@ def __str__(self) -> str:
3435

3536
class LineageEntityV1(BaseModel):
3637
kind: LineageEntityKindV1 = Field(description="Type of Lineage entity")
37-
id: int | UUID = Field(description="Id of Lineage entity")
38+
id: str | UUID = Field(description="Id of Lineage entity")
3839

39-
model_config = ConfigDict(from_attributes=True, use_enum_values=True)
40+
model_config = ConfigDict(from_attributes=True)
4041

4142

4243
class BaseLineageQueryV1(BaseModel):
@@ -94,11 +95,11 @@ class JobLineageQueryV1(BaseLineageQueryV1):
9495

9596

9697
class OperationLineageQueryV1(BaseLineageQueryV1):
97-
start_node_id: UUID = Field(description="Operation id", examples=["00000000-0000-0000-0000-000000000000"])
98+
start_node_id: UUIDv6Plus = Field(description="Operation id", examples=["00000000-0000-0000-0000-000000000000"])
9899

99100

100101
class RunLineageQueryV1(BaseLineageQueryV1):
101-
start_node_id: UUID = Field(description="Run id", examples=["00000000-0000-0000-0000-000000000000"])
102+
start_node_id: UUIDv6Plus = Field(description="Run id", examples=["00000000-0000-0000-0000-000000000000"])
102103
granularity: Literal["OPERATION", "RUN"] = Field(
103104
default="RUN",
104105
description="Granularity of the run lineage",
@@ -107,36 +108,34 @@ class RunLineageQueryV1(BaseLineageQueryV1):
107108

108109

109110
class LineageParentRelationV1(BaseModel):
110-
kind: Literal["PARENT"] = "PARENT"
111111
from_: LineageEntityV1 = Field(description="Start point of relation", serialization_alias="from")
112112
to: LineageEntityV1 = Field(description="End point of relation")
113113

114114

115-
class LineageOutputRelationSchemaFieldV1(BaseModel):
115+
class LineageIORelationSchemaFieldV1(BaseModel):
116116
name: str
117117
type: str | None = Field(default=None)
118118
description: str | None = Field(default=None)
119-
fields: list["LineageOutputRelationSchemaFieldV1"] = Field(description="Nested fields", default_factory=list)
119+
fields: list["LineageIORelationSchemaFieldV1"] = Field(description="Nested fields", default_factory=list)
120120

121121
model_config = ConfigDict(from_attributes=True)
122122

123123

124-
class LineageOutputRelationSchemaV1(BaseModel):
125-
id: int = Field(description="Schema id")
126-
fields: list[LineageOutputRelationSchemaFieldV1] = Field(description="Schema fields")
124+
class LineageIORelationSchemaV1(BaseModel):
125+
id: str = Field(description="Schema id", coerce_numbers_to_str=True)
126+
fields: list[LineageIORelationSchemaFieldV1] = Field(description="Schema fields")
127127

128128
model_config = ConfigDict(from_attributes=True)
129129

130130

131131
class LineageInputRelationV1(BaseModel):
132-
kind: Literal["INPUT"] = "INPUT"
133132
from_: LineageEntityV1 = Field(description="Start point of relation", serialization_alias="from")
134133
to: LineageEntityV1 = Field(description="End point of relation")
135134
last_interaction_at: datetime = Field(description="Last interaction at", examples=["2008-09-15T15:53:00+05:00"])
136135
num_bytes: int | None = Field(description="Number of bytes", examples=[42], default=None)
137136
num_rows: int | None = Field(description="Number of rows", examples=[42], default=None)
138137
num_files: int | None = Field(description="Number of files", examples=[42], default=None)
139-
i_schema: LineageOutputRelationSchemaV1 | None = Field(
138+
i_schema: LineageIORelationSchemaV1 | None = Field(
140139
description="Schema",
141140
default=None,
142141
# pydantic models have reserved "schema" attribute, using alias
@@ -145,15 +144,14 @@ class LineageInputRelationV1(BaseModel):
145144

146145

147146
class LineageOutputRelationV1(BaseModel):
148-
kind: Literal["OUTPUT"] = "OUTPUT"
149147
from_: LineageEntityV1 = Field(description="Start point of relation", serialization_alias="from")
150148
to: LineageEntityV1 = Field(description="End point of relation")
151149
type: str | None = Field(description="Type of relation", examples=["CREATE", "APPEND"], default=None)
152150
last_interaction_at: datetime = Field(description="Last interaction at", examples=["2008-09-15T15:53:00+05:00"])
153151
num_bytes: int | None = Field(description="Number of bytes", examples=[42], default=None)
154152
num_rows: int | None = Field(description="Number of rows", examples=[42], default=None)
155153
num_files: int | None = Field(description="Number of files", examples=[42], default=None)
156-
o_schema: LineageOutputRelationSchemaV1 | None = Field(
154+
o_schema: LineageIORelationSchemaV1 | None = Field(
157155
description="Schema",
158156
default=None,
159157
# pydantic models have reserved "schema" attribute, using alias
@@ -162,17 +160,28 @@ class LineageOutputRelationV1(BaseModel):
162160

163161

164162
class LineageSymlinkRelationV1(BaseModel):
165-
kind: Literal["SYMLINK"] = "SYMLINK"
166163
from_: LineageEntityV1 = Field(description="Start point of relation", serialization_alias="from")
167164
to: LineageEntityV1 = Field(description="End point of relation")
168165
type: str = Field(description="Type of relation between datasets", examples=["METASTORE", "WAREHOUSE"])
169166

170167

168+
class LineageRelationsResponseV1(BaseModel):
169+
parents: list[LineageParentRelationV1] = Field(description="Parent relations", default_factory=list)
170+
symlinks: list[LineageSymlinkRelationV1] = Field(description="Symlink relations", default_factory=list)
171+
inputs: list[LineageInputRelationV1] = Field(description="Input relations", default_factory=list)
172+
outputs: list[LineageOutputRelationV1] = Field(description="Input relations", default_factory=list)
173+
174+
175+
class LineageNodesResponseV1(BaseModel):
176+
datasets: dict[str, DatasetResponseV1] = Field(description="Dataset nodes", default_factory=dict)
177+
jobs: dict[str, JobResponseV1] = Field(description="Job nodes", default_factory=dict)
178+
runs: dict[UUID, RunResponseV1] = Field(description="Run nodes", default_factory=dict)
179+
operations: dict[UUID, OperationResponseV1] = Field(description="Operation nodes", default_factory=dict)
180+
181+
171182
class LineageResponseV1(BaseModel):
172-
relations: list[
173-
LineageParentRelationV1 | LineageInputRelationV1 | LineageOutputRelationV1 | LineageSymlinkRelationV1
174-
] = Field(description="List of relations", default_factory=list)
175-
nodes: list[RunResponseV1 | OperationResponseV1 | JobResponseV1 | DatasetResponseV1] = Field(
176-
description="List of nodes",
177-
default_factory=list,
183+
relations: LineageRelationsResponseV1 = Field(
184+
description="Lineage relations",
185+
default_factory=LineageRelationsResponseV1,
178186
)
187+
nodes: LineageNodesResponseV1 = Field(description="Lineage nodes", default_factory=LineageNodesResponseV1)

data_rentgen/server/schemas/v1/location.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
class LocationResponseV1(BaseModel):
11-
id: int = Field(description="Location id")
11+
id: str = Field(description="Location id", coerce_numbers_to_str=True)
1212
type: str = Field(description="Location type, e.g kafka, hdfs, postgres")
1313
name: str = Field(description="Location name, e.g. cluster name")
1414
addresses: list[AddressResponseV1] = Field(description="List of addresses")

0 commit comments

Comments
 (0)