Skip to content

Commit 10cbf36

Browse files
authored
feat: add new dbt artifacts support (#89)
* add new dbt artifacts support * remove semantic manifest * WIP * fix issues * remove extra allow * changes
1 parent be5b56c commit 10cbf36

File tree

22 files changed

+394
-71
lines changed

22 files changed

+394
-71
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Constants for the Altimate API client."""
2+
3+
# Supported dbt artifact file types for onboarding
4+
SUPPORTED_ARTIFACT_TYPES = {
5+
"manifest",
6+
"catalog",
7+
"run_results",
8+
"sources",
9+
"semantic_manifest",
10+
}

src/datapilot/clients/altimate/utils.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from requests import Response
88

99
from datapilot.clients.altimate.client import APIClient
10+
from datapilot.clients.altimate.constants import SUPPORTED_ARTIFACT_TYPES
1011

1112

1213
def check_token_and_instance(
@@ -56,6 +57,27 @@ def validate_permissions(
5657

5758

5859
def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict:
60+
"""
61+
Upload a dbt artifact file to the Altimate backend.
62+
63+
Args:
64+
api_token: API authentication token
65+
tenant: Tenant/instance name
66+
dbt_core_integration_id: ID of the dbt integration
67+
dbt_core_integration_environment: Environment type (e.g., PROD)
68+
file_type: Type of artifact - one of: manifest, catalog, run_results, sources, semantic_manifest
69+
file_path: Path to the artifact file
70+
backend_url: URL of the Altimate backend
71+
72+
Returns:
73+
Dict with 'ok' boolean and optional 'message' on failure
74+
"""
75+
if file_type not in SUPPORTED_ARTIFACT_TYPES:
76+
return {
77+
"ok": False,
78+
"message": f"Unsupported file type: {file_type}. Supported types: {', '.join(sorted(SUPPORTED_ARTIFACT_TYPES))}",
79+
}
80+
5981
api_client = APIClient(api_token, base_url=backend_url, tenant=tenant)
6082

6183
params = {
@@ -84,7 +106,7 @@ def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integratio
84106
api_client.log("Error getting signed URL.")
85107
return {
86108
"ok": False,
87-
"message": "Error in uploading the manifest. ",
109+
"message": f"Error in uploading the {file_type}.",
88110
}
89111

90112

src/datapilot/core/platforms/dbt/cli/cli.py

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
1919
from datapilot.core.platforms.dbt.utils import load_catalog
2020
from datapilot.core.platforms.dbt.utils import load_manifest
21+
from datapilot.core.platforms.dbt.utils import load_run_results
22+
from datapilot.core.platforms.dbt.utils import load_sources
2123
from datapilot.utils.formatting.utils import tabulate_data
2224
from datapilot.utils.utils import map_url_to_instance
2325

@@ -155,6 +157,9 @@ def project_health(
155157
)
156158
@click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
157159
@click.option("--catalog-path", required=False, prompt=False, help="Path to the catalog file.")
160+
@click.option("--run-results-path", required=False, prompt=False, help="Path to the run_results.json file.")
161+
@click.option("--sources-path", required=False, prompt=False, help="Path to the sources.json file (source freshness results).")
162+
@click.option("--semantic-manifest-path", required=False, prompt=False, help="Path to the semantic_manifest.json file.")
158163
def onboard(
159164
token,
160165
instance_name,
@@ -164,6 +169,9 @@ def onboard(
164169
dbt_integration_environment,
165170
manifest_path,
166171
catalog_path,
172+
run_results_path,
173+
sources_path,
174+
semantic_manifest_path,
167175
):
168176
"""Onboard a manifest file to DBT. You can specify either --dbt_integration_id or --dbt_integration_name."""
169177

@@ -198,34 +206,91 @@ def onboard(
198206
elif dbt_integration_name and dbt_integration_id:
199207
click.echo("Warning: Both integration ID and name provided. Using ID and ignoring name.")
200208

209+
# Validate manifest (required)
201210
try:
202211
load_manifest(manifest_path)
203212
except Exception as e:
204213
click.echo(f"Error: {e}")
205214
return
206215

216+
# Validate optional artifacts if provided
217+
if catalog_path:
218+
try:
219+
load_catalog(catalog_path)
220+
except Exception as e:
221+
click.echo(f"Error validating catalog: {e}")
222+
return
223+
224+
if run_results_path:
225+
try:
226+
load_run_results(run_results_path)
227+
except Exception as e:
228+
click.echo(f"Error validating run_results: {e}")
229+
return
230+
231+
if sources_path:
232+
try:
233+
load_sources(sources_path)
234+
except Exception as e:
235+
click.echo(f"Error validating sources: {e}")
236+
return
237+
238+
# Onboard manifest (required)
207239
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "manifest", manifest_path, backend_url)
208240
if response["ok"]:
209241
click.echo("Manifest onboarded successfully!")
210242
else:
211243
click.echo(f"{response['message']}")
212-
213-
if not catalog_path:
214244
return
215245

216-
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "catalog", catalog_path, backend_url)
217-
if response["ok"]:
218-
click.echo("Catalog onboarded successfully!")
219-
else:
220-
click.echo(f"{response['message']}")
246+
# Onboard optional artifacts
247+
artifacts_uploaded = ["manifest"]
248+
249+
if catalog_path:
250+
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "catalog", catalog_path, backend_url)
251+
if response["ok"]:
252+
click.echo("Catalog onboarded successfully!")
253+
artifacts_uploaded.append("catalog")
254+
else:
255+
click.echo(f"{response['message']}")
256+
257+
if run_results_path:
258+
response = onboard_file(
259+
token, instance_name, dbt_integration_id, dbt_integration_environment, "run_results", run_results_path, backend_url
260+
)
261+
if response["ok"]:
262+
click.echo("Run results onboarded successfully!")
263+
artifacts_uploaded.append("run_results")
264+
else:
265+
click.echo(f"{response['message']}")
266+
267+
if sources_path:
268+
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "sources", sources_path, backend_url)
269+
if response["ok"]:
270+
click.echo("Sources onboarded successfully!")
271+
artifacts_uploaded.append("sources")
272+
else:
273+
click.echo(f"{response['message']}")
274+
275+
if semantic_manifest_path:
276+
response = onboard_file(
277+
token, instance_name, dbt_integration_id, dbt_integration_environment, "semantic_manifest", semantic_manifest_path, backend_url
278+
)
279+
if response["ok"]:
280+
click.echo("Semantic manifest onboarded successfully!")
281+
artifacts_uploaded.append("semantic_manifest")
282+
else:
283+
click.echo(f"{response['message']}")
221284

285+
# Start ingestion
222286
response = start_dbt_ingestion(token, instance_name, dbt_integration_id, dbt_integration_environment, backend_url)
223287
if response["ok"]:
224288
url = map_url_to_instance(backend_url, instance_name)
289+
artifacts_str = ", ".join(artifacts_uploaded)
225290
if not url:
226-
click.echo("Manifest and catalog ingestion has started.")
291+
click.echo(f"Ingestion has started for: {artifacts_str}")
227292
else:
228293
url = f"{url}/settings/integrations/{dbt_integration_id}/{dbt_integration_environment}"
229-
click.echo(f"Manifest and catalog ingestion has started. You can check the status at {url}")
294+
click.echo(f"Ingestion has started for: {artifacts_str}. You can check the status at {url}")
230295
else:
231296
click.echo(f"{response['message']}")
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import Union
2+
3+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v1 import RunResultsV1
4+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v2 import RunResultsV2
5+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v3 import RunResultsV3
6+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4
7+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5
8+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v6 import RunResultsV6
9+
10+
RunResults = Union[
11+
RunResultsV6,
12+
RunResultsV5,
13+
RunResultsV4,
14+
RunResultsV3,
15+
RunResultsV2,
16+
RunResultsV1,
17+
]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Union
2+
3+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1
4+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2
5+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3
6+
7+
Sources = Union[
8+
SourcesV3,
9+
SourcesV2,
10+
SourcesV1,
11+
]

src/datapilot/core/platforms/dbt/utils.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,17 @@
2222
from datapilot.core.platforms.dbt.schemas.manifest import AltimateManifestSourceNode
2323
from datapilot.core.platforms.dbt.schemas.manifest import AltimateManifestTestNode
2424
from datapilot.core.platforms.dbt.schemas.manifest import Manifest
25+
from datapilot.core.platforms.dbt.schemas.run_results import RunResults
26+
from datapilot.core.platforms.dbt.schemas.sources import Sources
2527
from datapilot.exceptions.exceptions import AltimateFileNotFoundError
2628
from datapilot.exceptions.exceptions import AltimateInvalidJSONError
2729
from datapilot.utils.utils import extract_dir_name_from_file_path
2830
from datapilot.utils.utils import extract_folders_in_path
2931
from datapilot.utils.utils import is_superset_path
3032
from datapilot.utils.utils import load_json
3133
from vendor.dbt_artifacts_parser.parser import parse_manifest
34+
from vendor.dbt_artifacts_parser.parser import parse_run_results
35+
from vendor.dbt_artifacts_parser.parser import parse_sources
3236

3337
MODEL_TYPE_PATTERNS = {
3438
STAGING: r"^stg_.*", # Example: models starting with 'stg_'
@@ -94,8 +98,36 @@ def load_catalog(catalog_path: str) -> Catalog:
9498
return catalog
9599

96100

97-
def load_run_results(run_results_path: str) -> Manifest:
98-
raise NotImplementedError
101+
def load_run_results(run_results_path: str) -> RunResults:
102+
try:
103+
run_results_dict = load_json(run_results_path)
104+
except FileNotFoundError as e:
105+
raise AltimateFileNotFoundError(f"Run results file not found: {run_results_path}. Error: {e}") from e
106+
except ValueError as e:
107+
raise AltimateInvalidJSONError(f"Invalid JSON file: {run_results_path}. Error: {e}") from e
108+
109+
try:
110+
run_results: RunResults = parse_run_results(run_results_dict)
111+
except ValueError as e:
112+
raise AltimateInvalidManifestError(f"Invalid run results file: {run_results_path}. Error: {e}") from e
113+
114+
return run_results
115+
116+
117+
def load_sources(sources_path: str) -> Sources:
118+
try:
119+
sources_dict = load_json(sources_path)
120+
except FileNotFoundError as e:
121+
raise AltimateFileNotFoundError(f"Sources file not found: {sources_path}. Error: {e}") from e
122+
except ValueError as e:
123+
raise AltimateInvalidJSONError(f"Invalid JSON file: {sources_path}. Error: {e}") from e
124+
125+
try:
126+
sources: Sources = parse_sources(sources_dict)
127+
except ValueError as e:
128+
raise AltimateInvalidManifestError(f"Invalid sources file: {sources_path}. Error: {e}") from e
129+
130+
return sources
99131

100132

101133
# TODO: Add tests!

src/vendor/dbt_artifacts_parser/parsers/run_results/run_results_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
class BaseArtifactMetadata(BaseParserModel):
1818
model_config = ConfigDict(
19-
extra="forbid",
19+
extra="allow",
2020
)
2121
dbt_schema_version: str
2222
dbt_version: Optional[str] = "0.19.0"
@@ -47,7 +47,7 @@ class Status2(Enum):
4747

4848
class TimingInfo(BaseParserModel):
4949
model_config = ConfigDict(
50-
extra="forbid",
50+
extra="allow",
5151
)
5252
name: str
5353
started_at: Optional[datetime] = None
@@ -56,7 +56,7 @@ class TimingInfo(BaseParserModel):
5656

5757
class RunResultOutput(BaseParserModel):
5858
model_config = ConfigDict(
59-
extra="forbid",
59+
extra="allow",
6060
)
6161
status: Union[Status, Status1, Status2]
6262
timing: list[TimingInfo]
@@ -69,7 +69,7 @@ class RunResultOutput(BaseParserModel):
6969

7070
class RunResultsV1(BaseParserModel):
7171
model_config = ConfigDict(
72-
extra="forbid",
72+
extra="allow",
7373
)
7474
metadata: BaseArtifactMetadata
7575
results: list[RunResultOutput]

src/vendor/dbt_artifacts_parser/parsers/run_results/run_results_v2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
class BaseArtifactMetadata(BaseParserModel):
1818
model_config = ConfigDict(
19-
extra="forbid",
19+
extra="allow",
2020
)
2121
dbt_schema_version: str
2222
dbt_version: Optional[str] = "0.20.0rc1"
@@ -47,7 +47,7 @@ class Status2(Enum):
4747

4848
class TimingInfo(BaseParserModel):
4949
model_config = ConfigDict(
50-
extra="forbid",
50+
extra="allow",
5151
)
5252
name: str
5353
started_at: Optional[datetime] = None
@@ -56,7 +56,7 @@ class TimingInfo(BaseParserModel):
5656

5757
class RunResultOutput(BaseParserModel):
5858
model_config = ConfigDict(
59-
extra="forbid",
59+
extra="allow",
6060
)
6161
status: Union[Status, Status1, Status2]
6262
timing: list[TimingInfo]
@@ -70,7 +70,7 @@ class RunResultOutput(BaseParserModel):
7070

7171
class RunResultsV2(BaseParserModel):
7272
model_config = ConfigDict(
73-
extra="forbid",
73+
extra="allow",
7474
)
7575
metadata: BaseArtifactMetadata
7676
results: list[RunResultOutput]

0 commit comments

Comments
 (0)