Skip to content

Commit 9358e35

Browse files
committed
add new dbt artifacts support
1 parent be5b56c commit 9358e35

File tree

19 files changed

+555
-11
lines changed

19 files changed

+555
-11
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Constants for the Altimate API client."""
2+
3+
# Supported dbt artifact file types for onboarding
4+
SUPPORTED_ARTIFACT_TYPES = {
5+
"manifest",
6+
"catalog",
7+
"run_results",
8+
"sources",
9+
"semantic_manifest",
10+
}

src/datapilot/clients/altimate/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from requests import Response
88

99
from datapilot.clients.altimate.client import APIClient
10+
from datapilot.clients.altimate.constants import SUPPORTED_ARTIFACT_TYPES
1011

1112

1213
def check_token_and_instance(
@@ -56,6 +57,27 @@ def validate_permissions(
5657

5758

5859
def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict:
60+
"""
61+
Upload a dbt artifact file to the Altimate backend.
62+
63+
Args:
64+
api_token: API authentication token
65+
tenant: Tenant/instance name
66+
dbt_core_integration_id: ID of the dbt integration
67+
dbt_core_integration_environment: Environment type (e.g., PROD)
68+
file_type: Type of artifact - one of: manifest, catalog, run_results, sources, semantic_manifest
69+
file_path: Path to the artifact file
70+
backend_url: URL of the Altimate backend
71+
72+
Returns:
73+
Dict with 'ok' boolean and optional 'message' on failure
74+
"""
75+
if file_type not in SUPPORTED_ARTIFACT_TYPES:
76+
return {
77+
"ok": False,
78+
"message": f"Unsupported file type: {file_type}. Supported types: {', '.join(sorted(SUPPORTED_ARTIFACT_TYPES))}",
79+
}
80+
5981
api_client = APIClient(api_token, base_url=backend_url, tenant=tenant)
6082

6183
params = {

src/datapilot/core/platforms/dbt/cli/cli.py

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
1919
from datapilot.core.platforms.dbt.utils import load_catalog
2020
from datapilot.core.platforms.dbt.utils import load_manifest
21+
from datapilot.core.platforms.dbt.utils import load_run_results
22+
from datapilot.core.platforms.dbt.utils import load_semantic_manifest
23+
from datapilot.core.platforms.dbt.utils import load_sources
2124
from datapilot.utils.formatting.utils import tabulate_data
2225
from datapilot.utils.utils import map_url_to_instance
2326

@@ -155,6 +158,9 @@ def project_health(
155158
)
156159
@click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
157160
@click.option("--catalog-path", required=False, prompt=False, help="Path to the catalog file.")
161+
@click.option("--run-results-path", required=False, prompt=False, help="Path to the run_results.json file.")
162+
@click.option("--sources-path", required=False, prompt=False, help="Path to the sources.json file (source freshness results).")
163+
@click.option("--semantic-manifest-path", required=False, prompt=False, help="Path to the semantic_manifest.json file.")
158164
def onboard(
159165
token,
160166
instance_name,
@@ -164,6 +170,9 @@ def onboard(
164170
dbt_integration_environment,
165171
manifest_path,
166172
catalog_path,
173+
run_results_path,
174+
sources_path,
175+
semantic_manifest_path,
167176
):
168177
"""Onboard a manifest file to DBT. You can specify either --dbt_integration_id or --dbt_integration_name."""
169178

@@ -198,34 +207,98 @@ def onboard(
198207
elif dbt_integration_name and dbt_integration_id:
199208
click.echo("Warning: Both integration ID and name provided. Using ID and ignoring name.")
200209

210+
# Validate manifest (required)
201211
try:
202212
load_manifest(manifest_path)
203213
except Exception as e:
204214
click.echo(f"Error: {e}")
205215
return
206216

217+
# Validate optional artifacts if provided
218+
if catalog_path:
219+
try:
220+
load_catalog(catalog_path)
221+
except Exception as e:
222+
click.echo(f"Error validating catalog: {e}")
223+
return
224+
225+
if run_results_path:
226+
try:
227+
load_run_results(run_results_path)
228+
except Exception as e:
229+
click.echo(f"Error validating run_results: {e}")
230+
return
231+
232+
if sources_path:
233+
try:
234+
load_sources(sources_path)
235+
except Exception as e:
236+
click.echo(f"Error validating sources: {e}")
237+
return
238+
239+
if semantic_manifest_path:
240+
try:
241+
load_semantic_manifest(semantic_manifest_path)
242+
except Exception as e:
243+
click.echo(f"Error validating semantic_manifest: {e}")
244+
return
245+
246+
# Onboard manifest (required)
207247
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "manifest", manifest_path, backend_url)
208248
if response["ok"]:
209249
click.echo("Manifest onboarded successfully!")
210250
else:
211251
click.echo(f"{response['message']}")
212-
213-
if not catalog_path:
214252
return
215253

216-
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "catalog", catalog_path, backend_url)
217-
if response["ok"]:
218-
click.echo("Catalog onboarded successfully!")
219-
else:
220-
click.echo(f"{response['message']}")
254+
# Onboard optional artifacts
255+
artifacts_uploaded = ["manifest"]
256+
257+
if catalog_path:
258+
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "catalog", catalog_path, backend_url)
259+
if response["ok"]:
260+
click.echo("Catalog onboarded successfully!")
261+
artifacts_uploaded.append("catalog")
262+
else:
263+
click.echo(f"{response['message']}")
264+
265+
if run_results_path:
266+
response = onboard_file(
267+
token, instance_name, dbt_integration_id, dbt_integration_environment, "run_results", run_results_path, backend_url
268+
)
269+
if response["ok"]:
270+
click.echo("Run results onboarded successfully!")
271+
artifacts_uploaded.append("run_results")
272+
else:
273+
click.echo(f"{response['message']}")
274+
275+
if sources_path:
276+
response = onboard_file(token, instance_name, dbt_integration_id, dbt_integration_environment, "sources", sources_path, backend_url)
277+
if response["ok"]:
278+
click.echo("Sources onboarded successfully!")
279+
artifacts_uploaded.append("sources")
280+
else:
281+
click.echo(f"{response['message']}")
282+
283+
if semantic_manifest_path:
284+
response = onboard_file(
285+
token, instance_name, dbt_integration_id, dbt_integration_environment, "semantic_manifest", semantic_manifest_path, backend_url
286+
)
287+
if response["ok"]:
288+
click.echo("Semantic manifest onboarded successfully!")
289+
artifacts_uploaded.append("semantic_manifest")
290+
else:
291+
click.echo(f"{response['message']}")
221292

293+
# Start ingestion
222294
response = start_dbt_ingestion(token, instance_name, dbt_integration_id, dbt_integration_environment, backend_url)
223295
if response["ok"]:
224296
url = map_url_to_instance(backend_url, instance_name)
297+
artifacts_str = ", ".join(artifacts_uploaded)
225298
if not url:
226-
click.echo("Manifest and catalog ingestion has started.")
299+
click.echo(f"Ingestion has started for: {artifacts_str}")
227300
else:
228301
url = f"{url}/settings/integrations/{dbt_integration_id}/{dbt_integration_environment}"
229-
click.echo(f"Manifest and catalog ingestion has started. You can check the status at {url}")
302+
click.echo(f"Ingestion has started for: {artifacts_str}. You can check the status at {url}")
230303
else:
231304
click.echo(f"{response['message']}")
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from typing import Union
2+
3+
from pydantic import ConfigDict
4+
5+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v1 import RunResultsV1 as BaseRunResultsV1
6+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v2 import RunResultsV2 as BaseRunResultsV2
7+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v3 import RunResultsV3 as BaseRunResultsV3
8+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 as BaseRunResultsV4
9+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5 as BaseRunResultsV5
10+
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v6 import RunResultsV6 as BaseRunResultsV6
11+
12+
13+
class RunResultsV1(BaseRunResultsV1):
14+
model_config = ConfigDict(extra="allow")
15+
16+
17+
class RunResultsV2(BaseRunResultsV2):
18+
model_config = ConfigDict(extra="allow")
19+
20+
21+
class RunResultsV3(BaseRunResultsV3):
22+
model_config = ConfigDict(extra="allow")
23+
24+
25+
class RunResultsV4(BaseRunResultsV4):
26+
model_config = ConfigDict(extra="allow")
27+
28+
29+
class RunResultsV5(BaseRunResultsV5):
30+
model_config = ConfigDict(extra="allow")
31+
32+
33+
class RunResultsV6(BaseRunResultsV6):
34+
model_config = ConfigDict(extra="allow")
35+
36+
37+
RunResults = Union[
38+
RunResultsV6,
39+
RunResultsV5,
40+
RunResultsV4,
41+
RunResultsV3,
42+
RunResultsV2,
43+
RunResultsV1,
44+
]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from typing import Union
2+
3+
from pydantic import ConfigDict
4+
5+
from vendor.dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1 as BaseSemanticManifestV1
6+
7+
8+
class SemanticManifestV1(BaseSemanticManifestV1):
9+
model_config = ConfigDict(extra="allow")
10+
11+
12+
SemanticManifest = Union[SemanticManifestV1]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from typing import Union
2+
3+
from pydantic import ConfigDict
4+
5+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1 as BaseSourcesV1
6+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2 as BaseSourcesV2
7+
from vendor.dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3 as BaseSourcesV3
8+
9+
10+
class SourcesV1(BaseSourcesV1):
11+
model_config = ConfigDict(extra="allow")
12+
13+
14+
class SourcesV2(BaseSourcesV2):
15+
model_config = ConfigDict(extra="allow")
16+
17+
18+
class SourcesV3(BaseSourcesV3):
19+
model_config = ConfigDict(extra="allow")
20+
21+
22+
Sources = Union[
23+
SourcesV3,
24+
SourcesV2,
25+
SourcesV1,
26+
]

src/datapilot/core/platforms/dbt/utils.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,19 @@
2222
from datapilot.core.platforms.dbt.schemas.manifest import AltimateManifestSourceNode
2323
from datapilot.core.platforms.dbt.schemas.manifest import AltimateManifestTestNode
2424
from datapilot.core.platforms.dbt.schemas.manifest import Manifest
25+
from datapilot.core.platforms.dbt.schemas.run_results import RunResults
26+
from datapilot.core.platforms.dbt.schemas.semantic_manifest import SemanticManifest
27+
from datapilot.core.platforms.dbt.schemas.sources import Sources
2528
from datapilot.exceptions.exceptions import AltimateFileNotFoundError
2629
from datapilot.exceptions.exceptions import AltimateInvalidJSONError
2730
from datapilot.utils.utils import extract_dir_name_from_file_path
2831
from datapilot.utils.utils import extract_folders_in_path
2932
from datapilot.utils.utils import is_superset_path
3033
from datapilot.utils.utils import load_json
3134
from vendor.dbt_artifacts_parser.parser import parse_manifest
35+
from vendor.dbt_artifacts_parser.parser import parse_run_results
36+
from vendor.dbt_artifacts_parser.parser import parse_semantic_manifest
37+
from vendor.dbt_artifacts_parser.parser import parse_sources
3238

3339
MODEL_TYPE_PATTERNS = {
3440
STAGING: r"^stg_.*", # Example: models starting with 'stg_'
@@ -94,8 +100,52 @@ def load_catalog(catalog_path: str) -> Catalog:
94100
return catalog
95101

96102

97-
def load_run_results(run_results_path: str) -> Manifest:
98-
raise NotImplementedError
103+
def load_run_results(run_results_path: str) -> RunResults:
104+
try:
105+
run_results_dict = load_json(run_results_path)
106+
except FileNotFoundError as e:
107+
raise AltimateFileNotFoundError(f"Run results file not found: {run_results_path}. Error: {e}") from e
108+
except ValueError as e:
109+
raise AltimateInvalidJSONError(f"Invalid JSON file: {run_results_path}. Error: {e}") from e
110+
111+
try:
112+
run_results: RunResults = parse_run_results(run_results_dict)
113+
except ValueError as e:
114+
raise AltimateInvalidManifestError(f"Invalid run results file: {run_results_path}. Error: {e}") from e
115+
116+
return run_results
117+
118+
119+
def load_sources(sources_path: str) -> Sources:
120+
try:
121+
sources_dict = load_json(sources_path)
122+
except FileNotFoundError as e:
123+
raise AltimateFileNotFoundError(f"Sources file not found: {sources_path}. Error: {e}") from e
124+
except ValueError as e:
125+
raise AltimateInvalidJSONError(f"Invalid JSON file: {sources_path}. Error: {e}") from e
126+
127+
try:
128+
sources: Sources = parse_sources(sources_dict)
129+
except ValueError as e:
130+
raise AltimateInvalidManifestError(f"Invalid sources file: {sources_path}. Error: {e}") from e
131+
132+
return sources
133+
134+
135+
def load_semantic_manifest(semantic_manifest_path: str) -> SemanticManifest:
136+
try:
137+
semantic_manifest_dict = load_json(semantic_manifest_path)
138+
except FileNotFoundError as e:
139+
raise AltimateFileNotFoundError(f"Semantic manifest file not found: {semantic_manifest_path}. Error: {e}") from e
140+
except ValueError as e:
141+
raise AltimateInvalidJSONError(f"Invalid JSON file: {semantic_manifest_path}. Error: {e}") from e
142+
143+
try:
144+
semantic_manifest: SemanticManifest = parse_semantic_manifest(semantic_manifest_dict)
145+
except ValueError as e:
146+
raise AltimateInvalidManifestError(f"Invalid semantic manifest file: {semantic_manifest_path}. Error: {e}") from e
147+
148+
return semantic_manifest
99149

100150

101151
# TODO: Add tests!

src/vendor/dbt_artifacts_parser/parser.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4
3636
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5
3737
from vendor.dbt_artifacts_parser.parsers.run_results.run_results_v6 import RunResultsV6
38+
from vendor.dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1
3839
from vendor.dbt_artifacts_parser.parsers.sources.sources_v1 import SourcesV1
3940
from vendor.dbt_artifacts_parser.parsers.sources.sources_v2 import SourcesV2
4041
from vendor.dbt_artifacts_parser.parsers.sources.sources_v3 import SourcesV3
@@ -345,3 +346,20 @@ def parse_sources_v3(sources: dict) -> SourcesV3:
345346
if dbt_schema_version == ArtifactTypes.SOURCES_V3.value.dbt_schema_version:
346347
return SourcesV3(**sources)
347348
raise ValueError("Not a sources.json v3")
349+
350+
351+
#
352+
# semantic-manifest
353+
#
354+
def parse_semantic_manifest(semantic_manifest: dict) -> SemanticManifestV1:
355+
"""Parse semantic_manifest.json
356+
357+
Args:
358+
semantic_manifest: A dict of semantic_manifest.json
359+
360+
Returns:
361+
SemanticManifestV1
362+
"""
363+
# Semantic manifest uses a flexible schema, so we accept any valid dict
364+
# The schema version check is optional since the format may vary
365+
return SemanticManifestV1(**semantic_manifest)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
from vendor.dbt_artifacts_parser.parsers.semantic_manifest.semantic_manifest_v1 import SemanticManifestV1
18+
19+
__all__ = ["SemanticManifestV1"]

0 commit comments

Comments
 (0)