Skip to content

Commit af23595

Browse files
gaurpulkitrakenddsaravmajestic
authored
feat: project governance llm checks (#45)
* project governance llm checks * changes * changes * updated fields * add llm insights to datapilot * category * teammate_check_id * no check condition * change * handle null package_name * handling none values * fix: null condition * fix --------- Co-authored-by: rakendd <[email protected]> Co-authored-by: saravmajestic <[email protected]>
1 parent 35d2598 commit af23595

File tree

5 files changed

+119
-2
lines changed

5 files changed

+119
-2
lines changed

src/datapilot/clients/altimate/client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,16 @@ def validate_upload_to_integration(self):
9191
def start_dbt_ingestion(self, params=None):
9292
endpoint = "/dbt/v1/start_dbt_ingestion"
9393
return self.post(endpoint, data=params)
94+
95+
def get_project_governance_llm_checks(self, params=None):
96+
endpoint = "/project_governance/checks"
97+
return self.get(endpoint, params=params)
98+
99+
def run_project_governance_llm_checks(self, manifest, catalog, check_names):
100+
endpoint = "/project_governance/check/run"
101+
data = {
102+
"manifest": manifest,
103+
"catalog": catalog,
104+
"check_names": check_names,
105+
}
106+
return self.post(endpoint, data=data)

src/datapilot/clients/altimate/utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,24 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int
103103
"ok": False,
104104
"message": "Error starting dbt ingestion worker. ",
105105
}
106+
107+
108+
def get_project_governance_llm_checks(
109+
api_token,
110+
tenant,
111+
backend_url,
112+
):
113+
api_client = APIClient(api_token=api_token, base_url=backend_url, tenant=tenant)
114+
return api_client.get_project_governance_llm_checks()
115+
116+
117+
def run_project_governance_llm_checks(
118+
api_token,
119+
tenant,
120+
backend_url,
121+
manifest,
122+
catalog,
123+
check_names,
124+
):
125+
api_client = APIClient(api_token=api_token, base_url=backend_url, tenant=tenant)
126+
return api_client.run_project_governance_llm_checks(manifest, catalog, check_names)

src/datapilot/core/platforms/dbt/cli/cli.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def dbt():
2828

2929

3030
@dbt.command("project-health")
31+
@click.option("--token", required=False, help="Your API token for authentication.")
32+
@click.option("--instance-name", required=False, help="Your tenant ID.")
3133
@click.option(
3234
"--manifest-path",
3335
required=True,
@@ -49,7 +51,10 @@ def dbt():
4951
default=None,
5052
help="Selective model testing. Specify one or more models to run tests on.",
5153
)
52-
def project_health(manifest_path, catalog_path, config_path=None, select=None):
54+
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
55+
def project_health(
56+
token, instance_name, manifest_path, catalog_path, config_path=None, select=None, backend_url="https://api.myaltimate.com"
57+
):
5358
"""
5459
Validate the DBT project's configuration and structure.
5560
:param manifest_path: Path to the DBT manifest file.
@@ -62,7 +67,16 @@ def project_health(manifest_path, catalog_path, config_path=None, select=None):
6267
selected_models = select.split(" ")
6368
manifest = load_manifest(manifest_path)
6469
catalog = load_catalog(catalog_path) if catalog_path else None
65-
insight_generator = DBTInsightGenerator(manifest=manifest, catalog=catalog, config=config, selected_models=selected_models)
70+
71+
insight_generator = DBTInsightGenerator(
72+
manifest=manifest,
73+
catalog=catalog,
74+
config=config,
75+
selected_models=selected_models,
76+
token=token,
77+
instance_name=instance_name,
78+
backend_url=backend_url,
79+
)
6680
reports = insight_generator.run()
6781

6882
package_insights = reports[PROJECT]

src/datapilot/core/platforms/dbt/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
MODEL = "model"
55
SOURCE = "source"
66

7+
LLM = "llm"
8+
79

810
PROJECT = "project"
911
SQL = "sql"

src/datapilot/core/platforms/dbt/executor.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,16 @@
55
from typing import List
66
from typing import Optional
77

8+
from datapilot.clients.altimate.utils import get_project_governance_llm_checks
9+
from datapilot.clients.altimate.utils import run_project_governance_llm_checks
10+
from datapilot.core.platforms.dbt.constants import LLM
811
from datapilot.core.platforms.dbt.constants import MODEL
912
from datapilot.core.platforms.dbt.constants import PROJECT
1013
from datapilot.core.platforms.dbt.exceptions import AltimateCLIArgumentError
1114
from datapilot.core.platforms.dbt.factory import DBTFactory
1215
from datapilot.core.platforms.dbt.insights import INSIGHTS
16+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
17+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
1318
from datapilot.core.platforms.dbt.schemas.manifest import Catalog
1419
from datapilot.core.platforms.dbt.schemas.manifest import Manifest
1520
from datapilot.core.platforms.dbt.utils import get_models
@@ -29,11 +34,19 @@ def __init__(
2934
target: str = "dev",
3035
selected_models: Optional[str] = None,
3136
selected_model_ids: Optional[List[str]] = None,
37+
token: Optional[str] = None,
38+
instance_name: Optional[str] = None,
39+
backend_url: Optional[str] = None,
3240
):
3341
self.run_results_path = run_results_path
3442
self.target = target
3543
self.env = env
3644
self.config = config or {}
45+
self.token = token
46+
self.instance_name = instance_name
47+
self.backend_url = backend_url
48+
self.manifest = manifest
49+
self.catalog = catalog
3750

3851
self.manifest_wrapper = DBTFactory.get_manifest_wrapper(manifest)
3952
self.manifest_present = True
@@ -86,6 +99,22 @@ def _check_if_skipped(self, insight):
8699
return True
87100
return False
88101

102+
def run_llm_checks(self):
103+
llm_checks = get_project_governance_llm_checks(self.token, self.instance_name, self.backend_url)
104+
check_names = [check["name"] for check in llm_checks if check["alias"] not in self.config.get("disabled_insights", [])]
105+
if len(check_names) == 0:
106+
return {"results": []}
107+
108+
llm_check_results = run_project_governance_llm_checks(
109+
self.token,
110+
self.instance_name,
111+
self.backend_url,
112+
self.manifest.json() if self.manifest else "",
113+
self.catalog.json() if self.catalog else "",
114+
check_names,
115+
)
116+
return llm_check_results
117+
89118
def run(self):
90119
reports = {
91120
MODEL: {},
@@ -156,4 +185,42 @@ def run(self):
156185
else:
157186
self.logger.info(color_text(f"Skipping insight {insight_class.NAME} as {message}", YELLOW))
158187

188+
if self.token and self.instance_name and self.backend_url:
189+
llm_check_results = self.run_llm_checks()
190+
llm_reports = llm_check_results.get("results", [])
191+
llm_insights = {}
192+
for report in llm_reports:
193+
for answer in report["answer"]:
194+
location = answer["unique_id"]
195+
if location not in llm_insights:
196+
llm_insights[location] = []
197+
metadata = answer.get("metadata", {})
198+
metadata["source"] = LLM
199+
metadata["teammate_check_id"] = report["id"]
200+
metadata["category"] = report["type"]
201+
llm_insights[location].append(
202+
DBTModelInsightResponse(
203+
insight=DBTInsightResult(
204+
type="Custom",
205+
name=report["name"],
206+
message=answer["message"],
207+
reason_to_flag=answer["reason_to_flag"],
208+
recommendation=answer["recommendation"],
209+
metadata=metadata,
210+
),
211+
severity=answer["severity"],
212+
path=answer["path"] if answer.get("path") else "",
213+
original_file_path=answer["original_file_path"] if answer.get("original_file_path") else "",
214+
package_name=answer["package_name"] if answer.get("package_name") else "",
215+
unique_id=answer["unique_id"],
216+
)
217+
)
218+
219+
if llm_insights:
220+
for key, value in llm_insights.items():
221+
if key in reports[MODEL]:
222+
reports[MODEL][key].extend(value)
223+
else:
224+
reports[MODEL][key] = value
225+
159226
return reports

0 commit comments

Comments
 (0)