Skip to content

Commit 2b985af

Browse files
committed
Fix pre-commit hook: load manifest/catalog from files instead of generating partial ones
1 parent aec9ae3 commit 2b985af

File tree

1 file changed

+66
-22
lines changed

1 file changed

+66
-22
lines changed

src/datapilot/core/platforms/dbt/hooks/executor_hook.py

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
1313
from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
1414
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
15+
from datapilot.core.platforms.dbt.utils import load_catalog
16+
from datapilot.core.platforms.dbt.utils import load_manifest
1517
from datapilot.utils.formatting.utils import tabulate_data
16-
from datapilot.utils.utils import generate_partial_manifest_catalog
1718

1819

1920
def validate_config_file(config_path: str) -> bool:
@@ -67,6 +68,16 @@ def main(argv: Optional[Sequence[str]] = None):
6768
help="Name of the DBT config to use from the API",
6869
)
6970

71+
parser.add_argument(
72+
"--manifest-path",
73+
help="Path to the DBT manifest file (defaults to ./target/manifest.json)",
74+
)
75+
76+
parser.add_argument(
77+
"--catalog-path",
78+
help="Path to the DBT catalog file (defaults to ./target/catalog.json)",
79+
)
80+
7081
args = parser.parse_known_args(argv)
7182

7283
# Handle config loading like in project_health command
@@ -138,44 +149,77 @@ def main(argv: Optional[Sequence[str]] = None):
138149
print("Warning: No instance name provided. Using default configuration.", file=sys.stderr)
139150
print("To specify an instance, use: --instance-name 'your-instance'", file=sys.stderr)
140151

141-
changed_files = args[1]
152+
# Determine manifest and catalog paths
153+
manifest_path = getattr(args[0], "manifest_path", None)
154+
catalog_path = getattr(args[0], "catalog_path", None)
142155

143-
if not changed_files:
144-
print("No changed files detected. Skipping datapilot checks.", file=sys.stderr)
145-
return
156+
# Set default paths if not provided
157+
if not manifest_path:
158+
manifest_path = str(Path(base_path) / "target" / "manifest.json")
159+
print(f"Using default manifest path: {manifest_path}", file=sys.stderr)
160+
else:
161+
print(f"Using provided manifest path: {manifest_path}", file=sys.stderr)
146162

147-
print(f"Processing {len(changed_files)} changed files...", file=sys.stderr)
148-
print(f"Changed files: {', '.join(changed_files)}", file=sys.stderr)
163+
if not catalog_path:
164+
catalog_path = str(Path(base_path) / "target" / "catalog.json")
165+
print(f"Using default catalog path: {catalog_path}", file=sys.stderr)
166+
else:
167+
print(f"Using provided catalog path: {catalog_path}", file=sys.stderr)
149168

169+
# Load manifest
170+
print("Loading manifest file...", file=sys.stderr)
150171
try:
151-
print("Generating partial manifest and catalog from changed files...", file=sys.stderr)
152-
selected_models, manifest, catalog = generate_partial_manifest_catalog(changed_files, base_path=base_path)
153-
154-
# Handle manifest object (could be ManifestV12 or similar)
172+
manifest = load_manifest(manifest_path)
155173
if hasattr(manifest, "nodes"):
156-
print(f"Generated manifest with {len(manifest.nodes)} nodes", file=sys.stderr)
174+
print(f"Manifest loaded successfully with {len(manifest.nodes)} nodes", file=sys.stderr)
157175
elif hasattr(manifest, "get") and callable(manifest.get):
158-
print(f"Generated manifest with {len(manifest.get('nodes', {}))} nodes", file=sys.stderr)
176+
print(f"Manifest loaded successfully with {len(manifest.get('nodes', {}))} nodes", file=sys.stderr)
159177
else:
160-
print(f"Generated manifest object of type: {type(manifest).__name__}", file=sys.stderr)
178+
print(f"Manifest loaded successfully, object type: {type(manifest).__name__}", file=sys.stderr)
179+
except Exception as e:
180+
print(f"Error loading manifest from {manifest_path}: {e}", file=sys.stderr)
181+
print("Pre-commit hook failed: Unable to load manifest file.", file=sys.stderr)
182+
sys.exit(1)
161183

162-
# Handle catalog object (could be CatalogV1 or similar)
163-
if catalog:
184+
# Load catalog if available
185+
catalog = None
186+
if Path(catalog_path).exists():
187+
print("Loading catalog file...", file=sys.stderr)
188+
try:
189+
catalog = load_catalog(catalog_path)
164190
if hasattr(catalog, "nodes"):
165-
print(f"Generated catalog with {len(catalog.nodes)} nodes", file=sys.stderr)
191+
print(f"Catalog loaded successfully with {len(catalog.nodes)} nodes", file=sys.stderr)
166192
elif hasattr(catalog, "get") and callable(catalog.get):
167-
print(f"Generated catalog with {len(catalog.get('nodes', {}))} nodes", file=sys.stderr)
193+
print(f"Catalog loaded successfully with {len(catalog.get('nodes', {}))} nodes", file=sys.stderr)
168194
else:
169-
print(f"Generated catalog object of type: {type(catalog).__name__}", file=sys.stderr)
170-
else:
171-
print("No catalog generated (catalog file not available)", file=sys.stderr)
195+
print(f"Catalog loaded successfully, object type: {type(catalog).__name__}", file=sys.stderr)
196+
except Exception as e:
197+
print(f"Warning: Error loading catalog from {catalog_path}: {e}", file=sys.stderr)
198+
print("Continuing without catalog...", file=sys.stderr)
199+
catalog = None
200+
else:
201+
print(f"Catalog file not found at {catalog_path}, continuing without catalog", file=sys.stderr)
172202

203+
# Get changed files for selective model testing
204+
changed_files = args[1]
205+
selected_models = []
206+
207+
if changed_files:
208+
print(f"Processing {len(changed_files)} changed files for selective testing...", file=sys.stderr)
209+
print(f"Changed files: {', '.join(changed_files)}", file=sys.stderr)
210+
# Extract model names from changed files for selective testing
211+
# This could be enhanced to map file paths to model names
212+
selected_models = changed_files
213+
else:
214+
print("No changed files detected. Running checks on all models.", file=sys.stderr)
215+
216+
try:
173217
print("Initializing DBT Insight Generator...", file=sys.stderr)
174218
insight_generator = DBTInsightGenerator(
175219
manifest=manifest,
176220
catalog=catalog,
177221
config=config,
178-
selected_model_ids=selected_models,
222+
selected_models=selected_models,
179223
token=token,
180224
instance_name=instance_name,
181225
backend_url=backend_url,

0 commit comments

Comments
 (0)