|
12 | 12 | from datapilot.core.platforms.dbt.executor import DBTInsightGenerator |
13 | 13 | from datapilot.core.platforms.dbt.formatting import generate_model_insights_table |
14 | 14 | from datapilot.core.platforms.dbt.formatting import generate_project_insights_table |
| 15 | +from datapilot.core.platforms.dbt.utils import load_catalog |
| 16 | +from datapilot.core.platforms.dbt.utils import load_manifest |
15 | 17 | from datapilot.utils.formatting.utils import tabulate_data |
16 | | -from datapilot.utils.utils import generate_partial_manifest_catalog |
17 | 18 |
|
18 | 19 |
|
19 | 20 | def validate_config_file(config_path: str) -> bool: |
@@ -67,6 +68,16 @@ def main(argv: Optional[Sequence[str]] = None): |
67 | 68 | help="Name of the DBT config to use from the API", |
68 | 69 | ) |
69 | 70 |
|
| 71 | + parser.add_argument( |
| 72 | + "--manifest-path", |
| 73 | + help="Path to the DBT manifest file (defaults to ./target/manifest.json)", |
| 74 | + ) |
| 75 | + |
| 76 | + parser.add_argument( |
| 77 | + "--catalog-path", |
| 78 | + help="Path to the DBT catalog file (defaults to ./target/catalog.json)", |
| 79 | + ) |
| 80 | + |
70 | 81 | args = parser.parse_known_args(argv) |
71 | 82 |
|
72 | 83 | # Handle config loading like in project_health command |
@@ -138,44 +149,77 @@ def main(argv: Optional[Sequence[str]] = None): |
138 | 149 | print("Warning: No instance name provided. Using default configuration.", file=sys.stderr) |
139 | 150 | print("To specify an instance, use: --instance-name 'your-instance'", file=sys.stderr) |
140 | 151 |
|
141 | | - changed_files = args[1] |
| 152 | + # Determine manifest and catalog paths |
| 153 | + manifest_path = getattr(args[0], "manifest_path", None) |
| 154 | + catalog_path = getattr(args[0], "catalog_path", None) |
142 | 155 |
|
143 | | - if not changed_files: |
144 | | - print("No changed files detected. Skipping datapilot checks.", file=sys.stderr) |
145 | | - return |
| 156 | + # Set default paths if not provided |
| 157 | + if not manifest_path: |
| 158 | + manifest_path = str(Path(base_path) / "target" / "manifest.json") |
| 159 | + print(f"Using default manifest path: {manifest_path}", file=sys.stderr) |
| 160 | + else: |
| 161 | + print(f"Using provided manifest path: {manifest_path}", file=sys.stderr) |
146 | 162 |
|
147 | | - print(f"Processing {len(changed_files)} changed files...", file=sys.stderr) |
148 | | - print(f"Changed files: {', '.join(changed_files)}", file=sys.stderr) |
| 163 | + if not catalog_path: |
| 164 | + catalog_path = str(Path(base_path) / "target" / "catalog.json") |
| 165 | + print(f"Using default catalog path: {catalog_path}", file=sys.stderr) |
| 166 | + else: |
| 167 | + print(f"Using provided catalog path: {catalog_path}", file=sys.stderr) |
149 | 168 |
|
| 169 | + # Load manifest |
| 170 | + print("Loading manifest file...", file=sys.stderr) |
150 | 171 | try: |
151 | | - print("Generating partial manifest and catalog from changed files...", file=sys.stderr) |
152 | | - selected_models, manifest, catalog = generate_partial_manifest_catalog(changed_files, base_path=base_path) |
153 | | - |
154 | | - # Handle manifest object (could be ManifestV12 or similar) |
| 172 | + manifest = load_manifest(manifest_path) |
155 | 173 | if hasattr(manifest, "nodes"): |
156 | | - print(f"Generated manifest with {len(manifest.nodes)} nodes", file=sys.stderr) |
| 174 | + print(f"Manifest loaded successfully with {len(manifest.nodes)} nodes", file=sys.stderr) |
157 | 175 | elif hasattr(manifest, "get") and callable(manifest.get): |
158 | | - print(f"Generated manifest with {len(manifest.get('nodes', {}))} nodes", file=sys.stderr) |
| 176 | + print(f"Manifest loaded successfully with {len(manifest.get('nodes', {}))} nodes", file=sys.stderr) |
159 | 177 | else: |
160 | | - print(f"Generated manifest object of type: {type(manifest).__name__}", file=sys.stderr) |
| 178 | + print(f"Manifest loaded successfully, object type: {type(manifest).__name__}", file=sys.stderr) |
| 179 | + except Exception as e: |
| 180 | + print(f"Error loading manifest from {manifest_path}: {e}", file=sys.stderr) |
| 181 | + print("Pre-commit hook failed: Unable to load manifest file.", file=sys.stderr) |
| 182 | + sys.exit(1) |
161 | 183 |
|
162 | | - # Handle catalog object (could be CatalogV1 or similar) |
163 | | - if catalog: |
| 184 | + # Load catalog if available |
| 185 | + catalog = None |
| 186 | + if Path(catalog_path).exists(): |
| 187 | + print("Loading catalog file...", file=sys.stderr) |
| 188 | + try: |
| 189 | + catalog = load_catalog(catalog_path) |
164 | 190 | if hasattr(catalog, "nodes"): |
165 | | - print(f"Generated catalog with {len(catalog.nodes)} nodes", file=sys.stderr) |
| 191 | + print(f"Catalog loaded successfully with {len(catalog.nodes)} nodes", file=sys.stderr) |
166 | 192 | elif hasattr(catalog, "get") and callable(catalog.get): |
167 | | - print(f"Generated catalog with {len(catalog.get('nodes', {}))} nodes", file=sys.stderr) |
| 193 | + print(f"Catalog loaded successfully with {len(catalog.get('nodes', {}))} nodes", file=sys.stderr) |
168 | 194 | else: |
169 | | - print(f"Generated catalog object of type: {type(catalog).__name__}", file=sys.stderr) |
170 | | - else: |
171 | | - print("No catalog generated (catalog file not available)", file=sys.stderr) |
| 195 | + print(f"Catalog loaded successfully, object type: {type(catalog).__name__}", file=sys.stderr) |
| 196 | + except Exception as e: |
| 197 | + print(f"Warning: Error loading catalog from {catalog_path}: {e}", file=sys.stderr) |
| 198 | + print("Continuing without catalog...", file=sys.stderr) |
| 199 | + catalog = None |
| 200 | + else: |
| 201 | + print(f"Catalog file not found at {catalog_path}, continuing without catalog", file=sys.stderr) |
172 | 202 |
|
| 203 | + # Get changed files for selective model testing |
| 204 | + changed_files = args[1] |
| 205 | + selected_models = [] |
| 206 | + |
| 207 | + if changed_files: |
| 208 | + print(f"Processing {len(changed_files)} changed files for selective testing...", file=sys.stderr) |
| 209 | + print(f"Changed files: {', '.join(changed_files)}", file=sys.stderr) |
| 210 | + # Extract model names from changed files for selective testing |
| 211 | + # This could be enhanced to map file paths to model names |
| 212 | + selected_models = changed_files |
| 213 | + else: |
| 214 | + print("No changed files detected. Running checks on all models.", file=sys.stderr) |
| 215 | + |
| 216 | + try: |
173 | 217 | print("Initializing DBT Insight Generator...", file=sys.stderr) |
174 | 218 | insight_generator = DBTInsightGenerator( |
175 | 219 | manifest=manifest, |
176 | 220 | catalog=catalog, |
177 | 221 | config=config, |
178 | | - selected_model_ids=selected_models, |
| 222 | + selected_models=selected_models, |
179 | 223 | token=token, |
180 | 224 | instance_name=instance_name, |
181 | 225 | backend_url=backend_url, |
|
0 commit comments