|
4 | 4 | import logging |
5 | 5 | import os |
6 | 6 | import sys |
| 7 | +import textwrap |
7 | 8 | from datetime import datetime |
8 | 9 | from typing import Optional |
9 | 10 |
|
10 | 11 | import click |
11 | 12 | import click_spinner |
| 13 | +import tzlocal |
12 | 14 | from click_default_group import DefaultGroup |
13 | 15 | from tabulate import tabulate |
14 | 16 |
|
|
21 | 23 | post_rollback_endpoint, |
22 | 24 | ) |
23 | 25 | from datahub.configuration.config_loader import load_config_file |
| 26 | +from datahub.ingestion.graph.client import get_default_graph |
24 | 27 | from datahub.ingestion.run.connection import ConnectionManager |
25 | 28 | from datahub.ingestion.run.pipeline import Pipeline |
26 | 29 | from datahub.telemetry import telemetry |
@@ -198,6 +201,156 @@ async def run_ingestion_and_check_upgrade() -> int: |
198 | 201 | # don't raise SystemExit if there's no error |
199 | 202 |
|
200 | 203 |
|
| 204 | +@ingest.command() |
| 205 | +@upgrade.check_upgrade |
| 206 | +@telemetry.with_telemetry() |
| 207 | +@click.option( |
| 208 | + "-n", |
| 209 | + "--name", |
| 210 | + type=str, |
| 211 | + help="Recipe Name", |
| 212 | + required=True, |
| 213 | +) |
| 214 | +@click.option( |
| 215 | + "-c", |
| 216 | + "--config", |
| 217 | + type=click.Path(dir_okay=False), |
| 218 | + help="Config file in .toml or .yaml format.", |
| 219 | + required=True, |
| 220 | +) |
| 221 | +@click.option( |
| 222 | + "--urn", |
| 223 | + type=str, |
| 224 | + help="Urn of recipe to update", |
| 225 | + required=False, |
| 226 | +) |
| 227 | +@click.option( |
| 228 | + "--executor-id", |
| 229 | + type=str, |
| 230 | + default="default", |
| 231 | + help="Executor id to route execution requests to. Do not use this unless you have configured a custom executor.", |
| 232 | + required=False, |
| 233 | +) |
| 234 | +@click.option( |
| 235 | + "--cli-version", |
| 236 | + type=str, |
| 237 | + help="Provide a custom CLI version to use for ingestion. By default will use server default.", |
| 238 | + required=False, |
| 239 | + default=None, |
| 240 | +) |
| 241 | +@click.option( |
| 242 | + "--schedule", |
| 243 | + type=str, |
| 244 | + help="Cron definition for schedule. If none is provided, ingestion recipe will not be scheduled", |
| 245 | + required=False, |
| 246 | + default=None, |
| 247 | +) |
| 248 | +@click.option( |
| 249 | + "--time-zone", |
| 250 | + type=str, |
| 251 | + help=f"Timezone for the schedule. By default uses the timezone of the current system: {tzlocal.get_localzone_name()}.", |
| 252 | + required=False, |
| 253 | + default=tzlocal.get_localzone_name(), |
| 254 | +) |
| 255 | +def deploy( |
| 256 | + name: str, |
| 257 | + config: str, |
| 258 | + urn: str, |
| 259 | + executor_id: str, |
| 260 | + cli_version: str, |
| 261 | + schedule: str, |
| 262 | + time_zone: str, |
| 263 | +) -> None: |
| 264 | + """ |
| 265 | + Deploy an ingestion recipe to your DataHub instance. |
| 266 | +
|
| 267 | + The urn of the ingestion source will be based on the name parameter in the format: |
| 268 | + urn:li:dataHubIngestionSource:<name> |
| 269 | + """ |
| 270 | + |
| 271 | + datahub_graph = get_default_graph() |
| 272 | + |
| 273 | + pipeline_config = load_config_file( |
| 274 | + config, |
| 275 | + allow_stdin=True, |
| 276 | + resolve_env_vars=False, |
| 277 | + ) |
| 278 | + |
| 279 | + graphql_query: str |
| 280 | + |
| 281 | + variables: dict = { |
| 282 | + "urn": urn, |
| 283 | + "name": name, |
| 284 | + "type": pipeline_config["source"]["type"], |
| 285 | + "schedule": {"interval": schedule, "timezone": time_zone}, |
| 286 | + "recipe": json.dumps(pipeline_config), |
| 287 | + "executorId": executor_id, |
| 288 | + "version": cli_version, |
| 289 | + } |
| 290 | + |
| 291 | + if urn: |
| 292 | + if not datahub_graph.exists(urn): |
| 293 | + logger.error(f"Could not find recipe for provided urn: {urn}") |
| 294 | + exit() |
| 295 | + logger.info("Found recipe URN, will update recipe.") |
| 296 | + |
| 297 | + graphql_query = textwrap.dedent( |
| 298 | + """ |
| 299 | + mutation updateIngestionSource( |
| 300 | + $urn: String!, |
| 301 | + $name: String!, |
| 302 | + $type: String!, |
| 303 | + $schedule: UpdateIngestionSourceScheduleInput, |
| 304 | + $recipe: String!, |
| 305 | + $executorId: String! |
| 306 | + $version: String) { |
| 307 | +
|
| 308 | + updateIngestionSource(urn: $urn, input: { |
| 309 | + name: $name, |
| 310 | + type: $type, |
| 311 | + schedule: $schedule, |
| 312 | + config: { |
| 313 | + recipe: $recipe, |
| 314 | + executorId: $executorId, |
| 315 | + version: $version, |
| 316 | + } |
| 317 | + }) |
| 318 | + } |
| 319 | + """ |
| 320 | + ) |
| 321 | + else: |
| 322 | + logger.info("No URN specified recipe urn, will create a new recipe.") |
| 323 | + graphql_query = textwrap.dedent( |
| 324 | + """ |
| 325 | + mutation createIngestionSource( |
| 326 | + $name: String!, |
| 327 | + $type: String!, |
| 328 | + $schedule: UpdateIngestionSourceScheduleInput, |
| 329 | + $recipe: String!, |
| 330 | + $executorId: String!, |
| 331 | + $version: String) { |
| 332 | +
|
| 333 | + createIngestionSource(input: { |
| 334 | + type: $type, |
| 335 | + schedule: $schedule, |
| 336 | + config: { |
| 337 | + recipe: $recipe, |
| 338 | + executorId: $executorId, |
| 339 | + version: $version, |
| 340 | + } |
| 341 | + }) |
| 342 | + } |
| 343 | + """ |
| 344 | + ) |
| 345 | + |
| 346 | + response = datahub_graph.execute_graphql(graphql_query, variables=variables) |
| 347 | + |
| 348 | + click.echo( |
| 349 | + f"✅ Successfully wrote data ingestion source metadata for recipe {name}:" |
| 350 | + ) |
| 351 | + click.echo(response) |
| 352 | + |
| 353 | + |
201 | 354 | def _test_source_connection(report_to: Optional[str], pipeline_config: dict) -> None: |
202 | 355 | connection_report = None |
203 | 356 | try: |
|
0 commit comments