|
28 | 28 | StructuredExecutionReportClass, |
29 | 29 | _Aspect, |
30 | 30 | ) |
| 31 | +from datahub.metadata.urns import DataHubExecutionRequestUrn |
31 | 32 | from datahub.utilities.logging_manager import get_log_buffer |
| 33 | +from datahub.utilities.urns.error import InvalidUrnError |
32 | 34 | from datahub.utilities.urns.urn import Urn |
33 | 35 |
|
34 | 36 | logger = logging.getLogger(__name__) |
@@ -118,35 +120,63 @@ def __init__(self, sink: Sink, report_recipe: bool, ctx: PipelineContext) -> Non |
118 | 120 | ingestion_source_key = self.generate_unique_key(ctx.pipeline_config) |
119 | 121 | self.entity_name: str = self.generate_entity_name(ingestion_source_key) |
120 | 122 |
|
| 123 | + # If run_id is an execution request URN, the executor owns the source/request lifecycle. |
| 124 | + try: |
| 125 | + parsed = Urn.from_string(ctx.run_id) |
| 126 | + self._is_running_under_executor = ( |
| 127 | + parsed.entity_type == DataHubExecutionRequestUrn.ENTITY_TYPE |
| 128 | + ) |
| 129 | + except InvalidUrnError: |
| 130 | + self._is_running_under_executor = False |
| 131 | + except Exception: |
| 132 | + logger.warning( |
| 133 | + f"Unexpected error parsing run_id={ctx.run_id!r} as URN; " |
| 134 | + "assuming standalone CLI context.", |
| 135 | + exc_info=True, |
| 136 | + ) |
| 137 | + self._is_running_under_executor = False |
| 138 | + |
| 139 | + if self._is_running_under_executor: |
| 140 | + logger.debug(f"Executor-managed run detected (run_id={ctx.run_id}).") |
| 141 | + |
121 | 142 | self.ingestion_source_urn: Urn = Urn( |
122 | 143 | entity_type="dataHubIngestionSource", |
123 | 144 | entity_id=["cli-" + datahub_guid(ingestion_source_key)], |
124 | 145 | ) |
125 | 146 | logger.debug(f"Ingestion source urn = {self.ingestion_source_urn}") |
126 | | - self.execution_request_input_urn: Urn = Urn( |
127 | | - entity_type="dataHubExecutionRequest", entity_id=[ctx.run_id] |
128 | | - ) |
| 147 | + # Use typed URN only in the executor path (run_id already validated as such). |
| 148 | + # For standalone CLI runs, run_id is a plain string; passing a foreign URN type |
| 149 | + # to DataHubExecutionRequestUrn would raise InvalidUrnError. |
| 150 | + if self._is_running_under_executor: |
| 151 | + self.execution_request_input_urn: Urn = DataHubExecutionRequestUrn( |
| 152 | + ctx.run_id |
| 153 | + ) |
| 154 | + else: |
| 155 | + self.execution_request_input_urn = Urn( |
| 156 | + entity_type="dataHubExecutionRequest", entity_id=[ctx.run_id] |
| 157 | + ) |
129 | 158 | self.start_time_ms: int = self.get_cur_time_in_ms() |
130 | 159 |
|
131 | | - # Construct the dataHubIngestionSourceInfo aspect |
132 | | - source_info_aspect = DataHubIngestionSourceInfoClass( |
133 | | - name=self.entity_name, |
134 | | - type=ctx.pipeline_config.source.type, |
135 | | - platform=make_data_platform_urn( |
136 | | - getattr(ctx.pipeline_config.source, "platform", "unknown") |
137 | | - ), |
138 | | - config=DataHubIngestionSourceConfigClass( |
139 | | - recipe=self._get_recipe_to_report(ctx), |
140 | | - version=nice_version_name(), |
141 | | - executorId=self._EXECUTOR_ID, |
142 | | - ), |
143 | | - ) |
| 160 | + if not self._is_running_under_executor: |
| 161 | + # Construct the dataHubIngestionSourceInfo aspect |
| 162 | + source_info_aspect = DataHubIngestionSourceInfoClass( |
| 163 | + name=self.entity_name, |
| 164 | + type=ctx.pipeline_config.source.type, |
| 165 | + platform=make_data_platform_urn( |
| 166 | + getattr(ctx.pipeline_config.source, "platform", "unknown") |
| 167 | + ), |
| 168 | + config=DataHubIngestionSourceConfigClass( |
| 169 | + recipe=self._get_recipe_to_report(ctx), |
| 170 | + version=nice_version_name(), |
| 171 | + executorId=self._EXECUTOR_ID, |
| 172 | + ), |
| 173 | + ) |
144 | 174 |
|
145 | | - # Emit the dataHubIngestionSourceInfo aspect |
146 | | - self._emit_aspect( |
147 | | - entity_urn=self.ingestion_source_urn, |
148 | | - aspect_value=source_info_aspect, |
149 | | - ) |
| 175 | + # Emit the dataHubIngestionSourceInfo aspect |
| 176 | + self._emit_aspect( |
| 177 | + entity_urn=self.ingestion_source_urn, |
| 178 | + aspect_value=source_info_aspect, |
| 179 | + ) |
150 | 180 |
|
151 | 181 | @staticmethod |
152 | 182 | def _convert_sets_to_lists(obj: Any) -> Any: |
@@ -214,6 +244,10 @@ def _emit_aspect( |
214 | 244 |
|
215 | 245 | def on_start(self, ctx: PipelineContext) -> None: |
216 | 246 | assert ctx.pipeline_config is not None |
| 247 | + |
| 248 | + if self._is_running_under_executor: |
| 249 | + return |
| 250 | + |
217 | 251 | # Construct the dataHubExecutionRequestInput aspect |
218 | 252 | execution_input_aspect = ExecutionRequestInputClass( |
219 | 253 | task=self._INGESTION_TASK_NAME, |
|
0 commit comments