|
22 | 22 | from sqlmesh.core.console import Console, get_console |
23 | 23 | from sqlmesh.core.environment import EnvironmentNamingInfo, execute_environment_statements |
24 | 24 | from sqlmesh.core.macros import RuntimeStage |
25 | | -from sqlmesh.core.snapshot.definition import to_view_mapping |
| 25 | +from sqlmesh.core.snapshot.definition import to_view_mapping, SnapshotTableInfo |
26 | 26 | from sqlmesh.core.plan import stages |
27 | 27 | from sqlmesh.core.plan.definition import EvaluatablePlan |
28 | 28 | from sqlmesh.core.scheduler import Scheduler |
|
40 | 40 | from sqlmesh.core.plan.common import identify_restatement_intervals_across_snapshot_versions |
41 | 41 | from sqlmesh.utils import CorrelationId |
42 | 42 | from sqlmesh.utils.concurrency import NodeExecutionFailedError |
43 | | -from sqlmesh.utils.errors import PlanError, SQLMeshError |
| 43 | +from sqlmesh.utils.errors import PlanError, ConflictingPlanError, SQLMeshError |
44 | 44 | from sqlmesh.utils.date import now, to_timestamp |
45 | 45 |
|
46 | 46 | logger = logging.getLogger(__name__) |
@@ -287,34 +287,78 @@ def visit_audit_only_run_stage( |
287 | 287 | def visit_restatement_stage( |
288 | 288 | self, stage: stages.RestatementStage, plan: EvaluatablePlan |
289 | 289 | ) -> None: |
290 | | - snapshot_intervals_to_restate = { |
291 | | - (s.id_and_version, i) for s, i in stage.snapshot_intervals.items() |
292 | | - } |
293 | | - |
294 | | - # Restating intervals on prod plans should mean that the intervals are cleared across |
295 | | - # all environments, not just the version currently in prod |
296 | | - # This ensures that work done in dev environments can still be promoted to prod |
297 | | - # by forcing dev environments to re-run intervals that changed in prod |
| 290 | + # Restating intervals on prod plans means that once the data for the intervals being restated has been backfilled |
| 291 | + # (which happens in the backfill stage) then we need to clear those intervals *from state* across all other environments. |
| 292 | + # |
| 293 | + # This ensures that work done in dev environments can still be promoted to prod by forcing dev environments to |
| 294 | + # re-run intervals that changed in prod (because after this stage runs they are cleared from state and thus show as missing) |
| 295 | + # |
| 296 | + # It also means that any new dev environments created while this restatement plan was running also get the |
| 297 | + # correct intervals cleared because we look up matching snapshots as at right now and not as at the time the plan |
| 298 | + # was created, which could have been several hours ago if there was a lot of data to restate. |
298 | 299 | # |
299 | 300 | # Without this rule, its possible that promoting a dev table to prod will introduce old data to prod |
300 | | - snapshot_intervals_to_restate.update( |
301 | | - { |
302 | | - (s.snapshot, s.interval) |
303 | | - for s in identify_restatement_intervals_across_snapshot_versions( |
304 | | - state_reader=self.state_sync, |
305 | | - prod_restatements=plan.restatements, |
306 | | - disable_restatement_models=plan.disabled_restatement_models, |
307 | | - loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()}, |
308 | | - current_ts=to_timestamp(plan.execution_time or now()), |
309 | | - ).values() |
310 | | - } |
311 | | - ) |
312 | 301 |
|
313 | | - self.state_sync.remove_intervals( |
314 | | - snapshot_intervals=list(snapshot_intervals_to_restate), |
315 | | - remove_shared_versions=plan.is_prod, |
| 302 | + intervals_to_clear = identify_restatement_intervals_across_snapshot_versions( |
| 303 | + state_reader=self.state_sync, |
| 304 | + prod_restatements=plan.restatements, |
| 305 | + disable_restatement_models=plan.disabled_restatement_models, |
| 306 | + loaded_snapshots={s.snapshot_id: s for s in stage.all_snapshots.values()}, |
| 307 | + current_ts=to_timestamp(plan.execution_time or now()), |
316 | 308 | ) |
317 | 309 |
|
| 310 | + if not intervals_to_clear: |
| 311 | + # Nothing to do |
| 312 | + return |
| 313 | + |
| 314 | + # While the restatements were being processed, did any of the snapshots being restated get new versions deployed? |
| 315 | + # If they did, they will not reflect the data that just got restated, so we need to notify the user |
| 316 | + deployed_during_restatement: t.Dict[ |
| 317 | + str, t.Tuple[SnapshotTableInfo, SnapshotTableInfo] |
| 318 | + ] = {} # tuple of (restated_snapshot, current_prod_snapshot) |
| 319 | + |
| 320 | + if deployed_env := self.state_sync.get_environment(plan.environment.name): |
| 321 | + promoted_snapshots_by_name = {s.name: s for s in deployed_env.snapshots} |
| 322 | + |
| 323 | + for name in plan.restatements: |
| 324 | + snapshot = stage.all_snapshots[name] |
| 325 | + version = snapshot.table_info.version |
| 326 | + if ( |
| 327 | + prod_snapshot := promoted_snapshots_by_name.get(name) |
| 328 | + ) and prod_snapshot.version != version: |
| 329 | + deployed_during_restatement[name] = ( |
| 330 | + snapshot.table_info, |
| 331 | + prod_snapshot.table_info, |
| 332 | + ) |
| 333 | + |
| 334 | + # we need to *not* clear the intervals on the snapshots where new versions were deployed while the restatement was running in order to prevent |
| 335 | + # subsequent plans from having unexpected intervals to backfill. |
| 336 | + # we instead list the affected models and abort the plan with an error so the user can decide what to do |
| 337 | + # (either re-attempt the restatement plan or leave things as they are) |
| 338 | + filtered_intervals_to_clear = [ |
| 339 | + (s.snapshot, s.interval) |
| 340 | + for s in intervals_to_clear.values() |
| 341 | + if s.snapshot.name not in deployed_during_restatement |
| 342 | + ] |
| 343 | + |
| 344 | + if filtered_intervals_to_clear: |
| 345 | + # We still clear intervals in other envs for models that were successfully restated without having new versions promoted during restatement |
| 346 | + self.state_sync.remove_intervals( |
| 347 | + snapshot_intervals=filtered_intervals_to_clear, |
| 348 | + remove_shared_versions=plan.is_prod, |
| 349 | + ) |
| 350 | + |
| 351 | + if deployed_env and deployed_during_restatement: |
| 352 | + self.console.log_models_updated_during_restatement( |
| 353 | + list(deployed_during_restatement.values()), |
| 354 | + plan.environment.naming_info, |
| 355 | + self.default_catalog, |
| 356 | + ) |
| 357 | + raise ConflictingPlanError( |
| 358 | + f"Another plan ({deployed_env.summary.plan_id}) deployed new versions of {len(deployed_during_restatement)} models in the target environment '{plan.environment.name}' while they were being restated by this plan.\n" |
| 359 | + "Please re-apply your plan if these new versions should be restated." |
| 360 | + ) |
| 361 | + |
318 | 362 | def visit_environment_record_update_stage( |
319 | 363 | self, stage: stages.EnvironmentRecordUpdateStage, plan: EvaluatablePlan |
320 | 364 | ) -> None: |
|
0 commit comments