|
1 | 1 | import uuid |
| 2 | +import datetime as dt |
2 | 3 |
|
3 | 4 | import pytest |
4 | 5 |
|
5 | 6 | from django.test import override_settings |
6 | 7 |
|
| 8 | +import temporalio.worker |
| 9 | +from temporalio import activity as temporal_activity |
| 10 | +from temporalio.testing import WorkflowEnvironment |
| 11 | + |
7 | 12 | from posthog.temporal.data_modeling import ducklake_copy_workflow as ducklake_module |
8 | 13 | from posthog.temporal.data_modeling.ducklake_copy_workflow import ( |
9 | 14 | DuckLakeCopyActivityInputs, |
@@ -126,3 +131,139 @@ def fake_ensure_bucket(config): |
126 | 131 | assert "read_parquet('s3://source/table/**/*.parquet')" in table_calls[0] |
127 | 132 | assert any("ATTACH" in statement for statement in fake_conn.sql_statements), "Expected DuckLake catalog attach" |
128 | 133 | assert fake_conn.closed is True |
| 134 | + |
| 135 | + |
| 136 | +@pytest.mark.asyncio |
| 137 | +@pytest.mark.django_db |
| 138 | +async def test_ducklake_copy_workflow_skips_when_feature_flag_disabled(monkeypatch, ateam): |
| 139 | + call_counts = {"metadata": 0, "copy": 0} |
| 140 | + |
| 141 | + @temporal_activity.defn |
| 142 | + async def metadata_stub(inputs: DataModelingDuckLakeCopyInputs): |
| 143 | + call_counts["metadata"] += 1 |
| 144 | + return [ |
| 145 | + DuckLakeCopyModelMetadata( |
| 146 | + model_label="model", |
| 147 | + saved_query_id=str(uuid.uuid4()), |
| 148 | + saved_query_name="model", |
| 149 | + normalized_name="model", |
| 150 | + source_glob_uri="s3://source/table/**/*.parquet", |
| 151 | + schema_name="data_modeling_team_1", |
| 152 | + table_name="model", |
| 153 | + ) |
| 154 | + ] |
| 155 | + |
| 156 | + @temporal_activity.defn |
| 157 | + async def copy_stub(inputs: DuckLakeCopyActivityInputs): |
| 158 | + call_counts["copy"] += 1 |
| 159 | + |
| 160 | + monkeypatch.setattr( |
| 161 | + "posthog.temporal.data_modeling.ducklake_copy_workflow.posthoganalytics.feature_enabled", |
| 162 | + lambda *args, **kwargs: False, |
| 163 | + ) |
| 164 | + monkeypatch.setattr(ducklake_module, "prepare_data_modeling_ducklake_metadata_activity", metadata_stub) |
| 165 | + monkeypatch.setattr(ducklake_module, "copy_data_modeling_model_to_ducklake_activity", copy_stub) |
| 166 | + |
| 167 | + inputs = DataModelingDuckLakeCopyInputs( |
| 168 | + team_id=ateam.pk, |
| 169 | + job_id="job", |
| 170 | + models=[ |
| 171 | + DuckLakeCopyModelInput( |
| 172 | + model_label="model", |
| 173 | + saved_query_id=str(uuid.uuid4()), |
| 174 | + table_uri="s3://source/table", |
| 175 | + file_uris=["s3://source/table/part-0.parquet"], |
| 176 | + ) |
| 177 | + ], |
| 178 | + ) |
| 179 | + |
| 180 | + async with await WorkflowEnvironment.start_time_skipping() as env: |
| 181 | + async with temporalio.worker.Worker( |
| 182 | + env.client, |
| 183 | + task_queue="ducklake-test", |
| 184 | + workflows=[ducklake_module.DuckLakeCopyDataModelingWorkflow], |
| 185 | + activities=[ |
| 186 | + ducklake_module.ducklake_copy_workflow_gate_activity, |
| 187 | + ducklake_module.prepare_data_modeling_ducklake_metadata_activity, |
| 188 | + ducklake_module.copy_data_modeling_model_to_ducklake_activity, |
| 189 | + ], |
| 190 | + workflow_runner=temporalio.worker.UnsandboxedWorkflowRunner(), |
| 191 | + ): |
| 192 | + await env.client.execute_workflow( |
| 193 | + ducklake_module.DuckLakeCopyDataModelingWorkflow.run, |
| 194 | + inputs, |
| 195 | + id=str(uuid.uuid4()), |
| 196 | + task_queue="ducklake-test", |
| 197 | + execution_timeout=dt.timedelta(seconds=30), |
| 198 | + ) |
| 199 | + |
| 200 | + assert call_counts["metadata"] == 0 |
| 201 | + assert call_counts["copy"] == 0 |
| 202 | + |
| 203 | + |
| 204 | +@pytest.mark.asyncio |
| 205 | +@pytest.mark.django_db |
| 206 | +async def test_ducklake_copy_workflow_runs_when_feature_flag_enabled(monkeypatch, ateam): |
| 207 | + call_counts = {"metadata": 0, "copy": 0} |
| 208 | + |
| 209 | + @temporal_activity.defn |
| 210 | + async def metadata_stub(inputs: DataModelingDuckLakeCopyInputs): |
| 211 | + call_counts["metadata"] += 1 |
| 212 | + return [ |
| 213 | + DuckLakeCopyModelMetadata( |
| 214 | + model_label="model", |
| 215 | + saved_query_id=str(uuid.uuid4()), |
| 216 | + saved_query_name="model", |
| 217 | + normalized_name="model", |
| 218 | + source_glob_uri="s3://source/table/**/*.parquet", |
| 219 | + schema_name="data_modeling_team_1", |
| 220 | + table_name="model", |
| 221 | + ) |
| 222 | + ] |
| 223 | + |
| 224 | + @temporal_activity.defn |
| 225 | + async def copy_stub(inputs: DuckLakeCopyActivityInputs): |
| 226 | + call_counts["copy"] += 1 |
| 227 | + |
| 228 | + monkeypatch.setattr( |
| 229 | + "posthog.temporal.data_modeling.ducklake_copy_workflow.posthoganalytics.feature_enabled", |
| 230 | + lambda *args, **kwargs: True, |
| 231 | + ) |
| 232 | + monkeypatch.setattr(ducklake_module, "prepare_data_modeling_ducklake_metadata_activity", metadata_stub) |
| 233 | + monkeypatch.setattr(ducklake_module, "copy_data_modeling_model_to_ducklake_activity", copy_stub) |
| 234 | + |
| 235 | + inputs = DataModelingDuckLakeCopyInputs( |
| 236 | + team_id=ateam.pk, |
| 237 | + job_id="job", |
| 238 | + models=[ |
| 239 | + DuckLakeCopyModelInput( |
| 240 | + model_label="model", |
| 241 | + saved_query_id=str(uuid.uuid4()), |
| 242 | + table_uri="s3://source/table", |
| 243 | + file_uris=["s3://source/table/part-0.parquet"], |
| 244 | + ) |
| 245 | + ], |
| 246 | + ) |
| 247 | + |
| 248 | + async with await WorkflowEnvironment.start_time_skipping() as env: |
| 249 | + async with temporalio.worker.Worker( |
| 250 | + env.client, |
| 251 | + task_queue="ducklake-test", |
| 252 | + workflows=[ducklake_module.DuckLakeCopyDataModelingWorkflow], |
| 253 | + activities=[ |
| 254 | + ducklake_module.ducklake_copy_workflow_gate_activity, |
| 255 | + ducklake_module.prepare_data_modeling_ducklake_metadata_activity, |
| 256 | + ducklake_module.copy_data_modeling_model_to_ducklake_activity, |
| 257 | + ], |
| 258 | + workflow_runner=temporalio.worker.UnsandboxedWorkflowRunner(), |
| 259 | + ): |
| 260 | + await env.client.execute_workflow( |
| 261 | + ducklake_module.DuckLakeCopyDataModelingWorkflow.run, |
| 262 | + inputs, |
| 263 | + id=str(uuid.uuid4()), |
| 264 | + task_queue="ducklake-test", |
| 265 | + execution_timeout=dt.timedelta(seconds=30), |
| 266 | + ) |
| 267 | + |
| 268 | + assert call_counts["metadata"] == 1 |
| 269 | + assert call_counts["copy"] == 1 |
0 commit comments