opensource-observer · ravenac95 · Aug 19, 2025 · Aug 18, 2025 · Aug 18, 2025 · Aug 18, 2025
diff --git a/.env.example b/.env.example
@@ -16,6 +16,11 @@ SQLMESH_DUCKDB_LOCAL_PATH=/tmp/oso.duckdb
 DAGSTER_USE_LOCAL_SECRETS=True
 #DAGSTER_GCP_SECRETS_PREFIX=dagster 
 
+# OSO's python libraries are configured to use json logging by default but this
+# can be annoying when viewing things locally. This will configure logs to be
+# output in a more human-readable format.
+OSO_ENABLE_JSON_LOGS=0
+
 ## Google Cloud setup
 # You will need to generate Google application credentials.
 # You can log in via `gcloud auth application-default login`

diff --git a/apps/docs/docs/contribute-data/setup/index.md b/apps/docs/docs/contribute-data/setup/index.md
@@ -215,3 +215,40 @@ Notice that after `-m` the code location's module path is specified. It is
 useful to note for newcomers that the `warehouse/` path in the repository is not
 considered a python module as it does not contain a `__init__.py` file and does
 not appear as a python module in the root `pyproject.toml`
+
+### Running dagster with sqlmesh locally
+
+This is mostly for the OSO team as most people should not need to run sqlmesh on
+the dagster UI in a local fashion. It should be enough for anyone looking to add
+models to run sqlmesh on it's own. The only reason to run sqlmesh locally is to
+ensure that the dagster-sqlmesh integration is working as expected with our
+particular pipeline.
+
+Some environment variables need to be set in your `.env`:
+
+```bash
+# While not strictly necessary, you likely want the sqlmesh dagster asset
+# caching enabled so restarting doesn't take so long.
+DAGSTER_ASSET_CACHE_ENABLED=1
+DAGSTER_ASSET_CACHE_DIR=/path/to/some/cache/dir # change this
+# You can set this number to anything reasonable for your testing use case
+DAGSTER_ASSET_CACHE_DEFAULT_TTL_SECONDS=3600
+# `local` uses duckdb
+# `local-trino` uses a locally deployed trino
+# Suggestion is to use `local` as it's faster. This doc assumes duckdb.
+DAGSTER_SQLMESH_GATEWAY=local
+SQLMESH_TESTING_ENABLED=1
+OSO_ENABLE_JSON_LOGS=0
+```
+
+Then you should run the sqlmesh local test setup to get your local sqlmesh
+duckdb initialized with oso local seed data.
+
+```bash
+uv run oso local sqlmesh-test --duckdb
+```
+
+Now it should be possible run sqlmesh and dagster locally. When materializing
+sqlmesh assets, it might complain about some out of date dependencies. Since we
+ran the local test setup, the data it's depending on should have been added by
+the oso local seed setup.
diff --git a/pyproject.toml b/pyproject.toml
@@ -67,7 +67,7 @@ dependencies = [
     "kr8s==0.20.9",
     "structlog>=25.4.0",
     "pandas-gbq>=0.29.2",
-    "dagster-sqlmesh>=0.19.0",
+    "dagster-sqlmesh>=0.20.0",
     "oso-core",
     "pyoso",
     "metrics-service"

diff --git a/uv.lock b/uv.lock
diff --git a/warehouse/oso_dagster/assets/sqlmesh/sqlmesh.py b/warehouse/oso_dagster/assets/sqlmesh/sqlmesh.py
@@ -194,8 +194,10 @@ def run_sqlmesh(
                         config.allow_destructive_models
                     )
 
-                # If we specify a dev_environment, we will first plan it for safety
-                if dev_environment:
+                # If we specify a dev_environment, we will first plan it for
+                # safety. Restatements are ignored as they may end up duplicating
+                # work based on how restatement in planning works.
+                if dev_environment and not config.restate_models:
                     context.log.info("Planning dev environment")
                     all(
                         sqlmesh.run(
@@ -206,6 +208,7 @@ def run_sqlmesh(
                             end=config.end,
                             restate_models=restate_models,
                             skip_run=True,
+                            materializations_enabled=False,
                         )
                     )