Add job to run unit tests

fjakobs · fjakobs · commit a1a4eef1c93e · 2022-11-30T10:12:25.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,8 @@
 .databricks/
 .venv/
+.pytest_cache/
 *.pyc
 __pycache__/
-.pytest_cache/
 dist/
 build/
 covid_analysis.egg-info/
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "type": "databricks",
+      "request": "launch",
+      "name": "Unit Tests (on Databricks)",
+      "program": "${workspaceFolder}/jobs/pytest_databricks.py",
+      "args": ["./tests", "-p", "no:cacheprovider"],
+      "env": {}
+    }
+  ]
+}
diff --git a/jobs/pytest_databricks.py b/jobs/pytest_databricks.py
@@ -0,0 +1,18 @@
+import pytest
+import os
+import sys
+
+
+def main():
+    # Run all tests in the repository root.
+    repo_root = os.path.dirname(os.path.dirname(__file__))
+    os.chdir(repo_root)
+
+    # Skip writing pyc files on a readonly filesystem.
+    sys.dont_write_bytecode = True
+
+    _ = pytest.main(sys.argv[1:])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/spark_test.py b/tests/spark_test.py
@@ -0,0 +1,18 @@
+from pyspark.sql import SparkSession
+import pytest
+
+
+@pytest.fixture
+def spark() -> SparkSession:
+    """
+    Create a spark session. Unit tests don't have access to the spark global
+    """
+    return SparkSession.builder.getOrCreate()
+
+
+def test_spark(spark):
+    """
+    Example test that needs to run on the cluster to work
+    """
+    data = spark.sql("select 1").collect()
+    assert data[0][0] == 1