From 931f1cd672e79520423a0c80fd8f10a18464b825 Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 23 Jan 2024 16:36:07 +0100 Subject: [PATCH 1/2] Inner dev loop fixes --- default_python/requirements-dev.txt | 3 +++ default_python/src/default_python/main.py | 5 ++--- default_python/src/dlt_pipeline.ipynb | 9 ++++----- default_python/src/notebook.ipynb | 24 ++++++++++++++++++++--- default_python/tests/main_test.py | 13 ++++++------ 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/default_python/requirements-dev.txt b/default_python/requirements-dev.txt index 40e79bf..35ca1e1 100644 --- a/default_python/requirements-dev.txt +++ b/default_python/requirements-dev.txt @@ -3,6 +3,9 @@ ## For defining dependencies used by jobs in Databricks Workflows, see ## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +## Add code completion support for DLT +databricks-dlt + ## pytest is the default package used for testing pytest diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py index 48a80b0..00b3d6e 100644 --- a/default_python/src/default_python/main.py +++ b/default_python/src/default_python/main.py @@ -1,11 +1,10 @@ from pyspark.sql import SparkSession -def get_taxis(): - spark = SparkSession.builder.getOrCreate() +def get_taxis(spark: SparkSession): return spark.read.table("samples.nyctaxi.trips") def main(): - get_taxis().show(5) + get_taxis(spark).show(5) if __name__ == '__main__': main() diff --git a/default_python/src/dlt_pipeline.ipynb b/default_python/src/dlt_pipeline.ipynb index 718160e..f277274 100644 --- a/default_python/src/dlt_pipeline.ipynb +++ b/default_python/src/dlt_pipeline.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, @@ -33,14 +33,13 @@ "source": [ "# Import DLT and src/default_python\n", "import dlt\n", - "import sys\n", "from pyspark.sql.functions import expr\n", "from default_python import main" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, @@ -54,7 +53,7 @@ "source": [ "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis()\n", + " return main.get_taxis(spark)\n", "\n", "@dlt.table\n", "def filtered_taxis():\n", @@ -79,7 +78,7 @@ }, "language_info": { "name": "python", - "version": "3.11.4" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/default_python/src/notebook.ipynb b/default_python/src/notebook.ipynb index b3886a5..0d6a5a3 100644 --- a/default_python/src/notebook.ipynb +++ b/default_python/src/notebook.ipynb @@ -19,7 +19,17 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { @@ -36,7 +46,7 @@ "source": [ "from default_python import main\n", "\n", - "main.get_taxis().show(10)" + "main.get_taxis(spark).show(10)" ] } ], @@ -56,8 +66,16 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.11.4" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" } }, "nbformat": 4, diff --git a/default_python/tests/main_test.py b/default_python/tests/main_test.py index 9c20d7a..d07e38e 100644 --- a/default_python/tests/main_test.py +++ b/default_python/tests/main_test.py @@ -1,14 +1,15 @@ -from databricks.connect import DatabricksSession -from pyspark.sql import SparkSession +from databricks.connect import DatabricksSession as SparkSession from default_python import main +from pytest import fixture # Create a new Databricks Connect session. If this fails, # check that you have configured Databricks Connect correctly. # See https://docs.databricks.com/dev-tools/databricks-connect.html. -SparkSession.builder = DatabricksSession.builder -SparkSession.builder.getOrCreate() +@fixture(scope="session") +def spark(): + return SparkSession.builder.getOrCreate() -def test_main(): - taxis = main.get_taxis() +def test_main(spark): + taxis = main.get_taxis(spark) assert taxis.count() > 5 From d8084a799fceeab08c0f32fc5ca757941e15aa3c Mon Sep 17 00:00:00 2001 From: Fabian Jakobs Date: Tue, 27 Feb 2024 16:07:28 +0100 Subject: [PATCH 2/2] fix running main --- default_python/scratch/exploration.ipynb | 22 ++++++++++++++++++++-- default_python/src/default_python/main.py | 4 +++- default_python/src/notebook.ipynb | 2 +- default_python/tests/main_test.py | 12 ++++++------ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/default_python/scratch/exploration.ipynb b/default_python/scratch/exploration.ipynb index 85c9640..516f1ec 100644 --- a/default_python/scratch/exploration.ipynb +++ b/default_python/scratch/exploration.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -21,7 +31,7 @@ "sys.path.append('../src')\n", "from default_python import main\n", "\n", - "main.get_taxis().show(10)" + "main.get_taxis(spark).show(10)" ] } ], @@ -41,8 +51,16 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.11.4" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" } }, "nbformat": 4, diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py index 00b3d6e..9f662fa 100644 --- a/default_python/src/default_python/main.py +++ b/default_python/src/default_python/main.py @@ -1,9 +1,11 @@ from pyspark.sql import SparkSession -def get_taxis(spark: SparkSession): +def get_taxis(spark: SparkSession): return spark.read.table("samples.nyctaxi.trips") def main(): + from databricks.connect import DatabricksSession as SparkSession + spark = SparkSession.builder.getOrCreate() get_taxis(spark).show(5) if __name__ == '__main__': diff --git a/default_python/src/notebook.ipynb b/default_python/src/notebook.ipynb index 0d6a5a3..cf50ed7 100644 --- a/default_python/src/notebook.ipynb +++ b/default_python/src/notebook.ipynb @@ -75,7 +75,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/default_python/tests/main_test.py b/default_python/tests/main_test.py index d07e38e..682c40b 100644 --- a/default_python/tests/main_test.py +++ b/default_python/tests/main_test.py @@ -1,15 +1,15 @@ from databricks.connect import DatabricksSession as SparkSession +from pytest import fixture from default_python import main from pytest import fixture -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. - @fixture(scope="session") def spark(): - return SparkSession.builder.getOrCreate() + spark = SparkSession.builder.getOrCreate() + yield spark + spark.stop() + -def test_main(spark): +def test_main(spark: SparkSession): taxis = main.get_taxis(spark) assert taxis.count() > 5