Skip to content

Commit 6fe1b60

Browse files
authored
Merge pull request #3 from databricks-industry-solutions/feat/pytest
add databricks pytest runner. 13 of 14 tests run.
2 parents fd6ffd8 + 8a31d5b commit 6fe1b60

File tree

3 files changed

+26
-88
lines changed

3 files changed

+26
-88
lines changed

databricks.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,18 @@ resources:
3030
jobs:
3131
demo_workflow:
3232
name: "${var.project_name} - Pytest Workflow"
33+
max_concurrent_runs: 4
3334
tasks:
3435
- task_key: dbrunner
35-
spark_python_task:
36-
python_file: /Workspace/Users/douglas.moore@databricks.com/python-data-sources-x/zipdcm/db_runner.py
36+
notebook_task:
37+
notebook_path: /Workspace/Users/douglas.moore@databricks.com/python-data-sources-x/zipdcm/db_runner
38+
source: WORKSPACE
3739
existing_cluster_id: 0519-014005-pr11dvi3
3840
libraries:
3941
- pypi:
4042
package: pyspark==4.0.0.dev1
41-
git_source:
42-
git_url: https://github.com/databricks-industry-solutions/python-data-sources.git
43-
git_provider: gitHub
44-
git_branch: feat/zipdcm
4543
tags:
44+
dev: douglas_moore
4645
owner: douglas.moore@databricks.com
4746
solacc: pixels
4847
queue:
@@ -56,4 +55,5 @@ resources:
5655
- pytest==8.3.5
5756
budget_policy_id: d8e5830d-97cb-40b9-bd65-063434295162
5857

58+
5959
# For more options and schema, see: https://docs.databricks.com/aws/en/dev-tools/bundles/settings

zipdcm/conftest.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@ def spark() -> SparkSession:
1111
the cluster in the remote Databricks workspace. Unit tests do not
1212
have access to this SparkSession by default.
1313
"""
14-
#sparkSession = DatabricksSession.builder.getOrCreate()
15-
sparkSession = (SparkSession.builder
16-
.master("local[*]")
17-
.getOrCreate())
14+
#sparkSession = DatabricksSession.builder.serverless(True).getOrCreate()
15+
sparkSession = (SparkSession.builder.getOrCreate())
1816
sparkSession.dataSource.register(ZipDCMDataSource)
1917
return sparkSession
2018

zipdcm/db_runner.py

Lines changed: 18 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,24 @@
1-
import configparser
2-
import io
1+
# Databricks notebook source
2+
import pytest
33
import os
4+
import sys
45

5-
from databricks.sdk import WorkspaceClient
6-
from databricks.sdk.service.compute import ClusterSpec, DataSecurityMode, RuntimeEngine
7-
from databricks.sdk.service.jobs import (
8-
GitProvider,
9-
GitSource,
10-
JobAccessControlRequest,
11-
JobPermissionLevel,
12-
NotebookTask,
13-
RunResultState,
14-
Source,
15-
Task,
16-
)
6+
# Run all tests in the connected directory in the remote Databricks workspace.
7+
# By default, pytest searches through all files with filenames ending with
8+
# "_test.py" for tests. Within each of these files, pytest runs each function
9+
# with a function name beginning with "test_".
1710

18-
WATCH_DOGS_EMAILS = os.environ.get("WATCH_DOGS_EMAILS", "").split(",")
11+
# Get the path to the directory for this file in the workspace.
12+
dir_root = os.path.abspath(".")
13+
print(dir_root)
14+
# Switch to the root directory.
15+
os.chdir(dir_root)
1916

20-
#config = configparser.ConfigParser()
21-
#config.read_file(io.StringIO(os.environ["DB_PROFILES"]))
22-
#config = config["DEMO"]
23-
#os.environ["DATABRICKS_HOST"] = config["host"]
24-
#os.environ["DATABRICKS_TOKEN"] = config["token"]
17+
# Skip writing .pyc files to the bytecode cache on the cluster.
18+
sys.dont_write_bytecode = True
2519

26-
branch = os.getenv("GITHUB_HEAD_REF", "main")
20+
# Now run pytest from the root directory, using the
2721

28-
# Create workspace client using host and token
29-
workspace = WorkspaceClient()
30-
user = workspace.current_user.me().user_name
31-
nodes = [
32-
node
33-
for node in workspace.clusters.list_node_types().node_types
34-
if not node.is_deprecated and node.num_cores == 4.0 and node.is_io_cache_enabled
35-
]
36-
acl = [JobAccessControlRequest(user_name=user, permission_level=JobPermissionLevel.IS_OWNER)]
37-
38-
for watcher in WATCH_DOGS_EMAILS:
39-
# Check if the watcher is a valid user
40-
ww_list = list(
41-
workspace.users.list(
42-
attributes="id,userName", sort_by="userName", filter=f"userName eq '{watcher}'"
43-
)
44-
)
45-
if len(ww_list) >= 1 and watcher != user:
46-
acl.append(
47-
JobAccessControlRequest(
48-
user_name=watcher,
49-
permission_level=JobPermissionLevel.CAN_VIEW,
50-
)
51-
)
52-
53-
repo_url = "https://github.com/databricks-industry-solutions/python-data-sources.git"
54-
55-
# Define the git source
56-
git_source = GitSource(git_url=repo_url, git_provider=GitProvider.GIT_HUB, git_branch=branch)
57-
58-
# Define the job cluster
59-
cluster_spec = ClusterSpec(
60-
num_workers=0,
61-
spark_version="17.0.x-scala2.13",
62-
node_type_id=nodes[0].node_type_id,
63-
spark_conf={"spark.master": "local[*, 4]"},
64-
data_security_mode=DataSecurityMode.SINGLE_USER,
65-
runtime_engine=RuntimeEngine.STANDARD,
66-
)
67-
68-
# Define the notebook task
69-
notebook_task = NotebookTask(
70-
notebook_path="pytest_databricks",
71-
base_parameters={},
72-
source=Source.GIT,
73-
)
74-
75-
# Define the task
76-
task = Task(task_key="notebook_task", notebook_task=notebook_task, new_cluster=cluster_spec)
77-
78-
# Submit the task
79-
run_response = workspace.jobs.submit_and_wait(
80-
run_name="pixels_gitaction_test", tasks=[task], git_source=git_source, access_control_list=acl
81-
)
82-
83-
if run_response.state.result_state != RunResultState.SUCCESS:
84-
raise Exception(f"Job failed with state {run_response.state.result_state}")
22+
#
23+
retcode = pytest.main(["-v", "."])
24+
dbutils.notebook.exit(retcode)

0 commit comments

Comments
 (0)