diff --git a/knowledge_base/job_with_requirements_txt/.gitignore b/knowledge_base/job_with_requirements_txt/.gitignore new file mode 100644 index 0000000..15bcc6d --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/.gitignore @@ -0,0 +1 @@ +.databricks diff --git a/knowledge_base/job_with_requirements_txt/README.md b/knowledge_base/job_with_requirements_txt/README.md new file mode 100644 index 0000000..4da57f7 --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/README.md @@ -0,0 +1,34 @@ +# Job that uses `requirements.txt` + +This example demonstrates how to make a job pick up a `requirements.txt` dependency file. + +## Prerequisites + +* Databricks CLI v0.222.0 (unreleased) or above + +## Usage + +Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. + +Run `databricks bundle deploy` to deploy the job. + +Run `databricks bundle run job_with_requirements_txt` to run the job. + +Example output: + +``` +$ databricks bundle run job_with_requirements_txt +Run URL: https://... + +2024-06-21 14:48:09 "[dev pieter_noordhuis] Example job that uses a requirements.txt file" TERMINATED SUCCESS + _____________ +| Hello, world! | + ============= + \ + \ + ^__^ + (oo)\_______ + (__)\ )\/\ + ||----w | + || || +``` diff --git a/knowledge_base/job_with_requirements_txt/databricks.yml b/knowledge_base/job_with_requirements_txt/databricks.yml new file mode 100644 index 0000000..06ee7ed --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/databricks.yml @@ -0,0 +1,13 @@ +bundle: + name: job_with_requirements_txt + +include: + - resources/*.yml + +workspace: + host: https://myworkspace.cloud.databricks.com + +targets: + dev: + default: true + mode: development diff --git a/knowledge_base/job_with_requirements_txt/requirements.txt b/knowledge_base/job_with_requirements_txt/requirements.txt new file mode 100644 index 0000000..c6b9ffd --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/requirements.txt @@ -0,0 +1 @@ +cowsay diff --git a/knowledge_base/job_with_requirements_txt/resources/job_with_requirements_txt.yml b/knowledge_base/job_with_requirements_txt/resources/job_with_requirements_txt.yml new file mode 100644 index 0000000..365259c --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/resources/job_with_requirements_txt.yml @@ -0,0 +1,22 @@ +resources: + jobs: + job_with_requirements_txt: + name: Example job that uses a requirements.txt file + + tasks: + - task_key: task + job_cluster_key: default + spark_python_task: + python_file: ../src/main.py + libraries: + - requirements: /Workspace/${workspace.file_path}/requirements.txt + + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.1.x-scala2.12 + node_type_id: i3.xlarge + num_workers: 0 + spark_conf: + spark.master: local[*, 4] + spark.databricks.cluster.profile: singleNode diff --git a/knowledge_base/job_with_requirements_txt/src/main.py b/knowledge_base/job_with_requirements_txt/src/main.py new file mode 100644 index 0000000..753eb3d --- /dev/null +++ b/knowledge_base/job_with_requirements_txt/src/main.py @@ -0,0 +1,3 @@ +from cowsay import cow + +cow("Hello, world!")