Skip to content

Commit 181383b

Browse files
committed
Add Dataproc CloudSql test
Signed-off-by: Dominik Dębowczyk <[email protected]>
1 parent 2f033d7 commit 181383b

22 files changed

+1188
-6
lines changed

.github/workflows/main_new_release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ jobs:
9090
uses: ./.github/workflows/producer_spark_dataproc.yml
9191
secrets:
9292
gcpKey: ${{ secrets.GCP_SA_KEY }}
93+
postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
94+
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
9395
with:
9496
release: ${{ needs.initialize_workflow.outputs.openlineage_release }}
9597
get-latest-snapshots: 'false'

.github/workflows/main_pr.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ jobs:
8282
uses: ./.github/workflows/producer_spark_dataproc.yml
8383
secrets:
8484
gcpKey: ${{ secrets.GCP_SA_KEY }}
85+
postgresqlUser: ${{ secrets.POSTGRESQL_USER }}
86+
postgresqlPassword: ${{ secrets.POSTGRESQL_PASSWORD }}
8587
with:
8688
release: ${{ needs.initialize_workflow.outputs.ol_release }}
8789
get-latest-snapshots: 'false'

.github/workflows/producer_spark_dataproc.yml

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ on:
55
secrets:
66
gcpKey:
77
required: true
8+
postgresqlUser:
9+
required: true
10+
postgresqlPassword:
11+
required: true
812
inputs:
913
release:
1014
description: "release tag of OpenLineage to use"
@@ -80,6 +84,14 @@ jobs:
8084
local-file-path: producer/spark_dataproc/runner/get_openlineage_jar.sh
8185
gcs-path: "gs://open-lineage-e2e/scripts"
8286
credentials: ${{ steps.gcp-auth.outputs.credentials_file_path }}
87+
88+
- name: Upload CloudSQL init actions to GCS
89+
id: upload-cloud-sql-initialization-actions
90+
uses: ./.github/actions/upload_artifacts
91+
with:
92+
local-file-path: producer/spark_dataproc/runner/cloud_sql_proxy.sh
93+
gcs-path: "gs://open-lineage-e2e/scripts"
94+
credentials: ${{ steps.gcp-auth.outputs.credentials_file_path }}
8395

8496
- name: Set up Python 3.11
8597
uses: actions/setup-python@v3
@@ -100,8 +112,8 @@ jobs:
100112
--region us-west1 \
101113
--cluster-name dataproc-producer-test-${{ github.run_id }} \
102114
--credentials-file ${{ steps.gcp-auth.outputs.credentials_file_path }} \
103-
--metadata "SPARK_BQ_CONNECTOR_URL=gs://open-lineage-e2e/jars/spark-3.5-bigquery-0.0.1-SNAPSHOT.jar,OPENLINEAGE_SPARK_URL=${{ steps.upload-spark-integration.outputs.uploaded-file }},SPARK_SPANNER_CONNECTOR_URL=gs://open-lineage-e2e/jars/spark-3.1-spanner-1.1.0.jar" \
104-
--initialization-actions="${{ steps.upload-initialization-actions.outputs.uploaded-file }}"
115+
--metadata 'SPARK_BQ_CONNECTOR_URL=gs://open-lineage-e2e/jars/spark-3.5-bigquery-0.0.1-SNAPSHOT.jar,OPENLINEAGE_SPARK_URL=${{ steps.upload-spark-integration.outputs.uploaded-file }},SPARK_SPANNER_CONNECTOR_URL=gs://open-lineage-e2e/jars/spark-3.1-spanner-1.1.0.jar,enable-cloud-sql-hive-metastore=false,additional-cloud-sql-instances=gcp-open-lineage-testing:us-central1:open-lineage-e2e=tcp:3307' \
116+
--initialization-actions="${{ steps.upload-initialization-actions.outputs.uploaded-file }},${{ steps.upload-cloud-sql-initialization-actions.outputs.uploaded-file }}"
105117
# --metadata "SPARK_BQ_CONNECTOR_URL=${{ steps.upload-spark-bq-connector.outputs.uploaded-file }},OPENLINEAGE_SPARK_URL=${{ steps.upload-spark-integration.outputs.uploaded-file }}" \
106118

107119
- name: Set producer output event dir
@@ -126,7 +138,7 @@ jobs:
126138
--gcs-bucket open-lineage-e2e \
127139
--python-job "$run_script" \
128140
--jars "${{ steps.upload-gcs-transport.outputs.uploaded-file }}" \
129-
--spark-properties "spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener,spark.sql.warehouse.dir=/tmp/warehouse,spark.openlineage.transport.type=gcs" \
141+
--spark-properties 'spark.extraListeners=io.openlineage.spark.agent.OpenLineageSparkListener,spark.sql.warehouse.dir=/tmp/warehouse,spark.openlineage.transport.type=gcs,spark.driver.POSTGRESQL_USER=${{ secrets.postgresqlUser }},spark.driver.POSTGRESQL_PASSWORD=${{ secrets.postgresqlPassword }}' \
130142
--output-directory "${{ steps.set-producer-output.outputs.event_dir }}/$scenario" \
131143
--credentials-file "${{ steps.gcp-auth.outputs.credentials_file_path }}" \
132144
--dataproc-image-version 2.2-ubuntu22 \

0 commit comments

Comments
 (0)