3838 GCP_PROJECT_ID : apache-beam-testing
3939 GCP_REGION : us-central1
4040 GCS_TEMP_LOCATION : gs://rc-validation-migration-tests/temp/
41+ GCS_STAGING_LOCATION : gs://rc-validation-migration-tests/staging/
4142 GCS_INPUT_PATH : gs://apache-beam-samples/shakespeare/kinglear.txt
4243
4344jobs :
44- setup :
45+ validate-rc-package :
4546 runs-on : self-hosted
4647 steps :
4748 - name : Checkout repository
@@ -52,43 +53,33 @@ jobs:
5253 with :
5354 go-version : default
5455
55- - name : Fetch Go SDK RC and Tidy Modules
56- working-directory : ./sdks/go/examples/wordcount
56+ - name : Setup Go Module and Fetch RC
57+ id : setup_go
5758 run : |
58- go get -d github.com/apache/beam/sdks/v2@${{ github.event.inputs.rc_tag }}
59+ TEMP_DIR="go-rc-test-${{ github.run_id }}"
60+ mkdir $TEMP_DIR
61+ wget -O $TEMP_DIR/wordcount.go https://raw.githubusercontent.com/apache/beam/refs/heads/master/sdks/go/examples/wordcount/wordcount.go
62+ cd $TEMP_DIR
63+ go mod init rc-test
64+ go get github.com/apache/beam/sdks/v2/go/pkg/beam@${{ github.event.inputs.rc_tag }}
5965 go mod tidy
66+ echo "work_dir=$TEMP_DIR" >> $GITHUB_OUTPUT # Output relative path
6067
61-
62- validate-go-rc-prism :
63- needs : setup
64- runs-on : self-hosted # Changed to self-hosted
65- steps :
66- - name : Checkout repository
67- uses : actions/checkout@v4
68-
69- - name : Set up environment
70- uses : ./.github/actions/setup-environment-action
71- with :
72- go-version : default
73-
74-
75- # Assuming gcloud/gsutil is available and authenticated on the self-hosted runner
68+ # --- Prism Steps ---
7669 - name : Download Input File from GCS (Prism)
77- working-directory : ./sdks/go/examples/wordcount
70+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
7871 run : gsutil cp ${{ env.GCS_INPUT_PATH }} ./kinglear.txt
7972
8073 - name : Run Go WordCount with PrismRunner
81- working-directory : ./sdks/go/examples/wordcount
74+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
8275 run : |
8376 go run wordcount.go \
8477 --input ./kinglear.txt \
8578 --output ./output_prism.txt \
86- --runner=PrismRunner \
87- --environment_type=DOCKER \
88- --environment_config=apache/beam_go_sdk:${{ github.event.inputs.container_tag }}
79+ --runner=PrismRunner
8980
90- - name : Check output file
91- working-directory : ./sdks/go/examples/wordcount
81+ - name : Check Prism output file
82+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
9283 run : |
9384 echo "--- PrismRunner WordCount Output ---"
9485 cat output_prism.txt* # Output might be sharded
@@ -101,22 +92,9 @@ jobs:
10192 exit 1
10293 fi
10394
104- validate-go-rc-dataflow :
105- needs : setup
106- runs-on : self-hosted # Changed to self-hosted
107- steps :
108- - name : Checkout repository
109- uses : actions/checkout@v4
110-
111- - name : Set up environment
112- uses : ./.github/actions/setup-environment-action
113- with :
114- go-version : default
115-
116-
117- # Assuming gcloud is available and authenticated on the self-hosted runner
95+ # --- Dataflow Steps ---
11896 - name : Run Go WordCount with DataflowRunner
119- working-directory : ./sdks/go/examples/wordcount
97+ working-directory : ./${{ steps.setup_go.outputs.work_dir }}
12098 env :
12199 # Define output path based on constant prefix and RC tag for uniqueness
122100 GCS_OUTPUT_PATH : ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output
@@ -129,11 +107,31 @@ jobs:
129107 --project=${{ env.GCP_PROJECT_ID }} \
130108 --region=${{ env.GCP_REGION }} \
131109 --temp_location=${{ env.GCS_TEMP_LOCATION }} \
110+ --staging_location=${{ env.GCS_STAGING_LOCATION }} \
132111 --environment_type=DOCKER \
133112 --environment_config=apache/beam_go_sdk:${{ github.event.inputs.container_tag }}
134113
135- # Note: Checking Dataflow output requires gcloud storage commands and depends on job completion.
136- # This basic workflow focuses on submission. A more robust check would poll the job status
137- # and then verify GCS output, which is significantly more complex.
138- - name : Log Dataflow Job Submission Info
139- run : echo "Dataflow job submitted. Check GCP console (project ${{ env.GCP_PROJECT_ID }}) for status and output at ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output"
114+ - name : Check Dataflow Output in GCS
115+ working-directory : ./${{ steps.setup_go.outputs.work_dir }} # Added working directory for consistency, though not strictly needed for gsutil
116+ env :
117+ # Re-define the output path pattern for checking
118+ GCS_OUTPUT_PATH_PATTERN : ${{ env.GCS_OUTPUT_PREFIX }}/${{ github.event.inputs.rc_tag }}/dataflow/output*
119+ run : |
120+ echo "Checking for Dataflow output files in GCS at: $GCS_OUTPUT_PATH_PATTERN"
121+ # Use gsutil stat. The -q flag suppresses errors for non-existent files,
122+ # allowing us to check the exit code. Exit code 0 means found, 1 means not found.
123+ if gsutil -q stat $GCS_OUTPUT_PATH_PATTERN; then
124+ echo "Output files found in GCS."
125+ FILE_COUNT=$(gsutil ls $GCS_OUTPUT_PATH_PATTERN | wc -l)
126+ if [ "$FILE_COUNT" -gt 0 ]; then echo "Found $FILE_COUNT output file(s)."; else echo "Error: Output path exists but contains no files."; exit 1; fi
127+ else
128+ echo "Error: Output files not found in GCS at $GCS_OUTPUT_PATH_PATTERN"
129+ exit 1
130+ fi
131+
132+ - name : Cleanup Temporary Directory
133+ if : always() # Ensure cleanup runs even if previous steps fail
134+ working-directory : ./ # Run from the root workspace dir
135+ run : |
136+ echo "Cleaning up temporary directory: ${{ steps.setup_go.outputs.work_dir }}"
137+ rm -rf ${{ steps.setup_go.outputs.work_dir }}
0 commit comments