1+ # # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+ # # SPDX-License-Identifier: Apache-2.0
3+
4+ # This is a reusable workflow for running the E2E test for App Signals.
5+ # It is meant to be called from another workflow.
6+ # Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
7+ name : Node ECS Use Case
8+ on :
9+ workflow_call :
10+ inputs :
11+ aws-region :
12+ required : true
13+ type : string
14+ caller-workflow-name :
15+ required : true
16+ type : string
17+ adot-image-name :
18+ required : false
19+ type : string
20+ cwagent-image-name :
21+ required : false
22+ type : string
23+ outputs :
24+ job-started :
25+ value : ${{ jobs.node-ecs.outputs.job-started }}
26+ validation-result :
27+ value : ${{ jobs.node-ecs.outputs.validation-result }}
28+
29+ permissions :
30+ id-token : write
31+ contents : read
32+
33+ env :
34+ E2E_TEST_AWS_REGION : ${{ inputs.aws-region }}
35+ CALLER_WORKFLOW_NAME : ${{ inputs.caller-workflow-name }}
36+ ADOT_IMAGE_NAME : ${{ inputs.adot-image-name }}
37+ CLUSTER_NAME : e2e-test-node
38+ SAMPLE_APP_NAME : main-service-node
39+ METRIC_NAMESPACE : ApplicationSignals
40+ LOG_GROUP_NAME : /aws/application-signals/data
41+ TEST_RESOURCES_FOLDER : ${GITHUB_WORKSPACE}
42+ E2E_TEST_ACCOUNT_ID : ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }}
43+ E2E_TEST_ROLE_NAME : ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }}
44+
45+ jobs :
46+ node-ecs :
47+ runs-on : ubuntu-latest
48+ outputs :
49+ job-started : ${{ steps.job-started.outputs.job-started }}
50+ validation-result : ${{ steps.validation-result.outputs.validation-result }}
51+ steps :
52+ - name : Check if the job started
53+ id : job-started
54+ run : echo "job-started=true" >> $GITHUB_OUTPUT
55+
56+ - name : Generate testing id and sample app namespace
57+ run : |
58+ echo TESTING_ID="${{ github.job }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV
59+
60+ - uses : actions/checkout@v4
61+ with :
62+ repository : ' aws-observability/aws-application-signals-test-framework'
63+ ref : ${{ env.CALLER_WORKFLOW_NAME == 'main-build' && 'main' || github.ref }}
64+ fetch-depth : 0
65+
66+ # We initialize Gradlew Daemon early on during the workflow because sometimes initialization
67+ # fails due to transient issues. If it fails here, then we will try again later before the validators
68+ - name : Initiate Gradlew Daemon
69+ id : initiate-gradlew
70+ uses : ./.github/workflows/actions/execute_and_retry
71+ continue-on-error : true
72+ with :
73+ command : " ./gradlew :validator:build"
74+ cleanup : " ./gradlew clean"
75+ max_retry : 3
76+ sleep_time : 60
77+
78+ - name : Configure AWS Credentials
79+ uses : aws-actions/configure-aws-credentials@v4
80+ with :
81+ role-to-assume : arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
82+ aws-region : us-east-1
83+
84+ - name : Retrieve account
85+ uses : aws-actions/aws-secretsmanager-get-secrets@v1
86+ with :
87+ secret-ids : |
88+ ACCOUNT_ID, region-account/${{ env.E2E_TEST_AWS_REGION }}
89+ NODE_MAIN_SAMPLE_APP_IMAGE, e2e-test/node-main-sample-app-image
90+ NODE_REMOTE_SAMPLE_APP_IMAGE, e2e-test/node-remote-sample-app-image
91+
92+ # If the workflow is running as a canary, then we want to log in to the aws account in the appropriate region
93+ - name : Configure AWS Credentials
94+ if : ${{ github.event.repository.name == 'aws-application-signals-test-framework' }}
95+ uses : aws-actions/configure-aws-credentials@v4
96+ with :
97+ role-to-assume : arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
98+ aws-region : ${{ env.E2E_TEST_AWS_REGION }}
99+
100+ - name : Initiate Terraform
101+ uses : ./.github/workflows/actions/execute_and_retry
102+ with :
103+ command : " cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/node/ecs && terraform init && terraform validate"
104+ cleanup : " rm -rf .terraform && rm -rf .terraform.lock.hcl"
105+ max_retry : 6
106+ sleep_time : 60
107+
108+ - name : Set Sample App Image
109+ run : |
110+ echo MAIN_SAMPLE_APP_IMAGE_URI="${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ env.NODE_MAIN_SAMPLE_APP_IMAGE }}" >> $GITHUB_ENV
111+ echo REMOTE_SAMPLE_APP_IMAGE_URI="${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/${{ env.NODE_REMOTE_SAMPLE_APP_IMAGE }}" >> $GITHUB_ENV
112+
113+ - name : Set ADOT Node image environment variable
114+ run : |
115+ if [ "${{ github.event.repository.name }}" = "aws-otel-js-instrumentation" ]; then
116+ # Use the staging image build by the ADOT node repo
117+ echo ADOT_INSTRUMENTATION_IMAGE_URI="${{ env.ADOT_IMAGE_NAME }}" >> $GITHUB_ENV
118+ else
119+ # ADOT_INSTRUMENTATION_IMAGE_TAG=$(curl -s -I -L 'https://github.com/aws-observability/aws-otel-js-instrumentation/releases/latest' | grep -i Location | awk -F'/tag/' '{print $2}' | tr -d '\r')
120+ # ADOT_INSTRUMENTATION_IMAGE_TAG="v0.1.0"
121+ # echo ADOT_INSTRUMENTATION_IMAGE_URI="public.ecr.aws/aws-observability/adot-autoinstrumentation-node:$ADOT_INSTRUMENTATION_IMAGE_TAG" >> $GITHUB_ENV
122+ echo ADOT_INSTRUMENTATION_IMAGE_URI="public.ecr.aws/e8l6r3w7/adot-autoinstrumentation-node-js:latest" >> $GITHUB_ENV
123+ fi
124+
125+ # Switch to use the public image for CW Agent
126+ - name : Set Get CW Agent command environment variable
127+ run : |
128+ if [ "${{ github.event.repository.name }}" = "amazon-cloudwatch-agent" ]; then
129+ echo CWAGENT_IMAGE_URI="${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/cwagent-integration-test:${{ github.sha }}" >> $GITHUB_ENV
130+ else
131+ # echo CWAGENT_IMAGE_URI="public.ecr.aws/cloudwatch-agent/cloudwatch-agent:latest" >> $GITHUB_ENV
132+ echo CWAGENT_IMAGE_URI="136146983976.dkr.ecr.us-east-1.amazonaws.com/cloudwatch-agent:1.300047.0b872" >> $GITHUB_ENV
133+ fi
134+
135+ - name : Deploy sample app via terraform and wait for the endpoint to come online
136+ id : deploy-sample-app
137+ working-directory : terraform/node/ecs
138+ run : |
139+ # Attempt to deploy the sample app on an EKS instance and wait for its endpoint to come online.
140+ # There may be occasional failures due to transitivity issues, so try up to 2 times.
141+ # deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
142+ # that it failed at some point
143+ retry_counter=0
144+ max_retry=2
145+ while [ $retry_counter -lt $max_retry ]; do
146+ echo "Attempt $retry_counter"
147+ deployment_failed=0
148+ terraform apply -auto-approve \
149+ -var="test_id=${{ env.TESTING_ID }}" \
150+ -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
151+ -var="ecs_cluster_name=${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}" \
152+ -var="sample_app_name=${{ env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}" \
153+ -var="sample_app_image=${{ env.MAIN_SAMPLE_APP_IMAGE_URI }}" \
154+ -var="sample_remote_app_image=${{ env.REMOTE_SAMPLE_APP_IMAGE_URI }}" \
155+ -var="adot_instrumentation_image=${{ env.ADOT_INSTRUMENTATION_IMAGE_URI }}" \
156+ -var="cwagent_image=${{ env.CWAGENT_IMAGE_URI }}" \
157+ || deployment_failed=$?
158+
159+ if [ $deployment_failed -ne 0 ]; then
160+ echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
161+ fi
162+
163+ # If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
164+ # resources created from terraform and try again.
165+ if [ $deployment_failed -eq 1 ]; then
166+ echo "Destroying terraform"
167+ terraform destroy -auto-approve \
168+ -var="test_id=${{ env.TESTING_ID }}" \
169+ -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
170+ -var="ecs_cluster_name=${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}" \
171+ -var="sample_app_name=${{ env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}" \
172+ -var="sample_app_image=${{ env.MAIN_SAMPLE_APP_IMAGE_URI }}" \
173+ -var="sample_remote_app_image=${{ env.REMOTE_SAMPLE_APP_IMAGE_URI }}" \
174+ -var="adot_instrumentation_image=${{ env.ADOT_INSTRUMENTATION_IMAGE_URI }}" \
175+ -var="cwagent_image=${{ env.CWAGENT_IMAGE_URI }}"
176+
177+ retry_counter=$(($retry_counter+1))
178+ else
179+ # If deployment succeeded, then exit the loop
180+ break
181+ fi
182+
183+ if [ $retry_counter -ge $max_retry ]; then
184+ echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
185+ exit 1
186+ fi
187+ done
188+
189+ - name : Sleep to Wait for Canary Generated and Log Artifact Versions
190+ run : |
191+ sleep 120
192+ echo "ADOT Image: ${{ env.ADOT_INSTRUMENTATION_IMAGE_URI }}";
193+ echo "CW Agent Image: ${{ env.CWAGENT_IMAGE_URI }}";
194+
195+ - name : Initiate Gradlew Daemon
196+ if : steps.initiate-gradlew == 'failure'
197+ uses : ./.github/workflows/actions/execute_and_retry
198+ continue-on-error : true
199+ with :
200+ command : " ./gradlew :validator:build"
201+ cleanup : " ./gradlew clean"
202+ max_retry : 3
203+ sleep_time : 60
204+
205+ # Validation for app signals telemetry data
206+ - name : Call endpoint and validate generated EMF logs
207+ id : log-validation
208+ if : steps.deploy-sample-app.outcome == 'success' && !cancelled()
209+ run : ./gradlew validator:run --args='-c node/ecs/log-validation.yml
210+ --testing-id ${{ env.TESTING_ID }}
211+ --region ${{ env.E2E_TEST_AWS_REGION }}
212+ --account-id ${{ env.ACCOUNT_ID }}
213+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
214+ --log-group ${{ env.LOG_GROUP_NAME }}
215+ --platform-info ${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}
216+ --service-name ${{env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}
217+ --rollup'
218+
219+ - name : Call endpoints and validate generated metrics
220+ id : metric-validation
221+ if : (steps.deploy-sample-app.outcome == 'success' || steps.log-validation.outcome == 'failure') && !cancelled()
222+ run : ./gradlew validator:run --args='-c node/ecs/metric-validation.yml
223+ --testing-id ${{ env.TESTING_ID }}
224+ --region ${{ env.E2E_TEST_AWS_REGION }}
225+ --account-id ${{ env.ACCOUNT_ID }}
226+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
227+ --log-group ${{ env.LOG_GROUP_NAME }}
228+ --platform-info ${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}
229+ --service-name ${{env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}
230+ --rollup'
231+
232+ - name : Call endpoints and validate generated traces
233+ id : trace-validation
234+ if : (steps.deploy-sample-app.outcome == 'success' || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
235+ run : ./gradlew validator:run --args='-c node/ecs/trace-validation.yml
236+ --testing-id ${{ env.TESTING_ID }}
237+ --region ${{ env.E2E_TEST_AWS_REGION }}
238+ --account-id ${{ env.ACCOUNT_ID }}
239+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
240+ --log-group ${{ env.LOG_GROUP_NAME }}
241+ --platform-info ${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}
242+ --service-name ${{env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}
243+ --rollup'
244+
245+ - name : Refresh AWS Credentials
246+ if : ${{ github.event.repository.name == 'aws-application-signals-test-framework' }}
247+ uses : aws-actions/configure-aws-credentials@v4
248+ with :
249+ role-to-assume : arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
250+ aws-region : ${{ env.E2E_TEST_AWS_REGION }}
251+
252+ - name : Save test results
253+ if : always()
254+ id : validation-result
255+ run : |
256+ if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then
257+ echo "validation-result=success" >> $GITHUB_OUTPUT
258+ else
259+ echo "validation-result=failure" >> $GITHUB_OUTPUT
260+ fi
261+
262+ # Clean up Procedures
263+
264+ - name : Terraform destroy
265+ if : always()
266+ continue-on-error : true
267+ timeout-minutes : 5
268+ working-directory : terraform/node/ecs
269+ run : |
270+ terraform destroy -auto-approve \
271+ -var="test_id=${{ env.TESTING_ID }}" \
272+ -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \
273+ -var="ecs_cluster_name=${{ env.CLUSTER_NAME }}-${{ env.TESTING_ID }}" \
274+ -var="sample_app_name=${{ env.SAMPLE_APP_NAME }}-${{ env.TESTING_ID }}" \
275+ -var="sample_app_image=${{ env.MAIN_SAMPLE_APP_IMAGE_URI }}" \
276+ -var="sample_remote_app_image=${{ env.REMOTE_SAMPLE_APP_IMAGE_URI }}" \
277+ -var="adot_instrumentation_image=${{ env.ADOT_INSTRUMENTATION_IMAGE_URI }}" \
278+ -var="cwagent_image=${{ env.CWAGENT_IMAGE_URI }}"
0 commit comments