aws-observability
diff --git a/‎.github/workflows/appsignals-e2e-ec2-canary-test.yml
Lines changed: 20 additions & 0 deletions b/‎.github/workflows/appsignals-e2e-ec2-canary-test.yml
Lines changed: 20 additions & 0 deletions
diff --git a/‎.github/workflows/appsignals-e2e-ec2-test.yml
Lines changed: 166 additions & 0 deletions b/‎.github/workflows/appsignals-e2e-ec2-test.yml
Lines changed: 166 additions & 0 deletions
diff --git a/‎.github/workflows/appsignals-e2e-eks-canary-test.yml
Lines changed: 4 additions & 2 deletions b/‎.github/workflows/appsignals-e2e-eks-canary-test.yml
Lines changed: 4 additions & 2 deletions
diff --git a/‎.github/workflows/appsignals-e2e-eks-test.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/appsignals-e2e-eks-test.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎testing/terraform/ec2/amazon-cloudwatch-agent.json
Lines changed: 20 additions & 0 deletions b/‎testing/terraform/ec2/amazon-cloudwatch-agent.json
Lines changed: 20 additions & 0 deletions
diff --git a/‎testing/terraform/ec2/main.tf
Lines changed: 143 additions & 0 deletions b/‎testing/terraform/ec2/main.tf
Lines changed: 143 additions & 0 deletions
@@ -0,0 +1,20 @@
+## This workflow aims to run the Application Signals end-to-end tests as a canary to
+## test the artifacts for App Signals enablement. It will deploy a sample app and remote
+## service on two EC2 instances, call the APIs, and validate the generated telemetry,
+## including logs, metrics, and traces.
+name: App Signals Enablement - E2E EC2 Canary Testing
+on:
+  schedule:
+    - cron: '0/15 * * * *' # run the workflow every 15 minutes
+  workflow_dispatch: # be able to run the workflow on demand
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  e2e-canary-test:
+    uses: ./.github/workflows/appsignals-e2e-ec2-test.yml
+    secrets: inherit
+    with:
+      caller-workflow-name: 'appsignals-e2e-ec2-canary-test'
@@ -0,0 +1,166 @@
+# This is a reusable workflow for running the E2E test for App Signals.
+# It is meant to be called from another workflow.
+# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
+name: App Signals Enablement E2E Testing - EC2 Use Case
+on:
+  workflow_call:
+    inputs:
+      caller-workflow-name:
+        required: true
+        type: string
+
+permissions:
+  id-token: write
+  contents: read
+
+env:
+  AWS_DEFAULT_REGION: us-east-1
+  TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACC }}
+  SAMPLE_APP_FRONTEND_SERVICE_JAR: "s3://aws-appsignals-sample-app/main-service.jar"
+  SAMPLE_APP_REMOTE_SERVICE_JAR: "s3://aws-appsignals-sample-app/remote-service.jar"
+  APP_SIGNALS_CW_AGENT_RPM: "https://amazoncloudwatch-agent-us-east-1.s3.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm"
+  APP_SIGNALS_ADOT_JAR: "https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
+  METRIC_NAMESPACE: AppSignals
+  LOG_GROUP_NAME: /aws/appsignals/generic
+
+jobs:
+  e2e-ec2-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Generate testing id
+        run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
+
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.E2E_TEST_ROLE_ARN }}
+          aws-region: ${{ env.AWS_DEFAULT_REGION }}
+
+      - name: Set up terraform
+        uses: hashicorp/setup-terraform@v2
+        with:
+          terraform_wrapper: false
+
+      - name: Deploy sample app via terraform
+        working-directory: testing/terraform/ec2
+        run: |
+          terraform init
+          terraform validate
+          terraform apply -auto-approve \
+            -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
+            -var="test_id=${{ env.TESTING_ID }}" \
+            -var="sample_app_jar=${{ env.SAMPLE_APP_FRONTEND_SERVICE_JAR }}" \
+            -var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
+            -var="cw_agent_rpm=${{ env.APP_SIGNALS_CW_AGENT_RPM }}" \
+            -var="adot_jar=${{ env.APP_SIGNALS_ADOT_JAR }}"
+
+      - name: Get the sample app endpoint
+        run: |
+          echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8080" >> $GITHUB_ENV
+          echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
+        working-directory: testing/terraform/ec2
+
+      - name: Wait for app endpoint to come online
+        id: endpoint-check
+        run: |
+          attempt_counter=0
+          max_attempts=30
+          until $(curl --output /dev/null --silent --head --fail http://${{ env.MAIN_SERVICE_ENDPOINT }}); do
+            if [ ${attempt_counter} -eq ${max_attempts} ];then
+              echo "Max attempts reached"
+              exit 1
+            fi
+
+            printf '.'
+            attempt_counter=$(($attempt_counter+1))
+            sleep 10
+          done
+
+      # This steps increases the speed of the validation by creating the telemetry data in advance
+      - name: Call all test APIs
+        continue-on-error: true
+        run: |
+          curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/
+          curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/
+          curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/
+          curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/
+
+      # Validation for pulse telemetry data
+      - name: Validate generated EMF logs
+        id: log-validation
+        run: ./gradlew testing:validator:run --args='-c ec2/log-validation.yml
+          --testing-id ${{ env.TESTING_ID }}
+          --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
+          --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
+          --region ${{ env.AWS_DEFAULT_REGION }}
+          --account-id ${{ env.TEST_ACCOUNT }}
+          --metric-namespace ${{ env.METRIC_NAMESPACE }}
+          --log-group ${{ env.LOG_GROUP_NAME }}
+          --service-name sample-application-${{ env.TESTING_ID }}
+          --remote-service-name sample-remote-application-${{ env.TESTING_ID }}
+          --request-body ip=${{ env.REMOTE_SERVICE_IP }}
+          --rollup'
+
+      - name: Validate generated metrics
+        id: metric-validation
+        if: (success() || steps.log-validation.outcome == 'failure') && !cancelled()
+        run: ./gradlew testing:validator:run --args='-c ec2/metric-validation.yml
+          --testing-id ${{ env.TESTING_ID }}
+          --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
+          --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
+          --region ${{ env.AWS_DEFAULT_REGION }}
+          --account-id ${{ env.TEST_ACCOUNT }}
+          --metric-namespace ${{ env.METRIC_NAMESPACE }}
+          --log-group ${{ env.LOG_GROUP_NAME }}
+          --service-name sample-application-${{ env.TESTING_ID }}
+          --remote-service-name sample-remote-application-${{ env.TESTING_ID }}
+          --request-body ip=${{ env.REMOTE_SERVICE_IP }}
+          --rollup'
+
+      - name: Validate generated traces
+        id: trace-validation
+        if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
+        run: ./gradlew testing:validator:run --args='-c ec2/trace-validation.yml
+          --testing-id ${{ env.TESTING_ID }}
+          --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
+          --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8080
+          --region ${{ env.AWS_DEFAULT_REGION }}
+          --account-id ${{ env.TEST_ACCOUNT }}
+          --metric-namespace ${{ env.METRIC_NAMESPACE }}
+          --log-group ${{ env.LOG_GROUP_NAME }}
+          --service-name sample-application-${{ env.TESTING_ID }}
+          --remote-service-name sample-remote-application-${{ env.TESTING_ID }}
+          --request-body ip=${{ env.REMOTE_SERVICE_IP }}
+          --rollup'
+
+      - name: Publish metric on test result
+        if: always()
+        run: |
+          if [[ "${{ steps.log-validation.outcome }}" == "success" && "${{ steps.metric-validation.outcome }}" == "success" && "${{ steps.trace-validation.outcome }}" == "success" ]]; then
+            aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
+            --metric-name Failure \
+            --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
+            --value 0.0 \
+            --region ${{ env.AWS_DEFAULT_REGION }}
+          else
+            aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
+            --metric-name Failure \
+            --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
+            --value 1.0 \
+            --region ${{ env.AWS_DEFAULT_REGION }}
+          fi
+
+
+      # Clean up Procedures
+
+      - name: Terraform destroy
+        if: always()
+        continue-on-error: true
+        working-directory: testing/terraform/ec2
+        run: |
+          terraform destroy -auto-approve \
+            -var="test_id=${{ env.TESTING_ID }}" 
@@ -1,5 +1,7 @@
-## This workflow aims to run the end-to-end tests in canary fashion to
-## test the prod artifacts for App Signals enablement.
+## This workflow aims to run the Application Signals end-to-end tests as a canary to
+## test the artifacts for App Signals enablement. It will deploy a sample app and remote
+## service onto an EKS cluster, call the APIs, and validate the generated telemetry,
+## including logs, metrics, and traces.
 name: App Signals Enablement - E2E EKS Canary Testing
 on:
   schedule:
 
@@ -159,7 +159,7 @@ jobs:
       - name: Call endpoint and validate generated EMF logs
         id: log-validation
         if: steps.endpoint-check.outcome == 'success' && !cancelled()
-        run: ./gradlew testing:validator:run --args='-c log-validation.yml
+        run: ./gradlew testing:validator:run --args='-c eks/log-validation.yml
           --testing-id ${{ env.TESTING_ID }}
           --endpoint http://${{ env.APP_ENDPOINT }}
           --region ${{ env.AWS_DEFAULT_REGION }}
@@ -176,7 +176,7 @@ jobs:
       - name: Call endpoints and validate generated metrics
         id: metric-validation
         if: (success() || steps.log-validation.outcome == 'failure') && !cancelled()
-        run: ./gradlew testing:validator:run --args='-c metric-validation.yml
+        run: ./gradlew testing:validator:run --args='-c eks/metric-validation.yml
           --testing-id ${{ env.TESTING_ID }}
           --endpoint http://${{ env.APP_ENDPOINT }}
           --region ${{ env.AWS_DEFAULT_REGION }}
@@ -194,7 +194,7 @@ jobs:
       - name: Call endpoints and validate generated traces
         id: trace-validation
         if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
-        run: ./gradlew testing:validator:run --args='-c trace-validation.yml
+        run: ./gradlew testing:validator:run --args='-c eks/trace-validation.yml
           --testing-id ${{ env.TESTING_ID }}
           --endpoint http://${{ env.APP_ENDPOINT }}
           --region ${{ env.AWS_DEFAULT_REGION }}
 
@@ -0,0 +1,20 @@
+{
+  "agent": {
+    "debug": true,
+    "region": "$REGION"
+  },
+  "traces": {
+    "traces_collected": {
+      "app_signals": {
+        "enabled": true
+      }
+    }
+  },
+  "logs": {
+    "metrics_collected": {
+      "app_signals": {
+        "enabled": true
+      }
+    }
+  }
+}
@@ -0,0 +1,143 @@
+terraform {
+  required_providers {
+    aws = {
+      source = "hashicorp/aws"
+    }
+  }
+}
+
+# Define the provider for AWS
+provider "aws" {}
+
+resource "aws_default_vpc" "default" {}
+
+resource "tls_private_key" "ssh_key" {
+  algorithm = "RSA"
+  rsa_bits = 4096
+}
+
+resource "aws_key_pair" "aws_ssh_key" {
+  key_name = "instance_key-${var.test_id}"
+  public_key = tls_private_key.ssh_key.public_key_openssh
+}
+
+locals {
+  ssh_key_name        = aws_key_pair.aws_ssh_key.key_name
+  private_key_content = tls_private_key.ssh_key.private_key_pem
+}
+
+resource "aws_instance" "main_service_instance" {
+  ami                                   = "ami-0b021814637c6d457" # Amazon Linux 2 (free tier)
+  instance_type                         = "t2.micro"
+  key_name                              = local.ssh_key_name
+  iam_instance_profile                  = "APP_SIGNALS_EC2_TEST_ROLE"
+  vpc_security_group_ids                = [aws_default_vpc.default.default_security_group_id]
+  associate_public_ip_address           = true
+  instance_initiated_shutdown_behavior  = "terminate"
+
+  tags = {
+    Name = "main-service-${var.test_id}"
+  }
+}
+
+resource "null_resource" "main_service_setup" {
+  connection {
+    type = "ssh"
+    user = var.user
+    private_key = local.private_key_content
+    host = aws_instance.main_service_instance.public_ip
+  }
+
+  provisioner "remote-exec" {
+    inline = [
+      # Install Java 11 and tmux
+      "yes | sudo amazon-linux-extras install java-openjdk11",
+
+      # Copy in CW Agent configuration
+      "agent_config='${replace(replace(file("./amazon-cloudwatch-agent.json"), "/\\s+/", ""), "$REGION", var.aws_region)}'",
+      "echo $agent_config > amazon-cloudwatch-agent.json",
+
+      # Get and run CW agent rpm
+      "wget -O cw-agent.rpm ${var.cw_agent_rpm}",
+      "sudo rpm -U ./cw-agent.rpm",
+      "sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:./amazon-cloudwatch-agent.json",
+
+      # Get ADOT
+      "wget -O adot.jar ${var.adot_jar}",
+
+      # Get and run the sample application with configuration
+      "aws s3 cp ${var.sample_app_jar} ./main-service.jar",
+
+      "JAVA_TOOL_OPTIONS=' -javaagent:/home/ec2-user/adot.jar' \\",
+      "OTEL_METRICS_EXPORTER=none \\",
+      "OTEL_SMP_ENABLED=true \\",
+      "OTEL_AWS_SMP_EXPORTER_ENDPOINT=http://localhost:4315 \\",
+      "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4315 \\",
+      "OTEL_RESOURCE_ATTRIBUTES=aws.hostedin.environment=EC2,service.name=sample-application-${var.test_id} \\",
+      "nohup java -jar main-service.jar &> nohup.out &",
+
+      # The application needs time to come up and reach a steady state, this should not take longer than 30 seconds
+      "sleep 30"
+    ]
+  }
+
+  depends_on = [aws_instance.main_service_instance]
+}
+
+resource "aws_instance" "remote_service_instance" {
+  ami                                   = "ami-0b021814637c6d457" # Amazon Linux 2 (free tier)
+  instance_type                         = "t2.micro"
+  key_name                              = local.ssh_key_name
+  iam_instance_profile                  = "APP_SIGNALS_EC2_TEST_ROLE"
+  vpc_security_group_ids                = [aws_default_vpc.default.default_security_group_id]
+  associate_public_ip_address           = true
+  instance_initiated_shutdown_behavior  = "terminate"
+
+  tags = {
+    Name = "remote-service-${var.test_id}"
+  }
+}
+
+resource "null_resource" "remote_service_setup" {
+  connection {
+    type = "ssh"
+    user = var.user
+    private_key = local.private_key_content
+    host = aws_instance.remote_service_instance.public_ip
+  }
+
+  provisioner "remote-exec" {
+    inline = [
+      # Install Java 11 and tmux
+      "yes | sudo amazon-linux-extras install java-openjdk11",
+
+      # Copy in CW Agent configuration
+      "agent_config='${replace(replace(file("./amazon-cloudwatch-agent.json"), "/\\s+/", ""), "$REGION", var.aws_region)}'",
+      "echo $agent_config > amazon-cloudwatch-agent.json",
+
+      # Get and run CW agent rpm
+      "wget -O cw-agent.rpm ${var.cw_agent_rpm}",
+      "sudo rpm -U ./cw-agent.rpm",
+      "sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:./amazon-cloudwatch-agent.json",
+
+      # Get ADOT
+      "wget -O adot.jar ${var.adot_jar}",
+
+      # Get and run the sample application with configuration
+      "aws s3 cp ${var.sample_remote_app_jar} ./remote-service.jar",
+
+      "JAVA_TOOL_OPTIONS=' -javaagent:/home/ec2-user/adot.jar' \\",
+      "OTEL_METRICS_EXPORTER=none \\",
+      "OTEL_SMP_ENABLED=true \\",
+      "OTEL_AWS_SMP_EXPORTER_ENDPOINT=http://localhost:4315 \\",
+      "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:4315 \\",
+      "OTEL_RESOURCE_ATTRIBUTES=aws.hostedin.environment=EC2,service.name=sample-remote-application-${var.test_id} \\",
+      "nohup java -jar remote-service.jar &> nohup.out &",
+
+      # The application needs time to come up and reach a steady state, this should not take longer than 30 seconds
+      "sleep 30"
+    ]
+  }
+
+  depends_on = [aws_instance.remote_service_instance]
+}