diff --git a/.github/workflows/python-ec2-genai-test.yml b/.github/workflows/python-ec2-genai-test.yml new file mode 100644 index 000000000..7a8d14116 --- /dev/null +++ b/.github/workflows/python-ec2-genai-test.yml @@ -0,0 +1,191 @@ +## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +## SPDX-License-Identifier: Apache-2.0 + +name: Python EC2 LangChain Service Deployment +on: + workflow_call: + inputs: + caller-workflow-name: + required: true + type: string + cpu-architecture: + description: "Permitted values: x86_64 or arm64" + required: false + type: string + default: "x86_64" + staging-wheel-name: + required: false + default: 'aws-opentelemetry-distro' + type: string + +permissions: + id-token: write + contents: read + +env: + E2E_TEST_AWS_REGION: 'us-west-2' + # E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }} + # E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }} + E2E_TEST_ACCOUNT_ID: 571600841604 + ADOT_WHEEL_NAME: ${{ inputs.staging-wheel-name }} + E2E_TEST_ROLE_NAME: github + METRIC_NAMESPACE: genesis + LOG_GROUP_NAME: test/genesis + TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE} + SAMPLE_APP_ZIP: s3://sigv4perfresults/langchain-service.zip + +jobs: + python-ec2-adot-genai: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v4 + + # - name: Set Get ADOT Wheel command environment variable + # run: | + # if [ "${{ github.event.repository.name }}" = "aws-otel-python-instrumentation" ]; then + # # Reusing the adot-main-build-staging-jar bucket to store the python wheel file + # echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # elif [ "${{ env.OTEL_SOURCE }}" == "pypi" ]; then + # echo GET_ADOT_WHEEL_COMMAND="sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # else + # latest_release_version=$(curl -sL https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest | grep -oP '/releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -n 1) + # echo "The latest version is $latest_release_version" + # echo GET_ADOT_WHEEL_COMMAND="wget -O ${{ env.ADOT_WHEEL_NAME }} https://github.com/aws-observability/aws-otel-python-instrumentation/releases/latest/download/aws_opentelemetry_distro-$latest_release_version-py3-none-any.whl \ + # && sudo python${{ env.PYTHON_VERSION }} -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV + # fi + + - name: Initiate Gradlew Daemon + uses: ./.github/workflows/actions/execute_and_retry + continue-on-error: true + with: + command: "./gradlew :validator:build" + cleanup: "./gradlew clean" + max_retry: 3 + sleep_time: 60 + + - name: Generate testing id + run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}-${RANDOM}" >> $GITHUB_ENV + + - name: Generate XRay and W3C trace ID + run: | + ID_1="$(printf '%08x' $(date +%s))" + ID_2="$(openssl rand -hex 12)" + W3C_TRACE_ID="${ID_1}${ID_2}" + XRAY_TRACE_ID="1-${ID_1}-${ID_2}" + echo "XRAY_TRACE_ID=${XRAY_TRACE_ID}" >> $GITHUB_ENV + echo "W3C_TRACE_ID=${W3C_TRACE_ID}" >> $GITHUB_ENV + echo "Generated XRay Trace ID: ${XRAY_TRACE_ID}" + echo "Generated W3C Trace ID: ${W3C_TRACE_ID}" + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} + aws-region: ${{ env.E2E_TEST_AWS_REGION }} + + - name: Set up terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg" + post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list + && sudo apt update && sudo apt install terraform' + + - name: Initiate Terraform + uses: ./.github/workflows/actions/execute_and_retry + with: + command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/adot-genai && terraform init && terraform validate" + cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" + max_retry: 6 + + - name: Deploy service via terraform + working-directory: terraform/python/ec2/adot-genai + run: | + terraform apply -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=${{ env.SAMPLE_APP_ZIP }}" + + - name: Get deployment info + working-directory: terraform/python/ec2/adot-genai + run: | + echo "INSTANCE_IP=$(terraform output langchain_service_public_ip)" >> $GITHUB_ENV + echo "INSTANCE_ID=$(terraform output langchain_service_instance_id)" >> $GITHUB_ENV + + - name: Wait for Gen AI Chatbot service to be ready + run: | + echo "Waiting for service to be ready at http://${{ env.INSTANCE_IP }}:8000" + for i in {1..60}; do + if curl -f -s "http://${{ env.INSTANCE_IP }}:8000/health" > /dev/null 2>&1; then + echo "Service is ready!" + break + fi + echo "Attempt $i: Service not ready yet, waiting 10 seconds..." + sleep 10 + done + + # Final check + if ! curl -f -s "http://${{ env.INSTANCE_IP }}:8000/health" > /dev/null 2>&1; then + echo "Service failed to become ready after 10 minutes" + exit 1 + fi + + - name: Generate traffic + run: | + cd sample-apps/traffic-generator/genai + chmod +x generate_traffic.sh + export SERVER_URL="http://${{ env.INSTANCE_IP }}:8000" + export NUM_REQUESTS="5" + export DELAY_SECONDS="5" + export TIMEOUT="30" + export TRACE_ID="Root=${XRAY_TRACE_ID};Parent=$(openssl rand -hex 8);Sampled=1" + ./generate_traffic.sh + + - name: Validate generated logs + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/log-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }} + --trace-id ${{ env.W3C_TRACE_ID }}' + + - name: Validate generated traces + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/trace-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }} + --trace-id ${{ env.XRAY_TRACE_ID }}' + + - name: Wait for metrics to be published + if: (success() || failure()) && !cancelled() + run: | + echo "Waiting 60 seconds to ensure EMF metrics are published to CloudWatch" + sleep 60 + + - name: Validate generated metrics + if: (success() || failure()) && !cancelled() + run: ./gradlew validator:run --args='-c python/ec2/adot-genai/metric-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.INSTANCE_IP }}:8000 + --region ${{ env.E2E_TEST_AWS_REGION }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --service-name langchain-traceloop-app + --instance-id ${{ env.INSTANCE_ID }}' + + - name: Cleanup + if: always() + continue-on-error: true + working-directory: terraform/python/ec2/adot-genai + run: | + terraform destroy -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="service_zip_url=${{ env.SAMPLE_APP_ZIP }}" diff --git a/sample-apps/python/genai_service/ec2-requirements.txt b/sample-apps/python/genai_service/ec2-requirements.txt new file mode 100644 index 000000000..9b270e251 --- /dev/null +++ b/sample-apps/python/genai_service/ec2-requirements.txt @@ -0,0 +1,15 @@ +langchain +langchain-community +langchain_aws +opentelemetry-sdk +openinference-instrumentation-langchain +opentelemetry-api +opentelemetry-semantic-conventions +python-dotenv +openlit +botocore +setuptools +boto3 +aws_opentelemetry_distro_genai_beta +fastapi +uvicorn[standard] \ No newline at end of file diff --git a/sample-apps/python/genai_service/server.py b/sample-apps/python/genai_service/server.py new file mode 100644 index 000000000..28572c7b7 --- /dev/null +++ b/sample-apps/python/genai_service/server.py @@ -0,0 +1,114 @@ +import os +from typing import Dict, List +from dotenv import load_dotenv +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from langchain_aws import ChatBedrock +from langchain.prompts import ChatPromptTemplate +from langchain.chains import LLMChain +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from openinference.instrumentation.langchain import LangChainInstrumentor + +# Load environment variables +load_dotenv() + +# Set up OpenTelemetry with BOTH exporters +tracer_provider = TracerProvider() + +# Add Console exporter +console_exporter = ConsoleSpanExporter() +console_processor = BatchSpanProcessor(console_exporter) +tracer_provider.add_span_processor(console_processor) + +# Add OTLP exporter +otlp_exporter = OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces") +otlp_processor = BatchSpanProcessor(otlp_exporter) +tracer_provider.add_span_processor(otlp_processor) + +# Set as global provider +trace.set_tracer_provider(tracer_provider) + +# Instrument LangChain with OpenInference +LangChainInstrumentor().instrument(tracer_provider=tracer_provider) + +# Initialize FastAPI app +app = FastAPI(title="LangChain Bedrock OpenInference API", version="1.0.0") + +# Initialize the LLM with AWS Bedrock +llm = ChatBedrock( + model_id="anthropic.claude-3-haiku-20240307-v1:0", + model_kwargs={ + "temperature": 0.7, + "max_tokens": 500 + }, + region_name=os.getenv("AWS_DEFAULT_REGION", "us-west-2") +) + +# Create a prompt template +prompt = ChatPromptTemplate.from_template( + "You are a helpful assistant. The user says: {input}. Provide a helpful response." +) + +# Create a chain +chain = LLMChain(llm=llm, prompt=prompt) + +# Request models +class ChatRequest(BaseModel): + message: str + +class BatchChatRequest(BaseModel): + messages: List[str] + +class ChatResponse(BaseModel): + response: str + +class BatchChatResponse(BaseModel): + responses: List[Dict[str, str]] + +# Sample prompts for testing +SAMPLE_PROMPTS = [ + "What is the capital of France?", + "How do I make a cup of coffee?", + "What are the benefits of exercise?", + "Explain quantum computing in simple terms", + "What's the best way to learn programming?" +] + +@app.get("/") +async def root(): + return { + "message": "LangChain Bedrock OpenInference API is running!", + "endpoints": { + "/ai-chat": "Single message chat endpoint", + "/hello": "Simple hello endpoint" + } + } + +@app.post("/ai-chat", response_model=ChatResponse) +async def chat(request: ChatRequest): + """ + Chat endpoint that processes a single user message through AWS Bedrock + """ + try: + # Process the input through the chain + result = await chain.ainvoke({"input": request.message}) + return ChatResponse(response=result["text"]) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health(): + """Health check endpoint""" + return {"status": "healthy", "llm": "AWS Bedrock Claude 3 Haiku"} + +if __name__ == "__main__": + import uvicorn + print("Starting FastAPI server with AWS Bedrock and OpenInference instrumentation...") + print("Make sure AWS credentials are configured") + print("Server will run on http://localhost:8000") + print("API docs available at http://localhost:8000/docs") + + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/sample-apps/traffic-generator/genai/generate_traffic.sh b/sample-apps/traffic-generator/genai/generate_traffic.sh new file mode 100644 index 000000000..6d2e2666a --- /dev/null +++ b/sample-apps/traffic-generator/genai/generate_traffic.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +# Configuration +SERVER_URL="${SERVER_URL:-http://localhost:8000}" +ENDPOINT="${SERVER_URL}/ai-chat" +DELAY_SECONDS="${DELAY_SECONDS:-3600}" # Default 1 hour (3600 seconds) between requests +NUM_REQUESTS="${NUM_REQUESTS:-0}" # 0 means infinite +TIMEOUT="${TIMEOUT:-30}" # Request timeout in seconds + +# Color codes for output +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Array of sample messages +MESSAGES=( + "What is the weather like today?" + "Tell me a joke" + "How do I make a cup of coffee?" + "What are the benefits of exercise?" + "Explain quantum computing in simple terms" + "What's the capital of France?" + "How do I learn programming?" + "What are some healthy breakfast ideas?" + "Tell me about artificial intelligence" + "How can I improve my productivity?" + "What's the difference between a list and a tuple in Python?" + "Explain the concept of microservices" + "What are some best practices for API design?" + "How does machine learning work?" + "What's the purpose of unit testing?" +) + +# Function to send a request +send_request() { + local message="$1" + local request_num="$2" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + echo -e "${YELLOW}[$timestamp] Request #$request_num${NC}" + echo "Message: \"$message\"" + + # Use environment variables or defaults for headers + local trace_id_header="${TRACE_ID:-Root=1-5759e988-bd862e3fe1be46a994272793;Parent=53995c3f42cd8ad8;Sampled=1}" + + echo "Using Trace ID: $trace_id_header" + + # Send request with timeout + response=$(curl -s -X POST "$ENDPOINT" \ + -H "Content-Type: application/json" \ + -H "X-Amzn-Trace-Id: $trace_id_header" \ + -d "{\"message\": \"$message\"}" \ + -m "$TIMEOUT" \ + -w "\nHTTP_STATUS:%{http_code}\nTIME_TOTAL:%{time_total}") + + # Extract HTTP status and response time + http_status=$(echo "$response" | grep "HTTP_STATUS:" | cut -d: -f2) + time_total=$(echo "$response" | grep "TIME_TOTAL:" | cut -d: -f2) + body=$(echo "$response" | sed '/HTTP_STATUS:/d' | sed '/TIME_TOTAL:/d') + + if [ "$http_status" = "200" ]; then + echo -e "${GREEN}✓ Success${NC} (${time_total}s)" + echo "Response: $body" + else + echo -e "${RED}✗ Error: HTTP $http_status${NC}" + if [ -n "$body" ]; then + echo "Response: $body" + fi + fi + echo "---" +} + +# Trap Ctrl+C to exit gracefully +trap 'echo -e "\n${YELLOW}Traffic generation stopped by user${NC}"; exit 0' INT + +# Main execution +echo -e "${GREEN}Starting traffic generation to $ENDPOINT${NC}" +echo "Configuration:" +echo " - Delay between requests: ${DELAY_SECONDS}s" +echo " - Request timeout: ${TIMEOUT}s" +echo " - Number of requests: ${NUM_REQUESTS} (0 = infinite)" +echo " - Requests per minute: ~$((60 / DELAY_SECONDS))" +echo -e "${YELLOW}Press Ctrl+C to stop${NC}" +echo "==================================" + +# Check if server is reachable +echo "Checking server health..." +health_check=$(curl -s -o /dev/null -w "%{http_code}" "$SERVER_URL/health" -m 5) +if [ "$health_check" != "200" ]; then + echo -e "${RED}Warning: Server health check failed (HTTP $health_check)${NC}" + echo "Make sure the server is running at $SERVER_URL" + read -p "Continue anyway? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + exit 1 + fi +fi + +count=0 +start_time=$(date +%s) + +while true; do + # Select a random message + random_index=$((RANDOM % ${#MESSAGES[@]})) + message="${MESSAGES[$random_index]}" + + # Increment counter + count=$((count + 1)) + + # Send the request + send_request "$message" "$count" + + # Check if we've reached the limit + if [ "$NUM_REQUESTS" -gt 0 ] && [ "$count" -ge "$NUM_REQUESTS" ]; then + end_time=$(date +%s) + duration=$((end_time - start_time)) + echo -e "${GREEN}Completed $count requests in ${duration}s${NC}" + break + fi + + # Show progress every 10 requests + if [ $((count % 10)) -eq 0 ]; then + current_time=$(date +%s) + elapsed=$((current_time - start_time)) + rate=$(echo "scale=2; $count / $elapsed * 60" | bc 2>/dev/null || echo "N/A") + echo -e "${YELLOW}Progress: $count requests sent, Rate: ${rate} req/min${NC}" + fi + + # Wait before next request + sleep "$DELAY_SECONDS" +done \ No newline at end of file diff --git a/terraform/python/ec2/adot-genai/main.tf b/terraform/python/ec2/adot-genai/main.tf new file mode 100644 index 000000000..a789da78f --- /dev/null +++ b/terraform/python/ec2/adot-genai/main.tf @@ -0,0 +1,129 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +resource "aws_default_vpc" "default" { + tags = { + Name = "Default VPC" + } +} + +resource "tls_private_key" "ssh_key" { + algorithm = "RSA" + rsa_bits = 4096 +} + +resource "aws_key_pair" "aws_ssh_key" { + key_name = "instance_key-${var.test_id}" + public_key = tls_private_key.ssh_key.public_key_openssh +} + +locals { + ssh_key_name = aws_key_pair.aws_ssh_key.key_name + private_key_content = tls_private_key.ssh_key.private_key_pem +} + +data "aws_ami" "ami" { + owners = ["amazon"] + most_recent = true + filter { + name = "name" + values = ["al20*-ami-minimal-*-${var.cpu_architecture}"] + } + filter { + name = "state" + values = ["available"] + } + filter { + name = "architecture" + values = [var.cpu_architecture] + } + filter { + name = "image-type" + values = ["machine"] + } + + filter { + name = "root-device-name" + values = ["/dev/xvda"] + } + + filter { + name = "root-device-type" + values = ["ebs"] + } + + filter { + name = "virtualization-type" + values = ["hvm"] + } +} + +resource "aws_instance" "main_service_instance" { + ami = data.aws_ami.ami.id + instance_type = var.cpu_architecture == "x86_64" ? "t3.medium" : "t4g.medium" + key_name = local.ssh_key_name + iam_instance_profile = "APP_SIGNALS_EC2_TEST_ROLE" + vpc_security_group_ids = [aws_default_vpc.default.default_security_group_id] + associate_public_ip_address = true + instance_initiated_shutdown_behavior = "terminate" + + metadata_options { + http_tokens = "required" + } + + root_block_device { + volume_size = 5 + } + + user_data = base64encode(<<-EOF +#!/bin/bash +yum update -y +yum install -y python3.12 python3.12-pip unzip + +mkdir -p /app +cd /app +aws s3 cp ${var.service_zip_url} langchain-service.zip +unzip langchain-service.zip + +# Having issues installing dependencies from ec2-requirements.txt as these dependencies are quite large and cause timeouts/memory issues on EC2, manually installing instead +python3.12 -m pip install fastapi uvicorn[standard] --no-cache-dir +python3.12 -m pip install boto3 botocore setuptools --no-cache-dir +python3.12 -m pip install opentelemetry-api opentelemetry-sdk opentelemetry-semantic-conventions --no-cache-dir +python3.12 -m pip install langchain langchain-community langchain_aws --no-cache-dir +python3.12 -m pip install python-dotenv openlit --no-cache-dir +python3.12 -m pip install openinference-instrumentation-langchain aws_opentelemetry_distro_genai_beta --no-cache-dir + +export AWS_REGION=${var.aws_region} +export OTEL_PROPAGATORS=tracecontext,xray,baggage +export OTEL_PYTHON_DISTRO=aws_distro +export OTEL_PYTHON_CONFIGURATOR=aws_configurator +export OTEL_EXPORTER_OTLP_LOGS_HEADERS="x-aws-log-group=test/genesis,x-aws-log-stream=default,x-aws-metric-namespace=genesis" +export OTEL_RESOURCE_ATTRIBUTES="service.name=langchain-traceloop-app" +export AGENT_OBSERVABILITY_ENABLED="true" + +nohup opentelemetry-instrument python3.12 server.py > /var/log/langchain-service.log 2>&1 & +EOF + ) + + tags = { + Name = "langchain-service-${var.test_id}" + } +} + +output "langchain_service_instance_id" { + value = aws_instance.main_service_instance.id +} + +output "langchain_service_public_ip" { + value = aws_instance.main_service_instance.public_ip +} \ No newline at end of file diff --git a/terraform/python/ec2/adot-genai/variables.tf b/terraform/python/ec2/adot-genai/variables.tf new file mode 100644 index 000000000..364ddb2bd --- /dev/null +++ b/terraform/python/ec2/adot-genai/variables.tf @@ -0,0 +1,38 @@ +# ------------------------------------------------------------------------ +# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# ------------------------------------------------------------------------- + +variable "aws_region" { + default = "us-west-2" +} + +variable "test_id" { + default = "dummy-123" +} + +variable "service_zip_url" { + description = "S3 URL for the service zip file" +} + +variable "language_version" { + default = "3.12" +} + +variable "cpu_architecture" { + default = "x86_64" +} + +variable "user" { + default = "ec2-user" +} \ No newline at end of file diff --git a/validator/src/main/java/com/amazon/aoc/App.java b/validator/src/main/java/com/amazon/aoc/App.java index 5074eccf4..674cfb2ef 100644 --- a/validator/src/main/java/com/amazon/aoc/App.java +++ b/validator/src/main/java/com/amazon/aoc/App.java @@ -157,6 +157,9 @@ public class App implements Callable { defaultValue = "defaultDnsName") private String privateDnsName; + @CommandLine.Option(names = {"--trace-id"}) + private String traceId; + private static final String TEST_CASE_DIM_KEY = "testcase"; private static final String CANARY_NAMESPACE = "Otel/Canary"; private static final String CANARY_METRIC_NAME = "Success"; @@ -196,6 +199,7 @@ public Integer call() throws Exception { context.setInstanceAmi(this.instanceAmi); context.setInstanceId(this.instanceId); context.setPrivateDnsName(this.privateDnsName); + context.setTraceId(this.traceId); log.info(context); diff --git a/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java b/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java index dc3427945..5a206444f 100644 --- a/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java +++ b/validator/src/main/java/com/amazon/aoc/fileconfigs/PredefinedExpectedTemplate.java @@ -246,6 +246,11 @@ public enum PredefinedExpectedTemplate implements FileConfig { /** Python EC2 ADOT SigV4 Log Exporter Test Case Validation */ PYTHON_EC2_ADOT_OTLP_LOG("/expected-data-template/python/ec2/adot-aws-otlp/application-log.mustache"), + /** Python EC2 ADOT Gen AI Test Case Validation */ + PYTHON_EC2_ADOT_GENAI_LOG("/expected-data-template/python/ec2/adot-genai/genai-log.mustache"), + PYTHON_EC2_ADOT_GENAI_TRACE("/expected-data-template/python/ec2/adot-genai/genai-trace.mustache"), + PYTHON_EC2_ADOT_GENAI_METRIC("/expected-data-template/python/ec2/adot-genai/genai-metric.mustache"), + /** Python K8S Test Case Validations */ PYTHON_K8S_OUTGOING_HTTP_CALL_LOG("/expected-data-template/python/k8s/outgoing-http-call-log.mustache"), PYTHON_K8S_OUTGOING_HTTP_CALL_METRIC("/expected-data-template/python/k8s/outgoing-http-call-metric.mustache"), diff --git a/validator/src/main/java/com/amazon/aoc/models/Context.java b/validator/src/main/java/com/amazon/aoc/models/Context.java index 6e607591a..6f5db130c 100644 --- a/validator/src/main/java/com/amazon/aoc/models/Context.java +++ b/validator/src/main/java/com/amazon/aoc/models/Context.java @@ -67,6 +67,8 @@ public class Context { private String privateDnsName; + private String traceId; + private ECSContext ecsContext; private CloudWatchContext cloudWatchContext; diff --git a/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java b/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java index 49af8d396..dec8d1dda 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/CWLogValidator.java @@ -27,6 +27,8 @@ import com.github.wnameless.json.flattener.FlattenMode; import com.github.wnameless.json.flattener.JsonFlattener; import com.github.wnameless.json.flattener.JsonifyArrayList; + +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -68,7 +70,23 @@ public void validate() throws Exception { Map actualLog; if (isAwsOtlpLog(expectedAttributes)) { - actualLog = this.getActualAwsOtlpLog(); + String otlpLogFilterPattern = String.format( + "{ ($.resource.attributes.['service.name'] = \"%s\") && ($.body = \"This is a custom log for validation testing\") }", + context.getServiceName() + ); + String addTraceIdFilter = (context.getTraceId() != null ? "&& ($.traceId = \"" + context.getTraceId() + "\") " : ""); + String genAILogFilterPattern = String.format( + "{ ($.resource.attributes.['service.name'] = \"%s\") " + + "&& ($.body.output.messages[0].role = \"assistant\") " + + "&& ($.body.input.messages[0].role = \"user\") " + + "&& ($.body.output.messages[1] NOT EXISTS) " + + "&& ($.body.input.messages[1] NOT EXISTS) " + + "%s" + + "}", + context.getServiceName(), + addTraceIdFilter + ); + actualLog = this.getActualAwsOtlpLog(Arrays.asList(otlpLogFilterPattern, genAILogFilterPattern)); } else { String operation = (String) expectedAttributes.get("Operation"); String remoteService = (String) expectedAttributes.get("RemoteService"); @@ -153,9 +171,12 @@ private JsonifyArrayList> getExpectedAttributes() throws Exc private boolean isAwsOtlpLog(Map expectedAttributes) { // OTLP SigV4 logs have 'body' as a top-level attribute - return expectedAttributes.containsKey("body") && - expectedAttributes.containsKey("severityNumber") && - expectedAttributes.containsKey("severityText"); + boolean hasBodyKey = expectedAttributes.keySet().stream() + .anyMatch(key -> key.startsWith("body")); + + return expectedAttributes.containsKey("severityNumber") && + expectedAttributes.containsKey("severityText") && + hasBodyKey; } private Map getActualLog( @@ -234,25 +255,33 @@ private Map getActualOtelSpanLog(String operation, String remote return JsonFlattener.flattenAsMap(retrievedLogs.get(0).getMessage()); } - private Map getActualAwsOtlpLog() throws Exception { - String filterPattern= String.format( - "{ ($.resource.attributes.['service.name'] = \"%s\") && ($.body = \"This is a custom log for validation testing\") }", - context.getServiceName() - ); - log.info("Filter Pattern for OTLP Log Search: " + filterPattern); + private Map getActualAwsOtlpLog(List filterPatterns) throws Exception { + log.info("Filter patterns {}", filterPatterns); - List retrievedLogs = - this.cloudWatchService.filterLogs( - context.getLogGroup(), - filterPattern, - System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5), - 10); + List retrievedLogs = null; + + for (String pattern : filterPatterns) { + log.info("Attempting filter Pattern for OTLP Log Search: {}", pattern); + + retrievedLogs = this.cloudWatchService.filterLogs( + context.getLogGroup(), + pattern, + System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5), + 10); + + if (retrievedLogs != null && !retrievedLogs.isEmpty()) { + log.info("Found logs for filter pattern {}", pattern); + break; + } + } if (retrievedLogs == null || retrievedLogs.isEmpty()) { - throw new BaseException(ExceptionCode.EMPTY_LIST); + throw new BaseException(ExceptionCode.EMPTY_LIST); } - return JsonFlattener.flattenAsMap(retrievedLogs.get(0).getMessage()); + return new JsonFlattener(retrievedLogs.get(0).getMessage()) + .withFlattenMode(FlattenMode.KEEP_ARRAYS) + .flattenAsMap(); } @Override diff --git a/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java b/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java index 3553bc1a8..2c5b01fbb 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/CWMetricValidator.java @@ -91,6 +91,14 @@ public void validate() throws Exception { RetryHelper.retry( maxRetryCount, () -> { + + // Special handling for Genesis path - just check if any metrics exists in namespace + // since ADOT will just capture any OTel Metrics emitted from the instrumentation library used + // and convert them into EMF metrics, it's impossible to create a validation template for this. + if (validationConfig.getHttpPath().contains("ai-chat")) { + validateAnyMetricExists(); + return; + } // We will query the Service, RemoteService, and RemoteTarget dimensions to ensure we // get all metrics from all aggregations, specifically the [RemoteService] aggregation. List serviceNames = @@ -210,6 +218,17 @@ private void compareMetricLists(List expectedMetricList, List ac matchAny.stream().findAny().get(), actualMetricSnapshot)); } } + + private void validateAnyMetricExists() throws Exception { + // This will grab all metrics from last 3 hours + // See: https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_ListMetrics.html + List allMetricsInNamespace = cloudWatchService.listMetrics(context.getMetricNamespace(), null, null, null); + log.info("Found {} metrics in namespace {}", allMetricsInNamespace.size(), context.getMetricNamespace()); + if (allMetricsInNamespace.isEmpty()) { + throw new BaseException(ExceptionCode.EXPECTED_METRIC_NOT_FOUND, "No metrics found in namespace: " + context.getMetricNamespace()); + } + log.info("validation is passed for path {}", validationConfig.getHttpPath()); + } private List listMetricFromCloudWatch( CloudWatchService cloudWatchService, diff --git a/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java b/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java index 21b762b9e..fa16dff4b 100644 --- a/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java +++ b/validator/src/main/java/com/amazon/aoc/validators/TraceValidator.java @@ -147,15 +147,15 @@ private Map getTrace() throws Exception { validationConfig.getHttpMethod().toUpperCase(), validationConfig.getHttpPath())); } + + if (validationConfig.getHttpPath().contains("ai-chat")) { + return this.getTraceById(Collections.singletonList(context.getTraceId())); + } + log.info("Trace Filter: {}", traceFilter); List retrieveTraceLists = xrayService.searchTraces(traceFilter); List traceIdLists = Collections.singletonList(retrieveTraceLists.get(0).getId()); - List retrievedTraceList = xrayService.listTraceByIds(traceIdLists); - - if (retrievedTraceList == null || retrievedTraceList.isEmpty()) { - throw new BaseException(ExceptionCode.EMPTY_LIST); - } - return this.flattenDocument(retrievedTraceList.get(0).getSegments()); + return getTraceById(traceIdLists); } private Map flattenDocument(List segmentList) { @@ -190,6 +190,14 @@ private Map flattenDocument(List segmentList) { return JsonFlattener.flattenAsMap(segmentsJson.toString()); } + private Map getTraceById(List traceIdLists) throws Exception { + List retrievedTraceList = xrayService.listTraceByIds(traceIdLists); + if (retrievedTraceList == null || retrievedTraceList.isEmpty()) { + throw new BaseException(ExceptionCode.EMPTY_LIST); + } + return this.flattenDocument(retrievedTraceList.get(0).getSegments()); + } + // This method will get the stored traces private Map getStoredTrace() throws Exception { Map flattenedJsonMapForStoredTraces = null; diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache new file mode 100644 index 000000000..1eb79a202 --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-log.mustache @@ -0,0 +1,36 @@ +[{ + "resource": { + "attributes": { + "aws.local.service": "{{serviceName}}", + "aws.service.type": "gen_ai_agent", + "service.name": "{{serviceName}}" + } + }, + "scope": { + "name": "openinference.instrumentation.langchain" + }, + "severityNumber": "^[0-9]+$", + "severityText": ".*", + "body": { + "output": { + "messages": [ + { + "content": "^.+$", + "role": "assistant" + } + ] + }, + "input": { + "messages": [ + { + "content": "^.+$", + "role": "user" + } + ] + } + }, + "attributes": { + "event.name": "openinference.instrumentation.langchain" + }, + "traceId": "{{traceId}}" +}] \ No newline at end of file diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache new file mode 100644 index 000000000..04d120344 --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-metric.mustache @@ -0,0 +1,4 @@ +- + metricName: ANY_VALUE + namespace: {{metricNamespace}} + dimensions: [] \ No newline at end of file diff --git a/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache new file mode 100644 index 000000000..bbad8e1ae --- /dev/null +++ b/validator/src/main/resources/expected-data-template/python/ec2/adot-genai/genai-trace.mustache @@ -0,0 +1,20 @@ +[{ + "name": "^{{serviceName}}$", + "trace_id": "^{{traceId}}$", + "http": { + "request": { + "url": "^.*/ai-chat$", + "method": "^POST$" + } + }, + "aws": { + "service.type": "^gen_ai_agent$" + }, + "annotations": { + "aws.local.service": "^{{serviceName}}$", + "aws.local.operation": "^POST /ai-chat$" + }, + "metadata": { + "service.name": "^{{serviceName}}$" + } +}] \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml new file mode 100644 index 000000000..92c64fd06 --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/log-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "cw-log" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedLogStructureTemplate: "PYTHON_EC2_ADOT_GENAI_LOG" \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml new file mode 100644 index 000000000..2fd94b30a --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/metric-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "cw-metric" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedMetricTemplate: "PYTHON_EC2_ADOT_GENAI_METRIC" \ No newline at end of file diff --git a/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml b/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml new file mode 100644 index 000000000..34a3573c4 --- /dev/null +++ b/validator/src/main/resources/validations/python/ec2/adot-genai/trace-validation.yml @@ -0,0 +1,6 @@ +- + validationType: "trace" + httpPath: "ai-chat" + httpMethod: "post" + callingType: "http" + expectedTraceTemplate: "PYTHON_EC2_ADOT_GENAI_TRACE" \ No newline at end of file