diff --git a/resources/aws/README.md b/resources/aws/README.md index 1bb168ca36..e20610ead6 100644 --- a/resources/aws/README.md +++ b/resources/aws/README.md @@ -30,7 +30,12 @@ require 'opentelemetry/sdk' require 'opentelemetry/resource/detector' OpenTelemetry::SDK.configure do |c| - c.resource = OpenTelemetry::Resource::Detector::AWS.detect + # Specify which AWS resource detectors to use + c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ec2, :ecs]) + + # Or use just one detector + c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ec2]) + c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ecs]) end ``` @@ -52,7 +57,25 @@ Populates `cloud` and `host` for processes running on Amazon EC2, including abst | `host.name` | Value of hostname from `/latest/meta-data/hostname` request | | `host.type` | Value of `instanceType` from `/latest/dynamic/instance-identity/document` request | -Additional AWS platforms (ECS, EKS, Lambda) will be supported in future versions. +### AWS ECS Detector + + +Populates `cloud`, `container`, and AWS ECS-specific attributes for processes running on Amazon ECS. +| Resource Attribute | Description | +|--------------------|-------------| +| `cloud.platform` | The cloud platform. In this context, it's always "aws_ecs" | +| `cloud.provider` | The cloud provider. In this context, it's always "aws" | +| `container.id` | The container ID from the `/proc/self/cgroup` file | +| `container.name` | The hostname of the container | +| `aws.ecs.container.arn` | The hostname of the container | +| `aws.ecs.cluster.arn` | The ARN of the ECS cluster | +| `aws.ecs.launchtype` | The launch type for the ECS task (e.g., "fargate" or "ec2") | +| `aws.ecs.task.arn` | The ARN of the ECS task | +| `aws.log.group.names` | The CloudWatch log group names (if awslogs driver is used) | +| `aws.log.stream.names` | The CloudWatch log stream names (if awslogs driver is used) | +| `aws.log.stream.arns` | The CloudWatch log stream ARNs (if awslogs driver is used) | + +Additional AWS platforms (EKS, Lambda) will be supported in future versions. ## License diff --git a/resources/aws/lib/opentelemetry/resource/detector/aws.rb b/resources/aws/lib/opentelemetry/resource/detector/aws.rb index ac5060b07c..c8c0a421bb 100644 --- a/resources/aws/lib/opentelemetry/resource/detector/aws.rb +++ b/resources/aws/lib/opentelemetry/resource/detector/aws.rb @@ -5,6 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 require 'opentelemetry/resource/detector/aws/ec2' +require 'opentelemetry/resource/detector/aws/ecs' module OpenTelemetry module Resource @@ -13,12 +14,31 @@ module Detector module AWS extend self - def detect - # This will be a composite of all the AWS platform detectors - EC2.detect + RESOURCE = OpenTelemetry::SDK::Resources::Resource - # For now, return the EC2 resource directly - # In the future, we'll implement detection for EC2, ECS, EKS, etc. + # Get resources from specified AWS resource detectors + # + # @param detectors [Array] List of detectors to use (e.g., :ec2) + # @return [OpenTelemetry::SDK::Resources::Resource] The detected AWS resources + def detect(detectors = []) + return RESOURCE.create({}) if detectors.empty? + + resources = detectors.map do |detector| + case detector + when :ec2 + EC2.detect + when :ecs + ECS.detect + else + OpenTelemetry.logger.warn("Unknown AWS resource detector: #{detector}") + OpenTelemetry::SDK::Resources::Resource.create({}) + end + end + + # Merge all resources into a single resource + resources.reduce(OpenTelemetry::SDK::Resources::Resource.create({})) do |merged, resource| + merged.merge(resource) + end end end end diff --git a/resources/aws/lib/opentelemetry/resource/detector/aws/ecs.rb b/resources/aws/lib/opentelemetry/resource/detector/aws/ecs.rb new file mode 100644 index 0000000000..665f9fff98 --- /dev/null +++ b/resources/aws/lib/opentelemetry/resource/detector/aws/ecs.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'net/http' +require 'json' +require 'socket' +require 'opentelemetry/common' + +module OpenTelemetry + module Resource + module Detector + module AWS + # ECS contains detect class method for determining the ECS resource attributes + module ECS + extend self + + # Container ID length from cgroup file + CONTAINER_ID_LENGTH = 64 + + # HTTP request timeout in seconds + HTTP_TIMEOUT = 5 + + # Create a constant for resource semantic conventions + RESOURCE = OpenTelemetry::SemanticConventions::Resource + + def detect + # Return empty resource if not running on ECS + metadata_uri = ENV.fetch('ECS_CONTAINER_METADATA_URI', nil) + metadata_uri_v4 = ENV.fetch('ECS_CONTAINER_METADATA_URI_V4', nil) + + return OpenTelemetry::SDK::Resources::Resource.create({}) if metadata_uri.nil? && metadata_uri_v4.nil? + + resource_attributes = {} + container_id = fetch_container_id + + # Base ECS resource attributes + resource_attributes[RESOURCE::CLOUD_PROVIDER] = 'aws' + resource_attributes[RESOURCE::CLOUD_PLATFORM] = 'aws_ecs' + resource_attributes[RESOURCE::CONTAINER_NAME] = Socket.gethostname + resource_attributes[RESOURCE::CONTAINER_ID] = container_id unless container_id.empty? + + # If v4 endpoint is not available, return basic resource + return OpenTelemetry::SDK::Resources::Resource.create(resource_attributes) if metadata_uri_v4.nil? + + begin + # Fetch container and task metadata + container_metadata = JSON.parse(http_get(metadata_uri_v4.to_s)) + task_metadata = JSON.parse(http_get("#{metadata_uri_v4}/task")) + + task_arn = task_metadata['TaskARN'] + base_arn = task_arn[0..task_arn.rindex(':') - 1] + + cluster = task_metadata['Cluster'] + cluster_arn = cluster.start_with?('arn:') ? cluster : "#{base_arn}:cluster/#{cluster}" + + # Set ECS-specific attributes + resource_attributes[RESOURCE::AWS_ECS_CONTAINER_ARN] = container_metadata['ContainerARN'] + resource_attributes[RESOURCE::AWS_ECS_CLUSTER_ARN] = cluster_arn + resource_attributes[RESOURCE::AWS_ECS_LAUNCHTYPE] = task_metadata['LaunchType'].downcase + resource_attributes[RESOURCE::AWS_ECS_TASK_ARN] = task_arn + resource_attributes[RESOURCE::AWS_ECS_TASK_FAMILY] = task_metadata['Family'] + resource_attributes[RESOURCE::AWS_ECS_TASK_REVISION] = task_metadata['Revision'] + + # Add logging attributes if awslogs is used + logs_attributes = get_logs_resource(container_metadata) + resource_attributes.merge!(logs_attributes) + rescue StandardError => e + OpenTelemetry.logger.debug("ECS resource detection failed: #{e.message}") + return OpenTelemetry::SDK::Resources::Resource.create({}) + end + + # Filter out nil or empty values + resource_attributes.delete_if { |_key, value| value.nil? || value.empty? } + OpenTelemetry::SDK::Resources::Resource.create(resource_attributes) + end + + private + + # Fetches container ID from /proc/self/cgroup file + # + # @return [String] The container ID or empty string if not found + def fetch_container_id + begin + File.open('/proc/self/cgroup', 'r') do |file| + file.each_line do |line| + line = line.strip + # Look for container ID (64 chars) at the end of the line + return line[-CONTAINER_ID_LENGTH..-1] if line.length > CONTAINER_ID_LENGTH + end + end + rescue Errno::ENOENT => e + OpenTelemetry.logger.debug("Failed to get container ID on ECS: #{e.message}") + end + + '' + end + + # Extracting logging-related resource attributes + # + # @param container_metadata [Hash] Container metadata from ECS metadata endpoint + # @returhn [Hash] Resource attributes for logging configuration + def get_logs_resource(container_metadata) + log_attributes = {} + + if container_metadata['LogDriver'] == 'awslogs' + log_options = container_metadata['LogOptions'] + + if log_options + logs_region = log_options['awslogs-region'] + logs_group_name = log_options['awslogs-group'] + logs_stream_name = log_options['awslogs-stream'] + + container_arn = container_metadata['ContainerARN'] + + # Parse region from ARN if not specified in log options + if logs_region.nil? || logs_region.empty? + region_match = container_arn.match(/arn:aws:ecs:([^:]+):.*/) + logs_region = region_match[1] if region_match + end + + # Parse account ID from ARN + account_match = container_arn.match(/arn:aws:ecs:[^:]+:([^:]+):.*/) + aws_account = account_match[1] if account_match + + logs_group_arn = nil + logs_stream_arn = nil + + if logs_region && aws_account + logs_group_arn = "arn:aws:logs:#{logs_region}:#{aws_account}:log-group:#{logs_group_name}" if logs_group_name + + logs_stream_arn = "arn:aws:logs:#{logs_region}:#{aws_account}:log-group:#{logs_group_name}:log-stream:#{logs_stream_name}" if logs_stream_name && logs_group_name + end + + log_attributes[RESOURCE::AWS_LOG_GROUP_NAMES] = [logs_group_name].compact + log_attributes[RESOURCE::AWS_LOG_GROUP_ARNS] = [logs_group_arn].compact + log_attributes[RESOURCE::AWS_LOG_STREAM_NAMES] = [logs_stream_name].compact + log_attributes[RESOURCE::AWS_LOG_STREAM_ARNS] = [logs_stream_arn].compact + else + OpenTelemetry.logger.debug("The metadata endpoint v4 has returned 'awslogs' as 'LogDriver', but there is no 'LogOptions' data") + end + end + + log_attributes + end + + # Makes an HTTP GET request to the specified URL + # + # @param url [String] The URL to request + # @return [String] The response body + def http_get(url) + uri = URI.parse(url) + request = Net::HTTP::Get.new(uri) + + http = Net::HTTP.new(uri.host, uri.port) + http.open_timeout = HTTP_TIMEOUT + http.read_timeout = HTTP_TIMEOUT + + OpenTelemetry::Common::Utilities.untraced do + response = http.request(request) + raise "HTTP request failed with status #{response.code}" unless response.is_a?(Net::HTTPSuccess) + + response.body + end + end + end + end + end + end +end diff --git a/resources/aws/test/opentelemetry/resource/detector/aws/ecs_test.rb b/resources/aws/test/opentelemetry/resource/detector/aws/ecs_test.rb new file mode 100644 index 0000000000..7794c20615 --- /dev/null +++ b/resources/aws/test/opentelemetry/resource/detector/aws/ecs_test.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +# Copyright The OpenTelemetry Authors +# +# SPDX-License-Identifier: Apache-2.0 + +require 'test_helper' + +describe OpenTelemetry::Resource::Detector::AWS::ECS do + let(:detector) { OpenTelemetry::Resource::Detector::AWS::ECS } + + describe '.detect' do + let(:metadata_uri) { 'http://169.254.170.2/v3' } + let(:metadata_uri_v4) { 'http://169.254.170.2/v4' } + let(:hostname) { 'test-container' } + + let(:container_metadata) do + { + 'ContainerARN' => 'arn:aws:ecs:us-west-2:123456789012:container/container-id', + 'LogDriver' => 'awslogs', + 'LogOptions' => { + 'awslogs-region' => 'us-west-2', + 'awslogs-group' => 'my-log-group', + 'awslogs-stream' => 'my-log-stream' + } + } + end + + let(:task_metadata) do + { + 'Cluster' => 'my-cluster', + 'TaskARN' => 'arn:aws:ecs:us-west-2:123456789012:task/task-id', + 'Family' => 'my-task-family', + 'Revision' => '1', + 'LaunchType' => 'FARGATE' + } + end + + before do + # Stub environment variables, hostname and File operations + @original_env = ENV.to_hash + ENV.clear + + # Initialize WebMock + WebMock.disable_net_connect! + end + + after do + # Restore original environment + ENV.replace(@original_env) + WebMock.allow_net_connect! + end + + it 'returns empty resource when not running on ECS' do + resource = detector.detect + _(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource) + _(resource.attribute_enumerator.to_h).must_equal({}) + end + + describe 'when running on ECS with metadata endpoint v4' do + before do + ENV['ECS_CONTAINER_METADATA_URI_V4'] = metadata_uri_v4 + + # Stub container metadata endpoint + stub_request(:get, metadata_uri_v4) + .to_return(status: 200, body: container_metadata.to_json) + + # Stub task metadata endpoint + stub_request(:get, "#{metadata_uri_v4}/task") + .to_return(status: 200, body: task_metadata.to_json) + end + + it 'detects ECS resources' do + # Stub the fetch_container_id method directly rather than trying to stub File + detector.stub :fetch_container_id, '0123456789abcdef' * 4 do + Socket.stub :gethostname, hostname do + resource = detector.detect + + _(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource) + attributes = resource.attribute_enumerator.to_h + + # Check basic attributes + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PROVIDER]).must_equal('aws') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PLATFORM]).must_equal('aws_ecs') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CONTAINER_NAME]).must_equal(hostname) + _(attributes[OpenTelemetry::SemanticConventions::Resource::CONTAINER_ID]).must_equal('0123456789abcdef' * 4) + + # Check ECS-specific attributes + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_CONTAINER_ARN]).must_equal(container_metadata['ContainerARN']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_CLUSTER_ARN]).must_equal('arn:aws:ecs:us-west-2:123456789012:cluster/my-cluster') + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_LAUNCHTYPE]).must_equal('fargate') + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_ARN]).must_equal(task_metadata['TaskARN']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_FAMILY]).must_equal(task_metadata['Family']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_REVISION]).must_equal(task_metadata['Revision']) + + # Check log attributes + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_GROUP_NAMES]).must_equal(['my-log-group']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_GROUP_ARNS]).must_equal(['arn:aws:logs:us-west-2:123456789012:log-group:my-log-group']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_STREAM_NAMES]).must_equal(['my-log-stream']) + _(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_STREAM_ARNS]).must_equal(['arn:aws:logs:us-west-2:123456789012:log-group:my-log-group:log-stream:my-log-stream']) + end + end + end + end + + describe 'when metadata endpoint fails' do + before do + ENV['ECS_CONTAINER_METADATA_URI_V4'] = metadata_uri_v4 + + # Stub metadata endpoint to fail + stub_request(:get, metadata_uri_v4) + .to_return(status: 500, body: 'Server Error') + end + + it 'returns empty resource on error' do + resource = detector.detect + _(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource) + _(resource.attribute_enumerator.to_h).must_equal({}) + end + end + end +end diff --git a/resources/aws/test/opentelemetry/resource/detector/aws_test.rb b/resources/aws/test/opentelemetry/resource/detector/aws_test.rb index 549e55a436..85cb65ce76 100644 --- a/resources/aws/test/opentelemetry/resource/detector/aws_test.rb +++ b/resources/aws/test/opentelemetry/resource/detector/aws_test.rb @@ -21,19 +21,157 @@ stub_request(:get, 'http://169.254.169.254/latest/dynamic/instance-identity/document') .with(headers: { 'Accept' => '*/*' }) .to_return(status: 404, body: 'Not Found') + + # Clear environment variables for ECS + @original_env = ENV.to_hash + ENV.delete('ECS_CONTAINER_METADATA_URI') + ENV.delete('ECS_CONTAINER_METADATA_URI_V4') end after do WebMock.allow_net_connect! + ENV.replace(@original_env) + end + + def assert_detection_result(detectors) + resource = detector.detect(detectors) + attributes = resource.attribute_enumerator.to_h + + _(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource) + _(attributes).must_equal({}) + end + + it 'returns an empty resource with no detectors' do + assert_detection_result([]) + end + + it 'returns an empty resource when EC2 detection fails' do + assert_detection_result([:ec2]) + end + + it 'returns an empty resource when ECS detection fails' do + assert_detection_result([:ecs]) + end + + it 'returns an empty resource with unknown detector' do + assert_detection_result([:unknown]) + end + + it 'returns an empty resource with multiple detectors when all fail' do + assert_detection_result(%i[ec2 ecs unknown]) end - let(:detected_resource) { detector.detect } - let(:detected_resource_attributes) { detected_resource.attribute_enumerator.to_h } - let(:expected_resource_attributes) { {} } + describe 'with successful EC2 detection' do + let(:token) { 'ec2-token-value' } + let(:identity_document) do + { + 'instanceId' => 'i-1234567890abcdef0', + 'instanceType' => 'm5.xlarge', + 'accountId' => '123456789012', + 'region' => 'us-west-2', + 'availabilityZone' => 'us-west-2b' + } + end + let(:hostname) { 'ip-10-0-0-1.ec2.internal' } + + before do + # Stub successful token request (IMDSv2) + stub_request(:put, 'http://169.254.169.254/latest/api/token') + .to_return(status: 200, body: token) + + # Stub successful identity document request + stub_request(:get, 'http://169.254.169.254/latest/dynamic/instance-identity/document') + .to_return(status: 200, body: identity_document.to_json) + + # Stub successful hostname request + stub_request(:get, 'http://169.254.169.254/latest/meta-data/hostname') + .with(headers: { 'X-aws-ec2-metadata-token' => token }) + .to_return(status: 200, body: hostname) + end + + it 'detects EC2 resources when specified' do + resource = detector.detect([:ec2]) + attributes = resource.attribute_enumerator.to_h + + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PROVIDER]).must_equal('aws') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PLATFORM]).must_equal('aws_ec2') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_ACCOUNT_ID]).must_equal('123456789012') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_REGION]).must_equal('us-west-2') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_AVAILABILITY_ZONE]).must_equal('us-west-2b') + _(attributes[OpenTelemetry::SemanticConventions::Resource::HOST_ID]).must_equal('i-1234567890abcdef0') + _(attributes[OpenTelemetry::SemanticConventions::Resource::HOST_TYPE]).must_equal('m5.xlarge') + _(attributes[OpenTelemetry::SemanticConventions::Resource::HOST_NAME]).must_equal(hostname) + end + + describe 'with succesefful ECS detection' do + let(:metadata_uri_v4) { 'http://169.254.170.2/v4' } + let(:container_metadata) do + { + 'ContainerARN' => 'arn:aws:ecs:us-west-2:123456789012:container/container-id', + 'LogDriver' => 'awslogs', + 'LogOptions' => { + 'awslogs-region' => 'us-west-2', + 'awslogs-group' => 'my-log-group', + 'awslogs-stream' => 'my-log-stream' + } + } + end + + let(:task_metadata) do + { + 'Cluster' => 'my-cluster', + 'TaskARN' => 'arn:aws:ecs:us-west-2:123456789012:task/task-id', + 'Family' => 'my-task-family', + 'Revision' => '1', + 'LaunchType' => 'FARGATE' + } + end + + before do + ENV['ECS_CONTAINER_METADATA_URI_V4'] = metadata_uri_v4 + + # Stub container metadata endpoint + stub_request(:get, metadata_uri_v4) + .to_return(status: 200, body: container_metadata.to_json) + + # Stub task metadata endpoint + stub_request(:get, "#{metadata_uri_v4}/task") + .to_return(status: 200, body: task_metadata.to_json) + end + + it 'detects ECS resources when specified' do + # Properly stub the fetch_container_id method + OpenTelemetry::Resource::Detector::AWS::ECS.stub :fetch_container_id, '0123456789abcdef' * 4 do + # Also stub the hostname method + Socket.stub :gethostname, 'test-container' do + resource = detector.detect([:ecs]) + attributes = resource.attribute_enumerator.to_h + + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PROVIDER]).must_equal('aws') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PLATFORM]).must_equal('aws_ecs') + end + end + end + + it 'returns combined resources when multiple detectors are used' do + # Stub the container ID and hostname methods + OpenTelemetry::Resource::Detector::AWS::ECS.stub :fetch_container_id, '0123456789abcdef' * 4 do + Socket.stub :gethostname, 'test-container' do + # Mock EC2 detector to return a simple resource + ec2_resource = OpenTelemetry::SDK::Resources::Resource.create({ 'ec2.instance.id' => 'i-1234567890abcdef0' }) + OpenTelemetry::Resource::Detector::AWS::EC2.stub :detect, ec2_resource do + resource = detector.detect(%i[ec2 ecs]) + attributes = resource.attribute_enumerator.to_h - it 'returns an empty resource' do - _(detected_resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource) - _(detected_resource_attributes).must_equal(expected_resource_attributes) + # Should include attributes from both detectors + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PROVIDER]).must_equal('aws') + _(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PLATFORM]).must_equal('aws_ecs') + _(attributes['ec2.instance.id']).must_equal('i-1234567890abcdef0') + end + end + end + end + end end end end