Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions resources/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ require 'opentelemetry/sdk'
require 'opentelemetry/resource/detector'

OpenTelemetry::SDK.configure do |c|
c.resource = OpenTelemetry::Resource::Detector::AWS.detect
# Specify which AWS resource detectors to use
c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ec2, :ecs])

# Or use just one detector
c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ec2])
c.resource = OpenTelemetry::Resource::Detector::AWS.detect([:ecs])
end
```

Expand All @@ -52,7 +57,25 @@ Populates `cloud` and `host` for processes running on Amazon EC2, including abst
| `host.name` | Value of hostname from `/latest/meta-data/hostname` request |
| `host.type` | Value of `instanceType` from `/latest/dynamic/instance-identity/document` request |

Additional AWS platforms (ECS, EKS, Lambda) will be supported in future versions.
### AWS ECS Detector

<!-- cspell:ignore launchtype awslogs -->
Populates `cloud`, `container`, and AWS ECS-specific attributes for processes running on Amazon ECS.
| Resource Attribute | Description |
|--------------------|-------------|
| `cloud.platform` | The cloud platform. In this context, it's always "aws_ecs" |
| `cloud.provider` | The cloud provider. In this context, it's always "aws" |
| `container.id` | The container ID from the `/proc/self/cgroup` file |
| `container.name` | The hostname of the container |
| `aws.ecs.container.arn` | The hostname of the container |
| `aws.ecs.cluster.arn` | The ARN of the ECS cluster |
| `aws.ecs.launchtype` | The launch type for the ECS task (e.g., "fargate" or "ec2") |
| `aws.ecs.task.arn` | The ARN of the ECS task |
| `aws.log.group.names` | The CloudWatch log group names (if awslogs driver is used) |
| `aws.log.stream.names` | The CloudWatch log stream names (if awslogs driver is used) |
| `aws.log.stream.arns` | The CloudWatch log stream ARNs (if awslogs driver is used) |

Additional AWS platforms (EKS, Lambda) will be supported in future versions.

## License

Expand Down
30 changes: 25 additions & 5 deletions resources/aws/lib/opentelemetry/resource/detector/aws.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# SPDX-License-Identifier: Apache-2.0

require 'opentelemetry/resource/detector/aws/ec2'
require 'opentelemetry/resource/detector/aws/ecs'

module OpenTelemetry
module Resource
Expand All @@ -13,12 +14,31 @@ module Detector
module AWS
extend self

def detect
# This will be a composite of all the AWS platform detectors
EC2.detect
RESOURCE = OpenTelemetry::SDK::Resources::Resource

# For now, return the EC2 resource directly
# In the future, we'll implement detection for EC2, ECS, EKS, etc.
# Get resources from specified AWS resource detectors
#
# @param detectors [Array<Symbol>] List of detectors to use (e.g., :ec2)
# @return [OpenTelemetry::SDK::Resources::Resource] The detected AWS resources
def detect(detectors = [])
return RESOURCE.create({}) if detectors.empty?

resources = detectors.map do |detector|
case detector
when :ec2
EC2.detect
when :ecs
ECS.detect
else
OpenTelemetry.logger.warn("Unknown AWS resource detector: #{detector}")
OpenTelemetry::SDK::Resources::Resource.create({})
end
end

# Merge all resources into a single resource
resources.reduce(OpenTelemetry::SDK::Resources::Resource.create({})) do |merged, resource|
merged.merge(resource)
end
end
end
end
Expand Down
172 changes: 172 additions & 0 deletions resources/aws/lib/opentelemetry/resource/detector/aws/ecs.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# frozen_string_literal: true

# Copyright The OpenTelemetry Authors
#
# SPDX-License-Identifier: Apache-2.0

require 'net/http'
require 'json'
require 'socket'
require 'opentelemetry/common'

module OpenTelemetry
module Resource
module Detector
module AWS
# ECS contains detect class method for determining the ECS resource attributes
module ECS
extend self

# Container ID length from cgroup file
CONTAINER_ID_LENGTH = 64

# HTTP request timeout in seconds
HTTP_TIMEOUT = 5

# Create a constant for resource semantic conventions
RESOURCE = OpenTelemetry::SemanticConventions::Resource

def detect
# Return empty resource if not running on ECS
metadata_uri = ENV.fetch('ECS_CONTAINER_METADATA_URI', nil)
metadata_uri_v4 = ENV.fetch('ECS_CONTAINER_METADATA_URI_V4', nil)

return OpenTelemetry::SDK::Resources::Resource.create({}) if metadata_uri.nil? && metadata_uri_v4.nil?

resource_attributes = {}
container_id = fetch_container_id

# Base ECS resource attributes
resource_attributes[RESOURCE::CLOUD_PROVIDER] = 'aws'
resource_attributes[RESOURCE::CLOUD_PLATFORM] = 'aws_ecs'
resource_attributes[RESOURCE::CONTAINER_NAME] = Socket.gethostname
resource_attributes[RESOURCE::CONTAINER_ID] = container_id unless container_id.empty?

# If v4 endpoint is not available, return basic resource
return OpenTelemetry::SDK::Resources::Resource.create(resource_attributes) if metadata_uri_v4.nil?

begin
# Fetch container and task metadata
container_metadata = JSON.parse(http_get(metadata_uri_v4.to_s))
task_metadata = JSON.parse(http_get("#{metadata_uri_v4}/task"))

task_arn = task_metadata['TaskARN']
base_arn = task_arn[0..task_arn.rindex(':') - 1]

cluster = task_metadata['Cluster']
cluster_arn = cluster.start_with?('arn:') ? cluster : "#{base_arn}:cluster/#{cluster}"

# Set ECS-specific attributes
resource_attributes[RESOURCE::AWS_ECS_CONTAINER_ARN] = container_metadata['ContainerARN']
resource_attributes[RESOURCE::AWS_ECS_CLUSTER_ARN] = cluster_arn
resource_attributes[RESOURCE::AWS_ECS_LAUNCHTYPE] = task_metadata['LaunchType'].downcase
resource_attributes[RESOURCE::AWS_ECS_TASK_ARN] = task_arn
resource_attributes[RESOURCE::AWS_ECS_TASK_FAMILY] = task_metadata['Family']
resource_attributes[RESOURCE::AWS_ECS_TASK_REVISION] = task_metadata['Revision']

# Add logging attributes if awslogs is used
logs_attributes = get_logs_resource(container_metadata)
resource_attributes.merge!(logs_attributes)
rescue StandardError => e
OpenTelemetry.logger.debug("ECS resource detection failed: #{e.message}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about using the OpenTelemetry.handle_error API for the rescues in this file rather than a debug-level log?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Kayla - the suggestion sounds good to me. I'll update the error handling to use this API instead.

I don't know of any AWS ECS experts in the upstream community. However, I can reach out to an ECS expert from my team and have them help take a look.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great, thank you!

return OpenTelemetry::SDK::Resources::Resource.create({})
end

# Filter out nil or empty values
resource_attributes.delete_if { |_key, value| value.nil? || value.empty? }
OpenTelemetry::SDK::Resources::Resource.create(resource_attributes)
end

private

# Fetches container ID from /proc/self/cgroup file
#
# @return [String] The container ID or empty string if not found
def fetch_container_id
begin
File.open('/proc/self/cgroup', 'r') do |file|
file.each_line do |line|
line = line.strip
# Look for container ID (64 chars) at the end of the line
return line[-CONTAINER_ID_LENGTH..-1] if line.length > CONTAINER_ID_LENGTH
end
end
rescue Errno::ENOENT => e
OpenTelemetry.logger.debug("Failed to get container ID on ECS: #{e.message}")
end

''
end

# Extracting logging-related resource attributes
#
# @param container_metadata [Hash] Container metadata from ECS metadata endpoint
# @returhn [Hash] Resource attributes for logging configuration
def get_logs_resource(container_metadata)
log_attributes = {}

if container_metadata['LogDriver'] == 'awslogs'
log_options = container_metadata['LogOptions']

if log_options
logs_region = log_options['awslogs-region']
logs_group_name = log_options['awslogs-group']
logs_stream_name = log_options['awslogs-stream']

container_arn = container_metadata['ContainerARN']

# Parse region from ARN if not specified in log options
if logs_region.nil? || logs_region.empty?
region_match = container_arn.match(/arn:aws:ecs:([^:]+):.*/)
logs_region = region_match[1] if region_match
end

# Parse account ID from ARN
account_match = container_arn.match(/arn:aws:ecs:[^:]+:([^:]+):.*/)
aws_account = account_match[1] if account_match

logs_group_arn = nil
logs_stream_arn = nil

if logs_region && aws_account
logs_group_arn = "arn:aws:logs:#{logs_region}:#{aws_account}:log-group:#{logs_group_name}" if logs_group_name

logs_stream_arn = "arn:aws:logs:#{logs_region}:#{aws_account}:log-group:#{logs_group_name}:log-stream:#{logs_stream_name}" if logs_stream_name && logs_group_name
end

log_attributes[RESOURCE::AWS_LOG_GROUP_NAMES] = [logs_group_name].compact
log_attributes[RESOURCE::AWS_LOG_GROUP_ARNS] = [logs_group_arn].compact
log_attributes[RESOURCE::AWS_LOG_STREAM_NAMES] = [logs_stream_name].compact
log_attributes[RESOURCE::AWS_LOG_STREAM_ARNS] = [logs_stream_arn].compact
else
OpenTelemetry.logger.debug("The metadata endpoint v4 has returned 'awslogs' as 'LogDriver', but there is no 'LogOptions' data")
end
end

log_attributes
end

# Makes an HTTP GET request to the specified URL
#
# @param url [String] The URL to request
# @return [String] The response body
def http_get(url)
uri = URI.parse(url)
request = Net::HTTP::Get.new(uri)

http = Net::HTTP.new(uri.host, uri.port)
http.open_timeout = HTTP_TIMEOUT
http.read_timeout = HTTP_TIMEOUT

OpenTelemetry::Common::Utilities.untraced do
response = http.request(request)
raise "HTTP request failed with status #{response.code}" unless response.is_a?(Net::HTTPSuccess)

response.body
end
end
end
end
end
end
end
122 changes: 122 additions & 0 deletions resources/aws/test/opentelemetry/resource/detector/aws/ecs_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# frozen_string_literal: true

# Copyright The OpenTelemetry Authors
#
# SPDX-License-Identifier: Apache-2.0

require 'test_helper'

describe OpenTelemetry::Resource::Detector::AWS::ECS do
let(:detector) { OpenTelemetry::Resource::Detector::AWS::ECS }

describe '.detect' do
let(:metadata_uri) { 'http://169.254.170.2/v3' }
let(:metadata_uri_v4) { 'http://169.254.170.2/v4' }
let(:hostname) { 'test-container' }

let(:container_metadata) do
{
'ContainerARN' => 'arn:aws:ecs:us-west-2:123456789012:container/container-id',
'LogDriver' => 'awslogs',
'LogOptions' => {
'awslogs-region' => 'us-west-2',
'awslogs-group' => 'my-log-group',
'awslogs-stream' => 'my-log-stream'
}
}
end

let(:task_metadata) do
{
'Cluster' => 'my-cluster',
'TaskARN' => 'arn:aws:ecs:us-west-2:123456789012:task/task-id',
'Family' => 'my-task-family',
'Revision' => '1',
'LaunchType' => 'FARGATE'
}
end

before do
# Stub environment variables, hostname and File operations
@original_env = ENV.to_hash
ENV.clear

# Initialize WebMock
WebMock.disable_net_connect!
end

after do
# Restore original environment
ENV.replace(@original_env)
WebMock.allow_net_connect!
end

it 'returns empty resource when not running on ECS' do
resource = detector.detect
_(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource)
_(resource.attribute_enumerator.to_h).must_equal({})
end

describe 'when running on ECS with metadata endpoint v4' do
before do
ENV['ECS_CONTAINER_METADATA_URI_V4'] = metadata_uri_v4

# Stub container metadata endpoint
stub_request(:get, metadata_uri_v4)
.to_return(status: 200, body: container_metadata.to_json)

# Stub task metadata endpoint
stub_request(:get, "#{metadata_uri_v4}/task")
.to_return(status: 200, body: task_metadata.to_json)
end

it 'detects ECS resources' do
# Stub the fetch_container_id method directly rather than trying to stub File
detector.stub :fetch_container_id, '0123456789abcdef' * 4 do
Socket.stub :gethostname, hostname do
resource = detector.detect

_(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource)
attributes = resource.attribute_enumerator.to_h

# Check basic attributes
_(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PROVIDER]).must_equal('aws')
_(attributes[OpenTelemetry::SemanticConventions::Resource::CLOUD_PLATFORM]).must_equal('aws_ecs')
_(attributes[OpenTelemetry::SemanticConventions::Resource::CONTAINER_NAME]).must_equal(hostname)
_(attributes[OpenTelemetry::SemanticConventions::Resource::CONTAINER_ID]).must_equal('0123456789abcdef' * 4)

# Check ECS-specific attributes
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_CONTAINER_ARN]).must_equal(container_metadata['ContainerARN'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_CLUSTER_ARN]).must_equal('arn:aws:ecs:us-west-2:123456789012:cluster/my-cluster')
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_LAUNCHTYPE]).must_equal('fargate')
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_ARN]).must_equal(task_metadata['TaskARN'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_FAMILY]).must_equal(task_metadata['Family'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_ECS_TASK_REVISION]).must_equal(task_metadata['Revision'])

# Check log attributes
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_GROUP_NAMES]).must_equal(['my-log-group'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_GROUP_ARNS]).must_equal(['arn:aws:logs:us-west-2:123456789012:log-group:my-log-group'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_STREAM_NAMES]).must_equal(['my-log-stream'])
_(attributes[OpenTelemetry::SemanticConventions::Resource::AWS_LOG_STREAM_ARNS]).must_equal(['arn:aws:logs:us-west-2:123456789012:log-group:my-log-group:log-stream:my-log-stream'])
end
end
end
end

describe 'when metadata endpoint fails' do
before do
ENV['ECS_CONTAINER_METADATA_URI_V4'] = metadata_uri_v4

# Stub metadata endpoint to fail
stub_request(:get, metadata_uri_v4)
.to_return(status: 500, body: 'Server Error')
end

it 'returns empty resource on error' do
resource = detector.detect
_(resource).must_be_instance_of(OpenTelemetry::SDK::Resources::Resource)
_(resource.attribute_enumerator.to_h).must_equal({})
end
end
end
end
Loading
Loading