Skip to content

Commit 27a870f

Browse files
TimDiekmannCopilot
andauthored
H-5328: Add Pyroscope continuous profiling infrastructure to AWS observability stack (#7787)
Co-authored-by: Copilot <[email protected]>
1 parent 8fc23ed commit 27a870f

File tree

28 files changed

+955
-14
lines changed

28 files changed

+955
-14
lines changed

infra/terraform/hash/observability/alloy/config.tf

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@ resource "aws_s3_object" "alloy_config" {
77

88
# Grafana Alloy configuration
99
content = templatefile("${path.module}/templates/alloy-config.alloy.tpl", {
10-
region = var.region
11-
mimir_http_dns = var.mimir_http_dns
12-
mimir_http_port = var.mimir_http_port
10+
environment = terraform.workspace
11+
region = var.region
12+
profile_port_internal = local.profile_port_internal
13+
profile_port_external = local.profile_port_external
14+
mimir_http_dns = var.mimir_http_dns
15+
mimir_http_port = var.mimir_http_port
16+
pyroscope_http_dns = var.pyroscope_http_dns
17+
pyroscope_http_port = var.pyroscope_http_port
18+
1319
})
1420

1521
content_type = "text/plain"

infra/terraform/hash/observability/alloy/main.tf

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@ locals {
55
prefix = "${var.prefix}-${local.service_name}"
66

77
# Port definitions for Alloy
8-
http_port = 5000 # Alloy HTTP API for metrics scraping
8+
http_port = 5000 # Alloy HTTP API for metrics scraping
9+
profile_port_internal = 4040
10+
profile_port_external = 4042
911

1012
# Port names for Service Connect
11-
http_port_name = "${local.service_name}-http" # HTTP API
13+
http_port_name = "${local.service_name}-http" # HTTP API
14+
profile_port_name = "${local.service_name}-profile"
1215

1316
# DNS names for Service Connect
14-
http_port_dns = "${local.http_port_name}.${var.service_discovery_namespace_name}"
17+
http_port_dns = "${local.http_port_name}.${var.service_discovery_namespace_name}"
18+
profile_port_dns = "${local.profile_port_name}.${var.service_discovery_namespace_name}"
1519
}

infra/terraform/hash/observability/alloy/outputs.tf

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@ output "http_port" {
22
description = "Port number for Grafana Alloy HTTP API"
33
value = local.http_port
44
}
5+
output "profile_port_internal" {
6+
description = "Port number for Grafana Alloy Profile API"
7+
value = local.profile_port_internal
8+
}
59

10+
output "profile_port_external" {
11+
description = "Port number for Grafana Alloy Profile API"
12+
value = local.profile_port_external
13+
}
614
output "http_port_name" {
715
description = "Port name for Service Connect"
816
value = local.http_port_name
@@ -12,3 +20,17 @@ output "http_port_dns" {
1220
description = "Service Connect DNS name for Grafana Alloy metrics endpoint"
1321
value = local.http_port_dns
1422
}
23+
24+
# Target groups for load balancer attachment
25+
26+
# Internal target groups (service-to-service communication)
27+
output "profile_internal_target_group_arn" {
28+
description = "Internal profile target group ARN"
29+
value = aws_lb_target_group.profile_internal.arn
30+
}
31+
32+
# External target groups (client applications)
33+
output "profile_external_target_group_arn" {
34+
description = "External profile target group ARN"
35+
value = aws_lb_target_group.profile_external.arn
36+
}

infra/terraform/hash/observability/alloy/security_groups.tf

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,22 @@ resource "aws_security_group" "alloy" {
1313
cidr_blocks = [var.vpc.cidr_block]
1414
}
1515

16+
# Allow inbound HTTP for profile ingestion
17+
ingress {
18+
from_port = local.profile_port_internal
19+
to_port = local.profile_port_internal
20+
protocol = "tcp"
21+
description = "Grafana Alloy profile ingestion endpoint"
22+
cidr_blocks = [var.vpc.cidr_block]
23+
}
24+
ingress {
25+
from_port = local.profile_port_external
26+
to_port = local.profile_port_external
27+
protocol = "tcp"
28+
description = "Grafana Alloy profile ingestion endpoint"
29+
cidr_blocks = [var.vpc.cidr_block]
30+
}
31+
1632
# Allow outbound HTTPS for CloudWatch API calls and S3 config download
1733
egress {
1834
from_port = 443
@@ -48,6 +64,15 @@ resource "aws_security_group" "alloy" {
4864
cidr_blocks = [var.vpc.cidr_block]
4965
}
5066

67+
# Allow outbound HTTP to Pyroscope for profile forwarding
68+
egress {
69+
from_port = var.pyroscope_http_port
70+
to_port = var.pyroscope_http_port
71+
protocol = "tcp"
72+
description = "HTTP to Pyroscope for profile forwarding"
73+
cidr_blocks = [var.vpc.cidr_block]
74+
}
75+
5176
tags = {
5277
Name = "${local.prefix}-sg"
5378
Purpose = "Grafana Alloy security group"

infra/terraform/hash/observability/alloy/service.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,22 @@ resource "aws_ecs_task_definition" "alloy" {
9191
], var.ssl_config.environment_vars)
9292

9393
portMappings = [
94+
# Internal ports with names for Service Connect
9495
{
9596
name = local.http_port_name
9697
containerPort = local.http_port
9798
protocol = "tcp"
99+
},
100+
{
101+
name = local.profile_port_name
102+
containerPort = local.profile_port_internal
103+
protocol = "tcp"
104+
},
105+
106+
# External ports without names (ALB only)
107+
{
108+
containerPort = local.profile_port_external
109+
protocol = "tcp"
98110
}
99111
]
100112

@@ -162,6 +174,28 @@ resource "aws_ecs_service" "alloy" {
162174
port = local.http_port
163175
}
164176
}
177+
178+
service {
179+
port_name = local.profile_port_name
180+
181+
client_alias {
182+
port = local.profile_port_internal
183+
}
184+
}
185+
}
186+
187+
# Internal ALB target groups
188+
load_balancer {
189+
target_group_arn = aws_lb_target_group.profile_internal.arn
190+
container_name = "alloy"
191+
container_port = local.profile_port_internal
192+
}
193+
194+
# External ALB target groups
195+
load_balancer {
196+
target_group_arn = aws_lb_target_group.profile_external.arn
197+
container_name = "alloy"
198+
container_port = local.profile_port_external
165199
}
166200

167201
tags = {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Internal HTTP target group for profiles
2+
resource "aws_lb_target_group" "profile_internal" {
3+
name = "${var.prefix}-profile-int"
4+
port = local.profile_port_internal
5+
protocol = "HTTP"
6+
target_type = "ip"
7+
vpc_id = var.vpc.id
8+
9+
health_check {
10+
enabled = true
11+
healthy_threshold = 2
12+
interval = 30
13+
matcher = "200"
14+
path = "/"
15+
port = tostring(local.http_port)
16+
protocol = "HTTP"
17+
timeout = 5
18+
unhealthy_threshold = 2
19+
}
20+
21+
tags = {
22+
Name = "${var.prefix}-profile-int"
23+
Purpose = "OpenTelemetry Collector internal profile target group"
24+
}
25+
}
26+
27+
# External HTTP target group for profiles
28+
resource "aws_lb_target_group" "profile_external" {
29+
name = "${var.prefix}-profile-ext"
30+
port = local.profile_port_external
31+
protocol = "HTTP"
32+
target_type = "ip"
33+
vpc_id = var.vpc.id
34+
35+
health_check {
36+
enabled = true
37+
healthy_threshold = 2
38+
interval = 30
39+
matcher = "200"
40+
path = "/"
41+
port = tostring(local.http_port)
42+
protocol = "HTTP"
43+
timeout = 5
44+
unhealthy_threshold = 2
45+
}
46+
47+
tags = {
48+
Name = "${var.prefix}-profile-ext"
49+
Purpose = "OpenTelemetry Collector external profile target group"
50+
}
51+
}

infra/terraform/hash/observability/alloy/templates/alloy-config.alloy.tpl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,41 @@ otelcol.exporter.otlphttp "mimir" {
275275
}
276276
}
277277
}
278+
279+
pyroscope.receive_http "profiles_internal" {
280+
http {
281+
listen_address = "0.0.0.0"
282+
listen_port = ${profile_port_internal}
283+
}
284+
285+
forward_to = [pyroscope.write.pyroscope_internal.receiver]
286+
}
287+
288+
pyroscope.write "pyroscope_internal" {
289+
endpoint {
290+
url = "http://${pyroscope_http_dns}:${pyroscope_http_port}"
291+
}
292+
293+
external_labels = {
294+
"env" = "${environment}",
295+
}
296+
}
297+
298+
pyroscope.receive_http "profiles_external" {
299+
http {
300+
listen_address = "0.0.0.0"
301+
listen_port = ${profile_port_external}
302+
}
303+
304+
forward_to = [pyroscope.write.pyroscope_external.receiver]
305+
}
306+
307+
pyroscope.write "pyroscope_external" {
308+
endpoint {
309+
url = "http://${pyroscope_http_dns}:${pyroscope_http_port}"
310+
}
311+
312+
external_labels = {
313+
"env" = "external",
314+
}
315+
}

infra/terraform/hash/observability/alloy/variables.tf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,13 @@ variable "mimir_http_port" {
5454
type = number
5555
description = "Mimir HTTP API port number"
5656
}
57+
58+
variable "pyroscope_http_dns" {
59+
type = string
60+
description = "Pyroscope HTTP API DNS name for metrics forwarding"
61+
}
62+
63+
variable "pyroscope_http_port" {
64+
type = number
65+
description = "Pyroscope HTTP API port number"
66+
}

infra/terraform/hash/observability/grafana/config.tf

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
# Configuration hash for task definition versioning
44
locals {
55
config_hash = sha256(jsonencode({
6-
grafana_config = aws_s3_object.grafana_config.content
7-
tempo_datasource = aws_s3_object.grafana_tempo_datasource.content
8-
loki_datasource = aws_s3_object.grafana_loki_datasource.content
9-
mimi_datasource = aws_s3_object.grafana_mimir_datasource.content
6+
grafana_config = aws_s3_object.grafana_config.content
7+
tempo_datasource = aws_s3_object.grafana_tempo_datasource.content
8+
loki_datasource = aws_s3_object.grafana_loki_datasource.content
9+
mimir_datasource = aws_s3_object.grafana_mimir_datasource.content
10+
pyroscope_datasource = aws_s3_object.grafana_pyroscope_datasource.content
1011
}))
1112
}
1213

@@ -79,3 +80,19 @@ resource "aws_s3_object" "grafana_mimir_datasource" {
7980
Service = "grafana"
8081
}
8182
}
83+
84+
# Pyroscope datasource provisioning
85+
resource "aws_s3_object" "grafana_pyroscope_datasource" {
86+
bucket = var.config_bucket.id
87+
key = "grafana/provisioning/datasources/pyroscope.yaml"
88+
content = templatefile("${path.module}/templates/provisioning/datasources/pyroscope.yaml.tpl", {
89+
pyroscope_http_dns = var.pyroscope_http_dns
90+
pyroscope_http_port = var.pyroscope_http_port
91+
})
92+
content_type = "application/x-yaml"
93+
94+
tags = {
95+
Purpose = "Grafana Pyroscope Datasource"
96+
Service = "grafana"
97+
}
98+
}

infra/terraform/hash/observability/grafana/security_groups.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,15 @@ resource "aws_security_group" "grafana" {
5858
cidr_blocks = [var.vpc.cidr_block]
5959
}
6060

61+
# Allow outbound HTTP for Pyroscope API
62+
egress {
63+
from_port = var.pyroscope_http_port
64+
to_port = var.pyroscope_http_port
65+
protocol = "tcp"
66+
description = "Pyroscope API access"
67+
cidr_blocks = [var.vpc.cidr_block]
68+
}
69+
6170
# Allow outbound PostgreSQL
6271
egress {
6372
from_port = var.grafana_database_port

0 commit comments

Comments
 (0)