Skip to content

Commit 36ebae1

Browse files
committed
feat: add redpanda buffering layer with split ingress/egress otel collectors
1 parent 08f22ec commit 36ebae1

File tree

8 files changed

+439
-124
lines changed

8 files changed

+439
-124
lines changed

deployment/index.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { deployPostgres } from './services/postgres';
2020
import { deployProxy } from './services/proxy';
2121
import { deployPublicGraphQLAPIGateway } from './services/public-graphql-api-gateway';
2222
import { deployRedis } from './services/redis';
23+
import { deployRedpanda } from './services/redpanda';
2324
import { deployS3, deployS3AuditLog, deployS3Mirror } from './services/s3';
2425
import { deploySchema } from './services/schema';
2526
import { configureSentry } from './services/sentry';
@@ -78,6 +79,7 @@ const clickhouse = deployClickhouse();
7879
const postgres = deployPostgres();
7980
const redis = deployRedis({ environment });
8081
const kafka = deployKafka();
82+
const redpanda = deployRedpanda();
8183
const s3 = deployS3();
8284
const s3Mirror = deployS3Mirror();
8385
const s3AuditLog = deployS3AuditLog();
@@ -284,6 +286,7 @@ const otelCollector = deployOTELCollector({
284286
graphql,
285287
dbMigrations,
286288
clickhouse,
289+
redpanda,
287290
image: docker.factory.getImageId('otel-collector', imagesTag),
288291
docker,
289292
});
@@ -344,5 +347,7 @@ export const schemaApiServiceId = schema.service.id;
344347
export const webhooksApiServiceId = webhooks.service.id;
345348

346349
export const appId = app.deployment.id;
347-
export const otelCollectorId = otelCollector.deployment.id;
350+
export const otelCollectorIngressId = otelCollector.ingress.deployment.id;
351+
export const otelCollectorEgressId = otelCollector.egress.deployment.id;
352+
export const redpandaStatefulSetId = redpanda.statefulSet.id;
348353
export const publicIp = proxy.get()!.status.loadBalancer.ingress[0].ip;

deployment/services/otel-collector.ts

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { DbMigrations } from './db-migrations';
55
import { Docker } from './docker';
66
import { Environment } from './environment';
77
import { GraphQL } from './graphql';
8+
import { Redpanda } from './redpanda';
89

910
export type OTELCollector = ReturnType<typeof deployOTELCollector>;
1011

@@ -15,9 +16,13 @@ export function deployOTELCollector(args: {
1516
clickhouse: Clickhouse;
1617
dbMigrations: DbMigrations;
1718
graphql: GraphQL;
19+
redpanda: Redpanda;
1820
}) {
19-
return new ServiceDeployment(
20-
'otel-collector',
21+
const kafkaBroker = args.redpanda.brokerEndpoint;
22+
23+
// Ingress: OTLP -> Redpanda
24+
const ingress = new ServiceDeployment(
25+
'otel-collector-ingress',
2126
{
2227
image: args.image,
2328
imagePullSecret: args.docker.secret,
@@ -26,6 +31,7 @@ export function deployOTELCollector(args: {
2631
HIVE_OTEL_AUTH_ENDPOINT: serviceLocalEndpoint(args.graphql.service).apply(
2732
value => value + '/otel-auth',
2833
),
34+
KAFKA_BROKER: kafkaBroker,
2935
},
3036
/**
3137
* We are using the healthcheck extension.
@@ -40,11 +46,40 @@ export function deployOTELCollector(args: {
4046
pdb: true,
4147
availabilityOnEveryNode: true,
4248
port: 4318,
43-
memoryLimit: args.environment.podsConfig.tracingCollector.memoryLimit,
49+
memoryLimit: '512Mi',
4450
autoScaling: {
4551
maxReplicas: args.environment.podsConfig.tracingCollector.maxReplicas,
4652
cpu: {
47-
limit: args.environment.podsConfig.tracingCollector.cpuLimit,
53+
limit: '500m',
54+
cpuAverageToScale: 80,
55+
},
56+
},
57+
},
58+
[args.dbMigrations],
59+
).deploy();
60+
61+
// Egress: Redpanda -> ClickHouse
62+
const egress = new ServiceDeployment(
63+
'otel-collector-egress',
64+
{
65+
image: args.image,
66+
imagePullSecret: args.docker.secret,
67+
env: {
68+
...args.environment.envVars,
69+
KAFKA_BROKER: kafkaBroker,
70+
},
71+
probePort: 13133,
72+
readinessProbe: '/',
73+
livenessProbe: '/',
74+
startupProbe: '/',
75+
exposesMetrics: true,
76+
replicas: args.environment.podsConfig.tracingCollector.maxReplicas,
77+
pdb: true,
78+
memoryLimit: '512Mi',
79+
autoScaling: {
80+
maxReplicas: args.environment.podsConfig.tracingCollector.maxReplicas,
81+
cpu: {
82+
limit: '500m',
4883
cpuAverageToScale: 80,
4984
},
5085
},
@@ -57,4 +92,12 @@ export function deployOTELCollector(args: {
5792
.withSecret('CLICKHOUSE_PASSWORD', args.clickhouse.secret, 'password')
5893
.withSecret('CLICKHOUSE_PROTOCOL', args.clickhouse.secret, 'protocol')
5994
.deploy();
95+
96+
return {
97+
ingress,
98+
egress,
99+
// For backward compatibility, expose ingress as the main deployment
100+
deployment: ingress.deployment,
101+
service: ingress.service,
102+
};
60103
}

deployment/services/redpanda.ts

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import * as k8s from '@pulumi/kubernetes';
2+
import * as pulumi from '@pulumi/pulumi';
3+
4+
export type Redpanda = ReturnType<typeof deployRedpanda>;
5+
6+
export function deployRedpanda() {
7+
const redpandaConfig = new pulumi.Config('redpanda');
8+
const replicas = redpandaConfig.getNumber('replicas') || 3;
9+
const storageSize = redpandaConfig.get('storageSize') || '20Gi';
10+
const memoryLimit = redpandaConfig.get('memoryLimit') || '2Gi';
11+
const cpuLimit = redpandaConfig.get('cpuLimit') || '1000m';
12+
13+
const labels = { app: 'redpanda' };
14+
15+
// StatefulSet for Redpanda
16+
const statefulSet = new k8s.apps.v1.StatefulSet('redpanda', {
17+
metadata: {
18+
name: 'redpanda',
19+
},
20+
spec: {
21+
serviceName: 'redpanda',
22+
replicas,
23+
selector: {
24+
matchLabels: labels,
25+
},
26+
template: {
27+
metadata: {
28+
labels,
29+
},
30+
spec: {
31+
containers: [
32+
{
33+
name: 'redpanda',
34+
image: 'redpandadata/redpanda:v25.2.11',
35+
resources: {
36+
limits: {
37+
cpu: cpuLimit,
38+
memory: memoryLimit,
39+
},
40+
requests: {
41+
cpu: '500m',
42+
memory: '1Gi',
43+
},
44+
},
45+
command: [
46+
'/bin/bash',
47+
'-c',
48+
`
49+
set -e
50+
POD_ORDINAL=\${HOSTNAME##*-}
51+
INTERNAL_ADVERTISE="redpanda-\${POD_ORDINAL}.redpanda.default.svc.cluster.local"
52+
53+
# Build seeds list for cluster formation
54+
SEEDS=""
55+
for i in $(seq 0 $((${replicas} - 1))); do
56+
if [ $i -ne 0 ]; then
57+
SEEDS="$SEEDS,"
58+
fi
59+
SEEDS="$SEEDS{\\\"node_id\\\":$i,\\\"host\\\":\\\"redpanda-$i.redpanda.default.svc.cluster.local\\\",\\\"port\\\":33145}"
60+
done
61+
62+
/usr/bin/redpanda start \\
63+
--node-id \${POD_ORDINAL} \\
64+
--smp 1 \\
65+
--memory 1G \\
66+
--reserve-memory 0M \\
67+
--overprovisioned \\
68+
--kafka-addr PLAINTEXT://0.0.0.0:29092 \\
69+
--advertise-kafka-addr PLAINTEXT://\${INTERNAL_ADVERTISE}:29092 \\
70+
--pandaproxy-addr 0.0.0.0:8082 \\
71+
--advertise-pandaproxy-addr \${INTERNAL_ADVERTISE}:8082 \\
72+
--rpc-addr 0.0.0.0:33145 \\
73+
--advertise-rpc-addr \${INTERNAL_ADVERTISE}:33145 \\
74+
--seeds "[$SEEDS]" \\
75+
--set redpanda.enable_idempotence=true \\
76+
--set redpanda.enable_transactions=true \\
77+
--set redpanda.default_topic_replications=1 \\
78+
--set redpanda.auto_create_topics_enabled=false
79+
`,
80+
],
81+
ports: [
82+
{ containerPort: 29092, name: 'kafka' },
83+
{ containerPort: 8082, name: 'http' },
84+
{ containerPort: 33145, name: 'rpc' },
85+
{ containerPort: 9644, name: 'admin' },
86+
],
87+
volumeMounts: [
88+
{
89+
name: 'datadir',
90+
mountPath: '/var/lib/redpanda/data',
91+
},
92+
],
93+
livenessProbe: {
94+
httpGet: {
95+
path: '/v1/status/ready',
96+
port: 9644 as any,
97+
},
98+
initialDelaySeconds: 30,
99+
periodSeconds: 10,
100+
},
101+
readinessProbe: {
102+
httpGet: {
103+
path: '/v1/status/ready',
104+
port: 9644 as any,
105+
},
106+
initialDelaySeconds: 10,
107+
periodSeconds: 5,
108+
},
109+
},
110+
],
111+
},
112+
},
113+
volumeClaimTemplates: [
114+
{
115+
metadata: {
116+
name: 'datadir',
117+
},
118+
spec: {
119+
accessModes: ['ReadWriteOnce'],
120+
resources: {
121+
requests: {
122+
storage: storageSize,
123+
},
124+
},
125+
},
126+
},
127+
],
128+
},
129+
});
130+
131+
// Headless Service for StatefulSet (used for internal cluster communication)
132+
const headlessService = new k8s.core.v1.Service('redpanda-headless', {
133+
metadata: {
134+
name: 'redpanda',
135+
},
136+
spec: {
137+
clusterIP: 'None',
138+
selector: labels,
139+
ports: [
140+
{ name: 'kafka', port: 29092, targetPort: 29092 as any },
141+
{ name: 'http', port: 8082, targetPort: 8082 as any },
142+
{ name: 'rpc', port: 33145, targetPort: 33145 as any },
143+
{ name: 'admin', port: 9644, targetPort: 9644 as any },
144+
],
145+
},
146+
});
147+
148+
// ClusterIP Service for clients (load balances across all pods)
149+
const clientService = new k8s.core.v1.Service('redpanda-client-service', {
150+
metadata: {
151+
name: 'redpanda-client',
152+
},
153+
spec: {
154+
type: 'ClusterIP',
155+
selector: labels,
156+
ports: [
157+
{ name: 'kafka', port: 29092, targetPort: 29092 as any },
158+
{ name: 'http', port: 8082, targetPort: 8082 as any },
159+
],
160+
},
161+
});
162+
163+
// Create otel-traces topic with proper replication
164+
const topicCreationJob = new k8s.batch.v1.Job('redpanda-topic-creation', {
165+
metadata: {
166+
name: 'redpanda-topic-creation',
167+
},
168+
spec: {
169+
template: {
170+
spec: {
171+
restartPolicy: 'OnFailure',
172+
containers: [
173+
{
174+
name: 'rpk',
175+
image: 'redpandadata/redpanda:v25.2.11',
176+
command: [
177+
'/bin/bash',
178+
'-c',
179+
`
180+
# Wait for Redpanda to be ready
181+
until rpk cluster health --brokers redpanda-0.redpanda:29092 2>/dev/null | grep -q 'Healthy'; do
182+
echo "Waiting for Redpanda cluster..."
183+
sleep 5
184+
done
185+
186+
# Create topic with partitioning only (no replication)
187+
rpk topic create otel-traces \\
188+
--brokers redpanda-0.redpanda:29092 \\
189+
--replicas 1 \\
190+
--partitions 6 \\
191+
--config retention.ms=86400000 \\
192+
--config compression.type=snappy \\
193+
|| echo "Topic may already exist"
194+
`,
195+
],
196+
},
197+
],
198+
},
199+
},
200+
},
201+
}, { dependsOn: [statefulSet, headlessService] });
202+
203+
return {
204+
statefulSet,
205+
headlessService,
206+
clientService,
207+
topicCreationJob,
208+
// Client service endpoint - auto-discovers all brokers
209+
brokerEndpoint: 'redpanda-client:29092',
210+
};
211+
}

docker/configs/otel-collector/builder-config.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ dist:
66

77
receivers:
88
- gomod: go.opentelemetry.io/collector/receiver/otlpreceiver v0.122.0
9+
- gomod:
10+
github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.122.0
911

1012
processors:
1113
- gomod: go.opentelemetry.io/collector/processor/batchprocessor v0.122.0
@@ -20,6 +22,8 @@ exporters:
2022
- gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.122.0
2123
- gomod:
2224
github.com/open-telemetry/opentelemetry-collector-contrib/exporter/clickhouseexporter v0.122.0
25+
- gomod:
26+
github.com/open-telemetry/opentelemetry-collector-contrib/exporter/kafkaexporter v0.122.0
2327

2428
extensions:
2529
- gomod:
@@ -29,9 +33,6 @@ extensions:
2933
github.com/open-telemetry/opentelemetry-collector-contrib/extension/pprofextension
3034
v0.122.0
3135
- gomod: go.opentelemetry.io/collector/extension/zpagesextension v0.122.0
32-
- gomod:
33-
github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage
34-
v0.122.0
3536
- gomod: github.com/graphql-hive/console/docker/configs/otel-collector/extension-hiveauth v0.0.0
3637
path: ./extension-hiveauth
3738
name: hiveauthextension # when using local extensions, package name is required, otherwise you get "missing import path"

0 commit comments

Comments
 (0)