Skip to content

Commit c6cce42

Browse files
committed
Add trino to CDK
1 parent 4867a18 commit c6cce42

File tree

2 files changed

+146
-29
lines changed

2 files changed

+146
-29
lines changed

benchmarks/cdk/lib/cdk-stack.ts

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import * as cr from 'aws-cdk-lib/custom-resources';
77
import { Construct } from 'constructs';
88
import * as path from 'path';
99
import { execSync } from 'child_process';
10+
import { trinoAfterDeployCommands, trinoUserDataCommands } from "./trino";
1011

1112
const ROOT = path.join(__dirname, '../../..')
1213

@@ -18,7 +19,7 @@ interface CdkStackProps extends StackProps {
1819
}
1920

2021
export class CdkStack extends Stack {
21-
constructor (scope: Construct, id: string, props: CdkStackProps) {
22+
constructor(scope: Construct, id: string, props: CdkStackProps) {
2223
super(scope, id, props);
2324

2425
const { config } = props;
@@ -122,7 +123,8 @@ EOF`,
122123
// Enable and start the service
123124
'systemctl daemon-reload',
124125
'systemctl enable worker',
125-
'systemctl start worker'
126+
'systemctl start worker',
127+
...trinoUserDataCommands(i)
126128
);
127129

128130
const instance = new ec2.Instance(this, `BenchmarkInstance${i}`, {
@@ -161,33 +163,50 @@ sudo journalctl -u worker.service -f -o cat
161163
});
162164

163165
// Custom resource to restart worker service on every deploy
164-
const restartWorker = new cr.AwsCustomResource(this, 'RestartWorkerService', {
165-
onUpdate: {
166-
service: 'SSM',
167-
action: 'sendCommand',
168-
parameters: {
169-
DocumentName: 'AWS-RunShellScript',
170-
InstanceIds: instances.map(inst => inst.instanceId),
171-
Parameters: {
172-
commands: [
173-
`aws s3 cp s3://${workerBinary.s3BucketName}/${workerBinary.s3ObjectKey} /usr/local/bin/worker`,
174-
'chmod +x /usr/local/bin/worker',
175-
'systemctl restart worker',
176-
],
177-
},
166+
sendCommandsUnconditionally(this, 'RestartWorkerService', instances, [
167+
`aws s3 cp s3://${workerBinary.s3BucketName}/${workerBinary.s3ObjectKey} /usr/local/bin/worker`,
168+
'chmod +x /usr/local/bin/worker',
169+
'systemctl restart worker',
170+
])
171+
172+
// Start coordinator first
173+
sendCommandsUnconditionally(this, 'RestartTrinoCoordinator', [instances[0]], [
174+
'systemctl start trino',
175+
])
176+
177+
// Then start workers (they will discover the coordinator)
178+
sendCommandsUnconditionally(this, 'RestartTrinoWorkers', instances.slice(1), trinoAfterDeployCommands(this.region))
179+
}
180+
}
181+
182+
function sendCommandsUnconditionally(
183+
construct: Construct,
184+
name: string,
185+
instances: ec2.Instance[],
186+
commands: string[]
187+
) {
188+
const cmd = new cr.AwsCustomResource(construct, name, {
189+
onUpdate: {
190+
service: 'SSM',
191+
action: 'sendCommand',
192+
parameters: {
193+
DocumentName: 'AWS-RunShellScript',
194+
InstanceIds: instances.map(inst => inst.instanceId),
195+
Parameters: {
196+
commands
178197
},
179-
physicalResourceId: cr.PhysicalResourceId.of(`restart-${Date.now()}`),
180-
ignoreErrorCodesMatching: '.*',
181198
},
182-
policy: cr.AwsCustomResourcePolicy.fromStatements([
183-
new iam.PolicyStatement({
184-
actions: ['ssm:SendCommand'],
185-
resources: ['*'],
186-
}),
187-
]),
188-
});
189-
190-
// Ensure instances are created before restarting
191-
restartWorker.node.addDependency(...instances)
192-
}
199+
physicalResourceId: cr.PhysicalResourceId.of(`${name}-${Date.now()}`),
200+
ignoreErrorCodesMatching: '.*',
201+
},
202+
policy: cr.AwsCustomResourcePolicy.fromStatements([
203+
new iam.PolicyStatement({
204+
actions: ['ssm:SendCommand'],
205+
resources: ['*'],
206+
}),
207+
]),
208+
});
209+
210+
// Ensure instances are created before restarting
211+
cmd.node.addDependency(...instances)
193212
}

benchmarks/cdk/lib/trino.ts

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
export function trinoUserDataCommands(instanceIndex: number): string[] {
2+
const isCoordinator = instanceIndex === 0;
3+
4+
return [
5+
// Install Java 22 for Trino (Trino 461 requires Java 22+)
6+
'yum install -y java-22-amazon-corretto-headless python',
7+
8+
// Download and install Trino
9+
'cd /opt',
10+
'curl -L -o trino-server.tar.gz https://repo1.maven.org/maven2/io/trino/trino-server/461/trino-server-461.tar.gz',
11+
'tar -xzf trino-server.tar.gz',
12+
'mv trino-server-461 trino-server',
13+
'rm trino-server.tar.gz',
14+
15+
// Create Trino directories
16+
'mkdir -p /var/trino/data',
17+
'mkdir -p /opt/trino-server/etc/catalog',
18+
19+
// Configure Trino node properties
20+
`cat > /opt/trino-server/etc/node.properties << 'TRINO_EOF'
21+
node.environment=benchmark
22+
node.id=instance-${instanceIndex}
23+
node.data-dir=/var/trino/data
24+
TRINO_EOF`,
25+
26+
// Configure Trino JVM settings (minimal - using conservative 8GB heap)
27+
`cat > /opt/trino-server/etc/jvm.config << 'TRINO_EOF'
28+
-server
29+
-Xmx8G
30+
-XX:+UseG1GC
31+
-XX:G1HeapRegionSize=32M
32+
-XX:+ExplicitGCInvokesConcurrent
33+
-XX:+HeapDumpOnOutOfMemoryError
34+
-XX:+ExitOnOutOfMemoryError
35+
-Djdk.attach.allowAttachSelf=true
36+
TRINO_EOF`,
37+
38+
// Configure Trino config.properties (workers will be reconfigured during lazy startup)
39+
isCoordinator
40+
? `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF'
41+
coordinator=true
42+
node-scheduler.include-coordinator=true
43+
http-server.http.port=8080
44+
discovery.uri=http://localhost:8080
45+
TRINO_EOF`
46+
: `cat > /opt/trino-server/etc/config.properties << 'TRINO_EOF'
47+
coordinator=false
48+
http-server.http.port=8080
49+
discovery.uri=http://localhost:8080
50+
TRINO_EOF`,
51+
52+
// Configure Hive catalog for S3 Parquet files
53+
`cat > /opt/trino-server/etc/catalog/hive.properties << 'TRINO_EOF'
54+
connector.name=hive
55+
hive.metastore=file
56+
hive.metastore.catalog.dir=/var/trino/metastore
57+
TRINO_EOF`,
58+
59+
// Download Trino CLI
60+
'curl -L -o /usr/local/bin/trino https://repo1.maven.org/maven2/io/trino/trino-cli/461/trino-cli-461-executable.jar',
61+
'chmod +x /usr/local/bin/trino',
62+
63+
// Create Trino systemd service
64+
`cat > /etc/systemd/system/trino.service << 'TRINO_EOF'
65+
[Unit]
66+
Description=Trino Server
67+
After=network.target
68+
69+
[Service]
70+
Type=forking
71+
ExecStart=/opt/trino-server/bin/launcher start
72+
ExecStop=/opt/trino-server/bin/launcher stop
73+
Restart=on-failure
74+
User=root
75+
WorkingDirectory=/opt/trino-server
76+
77+
[Install]
78+
WantedBy=multi-user.target
79+
TRINO_EOF`,
80+
81+
// Enable Trino (but don't start yet - will be started lazily after all instances are up)
82+
'systemctl daemon-reload',
83+
'systemctl enable trino',
84+
'systemctl start trino'
85+
];
86+
}
87+
88+
export function trinoAfterDeployCommands(region: string) {
89+
return [
90+
`COORDINATOR_IP=$(aws ec2 describe-instances --region ${region} --filters "Name=tag:Name,Values=instance-0" "Name=instance-state-name,Values=running" --query "Reservations[0].Instances[0].PrivateIpAddress" --output text)
91+
cat > /opt/trino-server/etc/config.properties << TRINO_EOF
92+
coordinator=false
93+
http-server.http.port=8080
94+
discovery.uri=http://\${COORDINATOR_IP}:8080
95+
TRINO_EOF`,
96+
'systemctl restart trino',
97+
]
98+
}

0 commit comments

Comments
 (0)