Skip to content

Commit 00d7292

Browse files
committed
feat(helm): Improve modularity and add Presto support refinements
- Replace `query_engine` conditions with modular flags in Helm templates. - Refactor Presto configurations to use dedicated flags and scripts. - Enhance worker and reducer node scheduling logic. - Update documentation to reflect Presto integration and better configuration flexibility.
1 parent d389542 commit 00d7292

27 files changed

+179
-101
lines changed

docs/src/user-docs/guides-k8s-deployment.md

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -323,12 +323,21 @@ clpConfig:
323323
storage_engine: "clp-s"
324324
query_engine: "presto"
325325
326+
# Disable the clp-s query pipeline since Presto replaces it.
327+
# NOTE: The API server currently depends on the clp-s query pipeline and does not work with
328+
# Presto. Keep it enabled if you need the API server; disable it if not.
329+
api_server: null
330+
query_scheduler: null
331+
query_worker: null
332+
reducer: null
333+
326334
# Disable results cache retention since the Presto integration doesn't yet support garbage
327335
# collection of search results.
328336
results_cache:
329337
retention_period: null
330338
331339
presto:
340+
port: 30889
332341
coordinator:
333342
logging_level: "INFO"
334343
query_max_memory_gb: 1
@@ -386,6 +395,9 @@ To run compression workers, query workers, and reducers in separate node pools:
386395

387396
# Label query nodes
388397
kubectl label nodes node3 node4 yscope.io/nodeType=query
398+
399+
# Label Presto nodes (if using Presto as the query engine)
400+
kubectl label nodes node5 node6 yscope.io/nodeType=presto
389401
```
390402

391403
2. Configure scheduling:
@@ -399,19 +411,27 @@ To run compression workers, query workers, and reducers in separate node pools:
399411
replicas: 2
400412
scheduling:
401413
nodeSelector:
402-
yscope.io/nodeType: compression
414+
yscope.io/nodeType: "compression"
403415
404416
queryWorker:
405417
replicas: 2
406418
scheduling:
407419
nodeSelector:
408-
yscope.io/nodeType: query
420+
yscope.io/nodeType: "query"
409421
410422
reducer:
411423
replicas: 2
412424
scheduling:
413425
nodeSelector:
414-
yscope.io/nodeType: query
426+
yscope.io/nodeType: "query"
427+
428+
# If using Presto as the query engine, configure prestoWorker instead of
429+
# queryWorker and reducer.
430+
prestoWorker:
431+
replicas: 2
432+
scheduling:
433+
nodeSelector:
434+
yscope.io/nodeType: "presto"
415435
```
416436

417437
3. Install:
@@ -441,7 +461,7 @@ To run all worker types in the same node pool:
441461
replicas: 2
442462
scheduling:
443463
nodeSelector:
444-
yscope.io/nodeType: compute
464+
yscope.io/nodeType: "compute"
445465
topologySpreadConstraints:
446466
- maxSkew: 1
447467
topologyKey: "kubernetes.io/hostname"
@@ -454,13 +474,21 @@ To run all worker types in the same node pool:
454474
replicas: 2
455475
scheduling:
456476
nodeSelector:
457-
yscope.io/nodeType: compute
477+
yscope.io/nodeType: "compute"
458478
459479
reducer:
460480
replicas: 2
461481
scheduling:
462482
nodeSelector:
463-
yscope.io/nodeType: compute
483+
yscope.io/nodeType: "compute"
484+
485+
# If using Presto as the query engine, configure prestoWorker instead of
486+
# queryWorker and reducer.
487+
prestoWorker:
488+
replicas: 2
489+
scheduling:
490+
nodeSelector:
491+
yscope.io/nodeType: "compute"
464492
```
465493

466494
3. Install:

docs/src/user-docs/guides-using-presto.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,28 @@ When deploying CLP on Kubernetes using Helm, Presto can be enabled by setting th
4343
package:
4444
query_engine: "presto"
4545
46+
# Disable the clp-s query pipeline since Presto replaces it.
47+
# NOTE: The API server currently depends on the clp-s query pipeline and does not work
48+
# with Presto. Keep it enabled if you need the API server; disable it if not.
49+
api_server: null
50+
query_scheduler: null
51+
query_worker: null
52+
reducer: null
53+
4654
# Disable results cache retention since the Presto integration doesn't yet support
4755
# garbage collection of search results.
4856
results_cache:
4957
retention_period: null
5058
5159
presto:
60+
port: 30889
61+
coordinator:
62+
logging_level: "INFO"
63+
query_max_memory_gb: 1
64+
query_max_memory_per_node_gb: 1
65+
worker:
66+
query_memory_gb: 4
67+
system_memory_gb: 8
5268
# Split filter config for the Presto CLP connector. For each dataset, add a filter entry.
5369
# Replace <dataset> with the dataset name (use "default" if you didn't specify one when
5470
# compressing) and <timestamp-key> with the timestamp key used during compression.

tools/deployment/package-helm/.set-up-common.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ nodes:
8282
- containerPort: 30800
8383
hostPort: 30800
8484
protocol: TCP
85+
- containerPort: 30889
86+
hostPort: 30889
87+
protocol: TCP
8588
EOF
8689

8790
for ((i = 0; i < num_workers; i++)); do

tools/deployment/package-helm/set-up-multi-dedicated-test.sh

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@ CLP_HOME="${CLP_HOME:-/tmp/clp}"
1010
CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
1111
NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}"
1212
NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}"
13+
NUM_PRESTO_NODES="${NUM_PRESTO_NODES:-2}"
1314
COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
1415
QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
1516
REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
17+
PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}"
1618

1719
# shellcheck source=.set-up-common.sh
1820
source "${script_dir}/.set-up-common.sh"
@@ -21,14 +23,16 @@ echo "=== Multi-node setup with dedicated worker nodes ==="
2123
echo "Cluster: ${CLUSTER_NAME}"
2224
echo "Compression nodes: ${NUM_COMPRESSION_NODES}"
2325
echo "Query nodes: ${NUM_QUERY_NODES}"
26+
echo "Presto nodes: ${NUM_PRESTO_NODES}"
2427
echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
2528
echo "Query workers: ${QUERY_WORKER_REPLICAS}"
2629
echo "Reducers: ${REDUCER_REPLICAS}"
30+
echo "Presto workers: ${PRESTO_WORKER_REPLICAS}"
2731
echo ""
2832

2933
prepare_environment "${CLUSTER_NAME}"
3034

31-
total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
35+
total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES + NUM_PRESTO_NODES))
3236

3337
echo "Creating kind cluster..."
3438
generate_kind_config "${total_workers}" | kind create cluster --name "${CLUSTER_NAME}" --config=-
@@ -43,11 +47,18 @@ for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
4347
done
4448

4549
# Label query nodes
46-
for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
50+
query_end=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
51+
for ((i = NUM_COMPRESSION_NODES; i < query_end; i++)); do
4752
echo "Labeling ${worker_nodes[$i]} as query node"
4853
kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
4954
done
5055

56+
# Label Presto nodes
57+
for ((i = query_end; i < total_workers; i++)); do
58+
echo "Labeling ${worker_nodes[$i]} as presto node"
59+
kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=presto --overwrite
60+
done
61+
5162
echo "Installing Helm chart..."
5263
helm uninstall test --ignore-not-found
5364
sleep 2
@@ -57,6 +68,9 @@ helm install test "${script_dir}" \
5768
--set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \
5869
--set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
5970
--set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \
60-
--set "reducer.replicas=${REDUCER_REPLICAS}"
71+
--set "reducer.replicas=${REDUCER_REPLICAS}" \
72+
--set "reducer.scheduling.nodeSelector.yscope\.io/nodeType=query" \
73+
--set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}" \
74+
--set "prestoWorker.scheduling.nodeSelector.yscope\.io/nodeType=presto"
6175

6276
wait_for_cluster_ready

tools/deployment/package-helm/set-up-multi-shared-test.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}"
1212
COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
1313
QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
1414
REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
15+
PRESTO_WORKER_REPLICAS="${PRESTO_WORKER_REPLICAS:-2}"
1516

1617
# shellcheck source=.set-up-common.sh
1718
source "${script_dir}/.set-up-common.sh"
@@ -22,6 +23,7 @@ echo "Worker nodes: ${NUM_WORKER_NODES}"
2223
echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
2324
echo "Query workers: ${QUERY_WORKER_REPLICAS}"
2425
echo "Reducers: ${REDUCER_REPLICAS}"
26+
echo "Presto workers: ${PRESTO_WORKER_REPLICAS}"
2527
echo ""
2628

2729
prepare_environment "${CLUSTER_NAME}"
@@ -36,6 +38,7 @@ helm install test "${script_dir}" \
3638
--set "distributedDeployment=true" \
3739
--set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \
3840
--set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
39-
--set "reducer.replicas=${REDUCER_REPLICAS}"
41+
--set "reducer.replicas=${REDUCER_REPLICAS}" \
42+
--set "prestoWorker.replicas=${PRESTO_WORKER_REPLICAS}"
4043

4144
wait_for_cluster_ready

tools/deployment/package-helm/templates/api-server-deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }}
1+
{{- if .Values.clpConfig.api_server }}
22
apiVersion: "apps/v1"
33
kind: "Deployment"
44
metadata:

tools/deployment/package-helm/templates/api-server-logs-pv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }}
1+
{{- if .Values.clpConfig.api_server }}
22
{{- include "clp.createStaticPv" (dict
33
"root" .
44
"component_category" "api-server"

tools/deployment/package-helm/templates/api-server-logs-pvc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }}
1+
{{- if .Values.clpConfig.api_server }}
22
{{- include "clp.createPvc" (dict
33
"root" .
44
"component_category" "api-server"

tools/deployment/package-helm/templates/api-server-service.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
{{- if and .Values.clpConfig.api_server (ne .Values.clpConfig.package.query_engine "presto") }}
1+
{{- if .Values.clpConfig.api_server }}
22
apiVersion: "v1"
33
kind: "Service"
44
metadata:

tools/deployment/package-helm/templates/configmap.yaml

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,18 @@ data:
129129
package:
130130
query_engine: {{ .Values.clpConfig.package.query_engine | quote }}
131131
storage_engine: {{ .Values.clpConfig.package.storage_engine | quote }}
132+
{{- with .Values.clpConfig.query_scheduler }}
132133
query_scheduler:
133-
host: "{{ include "clp.fullname" . }}-query-scheduler"
134-
jobs_poll_delay: {{ .Values.clpConfig.query_scheduler.jobs_poll_delay }}
135-
logging_level: {{ .Values.clpConfig.query_scheduler.logging_level | quote }}
136-
num_archives_to_search_per_sub_job: {{
137-
.Values.clpConfig.query_scheduler.num_archives_to_search_per_sub_job | int }}
134+
host: "{{ include "clp.fullname" $ }}-query-scheduler"
135+
jobs_poll_delay: {{ .jobs_poll_delay }}
136+
logging_level: {{ .logging_level | quote }}
137+
num_archives_to_search_per_sub_job: {{ .num_archives_to_search_per_sub_job | int }}
138138
port: 7000
139+
{{- end }}
140+
{{- with .Values.clpConfig.query_worker }}
139141
query_worker:
140-
logging_level: {{ .Values.clpConfig.query_worker.logging_level | quote }}
142+
logging_level: {{ .logging_level | quote }}
143+
{{- end }}
141144
queue:
142145
host: "{{ include "clp.fullname" . }}-queue"
143146
port: 5672
@@ -146,11 +149,13 @@ data:
146149
host: "{{ include "clp.fullname" . }}-redis"
147150
port: 6379
148151
query_backend_database: {{ .Values.clpConfig.redis.query_backend_database | int }}
152+
{{- with .Values.clpConfig.reducer }}
149153
reducer:
150154
base_port: 14009
151-
host: "{{ include "clp.fullname" . }}-reducer"
152-
logging_level: {{ .Values.clpConfig.reducer.logging_level | quote }}
153-
upsert_interval: {{ .Values.clpConfig.reducer.upsert_interval | int }}
155+
host: "{{ include "clp.fullname" $ }}-reducer"
156+
logging_level: {{ .logging_level | quote }}
157+
upsert_interval: {{ .upsert_interval | int }}
158+
{{- end }}
154159
results_cache:
155160
db_name: {{ .Values.clpConfig.results_cache.db_name | quote }}
156161
host: "{{ include "clp.fullname" . }}-results-cache"
@@ -219,10 +224,10 @@ data:
219224
{{- else }}
220225
mcp_server: null
221226
{{- end }}
222-
{{- if eq .Values.clpConfig.package.query_engine "presto" }}
227+
{{- if .Values.clpConfig.presto }}
223228
presto:
224229
host: "{{ include "clp.fullname" . }}-presto-coordinator"
225-
port: 8080
230+
port: 8889
226231
{{- else }}
227232
presto: null
228233
{{- end }}
@@ -328,16 +333,15 @@ data:
328333
{{ .Values.clpConfig.archive_output.target_segment_size | int }},
329334
"ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }},
330335
"ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }},
331-
{{- if eq .Values.clpConfig.package.query_engine "presto" }}
336+
{{- if .Values.clpConfig.presto }}
332337
"PrestoHost": "{{ include "clp.fullname" . }}-presto-coordinator",
333-
"PrestoPort": 8080
338+
"PrestoPort": 8889
334339
{{- else }}
335340
"PrestoHost": null,
336341
"PrestoPort": null
337342
{{- end }}
338343
}
339344
340-
{{- if eq .Values.clpConfig.package.query_engine "presto" }}
341345
{{- with .Values.clpConfig.presto }}
342346
presto-coordinator-catalog-clp.properties: |
343347
connector.name=clp
@@ -354,11 +358,11 @@ data:
354358
presto-coordinator-config-config.properties: |
355359
coordinator=true
356360
node-scheduler.include-coordinator=false
357-
http-server.http.port=8080
361+
http-server.http.port=8889
358362
query.max-memory={{ .coordinator.query_max_memory_gb }}GB
359363
query.max-memory-per-node={{ .coordinator.query_max_memory_per_node_gb }}GB
360364
discovery-server.enabled=true
361-
discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080
365+
discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8889
362366
optimizer.optimize-hash-generation=false
363367
regex-library=RE2J
364368
use-alternative-function-signatures=true
@@ -400,8 +404,8 @@ data:
400404
{{- end }}{{/* with $.Values.clpConfig.archive_output.storage */}}
401405
402406
presto-worker-config-config.properties: |
403-
discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8080
404-
http-server.http.port=8080
407+
discovery.uri=http://{{ include "clp.fullname" $ }}-presto-coordinator:8889
408+
http-server.http.port=8889
405409
query-memory-gb={{ .worker.query_memory_gb }}
406410
shutdown-onset-sec=1
407411
system-memory-gb={{ .worker.system_memory_gb }}
@@ -414,5 +418,23 @@ data:
414418
415419
presto-worker-config-velox.properties: |
416420
mutable-config=true
421+
422+
presto-worker-setup-configs.sh: |
423+
#!/bin/sh
424+
set -e
425+
426+
CONFIG_TEMPLATE_DIR="/etc/presto-config"
427+
PRESTO_CONFIG_DIR="/opt/presto-server/etc"
428+
429+
cp "${CONFIG_TEMPLATE_DIR}/presto-worker-config-config.properties" "${PRESTO_CONFIG_DIR}/config.properties"
430+
cp "${CONFIG_TEMPLATE_DIR}/presto-worker-config-node.properties" "${PRESTO_CONFIG_DIR}/node.properties"
431+
432+
# Query coordinator for version and append to config
433+
DISCOVERY_URI="http://{{ include "clp.fullname" $ }}-presto-coordinator:8889"
434+
PRESTO_VERSION=$(wget -q -O - "${DISCOVERY_URI}/v1/info" | jq -r '.version')
435+
echo "presto.version=${PRESTO_VERSION}" >> "${PRESTO_CONFIG_DIR}/config.properties"
436+
437+
# Set node identity from hostname
438+
echo "node.internal-address=$(hostname -i)" >> "${PRESTO_CONFIG_DIR}/node.properties"
439+
echo "node.id=$(hostname)" >> "${PRESTO_CONFIG_DIR}/node.properties"
417440
{{- end }}{{/* with .Values.clpConfig.presto */}}
418-
{{- end }}{{/* if eq .Values.clpConfig.package.query_engine "presto" */}}

0 commit comments

Comments
 (0)