11#! /bin/bash
22set -e
3+ set -o pipefail
34
45# Test script for HyperDX deployment
56NAMESPACE=${NAMESPACE:- default}
67RELEASE_NAME=${RELEASE_NAME:- hyperdx-test}
78CHART_NAME=${CHART_NAME:- clickstack}
89TIMEOUT=${TIMEOUT:- 300}
10+ CLICKHOUSE_SERVICE=${CLICKHOUSE_SERVICE:- $RELEASE_NAME -$CHART_NAME -clickhouse-clickhouse-headless}
11+ CLICKHOUSE_SECRET_NAME=${CLICKHOUSE_SECRET_NAME:- clickstack-secret}
12+ CLICKHOUSE_HTTP_USER=${CLICKHOUSE_HTTP_USER:- app}
13+ CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE:- default}
14+ CLICKHOUSE_TRACE_TABLE=${CLICKHOUSE_TRACE_TABLE:- otel_traces}
15+ CLICKHOUSE_LOG_TABLE=${CLICKHOUSE_LOG_TABLE:- otel_logs}
16+ INGESTION_POLL_INTERVAL=${INGESTION_POLL_INTERVAL:- 5}
17+ OTEL_TELEMETRYGEN_IMAGE=${OTEL_TELEMETRYGEN_IMAGE:- ghcr.io/ open-telemetry/ opentelemetry-collector-contrib/ telemetrygen: latest}
18+ OTEL_SIGNAL_COUNT=${OTEL_SIGNAL_COUNT:- 20}
19+
20+ PORT_FORWARD_PIDS=()
21+ PORT_FORWARD_LOGS=()
22+ CLICKHOUSE_HTTP_PASSWORD=" "
923
1024echo " Starting HyperDX tests..."
1125echo " Release: $RELEASE_NAME "
1226echo " Chart: $CHART_NAME "
1327echo " Namespace: $NAMESPACE "
1428
29+ cleanup_port_forwards () {
30+ local pid=" "
31+ local log_file=" "
32+
33+ for pid in " ${PORT_FORWARD_PIDS[@]} " ; do
34+ if [ -n " $pid " ] && kill -0 " $pid " 2> /dev/null; then
35+ kill " $pid " 2> /dev/null || true
36+ wait " $pid " 2> /dev/null || true
37+ fi
38+ done
39+
40+ for log_file in " ${PORT_FORWARD_LOGS[@]} " ; do
41+ rm -f " $log_file " 2> /dev/null || true
42+ done
43+ }
44+
45+ trap cleanup_port_forwards EXIT
46+
1547wait_for_service () {
1648 local url=$1
1749 local name=$2
@@ -39,6 +71,7 @@ check_endpoint() {
3971 local url=$1
4072 local expected_code=$2
4173 local desc=$3
74+ local code=" "
4275
4376 echo " Checking $desc ..."
4477
@@ -53,6 +86,148 @@ check_endpoint() {
5386 fi
5487}
5588
89+ start_port_forward () {
90+ local resource=$1
91+ local local_port=$2
92+ local remote_port=$3
93+ local name=$4
94+ local log_file=" "
95+ local pid=" "
96+
97+ log_file=$( mktemp " /tmp/${name} .XXXXXX.log" )
98+ echo " Starting port-forward for $name (${resource} ${local_port} :${remote_port} )..." >&2
99+ kubectl port-forward " $resource " " ${local_port} :${remote_port} " -n " $NAMESPACE " > " $log_file " 2>&1 &
100+ pid=$!
101+
102+ PORT_FORWARD_PIDS+=(" $pid " )
103+ PORT_FORWARD_LOGS+=(" $log_file " )
104+
105+ sleep 3
106+ if ! kill -0 " $pid " 2> /dev/null; then
107+ echo " ERROR: Failed to start port-forward for $name " >&2
108+ sed -n ' 1,120p' " $log_file " >&2 || true
109+ return 1
110+ fi
111+
112+ echo " $pid "
113+ }
114+
115+ stop_port_forward () {
116+ local pid=$1
117+
118+ if [ -n " ${pid:- } " ] && kill -0 " $pid " 2> /dev/null; then
119+ kill " $pid " 2> /dev/null || true
120+ wait " $pid " 2> /dev/null || true
121+ fi
122+ }
123+
124+ get_secret_value () {
125+ local secret_name=$1
126+ local key_name=$2
127+
128+ kubectl get secret " $secret_name " -n " $NAMESPACE " -o " jsonpath={.data.${key_name} }" | base64 --decode
129+ }
130+
131+ run_clickhouse_query () {
132+ local sql=$1
133+
134+ curl -sS --fail \
135+ -u " ${CLICKHOUSE_HTTP_USER} :${CLICKHOUSE_HTTP_PASSWORD} " \
136+ --data-binary " $sql " \
137+ " http://localhost:8123/?database=${CLICKHOUSE_DATABASE} "
138+ }
139+
140+ get_table_count () {
141+ local table=$1
142+ local count=" "
143+
144+ count=$( run_clickhouse_query " SELECT count() FROM \` ${CLICKHOUSE_DATABASE} \` .\` ${table} \` ;" )
145+ count=$( echo " $count " | tr -d ' [:space:]' )
146+
147+ if [[ ! " $count " =~ ^[0-9]+$ ]]; then
148+ echo " ERROR: Non-numeric count for table ${table} : ${count} "
149+ return 1
150+ fi
151+
152+ echo " $count "
153+ }
154+
155+ wait_for_table_queryable () {
156+ local table=$1
157+ local timeout_seconds=$2
158+ local start_time=0
159+ local now=0
160+ local count=" "
161+
162+ start_time=$( date +%s)
163+ while true ; do
164+ count=$( get_table_count " $table " 2> /dev/null || true)
165+ if [[ " $count " =~ ^[0-9]+$ ]]; then
166+ echo " $count "
167+ return 0
168+ fi
169+
170+ now=$( date +%s)
171+ if [ $(( now - start_time)) -ge " $timeout_seconds " ]; then
172+ echo " ERROR: Timed out waiting for table ${CLICKHOUSE_DATABASE} .${table} to become queryable"
173+ return 1
174+ fi
175+
176+ sleep " $INGESTION_POLL_INTERVAL "
177+ done
178+ }
179+
180+ wait_for_table_count_increase () {
181+ local table=$1
182+ local baseline_count=$2
183+ local timeout_seconds=$3
184+ local start_time=0
185+ local now=0
186+ local current_count=" "
187+
188+ start_time=$( date +%s)
189+ while true ; do
190+ current_count=$( get_table_count " $table " 2> /dev/null || true)
191+ if [[ " $current_count " =~ ^[0-9]+$ ]]; then
192+ echo " Current count for ${CLICKHOUSE_DATABASE} .${table} : ${current_count} (baseline ${baseline_count} )"
193+ if [ " $current_count " -gt " $baseline_count " ]; then
194+ echo " Detected new rows in ${CLICKHOUSE_DATABASE} .${table} "
195+ return 0
196+ fi
197+ fi
198+
199+ now=$( date +%s)
200+ if [ $(( now - start_time)) -ge " $timeout_seconds " ]; then
201+ echo " ERROR: Timed out waiting for row increase in ${CLICKHOUSE_DATABASE} .${table} "
202+ return 1
203+ fi
204+
205+ sleep " $INGESTION_POLL_INTERVAL "
206+ done
207+ }
208+
209+ send_telemetrygen_signal () {
210+ local signal=$1
211+ local count_flag=$2
212+ local count=$3
213+ local run_id=$4
214+ local body_arg=()
215+
216+ if [ " $signal " = " logs" ]; then
217+ body_arg=(--body " clickstack smoke test log ${run_id} " )
218+ fi
219+
220+ echo " Sending ${signal} to OTEL collector over OTLP HTTP..."
221+ docker run --rm --network host " $OTEL_TELEMETRYGEN_IMAGE " " $signal " \
222+ --otlp-http \
223+ --otlp-endpoint " localhost:4318" \
224+ --otlp-insecure \
225+ " $count_flag " " $count " \
226+ --rate 5 \
227+ --service " clickstack-smoke-test" \
228+ " ${body_arg[@]} "
229+ }
230+
56231# Check pods
57232echo " Checking pod status..."
58233kubectl wait --for=condition=Ready pods -l app.kubernetes.io/instance=$RELEASE_NAME --timeout=${TIMEOUT} s -n $NAMESPACE
@@ -62,26 +237,24 @@ kubectl get pods -l app.kubernetes.io/instance=$RELEASE_NAME -n $NAMESPACE
62237
63238# Test UI
64239echo " Testing HyperDX UI..."
65- kubectl port-forward service/$RELEASE_NAME -$CHART_NAME -app 3000:3000 -n $NAMESPACE &
66- pf_pid=$!
67- sleep 10
240+ pf_pid=$( start_port_forward " service/$RELEASE_NAME -$CHART_NAME -app" " 3000" " 3000" " hyperdx-ui" )
241+ sleep 2
68242
69243wait_for_service " http://localhost:3000" " HyperDX UI"
70244check_endpoint " http://localhost:3000" " 200" " UI"
71245
72- kill $pf_pid 2> /dev/null || true
246+ stop_port_forward " $pf_pid "
73247sleep 2
74248
75249# Test OTEL collector metrics endpoint
76250echo " Testing OTEL collector metrics endpoint..."
77- kubectl port-forward service/$RELEASE_NAME -otel-collector 8888:8888 -n $NAMESPACE &
78- metrics_pf_pid=$!
79- sleep 10
251+ metrics_pf_pid=$( start_port_forward " service/$RELEASE_NAME -otel-collector" " 8888" " 8888" " otel-metrics" )
252+ sleep 2
80253
81254wait_for_service " http://localhost:8888/metrics" " OTEL Metrics"
82255check_endpoint " http://localhost:8888/metrics" " 200" " OTEL Metrics endpoint"
83256
84- kill $metrics_pf_pid 2> /dev/null || true
257+ stop_port_forward " $metrics_pf_pid "
85258sleep 2
86259
87260# Verify OTEL Collector Deployment is Available
@@ -105,11 +278,62 @@ else
105278 exit 1
106279fi
107280
281+ # Verify OTEL data ingestion to ClickHouse
282+ echo " Verifying OTEL ingestion into ClickHouse..."
283+ otlp_http_pf_pid=$( start_port_forward " service/$RELEASE_NAME -otel-collector" " 4318" " 4318" " otel-http" )
284+ clickhouse_pf_pid=$( start_port_forward " service/$CLICKHOUSE_SERVICE " " 8123" " 8123" " clickhouse-http" )
285+
286+ CLICKHOUSE_HTTP_PASSWORD=$( get_secret_value " $CLICKHOUSE_SECRET_NAME " " CLICKHOUSE_APP_PASSWORD" )
287+ if [ -z " ${CLICKHOUSE_HTTP_PASSWORD:- } " ]; then
288+ echo " ERROR: Could not read CLICKHOUSE_APP_PASSWORD from secret ${CLICKHOUSE_SECRET_NAME} "
289+ exit 1
290+ fi
291+
292+ trace_baseline=$( wait_for_table_queryable " $CLICKHOUSE_TRACE_TABLE " " $TIMEOUT " )
293+ log_baseline=$( wait_for_table_queryable " $CLICKHOUSE_LOG_TABLE " " $TIMEOUT " )
294+ echo " Baseline count ${CLICKHOUSE_DATABASE} .${CLICKHOUSE_TRACE_TABLE} : ${trace_baseline} "
295+ echo " Baseline count ${CLICKHOUSE_DATABASE} .${CLICKHOUSE_LOG_TABLE} : ${log_baseline} "
296+
297+ if ! command -v docker > /dev/null 2>&1 ; then
298+ echo " ERROR: docker is required to run telemetrygen for OTEL ingestion checks"
299+ exit 1
300+ fi
301+
302+ run_id=$( date +%s)
303+ send_telemetrygen_signal " traces" " --traces" " $OTEL_SIGNAL_COUNT " " $run_id "
304+ send_telemetrygen_signal " logs" " --logs" " $OTEL_SIGNAL_COUNT " " $run_id "
305+
306+ echo " Waiting for traces/logs to land in ClickHouse..."
307+
308+ wait_for_table_count_increase " $CLICKHOUSE_TRACE_TABLE " " $trace_baseline " " $TIMEOUT "
309+ wait_for_table_count_increase " $CLICKHOUSE_LOG_TABLE " " $log_baseline " " $TIMEOUT "
310+
311+ stop_port_forward " $otlp_http_pf_pid "
312+ stop_port_forward " $clickhouse_pf_pid "
313+
314+ # Verify app works end-to-end with default connection (register + search)
315+ echo " Running Playwright e2e test..."
316+ ui_pf_pid=$( start_port_forward " service/$RELEASE_NAME -$CHART_NAME -app" " 3000" " 3000" " hyperdx-ui-e2e" )
317+ sleep 2
318+ wait_for_service " http://localhost:3000" " HyperDX UI (e2e)"
319+
320+ SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
321+ (
322+ cd " $SCRIPT_DIR /e2e"
323+ npm install
324+ npx playwright install --with-deps chromium
325+ npx playwright test
326+ )
327+
328+ stop_port_forward " $ui_pf_pid "
329+
108330echo " "
109331echo " All smoke tests passed"
110332echo " - All pods running"
111333echo " - HyperDX UI responding"
112334echo " - OTEL Collector metrics accessible"
113335echo " - OTEL Collector Deployment available"
114336echo " - ClickHouseCluster reconciled (Ready)"
115- echo " - MongoDBCommunity reconciled (Running)"
337+ echo " - MongoDBCommunity reconciled (Running)"
338+ echo " - OTEL traces and logs persisted to ClickHouse"
339+ echo " - App registers user and displays logs via default connection"
0 commit comments