66 - main
77
88jobs :
9- benchmark :
9+ run- benchmark :
1010 runs-on : ubuntu-latest
1111 steps :
1212 - name : Checkout code
@@ -33,14 +33,263 @@ jobs:
3333 mkdir -p /tmp/artifacts/
3434 ARTIFACT_PATH=/tmp/artifacts make test-benchmark
3535
36- - name : Compare with baseline
36+ - name : Convert Benchmark Output to Prometheus Metrics
3737 run : |
38- go install golang.org/x/perf/cmd/benchstat@latest
39- benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output
38+ mkdir -p /tmp/artifacts/prometheus/
39+ echo "RUN_ID=${{ github.run_id }}"
40+ export RUN_ID=${{ github.run_id }}
41+ cat << 'EOF' > benchmark_to_prometheus.py
42+ import sys
43+ import re
44+ import os
4045
41- - name : Upload benchmark results
46+ def parse_benchmark_output(benchmark_output):
47+ metrics = []
48+ round = 0
49+ value = os.getenv('RUN_ID') #get the github action run id so that those metrics cannot be overwritten
50+ for line in benchmark_output.split("\n"):
51+ match = re.match(r"Benchmark([\w\d]+)-\d+\s+\d+\s+([\d]+)\s+ns/op\s+([\d]+)\s+B/op\s+([\d]+)\s+allocs/op", line)
52+ if match:
53+ benchmark_name = match.group(1).lower()
54+ time_ns = match.group(2)
55+ memory_bytes = match.group(3)
56+ allocs = match.group(4)
57+
58+ metrics.append(f"benchmark_{benchmark_name}_ns {{run_id=\"{value}\", round=\"{round}\"}} {time_ns}")
59+ metrics.append(f"benchmark_{benchmark_name}_allocs {{run_id=\"{value}\", round=\"{round}\"}} {allocs}")
60+ metrics.append(f"benchmark_{benchmark_name}_mem_bytes {{run_id=\"{value}\", round=\"{round}\"}} {memory_bytes}")
61+ round+=1
62+
63+ return "\n".join(metrics)
64+
65+ if __name__ == "__main__":
66+ benchmark_output = sys.stdin.read()
67+ metrics = parse_benchmark_output(benchmark_output)
68+ print(metrics)
69+ EOF
70+
71+ cat /tmp/artifacts/new.txt | python3 benchmark_to_prometheus.py | tee /tmp/artifacts/prometheus/metrics.txt
72+
73+ # - name: Compare with baseline
74+ # run: |
75+ # go install golang.org/x/perf/cmd/benchstat@latest
76+ # benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output
77+
78+ - name : Upload Benchmark Metrics
4279 uses : actions/upload-artifact@v4
4380 with :
44- name : benchmark-artifacts
45- path : /tmp/artifacts/
81+ name : benchmark-metrics
82+ path : /tmp/artifacts/prometheus/
83+
84+ run-prometheus :
85+ needs : run-benchmark
86+ runs-on : ubuntu-latest
87+ steps :
88+ - name : Checkout code
89+ uses : actions/checkout@v4
90+ with :
91+ fetch-depth : 0
92+
93+ # ToDo: use GitHub REST API to download artifact across repos
94+ - name : Download Prometheus Snapshot
95+ run : |
96+ echo "Available Artifacts in this run:"
97+ gh run list --repo operator-framework/operator-controller --limit 5
98+ gh run download --repo operator-framework/operator-controller --name prometheus-snapshot --dir .
99+ ls -lh ./
100+ env :
101+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
102+
103+ # #this step is invalid if download the artifacts in a different job
104+ # - name: Download Prometheus Snapshot2
105+ # uses: actions/download-artifact@v4
106+ # with:
107+ # name: prometheus-snapshot
108+ # path: ./
109+ # env:
110+ # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
111+
112+ - name : Download Benchmark Metrics
113+ uses : actions/download-artifact@v4
114+ with :
115+ name : benchmark-metrics
116+ path : ./
117+
118+ - name : Get Host IP
119+ run : |
120+ echo "HOST_IP=$(ip route get 1 | awk '{print $7}')" | tee -a $GITHUB_ENV
121+
122+ # localhost doesn't work, use host IP directly
123+ - name : Set Up Prometheus Config
124+ run : |
125+ echo "HOST_IP is $HOST_IP"
126+ cat << EOF > prometheus.yml
127+ global:
128+ scrape_interval: 5s
129+ scrape_configs:
130+ - job_name: 'benchmark_metrics'
131+ static_configs:
132+ - targets: ['$HOST_IP:9000']
133+ EOF
134+ mkdir -p ${{ github.workspace }}/prometheus-data
135+ sudo chown -R 65534:65534 ${{ github.workspace }}/prometheus-data
136+ sudo chmod -R 777 ${{ github.workspace }}/prometheus-data
137+
138+ - name : Extract and Restore Prometheus Snapshot
139+ run : |
140+ SNAPSHOT_ZIP="${{ github.workspace }}/prometheus-snapshot.zip"
141+ SNAPSHOT_TAR="${{ github.workspace }}/prometheus_snapshot.tar.gz"
142+ SNAPSHOT_DIR="${{ github.workspace }}/prometheus-data/snapshots"
143+
144+ mkdir -p "$SNAPSHOT_DIR"
145+
146+ if [[ -f "$SNAPSHOT_ZIP" ]]; then
147+ echo "📦 Detected ZIP archive: $SNAPSHOT_ZIP"
148+ unzip -o "$SNAPSHOT_ZIP" -d "$SNAPSHOT_DIR"
149+ echo "✅ Successfully extracted ZIP snapshot."
150+ elif [[ -f "$SNAPSHOT_TAR" ]]; then
151+ echo "📦 Detected TAR archive: $SNAPSHOT_TAR"
152+ tar -xzf "$SNAPSHOT_TAR" -C "$SNAPSHOT_DIR"
153+ echo "✅ Successfully extracted TAR snapshot."
154+ else
155+ echo "⚠️ WARNING: No snapshot file found. Skipping extraction."
156+ fi
157+
158+ - name : Run Prometheus
159+ run : |
160+ docker run -d --name prometheus -p 9090:9090 \
161+ --user=root \
162+ -v ${{ github.workspace }}/prometheus.yml:/etc/prometheus/prometheus.yml \
163+ -v ${{ github.workspace }}/prometheus-data:/prometheus \
164+ prom/prometheus --config.file=/etc/prometheus/prometheus.yml \
165+ --storage.tsdb.path=/prometheus \
166+ --storage.tsdb.retention.time=1h \
167+ --web.enable-admin-api
168+
169+ - name : Wait for Prometheus to start
170+ run : sleep 10
171+
172+ - name : Check Prometheus is running
173+ run : |
174+ set -e
175+ curl -s http://localhost:9090/-/ready || (docker logs prometheus && exit 1)
46176
177+ - name : Start HTTP Server to Expose Metrics
178+ run : |
179+ cat << 'EOF' > server.py
180+ from http.server import SimpleHTTPRequestHandler, HTTPServer
181+
182+ class MetricsHandler(SimpleHTTPRequestHandler):
183+ def do_GET(self):
184+ if self.path == "/metrics":
185+ self.send_response(200)
186+ self.send_header("Content-type", "text/plain")
187+ self.end_headers()
188+ with open("metrics.txt", "r") as f:
189+ self.wfile.write(f.read().encode())
190+ else:
191+ self.send_response(404)
192+ self.end_headers()
193+
194+ if __name__ == "__main__":
195+ server = HTTPServer(('0.0.0.0', 9000), MetricsHandler)
196+ print("Serving on port 9000...")
197+ server.serve_forever()
198+ EOF
199+
200+ nohup python3 server.py &
201+
202+ - name : Wait for Prometheus to Collect Data
203+ run : sleep 30
204+
205+ - name : Check Prometheus targets page
206+ run : |
207+ http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
208+ if [ "$http_status" -eq 200 ]; then
209+ echo "Prometheus targets page is reachable."
210+ else
211+ echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
212+ exit 1
213+ fi
214+
215+ http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
216+ if [ "$http_status" -eq 200 ]; then
217+ echo "Prometheus targets page is reachable."
218+
219+ # Check for lastError field in the targets API
220+ error=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[].lastError')
221+ if [ "$error" != "null" ] && [ -n "$error" ]; then
222+ echo "Error: Prometheus target has an error: $error"
223+ exit 1
224+ else
225+ echo "No errors found in Prometheus targets."
226+ fi
227+
228+ else
229+ echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
230+ exit 1
231+ fi
232+
233+ # - name: Debug via SSH
234+ # uses: mxschmitt/action-tmate@v3
235+
236+ - name : Check Benchmark Metrics Against Threshold
237+ run : |
238+ MAX_TIME_NS=1200000000 # 1.2s
239+ MAX_ALLOCS=4000
240+ MAX_MEM_BYTES=450000
241+
242+ # Query Prometheus Metrics, get the max value
243+ time_ns=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_ns)" | jq -r '.data.result[0].value[1]')
244+ allocs=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_allocs)" | jq -r '.data.result[0].value[1]')
245+ mem_bytes=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_mem_bytes)" | jq -r '.data.result[0].value[1]')
246+
247+ echo "⏳ Benchmark Execution Time: $time_ns ns"
248+ echo "🛠️ Memory Allocations: $allocs"
249+ echo "💾 Memory Usage: $mem_bytes bytes"
250+
251+ # threshold checking
252+ if (( $(echo "$time_ns > $MAX_TIME_NS" | bc -l) )); then
253+ echo "❌ ERROR: Execution time exceeds threshold!"
254+ exit 1
255+ fi
256+
257+ if (( $(echo "$allocs > $MAX_ALLOCS" | bc -l) )); then
258+ echo "❌ ERROR: Too many memory allocations!"
259+ exit 1
260+ fi
261+
262+ if (( $(echo "$mem_bytes > $MAX_MEM_BYTES" | bc -l) )); then
263+ echo "❌ ERROR: Memory usage exceeds threshold!"
264+ exit 1
265+ fi
266+
267+ echo "✅ All benchmarks passed within threshold!"
268+
269+ - name : Trigger Prometheus Snapshot
270+ run : |
271+ set -e
272+ curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot || (docker logs prometheus && exit 1)
273+
274+ - name : Find and Upload Prometheus Snapshot
275+ run : |
276+ SNAPSHOT_PATH=$(ls -td ${{ github.workspace }}/prometheus-data/snapshots/* 2>/dev/null | head -1 || echo "")
277+ if [[ -z "$SNAPSHOT_PATH" ]]; then
278+ echo "❌ No Prometheus snapshot found!"
279+ docker logs prometheus
280+ exit 1
281+ fi
282+
283+ echo "✅ Prometheus snapshot stored in: $SNAPSHOT_PATH"
284+ tar -czf $GITHUB_WORKSPACE/prometheus_snapshot.tar.gz -C "$SNAPSHOT_PATH" .
285+
286+
287+ - name : Stop Prometheus
288+ run : docker stop prometheus
289+
290+ - name : Upload Prometheus Snapshot
291+ uses : actions/upload-artifact@v4
292+ with :
293+ name : prometheus-snapshot
294+ path : prometheus_snapshot.tar.gz
295+
0 commit comments