Skip to content

Commit 663e850

Browse files
committed
benchmark optimization
1 parent 2f54a99 commit 663e850

File tree

5 files changed

+358
-336
lines changed

5 files changed

+358
-336
lines changed

.env

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ LATEST_ONLY=true
4343
# -----------------------------------------------------------------------------
4444
# Pod 6: Inference Benchmark Settings
4545
# -----------------------------------------------------------------------------
46-
BENCHMARK_SAMPLE_SIZE=10000
46+
# Duration of sustained throughput test per model (CPU and GPU)
47+
BENCHMARK_DURATION=60
48+
49+
# Batch size for inference (records per batch)
50+
BENCHMARK_BATCH_SIZE=10000
4751

4852
# -----------------------------------------------------------------------------
4953
# S3 Configuration (TODO - FlashBlade S3 Endpoint)

Makefile

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ help:
3030
@echo " make build Build all containers"
3131
@echo " make pipeline Run full pipeline (pods 1-3)"
3232
@echo " make inference Start inference server (pod 4)"
33-
@echo " make benchmark Run CPU vs GPU inference benchmark (pod 6)"
33+
@echo " make benchmark Run sustained throughput benchmark (pod 6)"
3434
@echo " make test Test inference endpoint"
3535
@echo " make stop Stop all containers"
3636
@echo " make clean-data Remove generated data"
@@ -48,7 +48,11 @@ help:
4848
@echo " FB_PREP=$(FB_PREP)"
4949
@echo " FA_MODEL_REPO=$(FA_MODEL_REPO)"
5050
@echo " DURATION_SECONDS=$(DURATION_SECONDS)s NUM_WORKERS=$(NUM_WORKERS)"
51-
@echo " BENCHMARK_SAMPLE_SIZE=$(BENCHMARK_SAMPLE_SIZE)"
51+
@echo ""
52+
@echo "Benchmark options:"
53+
@echo " BENCHMARK_DURATION=$(BENCHMARK_DURATION)s"
54+
@echo " BENCHMARK_BATCH_SIZE=$(BENCHMARK_BATCH_SIZE)"
55+
@echo " Example: make benchmark BENCHMARK_DURATION=120 BENCHMARK_BATCH_SIZE=50000"
5256

5357
# Verify environment and paths
5458
env-check:
@@ -143,13 +147,18 @@ inference:
143147
@echo " gRPC: localhost:8001"
144148
@echo " Metrics: http://localhost:8002"
145149

146-
# Run inference benchmark (CPU vs GPU comparison)
150+
# Benchmark settings
151+
BENCHMARK_DURATION ?= 60
152+
BENCHMARK_BATCH_SIZE ?= 10000
153+
154+
# Run sustained throughput benchmark (CPU vs GPU)
147155
benchmark:
148156
@echo ""
149157
@echo "=========================================="
150-
@echo "Inference Benchmark: CPU vs GPU"
158+
@echo "Sustained Throughput Benchmark: CPU vs GPU"
151159
@echo "=========================================="
152-
@echo "Sample size: $(BENCHMARK_SAMPLE_SIZE) records"
160+
@echo "Duration: $(BENCHMARK_DURATION)s per model"
161+
@echo "Batch size: $(BENCHMARK_BATCH_SIZE) records"
153162
@echo ""
154163
@if [ ! -d "$(FB_DATA)" ] || [ -z "$$(ls -A $(FB_DATA)/run_* 2>/dev/null)" ]; then \
155164
echo "ERROR: No data found at $(FB_DATA)/run_*/"; \
@@ -175,25 +184,28 @@ benchmark:
175184
sleep 3; \
176185
done
177186
@echo ""
178-
docker compose run --rm benchmark
187+
DURATION_SECONDS=$(BENCHMARK_DURATION) BATCH_SIZE=$(BENCHMARK_BATCH_SIZE) docker compose run --rm benchmark
179188
@echo ""
180189
@echo "Benchmark complete!"
181190

182191
# Run benchmark without Triton (CPU only)
183192
benchmark-cpu:
184193
@echo ""
185194
@echo "=========================================="
186-
@echo "Inference Benchmark: CPU Only"
195+
@echo "Sustained Throughput Benchmark: CPU Only"
187196
@echo "=========================================="
197+
@echo "Duration: $(BENCHMARK_DURATION)s"
198+
@echo "Batch size: $(BENCHMARK_BATCH_SIZE) records"
199+
@echo ""
188200
@if [ ! -d "$(FB_DATA)" ] || [ -z "$$(ls -A $(FB_DATA)/run_* 2>/dev/null)" ]; then \
189201
echo "ERROR: No data found at $(FB_DATA)/run_*/"; \
190202
exit 1; \
191203
fi
192-
@if [ ! -d "$(FA_MODEL_REPO)/fraud_xgboost" ]; then \
193-
echo "ERROR: Model not found at $(FA_MODEL_REPO)/fraud_xgboost/"; \
204+
@if [ ! -d "$(FA_MODEL_REPO)/fraud_xgboost" ] && [ ! -d "$(FA_MODEL_REPO)/fraud_xgboost_gpu" ] && [ ! -d "$(FA_MODEL_REPO)/fraud_xgboost_cpu" ]; then \
205+
echo "ERROR: Model not found at $(FA_MODEL_REPO)/"; \
194206
exit 1; \
195207
fi
196-
docker compose run --rm -e TRITON_URL=http://localhost:9999 benchmark
208+
DURATION_SECONDS=$(BENCHMARK_DURATION) BATCH_SIZE=$(BENCHMARK_BATCH_SIZE) docker compose run --rm -e TRITON_URL=http://localhost:9999 benchmark
197209

198210
# Test inference
199211
test:

docker-compose.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,9 @@ services:
155155

156156
# ============================================================================
157157
# Pod 6: Inference Benchmark
158-
# Compares CPU (XGBoost direct) vs GPU (Triton) inference performance
158+
# Sustained throughput test comparing CPU (XGBoost) vs GPU (Triton)
159159
# Input: ${FB_DATA}/run_*/*.parquet (raw data from Pod 1)
160-
# ${FA_MODEL_REPO}/fraud_xgboost/ (model from Pod 3)
160+
# ${FA_MODEL_REPO}/fraud_xgboost*/ (model from Pod 3)
161161
# Output: Console performance comparison
162162
# ============================================================================
163163
benchmark:
@@ -172,7 +172,8 @@ services:
172172
- DATA_DIR=/data/input
173173
- MODEL_DIR=/data/models
174174
- TRITON_URL=http://inference:8000
175-
- SAMPLE_SIZE=${BENCHMARK_SAMPLE_SIZE:-10000}
175+
- DURATION_SECONDS=${DURATION_SECONDS:-60}
176+
- BATCH_SIZE=${BATCH_SIZE:-10000}
176177
depends_on:
177178
inference:
178179
condition: service_healthy

pods/benchmark/Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# Pod 6: Inference Benchmark
2-
# Compares CPU (XGBoost) vs GPU (Triton) inference performance
1+
# Pod 6: Sustained Throughput Benchmark
2+
# Compares CPU (XGBoost) vs GPU (Triton) inference over 60 seconds
33

44
FROM python:3.11-slim
55

@@ -19,7 +19,8 @@ COPY benchmark.py .
1919
ENV DATA_DIR=/data/input
2020
ENV MODEL_DIR=/data/models
2121
ENV TRITON_URL=http://inference:8000
22-
ENV SAMPLE_SIZE=10000
22+
ENV DURATION_SECONDS=60
23+
ENV BATCH_SIZE=10000
2324
ENV PYTHONUNBUFFERED=1
2425

2526
CMD ["python", "benchmark.py"]

0 commit comments

Comments
 (0)