3030 @echo " make build Build all containers"
3131 @echo " make pipeline Run full pipeline (pods 1-3)"
3232 @echo " make inference Start inference server (pod 4)"
33- @echo " make benchmark Run sustained throughput benchmark (pod 6)"
33+ @echo " make benchmark Run FlashArray model reload stress test (pod 6)"
34+ @echo " make benchmark-io Run pure I/O stress test (no inference)"
3435 @echo " make test Test inference endpoint"
3536 @echo " make stop Stop all containers"
3637 @echo " make clean-data Remove generated data"
@@ -41,19 +42,19 @@ help:
4142 @echo " make pod1 Run data generator"
4243 @echo " make pod2 Run feature engineering (CPU vs GPU comparison)"
4344 @echo " make pod3 Run model training"
44- @echo " make pod6 Run inference benchmark (requires pod1 data + pod3 model) "
45+ @echo " make pod6 Run FlashArray stress test "
4546 @echo " "
4647 @echo " Configuration (from .env):"
4748 @echo " FB_DATA=$( FB_DATA) "
4849 @echo " FB_PREP=$( FB_PREP) "
4950 @echo " FA_MODEL_REPO=$( FA_MODEL_REPO) "
5051 @echo " DURATION_SECONDS=$( DURATION_SECONDS) s NUM_WORKERS=$( NUM_WORKERS) "
5152 @echo " "
52- @echo " Benchmark options:"
53+ @echo " Benchmark options (FlashArray stress test) :"
5354 @echo " BENCHMARK_DURATION=$( BENCHMARK_DURATION) s"
54- @echo " BENCHMARK_BATCH_SIZE =$( BENCHMARK_BATCH_SIZE ) "
55- @echo " BENCHMARK_WORKERS =$( BENCHMARK_WORKERS ) (concurrent GPU workers )"
56- @echo " Example: make benchmark BENCHMARK_DURATION=120 BENCHMARK_WORKERS=16 "
55+ @echo " BENCHMARK_WORKERS =$( BENCHMARK_WORKERS ) (concurrent model loaders )"
56+ @echo " BENCHMARK_INFERENCE =$( BENCHMARK_INFERENCE ) "
57+ @echo " Example: make benchmark BENCHMARK_DURATION=120 BENCHMARK_WORKERS=32 "
5758
5859# Verify environment and paths
5960env-check :
@@ -150,65 +151,48 @@ inference:
150151
151152# Benchmark settings
152153BENCHMARK_DURATION ?= 60
153- BENCHMARK_BATCH_SIZE ?= 10000
154154BENCHMARK_WORKERS ?= 8
155+ BENCHMARK_INFERENCE ?= true
156+ BENCHMARK_BATCH_SIZE ?= 1000
155157
156- # Run sustained throughput benchmark (CPU vs GPU)
158+ # Run FlashArray model reload stress test
157159benchmark :
158160 @echo " "
159161 @echo " =========================================="
160- @echo " Sustained Throughput Benchmark: CPU vs GPU "
162+ @echo " FlashArray Model Reload Stress Test "
161163 @echo " =========================================="
162- @echo " Duration: $( BENCHMARK_DURATION) s per model "
163- @echo " Batch size : $( BENCHMARK_BATCH_SIZE ) records "
164- @echo " GPU workers: $( BENCHMARK_WORKERS ) concurrent (gRPC )"
164+ @echo " Duration: $( BENCHMARK_DURATION) s per test "
165+ @echo " Workers : $( BENCHMARK_WORKERS ) concurrent "
166+ @echo " Run inference: $( BENCHMARK_INFERENCE ) "
165167 @echo " "
166- @if [ ! -d " $( FB_DATA) " ] || [ -z " $$ (ls -A $( FB_DATA) /run_* 2>/dev/null)" ]; then \
167- echo " ERROR: No data found at $( FB_DATA) /run_*/" ; \
168- echo " Run 'make pod1' or 'make pipeline' first to generate data." ; \
169- exit 1; \
170- fi
171168 @if [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost" ] && [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost_gpu" ] && [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost_cpu" ]; then \
172169 echo " ERROR: Model not found at $( FA_MODEL_REPO) /" ; \
173170 echo " Expected: fraud_xgboost, fraud_xgboost_gpu, or fraud_xgboost_cpu" ; \
174171 echo " Run 'make pod3' or 'make pipeline' first to train the model." ; \
175172 exit 1; \
176173 fi
177174 @ls -d $(FA_MODEL_REPO ) /fraud_xgboost* 2> /dev/null | head -1 | xargs -I{} echo " Found model: {}"
178- @echo " Starting Triton server if not running..."
179- @docker compose up -d inference
180- @echo " Waiting for Triton to be ready..."
181- @for i in 1 2 3 4 5 6 7 8 9 10; do \
182- if curl -s http://localhost:8000/v2/health/ready > /dev/null 2>&1 ; then \
183- echo " Triton ready!" ; \
184- break ; \
185- fi ; \
186- echo " Waiting... ($$ i/10)" ; \
187- sleep 3; \
188- done
189175 @echo " "
190- BENCHMARK_DURATION=$(BENCHMARK_DURATION ) BENCHMARK_BATCH_SIZE =$(BENCHMARK_BATCH_SIZE ) BENCHMARK_WORKERS =$(BENCHMARK_WORKERS ) docker compose run --rm benchmark
176+ BENCHMARK_DURATION=$(BENCHMARK_DURATION ) BENCHMARK_WORKERS =$(BENCHMARK_WORKERS ) BENCHMARK_INFERENCE =$(BENCHMARK_INFERENCE ) BENCHMARK_BATCH_SIZE= $( BENCHMARK_BATCH_SIZE ) docker compose run --rm benchmark
191177 @echo " "
192- @echo " Benchmark complete!"
178+ @echo " Stress test complete!"
179+ @echo " Check Grafana for FlashArray I/O metrics"
193180
194- # Run benchmark without Triton (CPU only )
195- benchmark-cpu :
181+ # Run FlashArray stress test without inference (pure I/O )
182+ benchmark-io :
196183 @echo " "
197184 @echo " =========================================="
198- @echo " Sustained Throughput Benchmark: CPU Only "
185+ @echo " FlashArray Pure I/O Stress Test "
199186 @echo " =========================================="
200- @echo " Duration: $( BENCHMARK_DURATION) s"
201- @echo " Batch size: $( BENCHMARK_BATCH_SIZE) records"
187+ @echo " Duration: $( BENCHMARK_DURATION) s per test"
188+ @echo " Workers: $( BENCHMARK_WORKERS) concurrent"
189+ @echo " Run inference: false (pure model load I/O)"
202190 @echo " "
203- @if [ ! -d " $( FB_DATA) " ] || [ -z " $$ (ls -A $( FB_DATA) /run_* 2>/dev/null)" ]; then \
204- echo " ERROR: No data found at $( FB_DATA) /run_*/" ; \
205- exit 1; \
206- fi
207191 @if [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost" ] && [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost_gpu" ] && [ ! -d " $( FA_MODEL_REPO) /fraud_xgboost_cpu" ]; then \
208192 echo " ERROR: Model not found at $( FA_MODEL_REPO) /" ; \
209193 exit 1; \
210194 fi
211- BENCHMARK_DURATION=$(BENCHMARK_DURATION ) BENCHMARK_BATCH_SIZE =$(BENCHMARK_BATCH_SIZE ) docker compose run --rm -e TRITON_URL=http://localhost:9999 benchmark
195+ BENCHMARK_DURATION=$(BENCHMARK_DURATION ) BENCHMARK_WORKERS =$(BENCHMARK_WORKERS ) BENCHMARK_INFERENCE=false docker compose run --rm benchmark
212196
213197# Test inference
214198test :
0 commit comments