1- .PHONY : help build up down logs clean test status
1+ # Financial Fraud Detection Pipeline
2+ # Pure Storage FlashBlade/FlashArray + NVIDIA GPU Demo
3+
4+ .PHONY : help build pipeline clean-data clean-all test inference stop
5+
6+ # Default paths (override with environment variables)
7+ FB_DATA ?= /mnt/fsaai-shared/ebiser/fraud-data
8+ FB_PREP ?= /mnt/fsaai-shared/ebiser/prep-output
9+ MODEL_REPO ?= ./model_repository
10+
11+ # Pipeline settings
12+ DURATION ?= 60
13+ NUM_WORKERS ?= 128
14+ MAX_FILES ?= 100
215
316help :
4- @echo " NVIDIA Financial Fraud Detection Pipeline"
17+ @echo " Financial Fraud Detection Pipeline"
18+ @echo " "
19+ @echo " Usage:"
20+ @echo " make build Build all containers"
21+ @echo " make pipeline Run full pipeline (pods 1-4)"
22+ @echo " make inference Start inference server"
23+ @echo " make test Test inference endpoint"
24+ @echo " make stop Stop all containers"
25+ @echo " make clean-data Remove generated data"
26+ @echo " make clean-all Full cleanup (data + images)"
527 @echo " "
6- @echo " Available commands:"
7- @echo " make build - Build all Docker containers"
8- @echo " make up - Start all services"
9- @echo " make down - Stop all services"
10- @echo " make logs - View logs from all services"
11- @echo " make status - Check status of all services"
12- @echo " make clean - Remove all containers and images"
13- @echo " make test - Run test inference request"
28+ @echo " Individual pods:"
29+ @echo " make pod1 Run data generator"
30+ @echo " make pod2 Run feature engineering"
31+ @echo " make pod3 Run model training"
1432 @echo " "
33+ @echo " Configuration (set via environment):"
34+ @echo " FB_DATA=$( FB_DATA) "
35+ @echo " FB_PREP=$( FB_PREP) "
36+ @echo " MODEL_REPO=$( MODEL_REPO) "
37+ @echo " DURATION=$( DURATION) s NUM_WORKERS=$( NUM_WORKERS) "
1538
1639build :
1740 @echo " Building all containers..."
18- @chmod +x scripts/build_all.sh
19- @./scripts/build_all.sh
41+ docker compose build
2042
21- up :
22- @echo " Starting all services..."
23- docker-compose up -d
43+ # Full pipeline: data generation → feature engineering → model training
44+ pipeline : build
45+ @echo " "
46+ @echo " =========================================="
47+ @echo " Starting Full Pipeline"
48+ @echo " =========================================="
49+ @echo " "
50+ @echo " [1/3] Data Generation ($( DURATION) s)..."
51+ docker compose run --rm \
52+ -e DURATION_SECONDS=$(DURATION ) \
53+ -e NUM_WORKERS=$(NUM_WORKERS ) \
54+ data-gather
55+ @echo " "
56+ @echo " [2/3] Feature Engineering..."
57+ docker compose run --rm \
58+ -e MAX_FILES_PER_RUN=$(MAX_FILES ) \
59+ data-prep
60+ @echo " "
61+ @echo " [3/3] Model Training..."
62+ docker compose run --rm model-build
2463 @echo " "
25- @echo " Services started. Check status with: make status"
26- @echo " View logs with: make logs"
64+ @echo " =========================================="
65+ @echo " Pipeline Complete!"
66+ @echo " =========================================="
67+ @echo " Model saved to: $( MODEL_REPO) "
68+ @echo " Start inference: make inference"
2769
28- down :
29- @echo " Stopping all services..."
30- docker-compose down
70+ # Individual pods
71+ pod1 :
72+ docker compose run --rm \
73+ -e DURATION_SECONDS=$(DURATION ) \
74+ -e NUM_WORKERS=$(NUM_WORKERS ) \
75+ data-gather
3176
32- logs :
33- docker-compose logs -f
77+ pod2 :
78+ docker compose run --rm \
79+ -e MAX_FILES_PER_RUN=$(MAX_FILES ) \
80+ data-prep
3481
35- status :
36- @echo " Service Status:"
37- @docker-compose ps
38- @echo " "
39- @echo " GPU Usage:"
40- @docker exec fraud-detection-prep nvidia-smi 2> /dev/null || echo " GPU containers not running"
82+ pod3 :
83+ docker compose run --rm model-build
4184
42- clean :
43- @echo " Removing all containers and images..."
44- docker-compose down -v --rmi all
45- @echo " Cleanup complete"
85+ # Start inference server
86+ inference :
87+ @echo " Starting Triton Inference Server..."
88+ docker compose up -d inference
89+ @echo " Waiting for server to be ready..."
90+ @sleep 10
91+ @curl -s http://localhost:8000/v2/health/ready && echo " Server ready!" || echo " Server starting..."
92+ @echo " "
93+ @echo " Endpoints:"
94+ @echo " HTTP: http://localhost:8000"
95+ @echo " gRPC: localhost:8001"
96+ @echo " Metrics: http://localhost:8002"
4697
98+ # Test inference
4799test :
48- @echo " Testing service endpoints..."
49- @echo " "
50- @echo " 1. Testing Notification Service:"
51- @curl -s -X GET http://localhost:5000/health | jq ' .' || echo " Notification service not ready"
52- @echo " "
53- @echo " 2. Testing Triton Inference Server:"
54- @curl -s -X GET http://localhost:8002/v2/health/ready || echo " Inference service not ready"
100+ @echo " Testing inference endpoint..."
101+ @curl -s -X POST http://localhost:8000/v2/models/fraud_xgboost/infer \
102+ -H " Content-Type: application/json" \
103+ -d ' {"inputs": [{"name": "input__0", "shape": [1, 21], "datatype": "FP32", "data": [100.0, 35.0, -90.0, 50000, 1704067200, 35.1, -90.1, 12345, 30301, 4.6, 0.5, 12, 3, 0, 0, 10.5, 1, 10, 1, 10.8, 3]}]}' \
104+ | python3 -m json.tool
55105 @echo " "
56- @echo " 3. Checking models loaded:"
57- @curl -s -X GET http://localhost:8000/v2/models || echo " Cannot query models"
106+
107+ # Stop all containers
108+ stop :
109+ docker compose down
110+
111+ # Clean generated data (preserves images)
112+ clean-data :
113+ @echo " Cleaning generated data..."
114+ rm -rf $(FB_DATA ) /run_*
115+ rm -rf $(FB_PREP ) /features_* .parquet
116+ rm -rf $(FB_PREP ) /metadata_* .json
117+ rm -rf $(FB_PREP ) /.prep_state.json
118+ rm -rf $(MODEL_REPO )
119+ @echo " Data cleaned"
120+
121+ # Full cleanup
122+ clean-all : stop clean-data
123+ @echo " Removing Docker images..."
124+ docker compose down --rmi all -v
125+ docker builder prune -f
126+ @echo " Full cleanup complete"
127+
128+ # Quick demo (1 minute)
129+ demo :
130+ @echo " Running quick demo (1 minute data generation)..."
131+ $(MAKE ) pipeline DURATION=60 NUM_WORKERS=64 MAX_FILES=50
0 commit comments