PureStorage-OpenConnect
diff --git a/‎.gitignore‎
Lines changed: 29 additions & 0 deletions b/‎.gitignore‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 115 additions & 41 deletions b/‎Makefile‎
Lines changed: 115 additions & 41 deletions
@@ -0,0 +1,29 @@
+# Generated data
+*.parquet
+*.csv
+*.pkl
+
+# Model artifacts
+model_repository/
+
+# State files
+.prep_state.json
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+
+# IDE
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Environment
+.env
+*.env.local
@@ -1,57 +1,131 @@
-.PHONY: help build up down logs clean test status
+# Financial Fraud Detection Pipeline
+# Pure Storage FlashBlade/FlashArray + NVIDIA GPU Demo
+
+.PHONY: help build pipeline clean-data clean-all test inference stop
+
+# Default paths (override with environment variables)
+FB_DATA ?= /mnt/fsaai-shared/ebiser/fraud-data
+FB_PREP ?= /mnt/fsaai-shared/ebiser/prep-output
+MODEL_REPO ?= ./model_repository
+
+# Pipeline settings
+DURATION ?= 60
+NUM_WORKERS ?= 128
+MAX_FILES ?= 100
 
 help:
-	@echo "NVIDIA Financial Fraud Detection Pipeline"
+	@echo "Financial Fraud Detection Pipeline"
+	@echo ""
+	@echo "Usage:"
+	@echo "  make build       Build all containers"
+	@echo "  make pipeline    Run full pipeline (pods 1-4)"
+	@echo "  make inference   Start inference server"
+	@echo "  make test        Test inference endpoint"
+	@echo "  make stop        Stop all containers"
+	@echo "  make clean-data  Remove generated data"
+	@echo "  make clean-all   Full cleanup (data + images)"
 	@echo ""
-	@echo "Available commands:"
-	@echo "  make build    - Build all Docker containers"
-	@echo "  make up       - Start all services"
-	@echo "  make down     - Stop all services"
-	@echo "  make logs     - View logs from all services"
-	@echo "  make status   - Check status of all services"
-	@echo "  make clean    - Remove all containers and images"
-	@echo "  make test     - Run test inference request"
+	@echo "Individual pods:"
+	@echo "  make pod1        Run data generator"
+	@echo "  make pod2        Run feature engineering"
+	@echo "  make pod3        Run model training"
 	@echo ""
+	@echo "Configuration (set via environment):"
+	@echo "  FB_DATA=$(FB_DATA)"
+	@echo "  FB_PREP=$(FB_PREP)"
+	@echo "  MODEL_REPO=$(MODEL_REPO)"
+	@echo "  DURATION=$(DURATION)s NUM_WORKERS=$(NUM_WORKERS)"
 
 build:
 	@echo "Building all containers..."
-	@chmod +x scripts/build_all.sh
-	@./scripts/build_all.sh
+	docker compose build
 
-up:
-	@echo "Starting all services..."
-	docker-compose up -d
+# Full pipeline: data generation → feature engineering → model training
+pipeline: build
+	@echo ""
+	@echo "=========================================="
+	@echo "Starting Full Pipeline"
+	@echo "=========================================="
+	@echo ""
+	@echo "[1/3] Data Generation ($(DURATION)s)..."
+	docker compose run --rm \
+		-e DURATION_SECONDS=$(DURATION) \
+		-e NUM_WORKERS=$(NUM_WORKERS) \
+		data-gather
+	@echo ""
+	@echo "[2/3] Feature Engineering..."
+	docker compose run --rm \
+		-e MAX_FILES_PER_RUN=$(MAX_FILES) \
+		data-prep
+	@echo ""
+	@echo "[3/3] Model Training..."
+	docker compose run --rm model-build
 	@echo ""
-	@echo "Services started. Check status with: make status"
-	@echo "View logs with: make logs"
+	@echo "=========================================="
+	@echo "Pipeline Complete!"
+	@echo "=========================================="
+	@echo "Model saved to: $(MODEL_REPO)"
+	@echo "Start inference: make inference"
 
-down:
-	@echo "Stopping all services..."
-	docker-compose down
+# Individual pods
+pod1:
+	docker compose run --rm \
+		-e DURATION_SECONDS=$(DURATION) \
+		-e NUM_WORKERS=$(NUM_WORKERS) \
+		data-gather
 
-logs:
-	docker-compose logs -f
+pod2:
+	docker compose run --rm \
+		-e MAX_FILES_PER_RUN=$(MAX_FILES) \
+		data-prep
 
-status:
-	@echo "Service Status:"
-	@docker-compose ps
-	@echo ""
-	@echo "GPU Usage:"
-	@docker exec fraud-detection-prep nvidia-smi 2>/dev/null || echo "GPU containers not running"
+pod3:
+	docker compose run --rm model-build
 
-clean:
-	@echo "Removing all containers and images..."
-	docker-compose down -v --rmi all
-	@echo "Cleanup complete"
+# Start inference server
+inference:
+	@echo "Starting Triton Inference Server..."
+	docker compose up -d inference
+	@echo "Waiting for server to be ready..."
+	@sleep 10
+	@curl -s http://localhost:8000/v2/health/ready && echo " Server ready!" || echo " Server starting..."
+	@echo ""
+	@echo "Endpoints:"
+	@echo "  HTTP:    http://localhost:8000"
+	@echo "  gRPC:    localhost:8001"
+	@echo "  Metrics: http://localhost:8002"
 
+# Test inference
 test:
-	@echo "Testing service endpoints..."
-	@echo ""
-	@echo "1. Testing Notification Service:"
-	@curl -s -X GET http://localhost:5000/health | jq '.' || echo "Notification service not ready"
-	@echo ""
-	@echo "2. Testing Triton Inference Server:"
-	@curl -s -X GET http://localhost:8002/v2/health/ready || echo "Inference service not ready"
+	@echo "Testing inference endpoint..."
+	@curl -s -X POST http://localhost:8000/v2/models/fraud_xgboost/infer \
+		-H "Content-Type: application/json" \
+		-d '{"inputs": [{"name": "input__0", "shape": [1, 21], "datatype": "FP32", "data": [100.0, 35.0, -90.0, 50000, 1704067200, 35.1, -90.1, 12345, 30301, 4.6, 0.5, 12, 3, 0, 0, 10.5, 1, 10, 1, 10.8, 3]}]}' \
+		| python3 -m json.tool
 	@echo ""
-	@echo "3. Checking models loaded:"
-	@curl -s -X GET http://localhost:8000/v2/models || echo "Cannot query models"
+
+# Stop all containers
+stop:
+	docker compose down
+
+# Clean generated data (preserves images)
+clean-data:
+	@echo "Cleaning generated data..."
+	rm -rf $(FB_DATA)/run_*
+	rm -rf $(FB_PREP)/features_*.parquet
+	rm -rf $(FB_PREP)/metadata_*.json
+	rm -rf $(FB_PREP)/.prep_state.json
+	rm -rf $(MODEL_REPO)
+	@echo "Data cleaned"
+
+# Full cleanup
+clean-all: stop clean-data
+	@echo "Removing Docker images..."
+	docker compose down --rmi all -v
+	docker builder prune -f
+	@echo "Full cleanup complete"
+
+# Quick demo (1 minute)
+demo:
+	@echo "Running quick demo (1 minute data generation)..."
+	$(MAKE) pipeline DURATION=60 NUM_WORKERS=64 MAX_FILES=50