This guide provides step-by-step instructions for deploying RuVector's globally distributed streaming system capable of handling 500 million concurrent learning streams with burst capacity up to 25 billion.
Target Infrastructure: Google Cloud Platform (GCP) Architecture: Multi-region Cloud Run with global load balancing Deployment Time: 4-6 hours for initial setup
- Prerequisites
- Phase 1: Initial Setup
- Phase 2: Core Infrastructure
- Phase 3: Multi-Region Deployment
- Phase 4: Load Balancing & CDN
- Phase 5: Monitoring & Alerting
- Phase 6: Validation & Testing
- Operations
- Troubleshooting
# Install gcloud CLI
curl https://sdk.cloud.google.com | bash
exec -l $SHELL
# Install Terraform
wget https://releases.hashicorp.com/terraform/1.6.0/terraform_1.6.0_linux_amd64.zip
unzip terraform_1.6.0_linux_amd64.zip
sudo mv terraform /usr/local/bin/
# Install Node.js 18+
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
sudo apt-get install -y nodejs
# Install Rust (for building ruvector core)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source $HOME/.cargo/env
# Install Docker
sudo apt-get update
sudo apt-get install -y docker.io
sudo usermod -aG docker $USER
# Install K6 (for load testing)
sudo gpg -k
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
sudo apt-get update
sudo apt-get install k6# Set project variables
export PROJECT_ID="your-project-id"
export PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format="value(projectNumber)")
export BILLING_ACCOUNT="your-billing-account-id"
# Authenticate
gcloud auth login
gcloud auth application-default login
# Set default project
gcloud config set project $PROJECT_ID
# Enable billing
gcloud billing projects link $PROJECT_ID --billing-account=$BILLING_ACCOUNT# Enable all required GCP APIs
gcloud services enable \
run.googleapis.com \
compute.googleapis.com \
sql-component.googleapis.com \
sqladmin.googleapis.com \
redis.googleapis.com \
servicenetworking.googleapis.com \
vpcaccess.googleapis.com \
cloudscheduler.googleapis.com \
cloudtasks.googleapis.com \
pubsub.googleapis.com \
monitoring.googleapis.com \
logging.googleapis.com \
cloudtrace.googleapis.com \
cloudbuild.googleapis.com \
artifactregistry.googleapis.com \
secretmanager.googleapis.com \
cloudresourcemanager.googleapis.com \
iamcredentials.googleapis.com \
cloudfunctions.googleapis.com \
networkconnectivity.googleapis.com# Create service accounts
gcloud iam service-accounts create ruvector-cloudrun \
--display-name="RuVector Cloud Run Service Account"
gcloud iam service-accounts create ruvector-deployer \
--display-name="RuVector CI/CD Deployer"
# Grant necessary permissions
export CLOUDRUN_SA="ruvector-cloudrun@${PROJECT_ID}.iam.gserviceaccount.com"
export DEPLOYER_SA="ruvector-deployer@${PROJECT_ID}.iam.gserviceaccount.com"
# Cloud Run permissions
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${CLOUDRUN_SA}" \
--role="roles/cloudsql.client"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${CLOUDRUN_SA}" \
--role="roles/redis.editor"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${CLOUDRUN_SA}" \
--role="roles/pubsub.publisher"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${CLOUDRUN_SA}" \
--role="roles/secretmanager.secretAccessor"
# Deployer permissions
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${DEPLOYER_SA}" \
--role="roles/run.admin"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${DEPLOYER_SA}" \
--role="roles/iam.serviceAccountUser"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:${DEPLOYER_SA}" \
--role="roles/cloudbuild.builds.editor"# Create budget (adjust amounts as needed)
gcloud billing budgets create \
--billing-account=$BILLING_ACCOUNT \
--display-name="RuVector Monthly Budget" \
--budget-amount=500000 \
--threshold-rule=percent=50 \
--threshold-rule=percent=80 \
--threshold-rule=percent=100 \
--threshold-rule=percent=120cd /home/user
git clone https://github.com/ruvnet/ruvector.git
cd ruvector# Build ruvector core
cargo build --release
# Build Node.js bindings
cd crates/ruvector-node
npm install
npm run build
cd ../..# Create terraform variables file
cd /home/user/ruvector/src/burst-scaling/terraform
cat > terraform.tfvars <<EOF
project_id = "${PROJECT_ID}"
project_number = "${PROJECT_NUMBER}"
regions = [
{
name = "us-central1"
display_name = "Iowa (US Central)"
priority = 10
capacity = 80000000
cost_per_hour = 0.50
min_instances = 10
max_instances = 800
},
{
name = "europe-west1"
display_name = "Belgium (Europe West)"
priority = 9
capacity = 80000000
cost_per_hour = 0.55
min_instances = 10
max_instances = 800
},
{
name = "asia-east1"
display_name = "Taiwan (Asia East)"
priority = 8
capacity = 80000000
cost_per_hour = 0.60
min_instances = 10
max_instances = 800
}
]
service_config = {
cpu = "4"
memory = "16Gi"
concurrency = 100
timeout_seconds = 300
max_instances_per_region = 1000
}
scaling_config = {
enable_adaptive_scaling = true
target_cpu_utilization = 0.70
target_memory_utilization = 0.75
target_concurrency = 0.80
scale_down_delay = "300s"
min_scale_down_rate = 2
max_scale_up_rate = 10
}
budget_limits = {
hourly_limit_usd = 10000
daily_limit_usd = 200000
monthly_limit_usd = 5000000
alert_threshold = 0.80
}
# Customize with your settings
redis_memory_gb = 128
database_tier = "db-custom-4-16384"
enable_cdn = true
enable_armor = true
# Notification channels (replace with your values)
slack_webhook_url = "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
email_addresses = ["ops@yourcompany.com"]
EOFcd /home/user/ruvector/src/burst-scaling/terraform
# Initialize Terraform
terraform init
# Review plan
terraform plan -out=tfplan
# Apply infrastructure
terraform apply tfplanThis will create:
- VPC network with subnets
- Cloud SQL PostgreSQL instance with replicas
- Memorystore Redis instances per region
- Cloud Storage buckets
- Pub/Sub topics
- Secrets Manager secrets
- IAM bindings
- Monitoring policies
Expected time: 30-45 minutes
# Get Cloud SQL connection name
export SQL_CONNECTION=$(terraform output -raw sql_connection_name)
# Connect to database
gcloud sql connect ruvector-db --user=postgres --quiet
# Run initialization script
\i /home/user/ruvector/src/cloud-architecture/init-database.sql
# Exit psql
\qDatabase Schema Created:
vectorstable with metadataagentstable for coordinationtaskstable for distributed worksync_queuefor cross-region sync- Indexes and partitions
# Store database password
echo -n "your-secure-password" | gcloud secrets create db-password \
--data-file=- \
--replication-policy="automatic"
# Store Redis connection strings (per region)
for region in us-central1 europe-west1 asia-east1; do
REDIS_HOST=$(terraform output -json redis_hosts | jq -r ".${region}")
echo "redis://${REDIS_HOST}:6379" | gcloud secrets create redis-connection-${region} \
--data-file=- \
--replication-policy="automatic"
done
# Store API keys and other secrets
echo -n "your-api-key" | gcloud secrets create api-key \
--data-file=- \
--replication-policy="automatic"cd /home/user/ruvector
# Build multi-arch image
docker buildx create --use
docker buildx build --platform linux/amd64,linux/arm64 \
-t gcr.io/${PROJECT_ID}/ruvector-streaming:latest \
-f src/cloud-run/Dockerfile \
--push .cd /home/user/ruvector/src/cloud-run
# Use Cloud Build for multi-region deployment
gcloud builds submit --config=cloudbuild.yamlThis deploys to:
- us-central1
- us-east1
- us-west1
- europe-west1
- europe-west3
- europe-north1
- asia-east1
- asia-southeast1
- asia-northeast1
Deployment strategy: Canary (10% → 50% → 100%)
Expected time: 20-30 minutes
# List all deployments
gcloud run services list --platform managed
# Test each region
for region in us-central1 europe-west1 asia-east1; do
URL=$(gcloud run services describe ruvector-streaming \
--region=$region \
--format='value(status.url)')
curl -f "$URL/health" && echo "✓ $region healthy" || echo "✗ $region failed"
done# This is handled by Terraform, but verify:
gcloud compute backend-services list
# Should show backend services for each region# Create URL map
gcloud compute url-maps create ruvector-lb \
--default-backend-bucket=ruvector-static-assets
# Add backend service
gcloud compute url-maps add-path-matcher ruvector-lb \
--path-matcher-name=ruvector-matcher \
--default-service=ruvector-backend-service \
--new-hosts='*.ruvector.io'
# Create target HTTPS proxy
gcloud compute target-https-proxies create ruvector-https-proxy \
--url-map=ruvector-lb \
--ssl-certificates=ruvector-ssl-cert
# Create global forwarding rule
gcloud compute forwarding-rules create ruvector-https-rule \
--global \
--target-https-proxy=ruvector-https-proxy \
--ports=443# Enable CDN on backend service
gcloud compute backend-services update ruvector-backend-service \
--enable-cdn \
--cache-mode=CACHE_ALL_STATIC \
--default-ttl=3600 \
--max-ttl=86400 \
--client-ttl=3600 \
--global# Get load balancer IP
LB_IP=$(gcloud compute addresses describe ruvector-global-ip \
--global \
--format='value(address)')
echo "Configure your DNS:"
echo "A record: ruvector.io -> $LB_IP"
echo "A record: *.ruvector.io -> $LB_IP"Manually configure in your DNS provider:
ruvector.ioA record →$LB_IP*.ruvector.ioA record →$LB_IP
# Create managed SSL certificate (auto-renewal)
gcloud compute ssl-certificates create ruvector-ssl-cert \
--domains=ruvector.io,api.ruvector.io,*.ruvector.io \
--global
# Wait for certificate provisioning (can take 15-30 minutes)
gcloud compute ssl-certificates listcd /home/user/ruvector/src/burst-scaling
# Create dashboard
gcloud monitoring dashboards create \
--config-from-file=monitoring-dashboard.jsonDashboard includes:
- Connection counts per region
- Latency percentiles
- Error rates
- Resource utilization
- Cost tracking
# High latency alert
gcloud alpha monitoring policies create \
--notification-channels=CHANNEL_ID \
--display-name="High P99 Latency" \
--condition-display-name="P99 > 50ms" \
--condition-threshold-value=0.050 \
--condition-threshold-duration=60s \
--aggregation-alignment-period=60s
# High error rate alert
gcloud alpha monitoring policies create \
--notification-channels=CHANNEL_ID \
--display-name="High Error Rate" \
--condition-display-name="Errors > 1%" \
--condition-threshold-value=0.01 \
--condition-threshold-duration=300s
# Region unhealthy alert
gcloud alpha monitoring policies create \
--notification-channels=CHANNEL_ID \
--display-name="Region Unhealthy" \
--condition-display-name="Health Check Failed" \
--condition-threshold-value=1 \
--condition-threshold-duration=180s# Create custom metrics from logs
gcloud logging metrics create error_rate \
--description="Application error rate" \
--log-filter='resource.type="cloud_run_revision"
severity>=ERROR'
gcloud logging metrics create connection_count \
--description="Active connection count" \
--log-filter='resource.type="cloud_run_revision"
jsonPayload.event="connection_established"' \
--value-extractor='EXTRACT(jsonPayload.connection_id)'cd /home/user/ruvector/benchmarks
# Run quick validation (2 hours)
npm run test:quick
# Expected output:
# ✓ Baseline load test passed
# ✓ Single region test passed
# ✓ Basic failover test passed
# ✓ Mixed workload test passed# Run baseline 500M concurrent test (4 hours)
npm run scenario:baseline-500m
# Monitor progress
npm run dashboard
# Open http://localhost:8000/visualization-dashboard.htmlSuccess criteria:
- P99 latency < 50ms
- P50 latency < 10ms
- Error rate < 0.1%
- All regions healthy
# Run 10x burst test (2 hours)
npm run scenario:product-launch-10x
# This will spike to 5B concurrentSuccess criteria:
- System survives without crash
- P99 latency < 100ms
- Auto-scaling completes within 60s
- Error rate < 2%
# Run regional failover test (1 hour)
npm run scenario:region-failover
# This will simulate region failureSuccess criteria:
- Failover completes within 60s
- Connection loss < 5%
- No cascading failures
#!/bin/bash
# Save as: /home/user/ruvector/scripts/daily-check.sh
# Check service health
echo "=== Service Health ==="
for region in us-central1 europe-west1 asia-east1; do
gcloud run services describe ruvector-streaming \
--region=$region \
--format='value(status.conditions[0].status)' | \
grep -q "True" && echo "✓ $region" || echo "✗ $region UNHEALTHY"
done
# Check error rates (last 24h)
echo -e "\n=== Error Rates (24h) ==="
gcloud logging read 'resource.type="cloud_run_revision" severity>=ERROR' \
--limit=10 \
--format=json | jq -r '.[].jsonPayload.message'
# Check costs (yesterday)
echo -e "\n=== Cost (Yesterday) ==="
# Requires BigQuery billing export
# bq query --use_legacy_sql=false "SELECT SUM(cost) FROM billing.gcp_billing_export WHERE DATE(usage_start_time) = CURRENT_DATE() - 1"
# Check capacity
echo -e "\n=== Capacity ==="
gcloud run services describe ruvector-streaming \
--region=us-central1 \
--format='value(spec.template.spec.containerConcurrency,status.observedGeneration)'# Manually scale up for planned event
gcloud run services update ruvector-streaming \
--region=us-central1 \
--min-instances=100 \
--max-instances=1000
# Manually scale down after event
gcloud run services update ruvector-streaming \
--region=us-central1 \
--min-instances=10 \
--max-instances=500
# Or use the burst predictor
cd /home/user/ruvector/src/burst-scaling
node dist/burst-predictor.js --event "Product Launch" --time "2025-12-01T10:00:00Z"# Generate weekly performance report
cd /home/user/ruvector/benchmarks
npm run report -- --period "last-7-days" --format pdf
# Review metrics:
# - Average latency trends
# - Error rate trends
# - Cost per million queries
# - Capacity utilization# Identify idle resources
gcloud run services list --format='table(
metadata.name,
metadata.namespace,
status.url,
status.traffic[0].percent
)' | grep "0%"
# Review committed use discounts
gcloud compute commitments list
# Check for underutilized databases
gcloud sql instances list --format='table(
name,
region,
settings.tier,
state
)' | grep RUNNABLE# Analyze growth trends
# Review last 3 months of connection counts
# Project next month's capacity needs
# Request quota increases if needed
# Request quota increase
gcloud compute project-info describe --project=$PROJECT_ID
gcloud compute regions describe us-central1 --format='value(quotas)'
# Submit increase request if needed
gcloud compute project-info add-metadata \
--metadata=quotas='{"CPUS":"10000","DISKS_TOTAL_GB":"100000"}'# Update container images
cd /home/user/ruvector
git pull origin main
docker build -t gcr.io/${PROJECT_ID}/ruvector-streaming:latest .
docker push gcr.io/${PROJECT_ID}/ruvector-streaming:latest
# Rolling update
gcloud run services update ruvector-streaming \
--image=gcr.io/${PROJECT_ID}/ruvector-streaming:latest \
--region=us-central1
# Verify update
gcloud run revisions list --service=ruvector-streaming --region=us-central1Diagnosis:
# Check database connections
gcloud sql operations list --instance=ruvector-db --limit=10
# Check Redis hit rates
gcloud redis instances describe ruvector-cache-us-central1 \
--region=us-central1 \
--format='value(metadata.stats.hitRate)'
# Check Cloud Run cold starts
gcloud run services describe ruvector-streaming \
--region=us-central1 \
--format='value(status.traffic[0].latestRevision)'Solutions:
- Increase min instances to reduce cold starts
- Increase Redis memory or optimize cache keys
- Add read replicas to database
- Enable connection pooling
- Review slow queries in database
Diagnosis:
# Check error types
gcloud logging read 'resource.type="cloud_run_revision" severity>=ERROR' \
--limit=100 \
--format=json | jq -r '.[] | .jsonPayload.error_type' | sort | uniq -c
# Check recent deployments
gcloud run revisions list --service=ruvector-streaming --region=us-central1 --limit=5Solutions:
- Rollback to previous revision if recent deploy
- Check database connection pool exhaustion
- Verify API rate limits not exceeded
- Check for memory leaks (restart instances)
- Review error logs for patterns
Diagnosis:
# Check scaling metrics
gcloud monitoring time-series list \
--filter='metric.type="run.googleapis.com/container/instance_count"' \
--interval-start-time="2025-01-01T00:00:00Z" \
--interval-end-time="2025-01-02T00:00:00Z"
# Check quotas
gcloud compute project-info describe --project=$PROJECT_ID | grep -A 5 "CPUS"Solutions:
- Request quota increase if limits hit
- Check budget caps (may block scaling)
- Verify IAM permissions for auto-scaler
- Review scaling policies (min/max instances)
- Check for regional capacity issues
Diagnosis:
# Check health checks
gcloud compute health-checks describe ruvector-health-check
# Check backend service health
gcloud compute backend-services get-health ruvector-backend-service --global
# Check load balancer configuration
gcloud compute url-maps describe ruvector-lbSolutions:
- Verify health check endpoints responding
- Check firewall rules allow health checks
- Verify backend services configured correctly
- Check DNS propagation
- Review load balancer logs
Diagnosis:
# Check current spend
gcloud billing accounts list
# Identify expensive resources
gcloud compute instances list --format='table(name,zone,machineType,status)'
gcloud sql instances list --format='table(name,region,tier,status)'
gcloud redis instances list --format='table(name,region,tier,memorySizeGb)'Solutions:
- Scale down min instances in low-traffic regions
- Reduce Redis memory size if underutilized
- Downgrade database tier if CPU/memory low
- Enable more aggressive CDN caching
- Review and delete unused resources
# List revisions
gcloud run revisions list --service=ruvector-streaming --region=us-central1
# Rollback to previous revision
PREVIOUS_REVISION=$(gcloud run revisions list \
--service=ruvector-streaming \
--region=us-central1 \
--format='value(metadata.name)' \
--limit=2 | tail -n1)
gcloud run services update-traffic ruvector-streaming \
--region=us-central1 \
--to-revisions=$PREVIOUS_REVISION=100cd /home/user/ruvector/src/burst-scaling/terraform
# Revert to previous state
terraform state pull > current-state.tfstate
terraform state push previous-state.tfstate
terraform apply -auto-approve# Disable all traffic to service
gcloud run services update ruvector-streaming \
--region=us-central1 \
--max-instances=0
# Or delete service entirely
gcloud run services delete ruvector-streaming --region=us-central1 --quiet- One-time setup: ~$100 (testing, quota requests, etc.)
- Cloud Run: $2.4M ($0.0048 per connection)
- Cloud SQL: $150K (3 regions, read replicas)
- Redis: $45K (3 regions, 128GB each)
- Load Balancer + CDN: $80K
- Networking: $50K
- Monitoring + Logging: $20K
- Storage: $5K
- Total: ~$2.75M/month (optimized)
- Cloud Run: ~$80K
- Database: ~$2K (connection spikes)
- Redis: ~$500 (included in monthly)
- Networking: ~$5K
- Total: ~$88K per event
- Use committed use discounts (30% savings)
- Enable auto-scaling to scale down when idle
- Increase CDN cache hit rate to reduce backend load
- Use preemptible instances for non-critical workloads
- Regularly review and delete unused resources
- Complete Initial Deployment (Phases 1-5)
- Run Validation Tests (Phase 6)
- Schedule Load Tests (Baseline, then burst)
- Set Up Monitoring Dashboard
- Configure Alert Policies
- Create Runbook (Already created:
/home/user/ruvector/src/burst-scaling/RUNBOOK.md) - Train Team on Operations
- Plan First Production Event (Start small, scale up)
- Iterate and Optimize (Based on real traffic)
- Architecture Overview
- Scaling Strategy
- Infrastructure Design
- Load Test Scenarios
- Operations Runbook
- Benchmarking Guide
- GCP Cloud Run Docs
- GCP Load Balancing Docs
For issues or questions:
- GitHub Issues: https://github.com/ruvnet/ruvector/issues
- Email: ops@ruvector.io
- Slack: #ruvector-ops
Document Version: 1.0 Last Updated: 2025-11-20 Deployment Status: Ready for Production