-
Notifications
You must be signed in to change notification settings - Fork 355
Open
Description
#!/bin/bash
################################################################################
# Document AI Challenge Lab - Automated Deployment Script
# Fixed Version with Task 4 Eventarc/Pub/Sub permission handling
#
# This script automates all 5 tasks:
# TASK 1: Enable APIs and download source files
# TASK 2: Create Document AI processor
# TASK 3: Create Cloud Storage buckets and BigQuery dataset
# TASK 4: Deploy Cloud Run Gen2 function with proper IAM permissions
# TASK 5: Test with sample invoices
################################################################################
echo ""
echo "================================"
echo "Document AI Challenge Lab Setup"
echo "================================"
echo ""
# Prompt user to input configuration
read -p "Enter PROCESSOR_NAME: " PROCESSOR_NAME
read -p "Enter REGION (e.g., us-east1): " REGION
echo ""
echo "Configuration:"
echo " PROCESSOR_NAME: $PROCESSOR_NAME"
echo " REGION: $REGION"
echo ""
################################################################################
# TASK 1: Enable APIs and download source files
################################################################################
echo "========== TASK 1: Setup APIs and Download Files =========="
export BUCKET_LOCATION=$REGION
export PROJECT_ID=$(gcloud config get-value core/project)
echo "Enabling required APIs..."
gcloud services enable documentai.googleapis.com
gcloud services enable cloudfunctions.googleapis.com
gcloud services enable cloudbuild.googleapis.com
gcloud services enable geocoding-backend.googleapis.com
gcloud services enable eventarc.googleapis.com
gcloud services enable run.googleapis.com
echo "Creating document-ai-challenge directory and downloading source files..."
mkdir -p ~/document-ai-challenge
gsutil -m cp -r gs://spls/gsp367/* ~/document-ai-challenge/
echo "✓ Task 1 completed"
echo ""
################################################################################
# TASK 2: Create Document AI Processor
################################################################################
echo "========== TASK 2: Create Document AI Processor =========="
ACCESS_TOKEN=$(gcloud auth application-default print-access-token)
echo "Creating FORM_PARSER_PROCESSOR: $PROCESSOR_NAME..."
curl -X POST \
-H "Authorization: Bearer $ACCESS_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"display_name": "'"$PROCESSOR_NAME"'",
"type": "FORM_PARSER_PROCESSOR"
}' \
"https://documentai.googleapis.com/v1/projects/$PROJECT_ID/locations/us/processors"
echo ""
echo "✓ Task 2 completed"
echo ""
################################################################################
# TASK 3: Create Storage Buckets and BigQuery Dataset
################################################################################
echo "========== TASK 3: Create Cloud Storage Buckets and BigQuery Dataset =========="
echo "Creating Cloud Storage buckets..."
gsutil mb -c standard -l ${BUCKET_LOCATION} -b on \
gs://${PROJECT_ID}-input-invoices
gsutil mb -c standard -l ${BUCKET_LOCATION} -b on \
gs://${PROJECT_ID}-output-invoices
gsutil mb -c standard -l ${BUCKET_LOCATION} -b on \
gs://${PROJECT_ID}-archived-invoices
echo "Creating BigQuery dataset..."
bq --location="US" mk -d \
--description "Form Parser Results" \
${PROJECT_ID}:invoice_parser_results
echo "Creating BigQuery tables..."
cd ~/document-ai-challenge/scripts/table-schema/
bq mk --table \
invoice_parser_results.doc_ai_extracted_entities \
doc_ai_extracted_entities.json
bq mk --table \
invoice_parser_results.geocode_details \
geocode_details.json
echo "✓ Task 3 completed"
echo ""
################################################################################
# TASK 4: Deploy Cloud Run Gen2 Function with Eventarc Trigger
################################################################################
echo "========== TASK 4: Deploy Cloud Run Function =========="
cd ~/document-ai-challenge/scripts
export PROJECT_ID=$(gcloud config get-value core/project)
PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format='value(projectNumber)')
echo "PROJECT_ID: $PROJECT_ID"
echo "PROJECT_NUMBER: $PROJECT_NUMBER"
echo ""
# Step 1: Grant Artifact Registry Reader role
echo "Step 1: Granting Artifact Registry Reader role..."
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com" \
--role="roles/artifactregistry.reader" 2>/dev/null || true
# Step 2: Grant Pub/Sub Publisher to Cloud Storage Service Agent
echo "Step 2: Granting Pub/Sub Publisher to Cloud Storage Service Agent..."
SERVICE_ACCOUNT=$(gcloud storage service-agent --project=$PROJECT_ID)
echo "Cloud Storage Service Account: $SERVICE_ACCOUNT"
gcloud projects add-iam-policy-binding $PROJECT_ID \
--member="serviceAccount:$SERVICE_ACCOUNT" \
--role="roles/pubsub.publisher"
echo "Step 3: Waiting for IAM propagation (60 seconds)..."
sleep 60
export CLOUD_FUNCTION_LOCATION=$REGION
echo "CLOUD_FUNCTION_LOCATION: $CLOUD_FUNCTION_LOCATION"
echo ""
# Step 4: Deploy Cloud Run Gen2 Function
echo "Step 4: Deploying Cloud Run Gen2 function..."
gcloud functions deploy process-invoices \
--gen2 \
--region=${CLOUD_FUNCTION_LOCATION} \
--entry-point=process_invoice \
--runtime=python39 \
--service-account=$PROJECT_NUMBER-compute@developer.gserviceaccount.com \
--source=cloud-functions/process-invoices \
--timeout=400 \
--env-vars-file=cloud-functions/process-invoices/.env.yaml \
--trigger-resource=gs://${PROJECT_ID}-input-invoices \
--trigger-event=google.storage.object.finalize \
--allow-unauthenticated
echo "✓ Cloud Run function deployed"
echo ""
# Step 5: Extract PROCESSOR_ID from Document AI API
echo "Step 5: Extracting PROCESSOR_ID from Document AI API..."
sleep 10
PROCESSOR_ID=$(curl -s -X GET \
-H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
-H "Content-Type: application/json" \
"https://documentai.googleapis.com/v1/projects/$PROJECT_ID/locations/us/processors" | \
grep -o '"name": "[^"]*processors/[^"]*"' | \
sed -E 's/.*processors\/([^"]+)".*/\1/' | \
head -1)
if [ -z "$PROCESSOR_ID" ]; then
echo "✗ Failed to extract PROCESSOR_ID"
exit 1
fi
export PROCESSOR_ID
echo "PROCESSOR_ID: $PROCESSOR_ID"
echo ""
# Step 6: Extract PARSER_LOCATION from REGION (lowercase)
PARSER_LOCATION=$(echo $REGION | tr '[:upper:]' '[:lower:]')
echo "PARSER_LOCATION: $PARSER_LOCATION"
echo ""
# Step 7: Update Cloud Run function with environment variables
echo "Step 6: Updating Cloud Run function with environment variables..."
sleep 10
gcloud functions deploy process-invoices \
--gen2 \
--region=${CLOUD_FUNCTION_LOCATION} \
--entry-point=process_invoice \
--runtime=python39 \
--source=cloud-functions/process-invoices \
--timeout=400 \
--trigger-resource=gs://${PROJECT_ID}-input-invoices \
--trigger-event=google.storage.object.finalize \
--update-env-vars=PROCESSOR_ID=${PROCESSOR_ID},PARSER_LOCATION=${PARSER_LOCATION},PROJECT_ID=${PROJECT_ID} \
--service-account=$PROJECT_NUMBER-compute@developer.gserviceaccount.com \
--allow-unauthenticated
echo "✓ Cloud Run function environment variables updated"
echo ""
# Step 8: Verify deployment
echo "Step 7: Verifying Cloud Run function deployment..."
gcloud functions describe process-invoices \
--gen2 \
--region=${CLOUD_FUNCTION_LOCATION} \
--format='table(name,state,runtime,trigger.type)'
echo "✓ Task 4 completed"
echo ""
################################################################################
# TASK 5: Test and Validate End-to-End Solution
################################################################################
echo "========== TASK 5: Test and Validate Solution =========="
export PROJECT_ID=$(gcloud config get-value core/project)
echo "Uploading test invoices to input bucket..."
gsutil -m cp gs://cloud-training/gsp367/invoices/* \
gs://${PROJECT_ID}-input-invoices/
echo ""
echo "✓ Test invoices uploaded to gs://${PROJECT_ID}-input-invoices/"
echo ""
echo "✓ Cloud Run function will automatically process invoices when they are uploaded"
echo ""
################################################################################
# Summary
################################################################################
echo "========== DEPLOYMENT COMPLETE =========="
echo ""
echo "✅ All tasks completed successfully!"
echo ""
echo "Function Details:"
echo " Function Name: process-invoices"
echo " Region: ${CLOUD_FUNCTION_LOCATION}"
echo " Runtime: python39"
echo " Trigger: Cloud Storage (${PROJECT_ID}-input-invoices)"
echo " Status: ACTIVE"
echo ""
echo "Environment Variables:"
echo " PROCESSOR_ID: ${PROCESSOR_ID}"
echo " PARSER_LOCATION: ${PARSER_LOCATION}"
echo " PROJECT_ID: ${PROJECT_ID}"
echo ""
echo "Next Steps:"
echo "1. Monitor function execution in Cloud Logs:"
echo " gcloud functions logs read process-invoices --gen2 --region=${CLOUD_FUNCTION_LOCATION} --limit=50"
echo ""
echo "2. Check BigQuery for processed results:"
echo " bq query --nouse_legacy_sql 'SELECT * FROM \`${PROJECT_ID}.invoice_parser_results.doc_ai_extracted_entities\` LIMIT 10'"
echo ""
echo "3. View processed invoices in output bucket:"
echo " gsutil ls gs://${PROJECT_ID}-output-invoices/"
echo ""Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels