|
| 1 | +set -euo pipefail |
| 2 | + |
| 3 | +echo "=== Search Index Configuration Script Start ===" |
| 4 | +echo "Storage account: $MAIN_STORAGE_ACCOUNT_NAME" |
| 5 | +echo "Search service: $SEARCH_SERVICE_NAME" |
| 6 | +echo "Repository URL: $GITHUB_REPO_URL" |
| 7 | + |
| 8 | +# Wait for RBAC permissions to fully propagate (Azure can take time to propagate permissions) |
| 9 | +echo "=== Waiting for RBAC permissions to propagate ===" |
| 10 | +sleep 30 |
| 11 | + |
| 12 | +# Verify main storage account exists |
| 13 | +az storage account show --name $MAIN_STORAGE_ACCOUNT_NAME --resource-group $RESOURCE_GROUP_NAME --output table |
| 14 | + |
| 15 | +# Setup Python environment |
| 16 | +python3 -m venv /tmp/venv && source /tmp/venv/bin/activate |
| 17 | +pip install --upgrade pip |
| 18 | + |
| 19 | +# Download only the necessary scripts |
| 20 | +mkdir -p /tmp/scripts && cd /tmp/scripts |
| 21 | +az storage blob download-batch --destination . --source scripts --account-name $SCRIPT_STORAGE_ACCOUNT_NAME --auth-mode login |
| 22 | + |
| 23 | +# Step 1: Fetch data files from source to local directory |
| 24 | +echo "=== Step 1: Fetching data files from $DATA_SOURCE_TYPE source ===" |
| 25 | + |
| 26 | +# First install requirements from the src/search directory where fetch_data.py is located |
| 27 | +cd /tmp/scripts/src/search |
| 28 | +pip install -r requirements.txt |
| 29 | + |
| 30 | +# Create local data directory |
| 31 | +mkdir -p /tmp/local_data |
| 32 | + |
| 33 | +# Fetch data using fetch_data.py |
| 34 | +python fetch_data.py \ |
| 35 | + --source_type "$DATA_SOURCE_TYPE" \ |
| 36 | + --source_url "$DATA_SOURCE_URL" \ |
| 37 | + --source_path "$DATA_SOURCE_PATH" \ |
| 38 | + --output_dir "/tmp/local_data" \ |
| 39 | + --file_pattern "$DATA_FILE_PATTERN" |
| 40 | + |
| 41 | +# Step 2: Upload fetched data files to main storage account |
| 42 | +echo "=== Step 2: Uploading data files to main storage account ===" |
| 43 | +echo "Debug: MAIN_STORAGE_ACCOUNT_NAME = $MAIN_STORAGE_ACCOUNT_NAME" |
| 44 | +echo "Debug: DATA_CONTAINER_NAME = $DATA_CONTAINER_NAME" |
| 45 | +echo "Debug: AZURE_CLIENT_ID = $AZURE_CLIENT_ID" |
| 46 | + |
| 47 | +# Verify managed identity authentication works |
| 48 | +echo "=== Testing Azure authentication ===" |
| 49 | +az account show |
| 50 | +echo "=== Testing storage account access ===" |
| 51 | +az storage account show --name "$MAIN_STORAGE_ACCOUNT_NAME" --resource-group $RESOURCE_GROUP_NAME --output table |
| 52 | +echo "=== Testing storage container list access ===" |
| 53 | +az storage container list --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login --output table || echo "Container list failed" |
| 54 | +echo "=== Testing specific container exists ===" |
| 55 | +az storage container exists --name "$DATA_CONTAINER_NAME" --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login || echo "Container exists check failed" |
| 56 | + |
| 57 | +# Test creating container if it doesn't exist using Azure CLI (this should work if RBAC is correct) |
| 58 | +echo "=== Testing container creation with Azure CLI ===" |
| 59 | +az storage container create --name "$DATA_CONTAINER_NAME" --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login || echo "Container creation failed" |
| 60 | + |
| 61 | +# Run upload_data.py from the correct directory |
| 62 | +python upload_data.py \ |
| 63 | + --storage_account_name "$MAIN_STORAGE_ACCOUNT_NAME" \ |
| 64 | + --container_name "$DATA_CONTAINER_NAME" \ |
| 65 | + --data_path "/tmp/local_data" \ |
| 66 | + --file_pattern "$DATA_FILE_PATTERN" |
| 67 | + |
| 68 | +# Step 3: Configure search index |
| 69 | +echo "=== Step 3: Configuring search index ===" |
| 70 | +python index_utils.py \ |
| 71 | + --aisearch_name $SEARCH_SERVICE_NAME \ |
| 72 | + --base_index_name "$BASE_INDEX_NAME" \ |
| 73 | + --openai_api_base $OPENAI_ENDPOINT \ |
| 74 | + --subscription_id $SUBSCRIPTION_ID \ |
| 75 | + --resource_group_name $RESOURCE_GROUP_NAME \ |
| 76 | + --storage_name "$MAIN_STORAGE_ACCOUNT_NAME" \ |
| 77 | + --container_name $DATA_CONTAINER_NAME \ |
| 78 | + --client_id "$AZURE_CLIENT_ID" |
| 79 | + |
| 80 | +echo "=== Search index configuration completed successfully ===" |
0 commit comments