Skip to content

Commit f2f9bf6

Browse files
authored
feat: add search index configuration script with data fetching and uploading functionality
1 parent 0b48f94 commit f2f9bf6

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
set -euo pipefail
2+
3+
echo "=== Search Index Configuration Script Start ==="
4+
echo "Storage account: $MAIN_STORAGE_ACCOUNT_NAME"
5+
echo "Search service: $SEARCH_SERVICE_NAME"
6+
echo "Repository URL: $GITHUB_REPO_URL"
7+
8+
# Wait for RBAC permissions to fully propagate (Azure can take time to propagate permissions)
9+
echo "=== Waiting for RBAC permissions to propagate ==="
10+
sleep 30
11+
12+
# Verify main storage account exists
13+
az storage account show --name $MAIN_STORAGE_ACCOUNT_NAME --resource-group $RESOURCE_GROUP_NAME --output table
14+
15+
# Setup Python environment
16+
python3 -m venv /tmp/venv && source /tmp/venv/bin/activate
17+
pip install --upgrade pip
18+
19+
# Download only the necessary scripts
20+
mkdir -p /tmp/scripts && cd /tmp/scripts
21+
az storage blob download-batch --destination . --source scripts --account-name $SCRIPT_STORAGE_ACCOUNT_NAME --auth-mode login
22+
23+
# Step 1: Fetch data files from source to local directory
24+
echo "=== Step 1: Fetching data files from $DATA_SOURCE_TYPE source ==="
25+
26+
# First install requirements from the src/search directory where fetch_data.py is located
27+
cd /tmp/scripts/src/search
28+
pip install -r requirements.txt
29+
30+
# Create local data directory
31+
mkdir -p /tmp/local_data
32+
33+
# Fetch data using fetch_data.py
34+
python fetch_data.py \
35+
--source_type "$DATA_SOURCE_TYPE" \
36+
--source_url "$DATA_SOURCE_URL" \
37+
--source_path "$DATA_SOURCE_PATH" \
38+
--output_dir "/tmp/local_data" \
39+
--file_pattern "$DATA_FILE_PATTERN"
40+
41+
# Step 2: Upload fetched data files to main storage account
42+
echo "=== Step 2: Uploading data files to main storage account ==="
43+
echo "Debug: MAIN_STORAGE_ACCOUNT_NAME = $MAIN_STORAGE_ACCOUNT_NAME"
44+
echo "Debug: DATA_CONTAINER_NAME = $DATA_CONTAINER_NAME"
45+
echo "Debug: AZURE_CLIENT_ID = $AZURE_CLIENT_ID"
46+
47+
# Verify managed identity authentication works
48+
echo "=== Testing Azure authentication ==="
49+
az account show
50+
echo "=== Testing storage account access ==="
51+
az storage account show --name "$MAIN_STORAGE_ACCOUNT_NAME" --resource-group $RESOURCE_GROUP_NAME --output table
52+
echo "=== Testing storage container list access ==="
53+
az storage container list --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login --output table || echo "Container list failed"
54+
echo "=== Testing specific container exists ==="
55+
az storage container exists --name "$DATA_CONTAINER_NAME" --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login || echo "Container exists check failed"
56+
57+
# Test creating container if it doesn't exist using Azure CLI (this should work if RBAC is correct)
58+
echo "=== Testing container creation with Azure CLI ==="
59+
az storage container create --name "$DATA_CONTAINER_NAME" --account-name "$MAIN_STORAGE_ACCOUNT_NAME" --auth-mode login || echo "Container creation failed"
60+
61+
# Run upload_data.py from the correct directory
62+
python upload_data.py \
63+
--storage_account_name "$MAIN_STORAGE_ACCOUNT_NAME" \
64+
--container_name "$DATA_CONTAINER_NAME" \
65+
--data_path "/tmp/local_data" \
66+
--file_pattern "$DATA_FILE_PATTERN"
67+
68+
# Step 3: Configure search index
69+
echo "=== Step 3: Configuring search index ==="
70+
python index_utils.py \
71+
--aisearch_name $SEARCH_SERVICE_NAME \
72+
--base_index_name "$BASE_INDEX_NAME" \
73+
--openai_api_base $OPENAI_ENDPOINT \
74+
--subscription_id $SUBSCRIPTION_ID \
75+
--resource_group_name $RESOURCE_GROUP_NAME \
76+
--storage_name "$MAIN_STORAGE_ACCOUNT_NAME" \
77+
--container_name $DATA_CONTAINER_NAME \
78+
--client_id "$AZURE_CLIENT_ID"
79+
80+
echo "=== Search index configuration completed successfully ==="

0 commit comments

Comments
 (0)