Skip to content

Commit bf02e6a

Browse files
committed
Fix(gcloud): Correct service account email format and improve creation
This commit addresses issues in the create-dpgce script related to service account handling, particularly for domain-scoped projects. - lib/env.sh: - Correctly formats the service account email (GSA) for domain-scoped projects (e.g., c9h.org:project-id) by including the domain in the email address (e.g., [email protected]). - lib/shared-functions.sh: - Replaced `gcloud iam service-accounts describe` with `list --filter` for a more reliable existence check, as describe was failing to find existing service accounts. - Added a retry loop for `gcloud projects add-iam-policy-binding` to handle potential IAM propagation delays after service account creation. - Ensured the script exits if role bindings fail after multiple retries. - Cleaned up role binding logic into a loop. These changes ensure the script can reliably create and configure the necessary service account and its IAM roles, unblocking cluster creation.
1 parent cb0ced6 commit bf02e6a

File tree

2 files changed

+72
-28
lines changed

2 files changed

+72
-28
lines changed

gcloud/lib/env.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,16 @@ export DATAPROC_IMAGE_VERSION="${IMAGE_VERSION}"
5757
#export INIT_ACTIONS_ROOT="gs://goog-dataproc-initialization-actions-${REGION}"
5858
export AUTOSCALING_POLICY_NAME=aspolicy-${CLUSTER_NAME}
5959
export SA_NAME=sa-${CLUSTER_NAME}
60-
export GSA=${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
60+
61+
if [[ "${PROJECT_ID}" == *":"* ]]; then
62+
# Domain-scoped project
63+
DOMAIN=$(echo "${PROJECT_ID}" | cut -d':' -f1)
64+
PROJECT_NAME=$(echo "${PROJECT_ID}" | cut -d':' -f2)
65+
export GSA="${SA_NAME}@${PROJECT_NAME}.${DOMAIN}.iam.gserviceaccount.com"
66+
else
67+
# Regular project
68+
export GSA="${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com"
69+
fi
6170

6271
export INIT_ACTIONS_ROOT="gs://${BUCKET}/dataproc-initialization-actions"
6372
export YARN_DOCKER_IMAGE="gcr.io/${PROJECT_ID}/${USER}/cudatest-ubuntu18:latest"

gcloud/lib/shared-functions.sh

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ gcloud beta billing projects \
581581
582582
once you have credentials to run the above command,
583583
584-
Press enter >
584+
Press enter >
585585
"
586586
read
587587

@@ -838,37 +838,72 @@ function delete_phs_cluster() {
838838

839839
function create_service_account() {
840840
set -x
841-
if gcloud iam service-accounts describe "${GSA}" > /dev/null ; then
842-
echo "service account ${SA_NAME} already exists"
843-
return 0 ; fi
844841

845-
gcloud iam service-accounts create "${SA_NAME}" \
846-
--description="Service account for use with cluster ${CLUSTER_NAME}" \
847-
--display-name="${SA_NAME}"
842+
# Attempt to describe the service account
843+
echo "Checking for service account ${GSA}..."
844+
# Use list with a filter on the SA NAME part of the email
845+
SA_EXISTS=$(gcloud iam service-accounts list \
846+
--project="${PROJECT_ID}" \
847+
--filter="email=${GSA}" \
848+
--format="value(email)")
848849

849-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
850-
--member="serviceAccount:${GSA}" \
851-
--role=roles/dataproc.worker
852-
853-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
854-
--member="serviceAccount:${GSA}" \
855-
--role=roles/storage.objectCreator
856-
857-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
858-
--member="serviceAccount:${GSA}" \
859-
--role=roles/storage.objectViewer
850+
if [[ -n "${SA_EXISTS}" ]]; then
851+
echo "Service account ${GSA} already exists."
852+
else
853+
echo "Service account ${GSA} not found, attempting to create..."
854+
if ! gcloud iam service-accounts create "${SA_NAME}" \
855+
--project="${PROJECT_ID}" \
856+
--description="Service account for use with cluster ${CLUSTER_NAME}" \
857+
--display-name="${SA_NAME}"; then
858+
echo "ERROR: Failed to create service account ${SA_NAME}."
859+
exit 1
860+
fi
861+
echo "Service account ${GSA} created successfully."
862+
echo "Waiting 10s for IAM propagation..."
863+
sleep 10
864+
fi
860865

861-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
862-
--member="serviceAccount:${GSA}" \
863-
--role=roles/secretmanager.secretAccessor
866+
# Bind roles to the service account
867+
ROLES=(
868+
roles/dataproc.worker
869+
roles/bigquery.dataEditor
870+
roles/storage.objectCreator
871+
roles/storage.objectViewer
872+
roles/secretmanager.secretAccessor
873+
roles/compute.viewer
874+
roles/compute.instanceAdmin.v1
875+
)
876+
877+
for role in "${ROLES[@]}"; do
878+
echo "Binding ${role} to ${GSA}..."
879+
MAX_RETRIES=5
880+
RETRY_COUNT=0
881+
SLEEP_TIME=10
882+
while [[ ${RETRY_COUNT} -lt ${MAX_RETRIES} ]]; do
883+
# Capture output and error
884+
BIND_OUTPUT=$(gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
885+
--member="serviceAccount:${GSA}" \
886+
--role="${role}" --condition=None 2>&1)
887+
BIND_EXIT_CODE=$?
888+
889+
if [[ ${BIND_EXIT_CODE} -eq 0 ]]; then
890+
echo "${role} bound successfully to ${GSA}."
891+
break # Exit the while loop on success
892+
fi
864893

865-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
866-
--member="serviceAccount:${GSA}" \
867-
--role=roles/compute.viewer
894+
RETRY_COUNT=$((RETRY_COUNT + 1))
895+
echo "Attempt ${RETRY_COUNT}/${MAX_RETRIES} failed for ${role}."
896+
echo "Error: ${BIND_OUTPUT}"
868897

869-
gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
870-
--member="serviceAccount:${GSA}" \
871-
--role=roles/compute.instanceAdmin.v1
898+
if [[ ${RETRY_COUNT} -lt ${MAX_RETRIES} ]]; then
899+
echo "Retrying in ${SLEEP_TIME} seconds..."
900+
sleep ${SLEEP_TIME}
901+
else
902+
echo "Failed to bind ${role} to ${GSA} after ${MAX_RETRIES} attempts."
903+
exit 1
904+
fi
905+
done
906+
done
872907

873908
gcloud iam service-accounts add-iam-policy-binding "${GSA}" \
874909
--member="serviceAccount:${GSA}" \

0 commit comments

Comments
 (0)