diff --git a/api/v1alpha1/cloudstorage_types.go b/api/v1alpha1/cloudstorage_types.go index 5ea1b3d612..e4dfc4cce6 100644 --- a/api/v1alpha1/cloudstorage_types.go +++ b/api/v1alpha1/cloudstorage_types.go @@ -42,8 +42,11 @@ type CloudStorageSpec struct { // region for the bucket to be in, will be us-east-1 if not set. Region string `json:"region,omitempty"` // provider is the provider of the cloud storage - // +kubebuilder:validation:Enum=aws + // +kubebuilder:validation:Enum=aws;azure;gcp Provider CloudStorageProvider `json:"provider"` + // config is provider-specific configuration options + // +kubebuilder:validation:Optional + Config map[string]string `json:"config,omitempty"` // https://pkg.go.dev/github.com/Azure/azure-sdk-for-go/sdk/storage/azblob@v0.2.0#section-readme // azure blob primary endpoint diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 46bc5a374b..3ae753e3ac 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -237,6 +237,13 @@ func (in *CloudStorageSpec) DeepCopyInto(out *CloudStorageSpec) { (*out)[key] = val } } + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CloudStorageSpec. diff --git a/bundle/manifests/oadp.openshift.io_cloudstorages.yaml b/bundle/manifests/oadp.openshift.io_cloudstorages.yaml index ac4390b8a0..2120b01a56 100644 --- a/bundle/manifests/oadp.openshift.io_cloudstorages.yaml +++ b/bundle/manifests/oadp.openshift.io_cloudstorages.yaml @@ -39,6 +39,11 @@ spec: type: object spec: properties: + config: + additionalProperties: + type: string + description: config is provider-specific configuration options + type: object creationSecret: description: creationSecret is the secret that is needed to be used while creating the bucket. @@ -75,6 +80,8 @@ spec: description: provider is the provider of the cloud storage enum: - aws + - azure + - gcp type: string region: description: region for the bucket to be in, will be us-east-1 if diff --git a/config/crd/bases/oadp.openshift.io_cloudstorages.yaml b/config/crd/bases/oadp.openshift.io_cloudstorages.yaml index 7679c2a04c..2b017dae48 100644 --- a/config/crd/bases/oadp.openshift.io_cloudstorages.yaml +++ b/config/crd/bases/oadp.openshift.io_cloudstorages.yaml @@ -39,6 +39,11 @@ spec: type: object spec: properties: + config: + additionalProperties: + type: string + description: config is provider-specific configuration options + type: object creationSecret: description: creationSecret is the secret that is needed to be used while creating the bucket. @@ -75,6 +80,8 @@ spec: description: provider is the provider of the cloud storage enum: - aws + - azure + - gcp type: string region: description: region for the bucket to be in, will be us-east-1 if diff --git a/docs/config/aws/oadp-aws-sts-cloud-authentication.adoc b/docs/config/aws/oadp-aws-sts-cloud-authentication.adoc new file mode 100644 index 0000000000..a09b888cb1 --- /dev/null +++ b/docs/config/aws/oadp-aws-sts-cloud-authentication.adoc @@ -0,0 +1,536 @@ +// Module included in the following assembly: +// +// * backup_and_restore/application_backup_and_restore/installing/installing-oadp-aws.adoc + +:_mod-docs-content-type: PROCEDURE +[id="oadp-aws-sts-cloud-authentication_{context}"] += Configuring AWS STS Manual Mode for OADP + +AWS Security Token Service (STS) manual mode is the recommended way to access AWS S3 for OADP (OpenShift Application Data Protection) operations. This approach uses the OpenShift cluster's signed ServiceAccount tokens that are automatically refreshed hourly and exchanged for temporary AWS credentials via AWS IAM OpenID Connect (OIDC) Identity Provider, eliminating the need for long-term static AWS credentials. + +[NOTE] +==== +This configuration requires `credentialsMode: Manual` during cluster installation and uses the Cloud Credential Operator (`ccoctl`) to set up the STS infrastructure, including ServiceAccount signing keys, S3 buckets or CloudFront distributions for OIDC configuration, and IAM roles for each component. +==== + +.Prerequisites + +* You have an OpenShift cluster installed on Amazon Web Services with STS manual mode configured. +* You have the AWS CLI (`aws`) installed and configured. +* You have access to the OpenShift cluster as a user with `cluster-admin` privileges. +* You have an AWS account with appropriate permissions. + +.Procedure + +. Set up environment variables for your configuration: ++ +[source,bash] +---- +# Get cluster information +export API_URL=$(oc whoami --show-server) +export CLUSTER_NAME=$(echo "$API_URL" | sed 's|https://api\.||' | sed 's|\..*||') +export CLUSTER_BASE_DOMAIN=$(oc get dns cluster -o jsonpath='{.spec.baseDomain}') + +# Get AWS information +export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) +export AWS_REGION=$(aws configure get region) + +# Set names for OADP resources +export OIDC_BUCKET_NAME="${CLUSTER_NAME}-oidc" +export VELERO_BUCKET_NAME="${CLUSTER_NAME}-velero-backups" +export ROLE_NAME="openshift-adp-controller-manager" +---- + +. Create an S3 bucket for Velero backups: ++ +[source,bash] +---- +# Create backup storage bucket +aws s3 mb s3://${VELERO_BUCKET_NAME} --region ${AWS_REGION} + +# Enable versioning for backup integrity +aws s3api put-bucket-versioning \ + --bucket ${VELERO_BUCKET_NAME} \ + --versioning-configuration Status=Enabled + +---- + +. Get the OIDC issuer URL from your OpenShift cluster: ++ +[source,bash] +---- +export SERVICE_ACCOUNT_ISSUER=$(oc get authentication.config.openshift.io cluster -o json | jq -r .spec.serviceAccountIssuer) +echo "OIDC Issuer: $SERVICE_ACCOUNT_ISSUER" + +# Extract the issuer hostname for AWS IAM +export OIDC_ISSUER_HOST=$(echo "$SERVICE_ACCOUNT_ISSUER" | sed 's|https://||') +---- + +. Create IAM OIDC Identity Provider: ++ +[source,bash] +---- +# Get the OIDC thumbprint (this is typically the same for all OpenShift clusters) +export OIDC_THUMBPRINT=$(echo | openssl s_client -servername ${OIDC_ISSUER_HOST} -connect ${OIDC_ISSUER_HOST}:443 2>/dev/null | openssl x509 -fingerprint -noout -sha1 | sed 's/://g' | awk -F= '{print tolower($2)}') + +# Create OIDC Identity Provider +aws iam create-open-id-connect-provider \ + --url "${SERVICE_ACCOUNT_ISSUER}" \ + --client-id-list "openshift" "sts.amazonaws.com" \ + --thumbprint-list "${OIDC_THUMBPRINT}" + +# Get the OIDC Provider ARN +export OIDC_PROVIDER_ARN=$(aws iam list-open-id-connect-providers --query "OpenIDConnectProviderList[?ends_with(Arn, '${OIDC_ISSUER_HOST}')].Arn" --output text) +echo "OIDC Provider ARN: $OIDC_PROVIDER_ARN" +---- + +. Create IAM role for OADP with trust policy: ++ +[source,bash] +---- +# Create trust policy for the OADP role +cat > /tmp/trust-policy.json << EOF +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "${OIDC_PROVIDER_ARN}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDC_ISSUER_HOST}:sub": [ + "system:serviceaccount:openshift-adp:velero", + "system:serviceaccount:openshift-adp:openshift-adp-controller-manager" + ], + "${OIDC_ISSUER_HOST}:aud": "openshift" + } + } + } + ] +} +EOF + +# Create the IAM role +aws iam create-role \ + --role-name ${ROLE_NAME} \ + --assume-role-policy-document file:///tmp/trust-policy.json \ + --description "Role for OpenShift OADP components" + +rm -f /tmp/trust-policy.json +---- + +. Attach the required policies to the IAM role: ++ +[source,bash] +---- +# Create permission policy for OADP operations +cat > /tmp/permission-policy.json << EOF +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:DeleteObject", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts" + ], + "Resource": [ + "arn:aws:s3:::${VELERO_BUCKET_NAME}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:GetBucketVersioning", + "s3:ListBucketVersions", + "s3:ListBucketMultipartUploads", + "s3:GetBucketNotification", + "s3:PutBucketNotification" + ], + "Resource": [ + "arn:aws:s3:::${VELERO_BUCKET_NAME}" + ] + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeVolumes", + "ec2:DescribeSnapshots", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:CreateSnapshot", + "ec2:DeleteSnapshot" + ], + "Resource": "*" + } + ] +} +EOF + +# Create and attach the policy +aws iam put-role-policy \ + --role-name ${ROLE_NAME} \ + --policy-name OADPPolicy \ + --policy-document file:///tmp/permission-policy.json + +rm -f /tmp/permission-policy.json + +# Get the role ARN +export ROLE_ARN=$(aws iam get-role --role-name ${ROLE_NAME} --query Role.Arn --output text) +echo "Role ARN: $ROLE_ARN" +---- + +. Create the OADP namespace if it doesn't exist: ++ +[source,bash] +---- +oc create namespace openshift-adp +---- + +. Annotate the service accounts to use AWS STS: ++ +[source,bash] +---- +oc annotate serviceaccount velero -n openshift-adp \ + eks.amazonaws.com/role-arn="${ROLE_ARN}" --overwrite + +oc annotate serviceaccount openshift-adp-controller-manager -n openshift-adp \ + eks.amazonaws.com/role-arn="${ROLE_ARN}" --overwrite +---- + +[id="oadp-aws-cloud-storage-api_{context}"] +== Alternative: Using Cloud Storage API for Automated Bucket Management + +Instead of manually creating S3 buckets, you can use the OADP Cloud Storage API to automatically manage bucket creation and configuration. This approach requires OADP operator version with Cloud Storage API support. + +.Prerequisites for Cloud Storage API + +* OADP operator with Cloud Storage API functionality enabled +* The same AWS STS configuration as above + +.Procedure for Cloud Storage API + +. Create a CloudStorage resource instead of manually creating buckets: ++ +[source,yaml] +---- +cat < trust-policy.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_ENDPOINT}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDC_ENDPOINT}:sub": [ + "system:serviceaccount:openshift-adp:velero", + "system:serviceaccount:openshift-adp:openshift-adp-controller-manager" + ] + } + } + } + ] +} +EOF + +# Create IAM role with ROSA-specific tags +aws iam create-role \ + --role-name ${ROLE_NAME} \ + --assume-role-policy-document file://trust-policy.json \ + --tags Key=red-hat-clustertype,Value=rosa \ + Key=red-hat-managed,Value=true + +echo "Created IAM role: ${ROLE_NAME}" +---- + +. Create IAM policy with enhanced S3 permissions: ++ +[source,bash] +---- +# Create enhanced IAM policy for OADP operations +cat < oadp-policy.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:CreateBucket", + "s3:DeleteBucket", + "s3:GetBucketLocation", + "s3:GetBucketTagging", + "s3:ListAllMyBuckets", + "s3:ListBucket", + "s3:PutBucketTagging", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:GetBucketAcl", + "s3:PutBucketAcl", + "s3:GetBucketCORS", + "s3:PutBucketCORS", + "s3:DeleteBucketCORS", + "s3:GetBucketWebsite", + "s3:PutBucketWebsite", + "s3:DeleteBucketWebsite", + "s3:GetBucketVersioning", + "s3:PutBucketVersioning", + "s3:GetBucketAcl", + "s3:PutBucketAcl", + "s3:GetBucketCORS", + "s3:PutBucketCORS", + "s3:DeleteBucketCORS", + "s3:GetBucketWebsite", + "s3:PutBucketWebsite", + "s3:DeleteBucketWebsite", + "s3:GetBucketLocation", + "s3:GetBucketRequestPayment", + "s3:GetBucketLogging", + "s3:GetBucketNotification", + "s3:GetBucketPolicy", + "s3:DeleteBucketPolicy", + "s3:GetBucketPolicyStatus", + "s3:PutBucketPolicy", + "s3:GetBucketPublicAccessBlock", + "s3:PutBucketPublicAccessBlock", + "s3:GetBucketObjectLockConfiguration", + "s3:PutBucketObjectLockConfiguration", + "s3:GetBucketTagging", + "s3:PutBucketTagging", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:GetObjectAttributes", + "s3:GetObjectLegalHold", + "s3:GetObjectRetention", + "s3:GetObjectTagging", + "s3:GetObjectVersion", + "s3:PutObject", + "s3:PutObjectAcl", + "s3:PutObjectLegalHold", + "s3:PutObjectRetention", + "s3:PutObjectTagging", + "s3:RestoreObject", + "s3:DeleteObject", + "s3:DeleteObjectVersion", + "s3:PutLifecycleConfiguration", + "s3:GetLifecycleConfiguration", + "s3:DeleteLifecycleConfiguration" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DescribeVolumes", + "ec2:DescribeSnapshots", + "ec2:CreateTags", + "ec2:DescribeInstances", + "ec2:CreateSnapshot", + "ec2:DeleteSnapshot" + ], + "Resource": "*" + } + ] +} +EOF + +# Attach policy to the IAM role +aws iam put-role-policy \ + --role-name ${ROLE_NAME} \ + --policy-name openshift-oadp-aws-cloud-credentials \ + --policy-document file://oadp-policy.json + +echo "Created and attached IAM policy to role: ${ROLE_NAME}" +---- + +. Create AWS credentials file and OADP namespace: ++ +[source,bash] +---- +# Create temporary directory for credentials +export SCRATCH="/tmp/oadp-rosa-setup" +mkdir -p ${SCRATCH} + +# Create AWS credentials file using ROSA STS format +cat < ${SCRATCH}/credentials +[default] +role_arn = ${ROLE_ARN} +web_identity_token_file = /var/run/secrets/openshift/serviceaccount/token +region = ${AWS_REGION} +EOF + +# Create OADP namespace +oc create namespace openshift-adp + +# Create secret with AWS credentials +oc -n openshift-adp create secret generic cloud-credentials-aws \ + --from-file=credentials=${SCRATCH}/credentials + +# Verify secret creation +oc -n openshift-adp get secret cloud-credentials-aws + +echo "Created credentials secret in openshift-adp namespace" +---- + +. Create S3 bucket with ROSA-specific tagging: ++ +[source,bash] +---- +# Create backup storage bucket +aws s3 mb s3://${VELERO_BUCKET_NAME} --region ${AWS_REGION} + +# Apply ROSA-specific tags to the bucket +aws s3api put-bucket-tagging \ + --bucket ${VELERO_BUCKET_NAME} \ + --tagging TagSet='[ + { + "Key": "red-hat-clustertype", + "Value": "rosa" + }, + { + "Key": "red-hat-managed", + "Value": "true" + }, + { + "Key": "cluster", + "Value": "'${CLUSTER_NAME}'" + }, + { + "Key": "component", + "Value": "oadp" + } + ]' + +# Enable versioning for backup integrity +aws s3api put-bucket-versioning \ + --bucket ${VELERO_BUCKET_NAME} \ + --versioning-configuration Status=Enabled + +# Block public access for security +aws s3api put-public-access-block \ + --bucket ${VELERO_BUCKET_NAME} \ + --public-access-block-configuration \ + BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true + +echo "Created and configured S3 bucket: ${VELERO_BUCKET_NAME}" +---- + +[id="oadp-rosa-cloud-storage-api_{context}"] +== Alternative: Using Cloud Storage API for Automated Bucket Management + +The OADP Cloud Storage API can automatically manage S3 bucket creation and configuration for ROSA STS clusters. + +.Prerequisites for Cloud Storage API + +* OADP operator with Cloud Storage API functionality enabled +* The same ROSA STS configuration as above + +.Procedure for Cloud Storage API + +. Create a CloudStorage resource for ROSA: ++ +[source,yaml] +---- +cat </dev/null || echo "Credentials file location may vary" +---- + +. Test backup functionality with ROSA-specific workload: ++ +[source,bash] +---- +# Create test namespace +oc create namespace test-backup-rosa + +# Create ROSA-compatible deployment +cat << EOF | oc apply -f - +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rosa-test-app + namespace: test-backup-rosa + labels: + app: rosa-test-app +spec: + replicas: 2 + selector: + matchLabels: + app: rosa-test-app + template: + metadata: + labels: + app: rosa-test-app + spec: + containers: + - name: nginx + image: registry.redhat.io/ubi8/nginx-120:latest + ports: + - containerPort: 8080 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault +--- +apiVersion: v1 +kind: Service +metadata: + name: rosa-test-app + namespace: test-backup-rosa +spec: + selector: + app: rosa-test-app + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP +EOF + +# Wait for deployment +oc wait --for=condition=available deployment/rosa-test-app -n test-backup-rosa --timeout=300s + +# Create backup +velero backup create rosa-sts-test --include-namespaces=test-backup-rosa +---- + +. Monitor backup status: ++ +[source,bash] +---- +velero backup describe rosa-sts-test --details + +# Verify backup files in S3 +aws s3 ls s3://${VELERO_BUCKET_NAME}/velero/backups/ --recursive +---- + +[NOTE] +==== +ROSA STS clusters automatically provide the OIDC identity provider configuration and trust relationships required for STS token exchange. The credential files use `web_identity_token_file` pointing to OpenShift service account tokens that are automatically rotated by the platform. +==== + +[IMPORTANT] +==== +* **Restic is unsupported** in ROSA environments - use Kopia instead for file system backups +* **Data Mover** is not currently supported in ROSA clusters - use native AWS S3 tools for cross-region operations +* Ensure your ROSA cluster has appropriate AWS service quotas for EBS snapshots and S3 operations +* Only specific storage classes are supported: `gp3-csi`, `gp2-csi`, `gp3`, `gp2` +==== + +.Troubleshooting + +If you encounter issues with OADP on ROSA STS: + +* Verify ROSA cluster status and OIDC configuration: ++ +[source,bash] +---- +rosa describe cluster -c ${CLUSTER_NAME} +rosa logs install -c ${CLUSTER_NAME} --tail 50 +---- + +* Check IAM role configuration: ++ +[source,bash] +---- +# Extract role name from ARN for validation +ROLE_NAME=$(echo ${ROLE_ARN} | awk -F'/' '{print $NF}') + +# Check role exists and trust policy +aws iam get-role --role-name ${ROLE_NAME} +aws iam list-role-policies --role-name ${ROLE_NAME} +aws iam list-attached-role-policies --role-name ${ROLE_NAME} +---- + +* Validate OIDC provider configuration: ++ +[source,bash] +---- +# List OIDC providers in the account +aws iam list-open-id-connect-providers + +# Get OIDC endpoint from cluster for validation +OIDC_ENDPOINT=$(oc get authentication.config.openshift.io cluster -o json | jq -r .spec.serviceAccountIssuer) +echo "Cluster OIDC Endpoint: $OIDC_ENDPOINT" + +# Validate the OIDC provider exists +aws iam get-open-id-connect-provider --open-id-connect-provider-arn arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_ENDPOINT#https://} +---- + +* Review OADP operator logs for ROSA-specific issues: ++ +[source,bash] +---- +oc logs -n openshift-adp deployment/oadp-operator-controller-manager | grep -i rosa +oc logs -n openshift-adp deployment/velero | grep -i sts +---- + +* Test S3 bucket access and credentials: ++ +[source,bash] +---- +# Test bucket access with current credentials +aws s3 ls s3://${VELERO_BUCKET_NAME}/ + +# Check credentials secret in OpenShift +oc get secret cloud-credentials-aws -n openshift-adp -o jsonpath='{.data.credentials}' | base64 -d + +# Test STS authentication +aws sts get-caller-identity +---- + +* Check ROSA-specific networking considerations: ++ +[source,bash] +---- +# Verify outbound connectivity for S3 and STS services +oc debug node/$(oc get nodes -o jsonpath='{.items[0].metadata.name}') -- chroot /host curl -I https://s3.${AWS_REGION}.amazonaws.com +oc debug node/$(oc get nodes -o jsonpath='{.items[0].metadata.name}') -- chroot /host curl -I https://sts.${AWS_REGION}.amazonaws.com +---- + +== Cleanup Procedures + +To remove OADP and associated AWS resources: + +. Remove OADP operator and resources: ++ +[source,bash] +---- +# Delete DataProtectionApplication +oc delete dpa --all -n openshift-adp + +# Delete CloudStorage resources if used +oc delete cloudstorage --all -n openshift-adp + +# Delete OADP namespace +oc delete namespace openshift-adp + +# Remove OADP operator subscription +oc delete subscription oadp-operator -n openshift-operators 2>/dev/null || true + +# Remove operator CSV +oc delete csv $(oc get csv -n openshift-operators | grep oadp | awk '{print $1}') -n openshift-operators 2>/dev/null || true +---- + +. Clean up AWS resources: ++ +[source,bash] +---- +# Delete S3 bucket contents and bucket +aws s3 rm s3://${VELERO_BUCKET_NAME} --recursive 2>/dev/null || true +aws s3 rb s3://${VELERO_BUCKET_NAME} 2>/dev/null || echo "Bucket ${VELERO_BUCKET_NAME} not found or already deleted" + +# Remove IAM role policy +aws iam delete-role-policy \ + --role-name ${ROLE_NAME} \ + --policy-name openshift-oadp-aws-cloud-credentials 2>/dev/null || echo "Policy not found" + +# Delete IAM role +aws iam delete-role --role-name ${ROLE_NAME} 2>/dev/null || echo "Role ${ROLE_NAME} not found" + +# Clean up temporary files +rm -f trust-policy.json oadp-policy.json +rm -rf ${SCRATCH} + +echo "OADP cleanup completed" +---- + +[IMPORTANT] +==== +* Verify all backups are no longer needed before deleting the S3 bucket +* The IAM role and OIDC provider are cluster-specific and should only be deleted if the cluster is being decommissioned +* Consider backing up any important data before running cleanup procedures +==== \ No newline at end of file diff --git a/docs/config/azure/oadp-azure-sts-cloud-authentication.adoc b/docs/config/azure/oadp-azure-sts-cloud-authentication.adoc new file mode 100644 index 0000000000..fb682b26af --- /dev/null +++ b/docs/config/azure/oadp-azure-sts-cloud-authentication.adoc @@ -0,0 +1,509 @@ +// Module included in the following assembly: +// +// * backup_and_restore/application_backup_and_restore/installing/installing-oadp-azure.adoc + +:_mod-docs-content-type: PROCEDURE +[id="oadp-azure-entra-workload-id-authentication_{context}"] += Configuring Microsoft Entra Workload ID for OADP + +Microsoft Entra Workload ID is the recommended way to access Azure Storage for OADP (OpenShift Application Data Protection) operations. This approach uses the OpenShift cluster's signed Kubernetes ServiceAccount tokens, which are automatically rotated hourly and exchanged for Azure AD access tokens, eliminating the need for long-term client secrets. + +[NOTE] +==== +This configuration requires `credentialsMode: Manual` during cluster installation and uses the Cloud Credential Operator (`ccoctl`) to set up the workload identity infrastructure, including OIDC issuer configuration and User-Assigned Managed Identities. +==== + +.Prerequisites + +* You have an OpenShift cluster installed on Microsoft Azure with Microsoft Entra Workload ID configured. +* You have the Azure CLI (`az`) installed and configured. +* You have access to the OpenShift cluster as a user with `cluster-admin` privileges. +* You have an Azure subscription with appropriate permissions. + +[NOTE] +==== +If your OpenShift cluster was not originally installed with Microsoft Entra Workload ID, you can enable short-term credentials after installation. This post-installation configuration is supported specifically for Azure clusters. +==== + +[id="post-install-enable-workload-id_{context}"] +== Enabling Microsoft Entra Workload ID After Installation + +If your cluster was installed with long-term credentials, you can switch to Microsoft Entra Workload ID authentication after installation. This is only supported on Azure. + +.Prerequisites for post-installation configuration + +* An existing OpenShift cluster on Azure using long-term credentials +* Cluster administrator access +* Azure CLI with appropriate permissions + +.Procedure to enable Microsoft Entra Workload ID post-installation + +. Update the cluster authentication configuration to enable token authentication: ++ +[source,bash] +---- +# Enable short-term credential support +oc patch authentication.config.openshift.io cluster \ + --type='merge' \ + --patch='{"spec":{"type":"IntegratedOAuth","oauthMetadata":{"name":"oauth-openshift"}}}' + +# Verify the service account issuer is configured +oc get authentication.config.openshift.io cluster -o jsonpath='{.spec.serviceAccountIssuer}' +---- + +. Follow the standard Microsoft Entra Workload ID configuration procedure below to complete the setup. + +[IMPORTANT] +==== +After enabling Microsoft Entra Workload ID on an existing cluster, you must update all cluster components that use cloud credentials, including OADP, to use the new authentication method. +==== + +.Procedure + +. Set up environment variables for your configuration: ++ +[source,bash] +---- +# Get cluster information +export API_URL=$(oc whoami --show-server) +export CLUSTER_NAME=$(echo "$API_URL" | sed 's|https://api\.||' | sed 's|\..*||') +export CLUSTER_RESOURCE_GROUP="${CLUSTER_NAME}-rg" + +# Get Azure information +export AZURE_SUBSCRIPTION_ID=$(az account show --query id -o tsv) +export AZURE_TENANT_ID=$(az account show --query tenantId -o tsv) + +# Set names for resources +export IDENTITY_NAME="velero" +export APP_NAME="velero-${CLUSTER_NAME}" +export STORAGE_ACCOUNT_NAME=$(echo "velero${CLUSTER_NAME}" | tr -d '-' | tr '[:upper:]' '[:lower:]' | cut -c1-24) +export CONTAINER_NAME="velero" +---- + +. Create an Azure Managed Identity for OADP: ++ +[source,bash] +---- +# Create managed identity +az identity create \ + --subscription "$AZURE_SUBSCRIPTION_ID" \ + --resource-group "$CLUSTER_RESOURCE_GROUP" \ + --name "$IDENTITY_NAME" + +# Get identity details +export IDENTITY_CLIENT_ID=$(az identity show -g "$CLUSTER_RESOURCE_GROUP" -n "$IDENTITY_NAME" --query clientId -o tsv) +export IDENTITY_PRINCIPAL_ID=$(az identity show -g "$CLUSTER_RESOURCE_GROUP" -n "$IDENTITY_NAME" --query principalId -o tsv) +---- + +. Grant the required Azure roles to the managed identity: ++ +[source,bash] +---- +# Get subscription ID for role assignments +export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + +# Required roles for OADP operations +REQUIRED_ROLES=( + "Contributor" + "Storage Blob Data Contributor" + "Disk Snapshot Contributor" +) + +for role in "${REQUIRED_ROLES[@]}"; do + echo "Assigning role: $role" + az role assignment create \ + --assignee "$IDENTITY_PRINCIPAL_ID" \ + --role "$role" \ + --scope "/subscriptions/$SUBSCRIPTION_ID" +done +---- + +. Create an Azure Storage Account and container: ++ +[source,bash] +---- +# Create storage account +az storage account create \ + --name "$STORAGE_ACCOUNT_NAME" \ + --resource-group "$CLUSTER_RESOURCE_GROUP" \ + --location "$(az group show -n $CLUSTER_RESOURCE_GROUP --query location -o tsv)" \ + --sku Standard_LRS \ + --kind StorageV2 + +# Create container for backups +az storage container create \ + --name "$CONTAINER_NAME" \ + --account-name "$STORAGE_ACCOUNT_NAME" \ + --auth-mode login +---- + +. Get the OIDC issuer URL from your OpenShift cluster: ++ +[source,bash] +---- +export SERVICE_ACCOUNT_ISSUER=$(oc get authentication.config.openshift.io cluster -o json | jq -r .spec.serviceAccountIssuer) +echo "OIDC Issuer: $SERVICE_ACCOUNT_ISSUER" +---- + +. Configure Microsoft Entra Workload ID Federation: ++ +[source,bash] +---- +# Create federated identity credential for Velero service account +az identity federated-credential create \ + --name "velero-federated-credential" \ + --identity-name "$IDENTITY_NAME" \ + --resource-group "$CLUSTER_RESOURCE_GROUP" \ + --issuer "$SERVICE_ACCOUNT_ISSUER" \ + --subject "system:serviceaccount:openshift-adp:velero" + +# Create federated identity credential for OADP controller manager +az identity federated-credential create \ + --name "oadp-controller-federated-credential" \ + --identity-name "$IDENTITY_NAME" \ + --resource-group "$CLUSTER_RESOURCE_GROUP" \ + --issuer "$SERVICE_ACCOUNT_ISSUER" \ + --subject "system:serviceaccount:openshift-adp:openshift-adp-controller-manager" +---- + +. Create the OADP namespace if it doesn't exist: ++ +[source,bash] +---- +oc create namespace openshift-adp +---- + +. Annotate the service accounts to use Microsoft Entra Workload ID: ++ +[source,bash] +---- +oc annotate serviceaccount velero -n openshift-adp \ + azure.workload.identity/client-id="$IDENTITY_CLIENT_ID" --overwrite + +oc annotate serviceaccount openshift-adp-controller-manager -n openshift-adp \ + azure.workload.identity/client-id="$IDENTITY_CLIENT_ID" --overwrite +---- + +[id="oadp-azure-cloud-storage-api_{context}"] +== Alternative: Using Cloud Storage API for Automated Container Management + +Instead of manually creating storage containers, you can use the OADP Cloud Storage API to automatically manage container creation and configuration. This approach requires OADP operator version with Cloud Storage API support. + +.Prerequisites for Cloud Storage API + +* OADP operator with Cloud Storage API functionality enabled +* The same Microsoft Entra Workload ID configuration as above + +.Procedure for Cloud Storage API + +. Create a CloudStorage resource instead of manually creating containers: ++ +[source,yaml] +---- +cat < 0 { + if err := a.validateAndConvertTags(); err != nil { + return false, fmt.Errorf("invalid tags: %w", err) + } + // Convert tags to metadata (which requires pointer values) + metadata := make(map[string]*string) + for k, v := range a.bucket.Spec.Tags { + metadata[k] = to.Ptr(v) + } + createOptions.Metadata = metadata + } + + _, err = containerClient.Create(ctx, createOptions) + if err != nil { + // Use bloberror package for Azure-specific error handling (following Velero pattern) + if bloberror.HasCode(err, bloberror.ContainerAlreadyExists) { + return true, nil // Idempotent behavior + } + + var respErr *azcore.ResponseError + if errors.As(err, &respErr) { + switch respErr.ErrorCode { + case string(bloberror.AuthenticationFailed): + return false, fmt.Errorf("authentication failed: check credentials") + case "AuthorizationFailed": + return false, fmt.Errorf("authorization failed: check permissions") + case string(bloberror.InvalidResourceName): + return false, fmt.Errorf("invalid container name: %s", containerName) + case "AccountNotFound": + return false, fmt.Errorf("storage account not found") + } + } + return false, fmt.Errorf("failed to create container: %w", err) + } + + return true, nil +} + +// Delete removes the container (idempotent operation) +func (a *azureBucketClient) Delete() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + containerName := a.bucket.Spec.Name + if err := validateContainerName(containerName); err != nil { + return false, fmt.Errorf("invalid container name: %w", err) + } + + azureClient, err := a.createAzureClient() + if err != nil { + return false, fmt.Errorf("failed to create Azure client: %w", err) + } + + containerClient := azureClient.NewContainerClient(containerName) + + _, err = containerClient.Delete(ctx, nil) + if err != nil { + // Use bloberror package for Azure-specific error handling (following Velero pattern) + if bloberror.HasCode(err, bloberror.ContainerNotFound) { + return true, nil // Idempotent behavior + } + + var respErr *azcore.ResponseError + if errors.As(err, &respErr) { + switch respErr.ErrorCode { + case string(bloberror.AuthenticationFailed): + return false, fmt.Errorf("authentication failed: check credentials") + case "AuthorizationFailed": + return false, fmt.Errorf("authorization failed: check permissions") + case "AccountNotFound": + // Storage account doesn't exist - treat as successful deletion (idempotent) + return true, nil + } + } + return false, fmt.Errorf("failed to delete container: %w", err) + } + + return true, nil +} + +// createAzureClient creates an Azure blob service client with appropriate authentication +// +// Unlike AWS and GCP providers which use getCredentialFromCloudStorageSecret to create +// temporary credential files, Azure handles credentials directly without file intermediaries. +// +// This approach is designed to be compatible with Velero's Azure credential handling +// without requiring upstream changes to Velero. Velero expects Azure credentials as: +// https://github.com/vmware-tanzu/velero/blob/main/pkg/util/azure/credential.go +// - Environment variables (for Workload Identity) +// - Direct secret values (for storage keys and service principals) +// +// By handling credentials directly in our Azure implementation, we maintain compatibility +// with Velero's expectations while avoiding the complexity of temporary credential files. +// The Azure SDK supports creating credentials programmatically from values, making the +// file-based approach unnecessary. +// +// The three authentication methods supported are: +// 1. Storage Account Key - uses NewSharedKeyCredential +// 2. Workload Identity (federated tokens) - uses NewWorkloadIdentityCredential +// 3. Service Principal - uses NewClientSecretCredential +func (a *azureBucketClient) createAzureClient() (azureServiceClient, error) { + secret, err := a.getSecret() + if err != nil { + return nil, fmt.Errorf("failed to get secret: %w", err) + } + + storageAccountName, err := getStorageAccountName(a.bucket, secret) + if err != nil { + return nil, err + } + + if err := validateStorageAccountName(storageAccountName); err != nil { + return nil, fmt.Errorf("invalid storage account name: %w", err) + } + + serviceURL := fmt.Sprintf("https://%s.blob.core.windows.net/", storageAccountName) + + // Use factory if provided (for testing) + if a.clientFactory != nil { + // Try storage account key authentication first (higher priority) + if accessKey, ok := secret.Data["AZURE_STORAGE_ACCOUNT_ACCESS_KEY"]; ok && len(accessKey) > 0 { + credential, err := azblob.NewSharedKeyCredential(storageAccountName, string(accessKey)) + if err != nil { + return nil, fmt.Errorf("failed to create shared key credential: %w", err) + } + return a.clientFactory(serviceURL, nil, credential) + } + + // For other auth methods, we'll pass the token credential + var tokenCred azcore.TokenCredential + var err error + + if a.hasWorkloadIdentityCredentials(secret) { + tokenCred, err = a.createWorkloadIdentityCredential(secret) + } else if a.hasServicePrincipalCredentials(secret) { + tokenCred, err = a.createServicePrincipalCredential(secret) + } else { + tokenCred, err = azidentity.NewDefaultAzureCredential(nil) + } + + if err != nil { + return nil, err + } + + return a.clientFactory(serviceURL, tokenCred, nil) + } + + // Default implementation (production code) + // Try storage account key authentication first (higher priority) + if accessKey, ok := secret.Data["AZURE_STORAGE_ACCOUNT_ACCESS_KEY"]; ok && len(accessKey) > 0 { + credential, err := azblob.NewSharedKeyCredential(storageAccountName, string(accessKey)) + if err != nil { + return nil, fmt.Errorf("failed to create shared key credential: %w", err) + } + + azClient, err := azblob.NewClientWithSharedKeyCredential(serviceURL, credential, nil) + if err != nil { + return nil, fmt.Errorf("failed to create Azure client with shared key: %w", err) + } + + return &realAzureServiceClient{client: azClient}, nil + } + + // Check if Azure Workload Identity is configured (federated tokens) + if a.hasWorkloadIdentityCredentials(secret) { + credential, err := a.createWorkloadIdentityCredential(secret) + if err != nil { + return nil, fmt.Errorf("failed to create workload identity credential: %w", err) + } + + azClient, err := azblob.NewClient(serviceURL, credential, nil) + if err != nil { + return nil, fmt.Errorf("failed to create Azure client with workload identity: %w", err) + } + + return &realAzureServiceClient{client: azClient}, nil + } + + // Fall back to service principal authentication + if a.hasServicePrincipalCredentials(secret) { + credential, err := a.createServicePrincipalCredential(secret) + if err != nil { + return nil, fmt.Errorf("failed to create service principal credential: %w", err) + } + + azClient, err := azblob.NewClient(serviceURL, credential, nil) + if err != nil { + return nil, fmt.Errorf("failed to create Azure client with service principal: %w", err) + } + + return &realAzureServiceClient{client: azClient}, nil + } + + // Try DefaultAzureCredential as last resort (supports various auth methods including managed identity) + credential, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("failed to create default Azure credential: %w", err) + } + + azClient, err := azblob.NewClient(serviceURL, credential, nil) + if err != nil { + return nil, fmt.Errorf("failed to create Azure client with default credential: %w", err) + } + + return &realAzureServiceClient{client: azClient}, nil +} + +// hasWorkloadIdentityCredentials checks if the secret contains workload identity credentials +func (a *azureBucketClient) hasWorkloadIdentityCredentials(secret *corev1.Secret) bool { + // Check if this is an STS-type secret created by OADP operator + if labels, ok := secret.Labels["oadp.openshift.io/secret-type"]; ok && labels == "sts-credentials" { + // For Azure STS secrets, check if it has the azurekey field + if azureKey, ok := secret.Data["azurekey"]; ok && len(azureKey) > 0 { + // Parse the azurekey to ensure it has the required fields + keyStr := string(azureKey) + return strings.Contains(keyStr, "AZURE_CLIENT_ID") && + strings.Contains(keyStr, "AZURE_TENANT_ID") && + strings.Contains(keyStr, "AZURE_SUBSCRIPTION_ID") + } + } + + // Check if AZURE_FEDERATED_TOKEN_FILE is in secret data + if _, ok := secret.Data["AZURE_FEDERATED_TOKEN_FILE"]; ok { + // Check if required fields are in the secret + requiredFields := []string{"AZURE_TENANT_ID", "AZURE_CLIENT_ID"} + for _, field := range requiredFields { + if _, ok := secret.Data[field]; !ok { + return false + } + } + return true + } + + // Also check for environment variable (for backward compatibility) + if os.Getenv("AZURE_FEDERATED_TOKEN_FILE") != "" { + // Check if required fields are in the secret + requiredFields := []string{"AZURE_TENANT_ID", "AZURE_CLIENT_ID"} + for _, field := range requiredFields { + if _, ok := secret.Data[field]; !ok { + return false + } + } + return true + } + return false +} + +// createWorkloadIdentityCredential creates a workload identity credential for federated tokens +func (a *azureBucketClient) createWorkloadIdentityCredential(secret *corev1.Secret) (azcore.TokenCredential, error) { + var tenantID, clientID, tokenFile string + + // First try to parse from azurekey field (STS secret format) + if azureKey, ok := secret.Data["azurekey"]; ok && len(azureKey) > 0 { + // Parse the environment variable format + keyStr := string(azureKey) + lines := strings.Split(keyStr, "\n") + envVars := make(map[string]string) + + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 { + envVars[parts[0]] = parts[1] + } + } + + tenantID = envVars["AZURE_TENANT_ID"] + clientID = envVars["AZURE_CLIENT_ID"] + + if tenantID == "" || clientID == "" { + return nil, fmt.Errorf("missing AZURE_TENANT_ID or AZURE_CLIENT_ID in azurekey") + } + } else { + // Fall back to individual fields + tenantID = string(secret.Data["AZURE_TENANT_ID"]) + clientID = string(secret.Data["AZURE_CLIENT_ID"]) + } + + // Check for federated token file in order of priority + // 1. From secret data + if tokenFileData, ok := secret.Data["AZURE_FEDERATED_TOKEN_FILE"]; ok && len(tokenFileData) > 0 { + tokenFile = string(tokenFileData) + } else if envTokenFile := os.Getenv("AZURE_FEDERATED_TOKEN_FILE"); envTokenFile != "" { + // 2. From environment variable + tokenFile = envTokenFile + } else { + // 3. Default for OpenShift environments + tokenFile = "/var/run/secrets/openshift/serviceaccount/token" + } + + // WorkloadIdentityCredential handles federated tokens automatically + options := &azidentity.WorkloadIdentityCredentialOptions{ + ClientID: clientID, + TenantID: tenantID, + TokenFilePath: tokenFile, + } + + credential, err := azidentity.NewWorkloadIdentityCredential(options) + if err != nil { + return nil, fmt.Errorf("failed to create workload identity credential: %w", err) + } + + return credential, nil +} + +// hasServicePrincipalCredentials checks if the secret contains service principal credentials +func (a *azureBucketClient) hasServicePrincipalCredentials(secret *corev1.Secret) bool { + requiredFields := []string{"AZURE_TENANT_ID", "AZURE_CLIENT_ID", "AZURE_CLIENT_SECRET"} + for _, field := range requiredFields { + if _, ok := secret.Data[field]; !ok { + return false + } + } + return true +} + +// createServicePrincipalCredential creates a service principal credential +func (a *azureBucketClient) createServicePrincipalCredential(secret *corev1.Secret) (azcore.TokenCredential, error) { + tenantID := string(secret.Data["AZURE_TENANT_ID"]) + clientID := string(secret.Data["AZURE_CLIENT_ID"]) + clientSecret := string(secret.Data["AZURE_CLIENT_SECRET"]) + + credential, err := azidentity.NewClientSecretCredential(tenantID, clientID, clientSecret, nil) + if err != nil { + return nil, fmt.Errorf("failed to create client secret credential: %w", err) + } + + return credential, nil +} + +// validateAndConvertTags validates tags meet Azure requirements +func (a *azureBucketClient) validateAndConvertTags() error { + if len(a.bucket.Spec.Tags) > 50 { + return fmt.Errorf("too many tags: Azure supports maximum 50 tags per resource") + } + + for key, value := range a.bucket.Spec.Tags { + if len(key) == 0 || len(key) > 512 { + return fmt.Errorf("tag name '%s' must be between 1 and 512 characters", key) + } + if len(value) > 256 { + return fmt.Errorf("tag value for '%s' must be 256 characters or less", key) + } + } + + return nil +} + +// getSecret retrieves the secret referenced in the CloudStorage spec +func (a *azureBucketClient) getSecret() (*corev1.Secret, error) { + secret := &corev1.Secret{} + secretKey := client.ObjectKey{ + Namespace: a.bucket.Namespace, + Name: a.bucket.Spec.CreationSecret.Name, + } + + if err := a.client.Get(context.Background(), secretKey, secret); err != nil { + return nil, fmt.Errorf("failed to get secret %s/%s: %w", + a.bucket.Namespace, a.bucket.Spec.CreationSecret.Name, err) + } + + return secret, nil +} + +// validateContainerName validates Azure container naming rules +func validateContainerName(name string) error { + if len(name) < 3 || len(name) > 63 { + return fmt.Errorf("container name must be between 3 and 63 characters") + } + + // Must start with letter or number + if !regexp.MustCompile(`^[a-z0-9]`).MatchString(name) { + return fmt.Errorf("container name must start with a letter or number") + } + + // Can only contain lowercase letters, numbers, and hyphens + if !regexp.MustCompile(`^[a-z0-9-]+$`).MatchString(name) { + return fmt.Errorf("container name can only contain lowercase letters, numbers, and hyphens") + } + + // Cannot have consecutive hyphens + if strings.Contains(name, "--") { + return fmt.Errorf("container name cannot contain consecutive hyphens") + } + + return nil +} + +// validateStorageAccountName validates Azure storage account naming rules +func validateStorageAccountName(name string) error { + if len(name) < 3 || len(name) > 24 { + return fmt.Errorf("storage account name must be between 3 and 24 characters") + } + + // Can only contain lowercase letters and numbers + if !regexp.MustCompile(`^[a-z0-9]+$`).MatchString(name) { + return fmt.Errorf("storage account name can only contain lowercase letters and numbers") + } + + return nil +} + +func getStorageAccountName(bucket v1alpha1.CloudStorage, secret *corev1.Secret) (string, error) { + // Priority 1: From secret - check standard field first + if accountName, ok := secret.Data["AZURE_STORAGE_ACCOUNT"]; ok && len(accountName) > 0 { + return string(accountName), nil + } + + // Priority 2: Parse from azurekey field (STS secret format) + if azureKey, ok := secret.Data["azurekey"]; ok && len(azureKey) > 0 { + // Parse the environment variable format + keyStr := string(azureKey) + lines := strings.Split(keyStr, "\n") + + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "AZURE_STORAGE_ACCOUNT=") { + parts := strings.SplitN(line, "=", 2) + if len(parts) == 2 && parts[1] != "" { + return parts[1], nil + } + } + } + } + + // Priority 3: From CloudStorage config + if bucket.Spec.Config != nil { + if accountName, ok := bucket.Spec.Config["storageAccount"]; ok && accountName != "" { + return accountName, nil + } + } + + return "", fmt.Errorf("storage account name not found in secret (AZURE_STORAGE_ACCOUNT), azurekey, or config") +} + +// isRetryableError determines if an Azure error should be retried +// Note: Some error codes like "AuthorizationFailed", "AccountNotFound", and "ServiceUnavailable" +// are not defined in the bloberror package constants but are actual error codes returned by Azure +func isRetryableError(err error) bool { + // Use bloberror package for specific Azure Storage errors (following Velero pattern) + if bloberror.HasCode(err, bloberror.ContainerNotFound) || + bloberror.HasCode(err, bloberror.ContainerAlreadyExists) { + return false // Don't retry these specific storage errors + } + + var respErr *azcore.ResponseError + if errors.As(err, &respErr) { + switch respErr.StatusCode { + case http.StatusTooManyRequests: // Too Many Requests + return true + case http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout: // Server errors + return true + case http.StatusUnauthorized, http.StatusForbidden: // Authentication/Authorization failures + return false + case http.StatusBadRequest: // Bad Request (invalid names, etc.) + return false + case http.StatusNotFound: // Not Found + return false + } + + // Check specific error codes + switch respErr.ErrorCode { + case string(bloberror.InternalError), string(bloberror.ServerBusy): + return true + case string(bloberror.AuthenticationFailed): + return false + case string(bloberror.InvalidResourceName): + return false + // Note: "ServiceUnavailable", "AuthorizationFailed", and "AccountNotFound" + // are not defined in bloberror constants, keeping as strings + case "ServiceUnavailable", "AuthorizationFailed", "AccountNotFound": + return false + } + } + + // Network-related errors are typically retryable + if strings.Contains(err.Error(), "timeout") || + strings.Contains(err.Error(), "connection") || + strings.Contains(err.Error(), "network") { + return true + } + + return false +} diff --git a/pkg/bucket/azure_test.go b/pkg/bucket/azure_test.go new file mode 100644 index 0000000000..0554e19039 --- /dev/null +++ b/pkg/bucket/azure_test.go @@ -0,0 +1,607 @@ +package bucket + +import ( + "context" + "fmt" + "strings" + "testing" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openshift/oadp-operator/api/v1alpha1" +) + +func TestValidateContainerName(t *testing.T) { + tests := []struct { + name string + input string + expectError bool + }{ + {"valid name", "mycontainer", false}, + {"valid with numbers", "container123", false}, + {"valid with hyphens", "my-container", false}, + {"too short", "ab", true}, + {"too long", strings.Repeat("a", 64), true}, + {"starts with hyphen", "-container", true}, + {"consecutive hyphens", "my--container", true}, + {"uppercase letters", "MyContainer", true}, + {"special characters", "my_container", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateContainerName(tt.input) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestValidateStorageAccountName(t *testing.T) { + tests := []struct { + name string + input string + expectError bool + }{ + {"valid name", "mystorageaccount", false}, + {"valid with numbers", "storage123", false}, + {"too short", "ab", true}, + {"too long", strings.Repeat("a", 25), true}, + {"uppercase letters", "MyStorage", true}, + {"special characters", "my-storage", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateStorageAccountName(tt.input) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestGetStorageAccountName(t *testing.T) { + tests := []struct { + name string + secretData map[string][]byte + config map[string]string + expectError bool + expected string + }{ + { + name: "valid account from secret", + secretData: map[string][]byte{ + "AZURE_STORAGE_ACCOUNT": []byte("mystorageaccount"), + }, + expectError: false, + expected: "mystorageaccount", + }, + { + name: "valid account from azurekey", + secretData: map[string][]byte{ + "azurekey": []byte(` +AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-123456789012 +AZURE_STORAGE_ACCOUNT=azurekeystorageaccount +AZURE_TENANT_ID=87654321-4321-4321-4321-210987654321 +`), + }, + expectError: false, + expected: "azurekeystorageaccount", + }, + { + name: "valid account from config", + secretData: map[string][]byte{}, + config: map[string]string{ + "storageAccount": "configstorageaccount", + }, + expectError: false, + expected: "configstorageaccount", + }, + { + name: "priority: secret over azurekey", + secretData: map[string][]byte{ + "AZURE_STORAGE_ACCOUNT": []byte("secretaccount"), + "azurekey": []byte("AZURE_STORAGE_ACCOUNT=azurekeyaccount"), + }, + expectError: false, + expected: "secretaccount", + }, + { + name: "priority: azurekey over config", + secretData: map[string][]byte{ + "azurekey": []byte("AZURE_STORAGE_ACCOUNT=azurekeyaccount"), + }, + config: map[string]string{ + "storageAccount": "configaccount", + }, + expectError: false, + expected: "azurekeyaccount", + }, + { + name: "missing account name", + secretData: map[string][]byte{}, + expectError: true, + }, + { + name: "empty account name in secret", + secretData: map[string][]byte{ + "AZURE_STORAGE_ACCOUNT": []byte(""), + }, + expectError: true, + }, + { + name: "azurekey without storage account", + secretData: map[string][]byte{ + "azurekey": []byte(` +AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-123456789012 +AZURE_TENANT_ID=87654321-4321-4321-4321-210987654321 +`), + }, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + bucket := v1alpha1.CloudStorage{ + Spec: v1alpha1.CloudStorageSpec{ + Config: tt.config, + }, + } + secret := &corev1.Secret{Data: tt.secretData} + + result, err := getStorageAccountName(bucket, secret) + + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func TestValidateAndConvertTags(t *testing.T) { + azureClient := &azureBucketClient{ + bucket: v1alpha1.CloudStorage{ + Spec: v1alpha1.CloudStorageSpec{ + Tags: map[string]string{ + "environment": "production", + "team": "backup", + }, + }, + }, + } + + err := azureClient.validateAndConvertTags() + assert.NoError(t, err) + + // Test too many tags + manyTags := make(map[string]string) + for i := 0; i < 51; i++ { + manyTags[fmt.Sprintf("key%d", i)] = "value" + } + azureClient.bucket.Spec.Tags = manyTags + + err = azureClient.validateAndConvertTags() + assert.Error(t, err) + assert.Contains(t, err.Error(), "too many tags") + + // Test tag name too long + azureClient.bucket.Spec.Tags = map[string]string{ + strings.Repeat("a", 513): "value", + } + err = azureClient.validateAndConvertTags() + assert.Error(t, err) + assert.Contains(t, err.Error(), "must be between 1 and 512 characters") + + // Test tag value too long + azureClient.bucket.Spec.Tags = map[string]string{ + "key": strings.Repeat("v", 257), + } + err = azureClient.validateAndConvertTags() + assert.Error(t, err) + assert.Contains(t, err.Error(), "must be 256 characters or less") +} + +func TestIsRetryableError(t *testing.T) { + // Test retryable errors + retryableErr := &azcore.ResponseError{ + StatusCode: 503, + ErrorCode: "ServiceUnavailable", + } + assert.True(t, isRetryableError(retryableErr)) + + // Test Too Many Requests + tooManyRequestsErr := &azcore.ResponseError{ + StatusCode: 429, + ErrorCode: "TooManyRequests", + } + assert.True(t, isRetryableError(tooManyRequestsErr)) + + // Test server errors + serverErr := &azcore.ResponseError{ + StatusCode: 500, + ErrorCode: "InternalError", + } + assert.True(t, isRetryableError(serverErr)) + + // Test non-retryable errors + nonRetryableErr := &azcore.ResponseError{ + StatusCode: 401, + ErrorCode: "AuthenticationFailed", + } + assert.False(t, isRetryableError(nonRetryableErr)) + + // Test authorization error + authzErr := &azcore.ResponseError{ + StatusCode: 403, + ErrorCode: "AuthorizationFailed", + } + assert.False(t, isRetryableError(authzErr)) + + // Test bad request + badRequestErr := &azcore.ResponseError{ + StatusCode: 400, + ErrorCode: "InvalidResourceName", + } + assert.False(t, isRetryableError(badRequestErr)) + + // Test not found + notFoundErr := &azcore.ResponseError{ + StatusCode: 404, + ErrorCode: "AccountNotFound", + } + assert.False(t, isRetryableError(notFoundErr)) + + // Test network-related errors + networkErr := fmt.Errorf("connection timeout") + assert.True(t, isRetryableError(networkErr)) + + networkErr2 := fmt.Errorf("network unreachable") + assert.True(t, isRetryableError(networkErr2)) +} + +func TestHasWorkloadIdentityCredentials(t *testing.T) { + azureClient := &azureBucketClient{} + + tests := []struct { + name string + secretData map[string][]byte + secretLabels map[string]string + expected bool + }{ + { + name: "valid STS secret with azurekey", + secretData: map[string][]byte{ + "azurekey": []byte(` +AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-123456789012 +AZURE_TENANT_ID=87654321-4321-4321-4321-210987654321 +AZURE_CLIENT_ID=abcdef12-3456-7890-abcd-ef1234567890 +AZURE_CLOUD_NAME=AzurePublicCloud +`), + }, + secretLabels: map[string]string{ + "oadp.openshift.io/secret-type": "sts-credentials", + }, + expected: true, + }, + { + name: "STS secret missing required fields", + secretData: map[string][]byte{ + "azurekey": []byte(` +AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-123456789012 +AZURE_CLOUD_NAME=AzurePublicCloud +`), + }, + secretLabels: map[string]string{ + "oadp.openshift.io/secret-type": "sts-credentials", + }, + expected: false, + }, + { + name: "STS secret without label", + secretData: map[string][]byte{ + "azurekey": []byte(` +AZURE_SUBSCRIPTION_ID=12345678-1234-1234-1234-123456789012 +AZURE_TENANT_ID=87654321-4321-4321-4321-210987654321 +AZURE_CLIENT_ID=abcdef12-3456-7890-abcd-ef1234567890 +`), + }, + expected: false, + }, + { + name: "secret with AZURE_FEDERATED_TOKEN_FILE", + secretData: map[string][]byte{ + "AZURE_TENANT_ID": []byte("tenant-id"), + "AZURE_CLIENT_ID": []byte("client-id"), + "AZURE_FEDERATED_TOKEN_FILE": []byte("/var/run/secrets/openshift/serviceaccount/token"), + }, + expected: true, + }, + { + name: "secret with AZURE_FEDERATED_TOKEN_FILE missing tenant", + secretData: map[string][]byte{ + "AZURE_CLIENT_ID": []byte("client-id"), + "AZURE_FEDERATED_TOKEN_FILE": []byte("/var/run/secrets/openshift/serviceaccount/token"), + }, + expected: false, + }, + { + name: "empty secret", + secretData: map[string][]byte{}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Labels: tt.secretLabels, + }, + Data: tt.secretData, + } + result := azureClient.hasWorkloadIdentityCredentials(secret) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestHasServicePrincipalCredentials(t *testing.T) { + azureClient := &azureBucketClient{} + + tests := []struct { + name string + secretData map[string][]byte + expected bool + }{ + { + name: "valid service principal credentials", + secretData: map[string][]byte{ + "AZURE_TENANT_ID": []byte("tenant-id"), + "AZURE_CLIENT_ID": []byte("client-id"), + "AZURE_CLIENT_SECRET": []byte("client-secret"), + }, + expected: true, + }, + { + name: "missing tenant ID", + secretData: map[string][]byte{ + "AZURE_CLIENT_ID": []byte("client-id"), + "AZURE_CLIENT_SECRET": []byte("client-secret"), + }, + expected: false, + }, + { + name: "missing client ID", + secretData: map[string][]byte{ + "AZURE_TENANT_ID": []byte("tenant-id"), + "AZURE_CLIENT_SECRET": []byte("client-secret"), + }, + expected: false, + }, + { + name: "missing client secret", + secretData: map[string][]byte{ + "AZURE_TENANT_ID": []byte("tenant-id"), + "AZURE_CLIENT_ID": []byte("client-id"), + }, + expected: false, + }, + { + name: "empty secret", + secretData: map[string][]byte{}, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + secret := &corev1.Secret{Data: tt.secretData} + result := azureClient.hasServicePrincipalCredentials(secret) + assert.Equal(t, tt.expected, result) + }) + } +} + +// Mock implementations for testing + +type mockAzureServiceClient struct { + containerClient azureContainerClient +} + +func (m *mockAzureServiceClient) NewContainerClient(containerName string) azureContainerClient { + return m.containerClient +} + +type mockAzureContainerClient struct { + getPropertiesErr error + createErr error + deleteErr error +} + +func (m *mockAzureContainerClient) GetProperties(ctx context.Context, options *container.GetPropertiesOptions) (container.GetPropertiesResponse, error) { + return container.GetPropertiesResponse{}, m.getPropertiesErr +} + +func (m *mockAzureContainerClient) Create(ctx context.Context, options *container.CreateOptions) (container.CreateResponse, error) { + return container.CreateResponse{}, m.createErr +} + +func (m *mockAzureContainerClient) Delete(ctx context.Context, options *container.DeleteOptions) (container.DeleteResponse, error) { + return container.DeleteResponse{}, m.deleteErr +} + +// TestAzureBucketClient_Delete tests the Delete method with various error scenarios +func TestAzureBucketClient_Delete(t *testing.T) { + tests := []struct { + name string + containerName string + deleteError error + expectedResult bool + expectedError bool + errorContains string + }{ + { + name: "successful deletion", + containerName: "test-container", + deleteError: nil, + expectedResult: true, + expectedError: false, + }, + { + name: "invalid container name", + containerName: "INVALID_NAME", + expectedResult: false, + expectedError: true, + errorContains: "invalid container name", + }, + { + name: "container not found - idempotent", + containerName: "test-container", + deleteError: &azcore.ResponseError{ + StatusCode: 404, + ErrorCode: "ContainerNotFound", + }, + expectedResult: true, + expectedError: false, + }, + { + name: "account not found - idempotent", + containerName: "test-container", + deleteError: &azcore.ResponseError{ + StatusCode: 404, + ErrorCode: "AccountNotFound", + }, + expectedResult: true, + expectedError: false, + }, + { + name: "authentication failed", + containerName: "test-container", + deleteError: &azcore.ResponseError{ + StatusCode: 401, + ErrorCode: "AuthenticationFailed", + }, + expectedResult: false, + expectedError: true, + errorContains: "authentication failed", + }, + { + name: "authorization failed", + containerName: "test-container", + deleteError: &azcore.ResponseError{ + StatusCode: 403, + ErrorCode: "AuthorizationFailed", + }, + expectedResult: false, + expectedError: true, + errorContains: "authorization failed", + }, + { + name: "generic error", + containerName: "test-container", + deleteError: fmt.Errorf("network error"), + expectedResult: false, + expectedError: true, + errorContains: "failed to delete container", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create mock container client + mockContainerClient := &mockAzureContainerClient{ + deleteErr: tt.deleteError, + } + + // Create mock service client + mockServiceClient := &mockAzureServiceClient{ + containerClient: mockContainerClient, + } + + // Create test secret + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-secret", + Namespace: "test-namespace", + }, + Data: map[string][]byte{ + "AZURE_STORAGE_ACCOUNT": []byte("teststorageaccount"), + }, + } + + // Create mock k8s client + mockK8sClient := &mockK8sClient{ + secret: secret, + } + + // Create azureBucketClient with factory + client := &azureBucketClient{ + bucket: v1alpha1.CloudStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cloudstorage", + Namespace: "test-namespace", + }, + Spec: v1alpha1.CloudStorageSpec{ + Name: tt.containerName, + Provider: v1alpha1.AzureBucketProvider, + CreationSecret: corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test-secret", + }, + Key: "azurekey", + }, + }, + }, + client: mockK8sClient, + clientFactory: func(serviceURL string, credential azcore.TokenCredential, sharedKey *azblob.SharedKeyCredential) (azureServiceClient, error) { + return mockServiceClient, nil + }, + } + + // Test Delete method + result, err := client.Delete() + + // Verify results + assert.Equal(t, tt.expectedResult, result) + + if tt.expectedError { + assert.Error(t, err) + if tt.errorContains != "" { + assert.Contains(t, err.Error(), tt.errorContains) + } + } else { + assert.NoError(t, err) + } + }) + } +} + +// mockK8sClient is a mock implementation of the Kubernetes client +type mockK8sClient struct { + client.Client + secret *corev1.Secret +} + +func (m *mockK8sClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if secret, ok := obj.(*corev1.Secret); ok { + *secret = *m.secret + return nil + } + return fmt.Errorf("object not found") +} diff --git a/pkg/bucket/client.go b/pkg/bucket/client.go index 3988bab166..4fcd0623fc 100644 --- a/pkg/bucket/client.go +++ b/pkg/bucket/client.go @@ -2,7 +2,6 @@ package bucket import ( "context" - "errors" "fmt" "os" "path/filepath" @@ -27,15 +26,18 @@ type Client interface { Exists() (bool, error) Create() (bool, error) Delete() (bool, error) - ForceCredentialRefresh() error } func NewClient(b v1alpha1.CloudStorage, c client.Client) (Client, error) { switch b.Spec.Provider { case v1alpha1.AWSBucketProvider: return &awsBucketClient{bucket: b, client: c}, nil + case v1alpha1.AzureBucketProvider: + return &azureBucketClient{bucket: b, client: c}, nil + case v1alpha1.GCPBucketProvider: + return &gcpBucketClient{bucket: b, client: c}, nil default: - return nil, fmt.Errorf("unable to determine bucket client") + return nil, fmt.Errorf("unsupported bucket provider: %s", b.Spec.Provider) } } @@ -58,14 +60,7 @@ func getCredentialFromCloudStorageSecret(a client.Client, cloudStorage v1alpha1. return "", err } - stsSecret, err := stsflow.STSStandardizedFlow() - if err != nil { - // Log the error for debugging purposes - fmt.Printf("Error in STSStandardizedFlow: %v\n", err) - // Optionally, return the error if it is critical - return "", err - } - if stsSecret != "" { + if stsSecret, err := stsflow.STSStandardizedFlow(); err == nil && stsSecret != "" { err := a.Get(context.TODO(), types.NamespacedName{ Name: stsSecret, Namespace: cloudStorage.Namespace, @@ -100,17 +95,31 @@ func getCredentialFromCloudStorageSecret(a client.Client, cloudStorage v1alpha1. } func SharedCredentialsFileFromSecret(secret *corev1.Secret) (string, error) { - if len(secret.Data["credentials"]) == 0 { - return "", errors.New("invalid secret for aws credentials") + // Check for AWS credentials key + if credData, exists := secret.Data[stsflow.AWSSecretCredentialsKey]; exists && len(credData) > 0 { + f, err := os.CreateTemp("", "cloud-credentials-aws-") + if err != nil { + return "", err + } + defer f.Close() + if _, err := f.Write(credData); err != nil { + return "", err + } + return f.Name(), nil } - f, err := os.CreateTemp("", "aws-shared-credentials") - if err != nil { - return "", err - } - defer f.Close() - if _, err := f.Write(secret.Data["credentials"]); err != nil { - return "", err + // Check for GCP service account JSON key + if serviceAccountData, exists := secret.Data[stsflow.GcpSecretJSONKey]; exists && len(serviceAccountData) > 0 { + f, err := os.CreateTemp("", "cloud-credentials-gcp-") + if err != nil { + return "", err + } + defer f.Close() + if _, err := f.Write(serviceAccountData); err != nil { + return "", err + } + return f.Name(), nil } - return f.Name(), nil + + return "", fmt.Errorf("invalid secret: missing %s key (for AWS) or %s key (for GCP)", stsflow.AWSSecretCredentialsKey, stsflow.GcpSecretJSONKey) } diff --git a/pkg/bucket/client_test.go b/pkg/bucket/client_test.go index d4efbeb47f..4700df8ab7 100644 --- a/pkg/bucket/client_test.go +++ b/pkg/bucket/client_test.go @@ -1,10 +1,16 @@ package bucket_test import ( + "os" + "strings" "testing" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + oadpv1alpha1 "github.com/openshift/oadp-operator/api/v1alpha1" "github.com/openshift/oadp-operator/pkg/bucket" + "github.com/openshift/oadp-operator/pkg/credentials/stsflow" ) func TestNewClient(t *testing.T) { @@ -51,3 +57,168 @@ func TestNewClient(t *testing.T) { }) } } + +func TestSharedCredentialsFileFromSecret(t *testing.T) { + tests := []struct { + name string + secret *corev1.Secret + wantErr bool + errContains string + filePrefix string + validateContent func(t *testing.T, content []byte) + }{ + { + name: "AWS credentials", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-aws-secret", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + stsflow.AWSSecretCredentialsKey: []byte(`[default] +aws_access_key_id = AKIAIOSFODNN7EXAMPLE +aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`), + }, + }, + wantErr: false, + filePrefix: "cloud-credentials-aws-", + validateContent: func(t *testing.T, content []byte) { + if !strings.Contains(string(content), "aws_access_key_id") { + t.Errorf("Expected AWS credentials content, got: %s", string(content)) + } + }, + }, + { + name: "GCP service account JSON", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-gcp-secret", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + stsflow.GcpSecretJSONKey: []byte(`{ + "type": "service_account", + "project_id": "test-project", + "private_key_id": "key-id", + "private_key": "-----BEGIN RSA PRIVATE KEY-----\ntest\n-----END RSA PRIVATE KEY-----\n", + "client_email": "test@test-project.iam.gserviceaccount.com", + "client_id": "123456789", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token" +}`), + }, + }, + wantErr: false, + filePrefix: "cloud-credentials-gcp-", + validateContent: func(t *testing.T, content []byte) { + if !strings.Contains(string(content), "service_account") { + t.Errorf("Expected GCP service account content, got: %s", string(content)) + } + }, + }, + { + name: "Empty credentials data for AWS", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-empty-aws", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + stsflow.AWSSecretCredentialsKey: {}, + }, + }, + wantErr: true, + errContains: "invalid secret", + }, + { + name: "Empty service account data for GCP", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-empty-gcp", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + stsflow.GcpSecretJSONKey: {}, + }, + }, + wantErr: true, + errContains: "invalid secret", + }, + { + name: "No recognized keys", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-invalid", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + "unknown-key": []byte("some data"), + }, + }, + wantErr: true, + errContains: "invalid secret: missing credentials key (for AWS) or service_account.json key (for GCP)", + }, + { + name: "Both AWS and GCP keys present (AWS takes precedence)", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-both", + Namespace: "test-ns", + }, + Data: map[string][]byte{ + stsflow.AWSSecretCredentialsKey: []byte("aws-creds"), + stsflow.GcpSecretJSONKey: []byte("gcp-creds"), + }, + }, + wantErr: false, + filePrefix: "cloud-credentials-aws-", + validateContent: func(t *testing.T, content []byte) { + if string(content) != "aws-creds" { + t.Errorf("Expected AWS credentials to take precedence, got: %s", string(content)) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filename, err := bucket.SharedCredentialsFileFromSecret(tt.secret) + + if tt.wantErr { + if err == nil { + t.Errorf("Expected error but got none") + } else if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("Expected error containing '%s', got: %v", tt.errContains, err) + } + return + } + + if err != nil { + t.Errorf("Unexpected error: %v", err) + return + } + + // Verify file was created + if _, err := os.Stat(filename); os.IsNotExist(err) { + t.Errorf("Expected file to be created at %s, but it doesn't exist", filename) + return + } + + // Verify file prefix + if tt.filePrefix != "" && !strings.Contains(filename, tt.filePrefix) { + t.Errorf("Expected filename to contain prefix '%s', got: %s", tt.filePrefix, filename) + } + + // Read and validate file content + content, err := os.ReadFile(filename) + if err != nil { + t.Errorf("Failed to read created file: %v", err) + } else if tt.validateContent != nil { + tt.validateContent(t, content) + } + + // Clean up + os.Remove(filename) + }) + } +} diff --git a/pkg/bucket/gcp.go b/pkg/bucket/gcp.go new file mode 100644 index 0000000000..d315eff168 --- /dev/null +++ b/pkg/bucket/gcp.go @@ -0,0 +1,552 @@ +package bucket + +import ( + "context" + "encoding/json" + "fmt" + "math/rand" + "net/http" + "os" + "regexp" + "strings" + "time" + + "cloud.google.com/go/storage" + "google.golang.org/api/googleapi" + "google.golang.org/api/iterator" + "google.golang.org/api/option" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openshift/oadp-operator/api/v1alpha1" +) + +type gcpBucketClient struct { + bucket v1alpha1.CloudStorage + client client.Client +} + +func (g gcpBucketClient) Exists() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + gcsClient, _, err := g.getGCSClient() + if err != nil { + return false, err + } + defer gcsClient.Close() + + bucket := gcsClient.Bucket(g.bucket.Spec.Name) + _, err = bucket.Attrs(ctx) + if err != nil { + if err == storage.ErrBucketNotExist { + return false, nil + } + // Return true for permission errors - unable to determine if bucket exists + return true, fmt.Errorf("unable to determine bucket %v status: %v", g.bucket.Spec.Name, err) + } + + // Tag bucket if it exists + err = g.tagBucket(gcsClient) + if err != nil { + return true, err + } + + return true, nil +} + +func (g gcpBucketClient) Create() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + // Validate bucket name + if err := validateBucketName(g.bucket.Spec.Name); err != nil { + return false, err + } + + gcsClient, projectID, err := g.getGCSClient() + if err != nil { + return false, err + } + defer gcsClient.Close() + + bucket := gcsClient.Bucket(g.bucket.Spec.Name) + + // Prepare bucket attributes + attrs := &storage.BucketAttrs{ + Location: g.getGCPLocation(), + Labels: g.convertTagsToLabels(), + } + + // Set storage class if configured + if g.bucket.Spec.Config != nil { + if storageClass, ok := g.bucket.Spec.Config["storageClass"]; ok && storageClass != "" { + attrs.StorageClass = storageClass + } + } + + // Create bucket with retry logic + err = withGCSRetry(func() error { + return bucket.Create(ctx, projectID, attrs) + }, defaultGCSRetryConfig) + + if err != nil { + err = handleGCSError(err, "create", g.bucket.Spec.Name) + if err == nil { + // Bucket already exists - idempotent behavior + return true, nil + } + return false, err + } + + return true, nil +} + +func (g gcpBucketClient) Delete() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), 300*time.Second) // Longer timeout for object deletion + defer cancel() + + gcsClient, _, err := g.getGCSClient() + if err != nil { + return false, err + } + defer gcsClient.Close() + + bucket := gcsClient.Bucket(g.bucket.Spec.Name) + + // Check if bucket exists first + _, err = bucket.Attrs(ctx) + if err != nil { + if err == storage.ErrBucketNotExist { + // Bucket doesn't exist - idempotent behavior + return true, nil + } + return false, err + } + + // Delete all objects in bucket (GCS requires empty bucket) + err = g.deleteObjectsConcurrently(ctx, bucket) + if err != nil { + return false, err + } + + // Delete the bucket with retry logic + err = withGCSRetry(func() error { + return bucket.Delete(ctx) + }, defaultGCSRetryConfig) + + if err != nil { + err = handleGCSError(err, "delete", g.bucket.Spec.Name) + if err == nil { + // Bucket already deleted - idempotent behavior + return true, nil + } + return false, err + } + + return true, nil +} + +// getGCSClient creates a GCS client with authentication +// Supports both traditional service account keys and GCP Workload Identity Federation (WIF) +func (g gcpBucketClient) getGCSClient() (*storage.Client, string, error) { + ctx := context.Background() + + // Get credential file from secret + // This could be either a service account key or WIF external account credentials + credFile, err := getCredentialFromCloudStorageSecret(g.client, g.bucket) + if err != nil { + return nil, "", err + } + + // Parse credentials to extract project ID + // Handles both service account and WIF credential formats + projectID, err := g.extractProjectID(credFile) + if err != nil { + return nil, "", err + } + + // Create GCS client with credentials + // The GCS client automatically handles both service account and WIF credentials + gcsClient, err := storage.NewClient(ctx, option.WithCredentialsFile(credFile)) + if err != nil { + return nil, "", err + } + + return gcsClient, projectID, nil +} + +// serviceAccountKey represents the structure of a service account JSON file +type serviceAccountKey struct { + Type string `json:"type"` + ProjectID string `json:"project_id"` + PrivateKeyID string `json:"private_key_id"` + PrivateKey string `json:"private_key"` + ClientEmail string `json:"client_email"` + ClientID string `json:"client_id"` + AuthURI string `json:"auth_uri"` + TokenURI string `json:"token_uri"` +} + +// externalAccountKey represents the structure of a GCP WIF external account JSON file +type externalAccountKey struct { + Type string `json:"type"` + Audience string `json:"audience"` + SubjectTokenType string `json:"subject_token_type"` + TokenURL string `json:"token_url"` + ServiceAccountImpersonationURL string `json:"service_account_impersonation_url"` + CredentialSource struct { + File string `json:"file"` + Format struct { + Type string `json:"type"` + } `json:"format"` + } `json:"credential_source"` +} + +// extractProjectID parses the credential JSON file to extract project ID +func (g gcpBucketClient) extractProjectID(credentialFile string) (string, error) { + data, err := os.ReadFile(credentialFile) + if err != nil { + return "", fmt.Errorf("error reading credential file: %v", err) + } + + // First, check the type of credentials + var typeCheck struct { + Type string `json:"type"` + } + if err := json.Unmarshal(data, &typeCheck); err != nil { + return "", fmt.Errorf("error parsing credential JSON: %v", err) + } + + switch typeCheck.Type { + case "service_account": + // Traditional service account key + var sa serviceAccountKey + if err := json.Unmarshal(data, &sa); err != nil { + return "", fmt.Errorf("error parsing service account JSON: %v", err) + } + if sa.ProjectID == "" { + return "", fmt.Errorf("project_id not found in service account key") + } + return sa.ProjectID, nil + + case "external_account": + // GCP WIF credentials - need to extract project ID from service account email + var ea externalAccountKey + if err := json.Unmarshal(data, &ea); err != nil { + return "", fmt.Errorf("error parsing external account JSON: %v", err) + } + + // Extract project ID from the service account impersonation URL + // URL format: https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/SERVICE_ACCOUNT_EMAIL:generateAccessToken + // Where SERVICE_ACCOUNT_EMAIL is like: my-sa@PROJECT_ID.iam.gserviceaccount.com + projectID, err := g.extractProjectIDFromWIF(ea.ServiceAccountImpersonationURL) + if err != nil { + // If we can't extract from URL, check if project ID is in config + if g.bucket.Spec.Config != nil { + if pid, ok := g.bucket.Spec.Config["projectID"]; ok && pid != "" { + return pid, nil + } + } + // Try environment variable as last resort + if pid := os.Getenv("GOOGLE_CLOUD_PROJECT"); pid != "" { + return pid, nil + } + if pid := os.Getenv("GCP_PROJECT"); pid != "" { + return pid, nil + } + return "", fmt.Errorf("unable to determine project ID from WIF credentials: %v", err) + } + return projectID, nil + + default: + return "", fmt.Errorf("unsupported credential type: %s", typeCheck.Type) + } +} + +// extractProjectIDFromWIF extracts project ID from WIF service account impersonation URL +func (g gcpBucketClient) extractProjectIDFromWIF(impersonationURL string) (string, error) { + // Extract service account email from URL + // URL format: https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/SERVICE_ACCOUNT_EMAIL:generateAccessToken + + prefix := "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/" + suffix := ":generateAccessToken" + + if !strings.HasPrefix(impersonationURL, prefix) || !strings.HasSuffix(impersonationURL, suffix) { + return "", fmt.Errorf("invalid service account impersonation URL format") + } + + // Extract the service account email + start := len(prefix) + end := len(impersonationURL) - len(suffix) + if start >= end { + return "", fmt.Errorf("invalid service account impersonation URL") + } + + serviceAccountEmail := impersonationURL[start:end] + + // Extract project ID from service account email + // Format: service-account-name@PROJECT_ID.iam.gserviceaccount.com + parts := strings.Split(serviceAccountEmail, "@") + if len(parts) != 2 { + return "", fmt.Errorf("invalid service account email format") + } + + domainParts := strings.Split(parts[1], ".") + if len(domainParts) < 3 || domainParts[1] != "iam" || domainParts[2] != "gserviceaccount" { + return "", fmt.Errorf("invalid service account email domain") + } + + return domainParts[0], nil +} + +// convertTagsToLabels converts CloudStorage tags to GCS labels +func (g gcpBucketClient) convertTagsToLabels() map[string]string { + labels := make(map[string]string) + + for key, value := range g.bucket.Spec.Tags { + // Convert to lowercase and replace invalid characters + labelKey := strings.ToLower(key) + labelKey = regexp.MustCompile(`[^a-z0-9_-]`).ReplaceAllString(labelKey, "_") + + labelValue := strings.ToLower(value) + labelValue = regexp.MustCompile(`[^a-z0-9_-]`).ReplaceAllString(labelValue, "_") + + // Truncate to GCP limits + if len(labelKey) > 63 { + labelKey = labelKey[:63] + } + if len(labelValue) > 63 { + labelValue = labelValue[:63] + } + + labels[labelKey] = labelValue + } + + return labels +} + +// tagBucket applies tags to an existing bucket +func (g gcpBucketClient) tagBucket(gcsClient *storage.Client) error { + ctx := context.Background() + bucket := gcsClient.Bucket(g.bucket.Spec.Name) + + // Update labels + newLabels := g.convertTagsToLabels() + + // Update bucket with new labels + attrsToUpdate := storage.BucketAttrsToUpdate{} + for key, value := range newLabels { + attrsToUpdate.SetLabel(key, value) + } + + _, err := bucket.Update(ctx, attrsToUpdate) + if err != nil { + return fmt.Errorf("error updating bucket labels: %v", err) + } + + return nil +} + +// validateBucketName validates GCS bucket naming rules +func validateBucketName(name string) error { + if len(name) < 3 || len(name) > 63 { + return fmt.Errorf("bucket name must be between 3 and 63 characters") + } + + // Must start and end with letter or number + if !regexp.MustCompile(`^[a-z0-9].*[a-z0-9]$`).MatchString(name) { + return fmt.Errorf("bucket name must start and end with a letter or number") + } + + // Can contain lowercase letters, numbers, hyphens, underscores, and dots + if !regexp.MustCompile(`^[a-z0-9._-]+$`).MatchString(name) { + return fmt.Errorf("bucket name can only contain lowercase letters, numbers, hyphens, underscores, and dots") + } + + // Cannot have consecutive dots + if strings.Contains(name, "..") { + return fmt.Errorf("bucket name cannot contain consecutive dots") + } + + // Cannot be IP address format + if regexp.MustCompile(`^\d+\.\d+\.\d+\.\d+$`).MatchString(name) { + return fmt.Errorf("bucket name cannot be in IP address format") + } + + return nil +} + +// getGCPLocation returns the GCP location for bucket creation +func (g gcpBucketClient) getGCPLocation() string { + region := g.bucket.Spec.Region + if region == "" { + return "us-central1" // Default GCP region + } + return region +} + +// deleteObjectsConcurrently deletes objects in parallel for faster bucket cleanup +func (g gcpBucketClient) deleteObjectsConcurrently(ctx context.Context, bucket *storage.BucketHandle) error { + const maxWorkers = 10 + const batchSize = 100 + + objects := make(chan string, batchSize) + errors := make(chan error, maxWorkers) + + // Start workers + for i := 0; i < maxWorkers; i++ { + go func() { + for objName := range objects { + if err := bucket.Object(objName).Delete(ctx); err != nil { + errors <- fmt.Errorf("error deleting object %s: %v", objName, err) + return + } + } + errors <- nil + }() + } + + // List and queue objects for deletion + it := bucket.Objects(ctx, nil) + go func() { + defer close(objects) + for { + objAttrs, err := it.Next() + if err == iterator.Done { + break + } + if err != nil { + errors <- fmt.Errorf("error listing objects: %v", err) + return + } + objects <- objAttrs.Name + } + }() + + // Wait for all workers to complete + for i := 0; i < maxWorkers; i++ { + if err := <-errors; err != nil { + return err + } + } + + return nil +} + +// gcsRetryConfig defines retry behavior for GCS operations +type gcsRetryConfig struct { + maxRetries int + initialDelay time.Duration + maxDelay time.Duration +} + +var defaultGCSRetryConfig = gcsRetryConfig{ + maxRetries: 5, + initialDelay: 1 * time.Second, + maxDelay: 32 * time.Second, +} + +// withGCSRetry executes a function with exponential backoff retry logic +func withGCSRetry(operation func() error, config gcsRetryConfig) error { + var lastErr error + delay := config.initialDelay + + for attempt := 0; attempt <= config.maxRetries; attempt++ { + if attempt > 0 { + // Add jitter to prevent thundering herd + jitter := time.Duration(rand.Int63n(int64(delay / 4))) + time.Sleep(delay + jitter) + + // Exponential backoff + delay = time.Duration(float64(delay) * 2.0) + if delay > config.maxDelay { + delay = config.maxDelay + } + } + + err := operation() + if err == nil { + return nil + } + + lastErr = err + + // Check if error is retryable + if !isGCSRetryableError(err) { + return err + } + } + + return fmt.Errorf("operation failed after %d retries: %w", config.maxRetries, lastErr) +} + +// isGCSRetryableError determines if a GCS error should be retried +func isGCSRetryableError(err error) bool { + if err == nil { + return false + } + + if gerr, ok := err.(*googleapi.Error); ok { + switch gerr.Code { + case http.StatusTooManyRequests: // Too Many Requests + return true + case http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, http.StatusGatewayTimeout: // Server errors + return true + case http.StatusUnauthorized, http.StatusForbidden: // Authentication/Authorization failures + return false + case http.StatusBadRequest: // Bad Request (invalid names, etc.) + return false + case http.StatusNotFound: // Not Found + return false + case http.StatusConflict: // Conflict (bucket exists) + return false + } + } + + // Network-related errors are typically retryable + if strings.Contains(err.Error(), "timeout") || + strings.Contains(err.Error(), "connection") || + strings.Contains(err.Error(), "network") { + return true + } + + return false +} + +// handleGCSError provides consistent error handling for GCS operations +func handleGCSError(err error, operation string, bucketName string) error { + if err == storage.ErrBucketNotExist { + if operation == "exists" || operation == "delete" { + return nil // Expected for these operations + } + return fmt.Errorf("bucket '%s' not found", bucketName) + } + + if gerr, ok := err.(*googleapi.Error); ok { + switch gerr.Code { + case http.StatusConflict: // Conflict - Bucket already exists + if operation == "create" { + return nil // Idempotent behavior + } + return fmt.Errorf("bucket '%s' already exists globally", bucketName) + case http.StatusNotFound: // Not Found + if operation == "exists" || operation == "delete" { + return nil // Expected for these operations + } + return fmt.Errorf("bucket '%s' not found", bucketName) + case http.StatusUnauthorized: // Unauthenticated + return fmt.Errorf("authentication failed: check service account key") + case http.StatusForbidden: // Permission Denied + return fmt.Errorf("permission denied: check service account permissions for project") + case http.StatusTooManyRequests: // Too Many Requests + return fmt.Errorf("rate limit exceeded: too many requests") + case http.StatusBadRequest: // Bad Request + return fmt.Errorf("invalid request: check bucket name and configuration") + default: + return fmt.Errorf("gcs error during %s: %s (HTTP %d): %v", + operation, gerr.Message, gerr.Code, gerr) + } + } + + return fmt.Errorf("failed to %s bucket '%s': %w", operation, bucketName, err) +} diff --git a/pkg/bucket/gcp_test.go b/pkg/bucket/gcp_test.go new file mode 100644 index 0000000000..616313456e --- /dev/null +++ b/pkg/bucket/gcp_test.go @@ -0,0 +1,410 @@ +package bucket + +import ( + "encoding/json" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/api/googleapi" + + "github.com/openshift/oadp-operator/api/v1alpha1" +) + +func TestValidateBucketName(t *testing.T) { + tests := []struct { + name string + input string + expectError bool + }{ + {"valid name", "my-bucket-123", false}, + {"valid with dots", "my.bucket.123", false}, + {"valid with underscores", "my_bucket_123", false}, + {"too short", "ab", true}, + {"too long", strings.Repeat("a", 64), true}, + {"starts with number", "123bucket", false}, + {"ends with hyphen", "bucket-", true}, + {"consecutive dots", "bucket..name", true}, + {"ip address format", "192.168.1.1", true}, + {"uppercase letters", "MyBucket", true}, + {"special characters", "bucket@name", true}, + {"starts with hyphen", "-bucket", true}, + {"ends with dot", "bucket.", true}, + {"valid minimum length", "abc", false}, + {"valid maximum length", strings.Repeat("a", 63), false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateBucketName(tt.input) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestExtractProjectID(t *testing.T) { + // Create temporary service account file + sa := serviceAccountKey{ + Type: "service_account", + ProjectID: "test-project-123", + ClientEmail: "test@test-project-123.iam.gserviceaccount.com", + ClientID: "123456789", + PrivateKey: "-----BEGIN RSA PRIVATE KEY-----\ntest\n-----END RSA PRIVATE KEY-----\n", + AuthURI: "https://accounts.google.com/o/oauth2/auth", + TokenURI: "https://oauth2.googleapis.com/token", + } + + data, err := json.Marshal(sa) + require.NoError(t, err) + + tmpFile, err := os.CreateTemp("", "test-sa-*.json") + require.NoError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.Write(data) + require.NoError(t, err) + tmpFile.Close() + + client := gcpBucketClient{} + projectID, err := client.extractProjectID(tmpFile.Name()) + + assert.NoError(t, err) + assert.Equal(t, "test-project-123", projectID) + + // Test missing project ID + saNoProject := serviceAccountKey{ + Type: "service_account", + ClientEmail: "test@test-project-123.iam.gserviceaccount.com", + } + + data, err = json.Marshal(saNoProject) + require.NoError(t, err) + + tmpFile2, err := os.CreateTemp("", "test-sa-no-project-*.json") + require.NoError(t, err) + defer os.Remove(tmpFile2.Name()) + + _, err = tmpFile2.Write(data) + require.NoError(t, err) + tmpFile2.Close() + + _, err = client.extractProjectID(tmpFile2.Name()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "project_id not found") + + // Test invalid JSON + tmpFile3, err := os.CreateTemp("", "test-sa-invalid-*.json") + require.NoError(t, err) + defer os.Remove(tmpFile3.Name()) + + _, err = tmpFile3.Write([]byte("invalid json")) + require.NoError(t, err) + tmpFile3.Close() + + _, err = client.extractProjectID(tmpFile3.Name()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "error parsing credential JSON") + + // Test non-existent file + _, err = client.extractProjectID("/non/existent/file.json") + assert.Error(t, err) + assert.Contains(t, err.Error(), "error reading credential file") +} + +func TestConvertTagsToLabels(t *testing.T) { + tests := []struct { + name string + tags map[string]string + expected map[string]string + }{ + { + name: "normal tags", + tags: map[string]string{ + "environment": "production", + "team": "backend", + }, + expected: map[string]string{ + "environment": "production", + "team": "backend", + }, + }, + { + name: "tags with uppercase and special characters", + tags: map[string]string{ + "Environment": "Production", + "Team-Name": "Backend_API", + "Cost.Center": "IT-Ops", + "Project@2024": "MyProject", + "Version#1.2.3": "Release", + }, + expected: map[string]string{ + "environment": "production", + "team-name": "backend_api", + "cost_center": "it-ops", + "project_2024": "myproject", + "version_1_2_3": "release", + }, + }, + { + name: "tags with long keys and values", + tags: map[string]string{ + strings.Repeat("a", 70): strings.Repeat("b", 70), + }, + expected: map[string]string{ + strings.Repeat("a", 63): strings.Repeat("b", 63), + }, + }, + { + name: "empty tags", + tags: map[string]string{}, + expected: map[string]string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := gcpBucketClient{ + bucket: v1alpha1.CloudStorage{ + Spec: v1alpha1.CloudStorageSpec{ + Tags: tt.tags, + }, + }, + } + + labels := client.convertTagsToLabels() + assert.Equal(t, tt.expected, labels) + }) + } +} + +func TestGetGCPLocation(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"us-central1", "us-central1"}, // GCP region + {"europe-west1", "europe-west1"}, // GCP region + {"asia-southeast1", "asia-southeast1"}, // GCP region + {"", "us-central1"}, // Default + {"any-region", "any-region"}, // Pass through + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + client := gcpBucketClient{ + bucket: v1alpha1.CloudStorage{ + Spec: v1alpha1.CloudStorageSpec{ + Region: tt.input, + }, + }, + } + result := client.getGCPLocation() + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestIsGCSRetryableError(t *testing.T) { + tests := []struct { + name string + err error + retryable bool + }{ + { + name: "rate limit error", + err: &googleapi.Error{ + Code: 429, + Message: "Too Many Requests", + }, + retryable: true, + }, + { + name: "server error 500", + err: &googleapi.Error{ + Code: 500, + Message: "Internal Server Error", + }, + retryable: true, + }, + { + name: "server error 503", + err: &googleapi.Error{ + Code: 503, + Message: "Service Unavailable", + }, + retryable: true, + }, + { + name: "gateway timeout", + err: &googleapi.Error{ + Code: 504, + Message: "Gateway Timeout", + }, + retryable: true, + }, + { + name: "authentication failed", + err: &googleapi.Error{ + Code: 401, + Message: "Unauthenticated", + }, + retryable: false, + }, + { + name: "permission denied", + err: &googleapi.Error{ + Code: 403, + Message: "Permission Denied", + }, + retryable: false, + }, + { + name: "bad request", + err: &googleapi.Error{ + Code: 400, + Message: "Bad Request", + }, + retryable: false, + }, + { + name: "not found", + err: &googleapi.Error{ + Code: 404, + Message: "Not Found", + }, + retryable: false, + }, + { + name: "conflict", + err: &googleapi.Error{ + Code: 409, + Message: "Conflict", + }, + retryable: false, + }, + { + name: "timeout error string", + err: assert.AnError, + retryable: false, // Our mock error doesn't contain "timeout" + }, + { + name: "nil error", + err: nil, + retryable: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isGCSRetryableError(tt.err) + assert.Equal(t, tt.retryable, result) + }) + } +} + +func TestHandleGCSError(t *testing.T) { + tests := []struct { + name string + err error + operation string + bucketName string + expectedError string + shouldBeNil bool + }{ + { + name: "bucket already exists on create", + err: &googleapi.Error{Code: 409, Message: "Bucket already exists"}, + operation: "create", + bucketName: "test-bucket", + shouldBeNil: true, + }, + { + name: "bucket not found on delete", + err: &googleapi.Error{Code: 404, Message: "Not Found"}, + operation: "delete", + bucketName: "test-bucket", + shouldBeNil: true, + }, + { + name: "bucket not found on exists", + err: &googleapi.Error{Code: 404, Message: "Not Found"}, + operation: "exists", + bucketName: "test-bucket", + shouldBeNil: true, + }, + { + name: "authentication failed", + err: &googleapi.Error{Code: 401, Message: "Unauthenticated"}, + operation: "create", + bucketName: "test-bucket", + expectedError: "authentication failed: check service account key", + }, + { + name: "permission denied", + err: &googleapi.Error{Code: 403, Message: "Permission Denied"}, + operation: "create", + bucketName: "test-bucket", + expectedError: "permission denied: check service account permissions for project", + }, + { + name: "rate limit exceeded", + err: &googleapi.Error{Code: 429, Message: "Too Many Requests"}, + operation: "create", + bucketName: "test-bucket", + expectedError: "rate limit exceeded: too many requests", + }, + { + name: "bad request", + err: &googleapi.Error{Code: 400, Message: "Bad Request"}, + operation: "create", + bucketName: "test-bucket", + expectedError: "invalid request: check bucket name and configuration", + }, + { + name: "other error", + err: &googleapi.Error{Code: 500, Message: "Internal Server Error"}, + operation: "create", + bucketName: "test-bucket", + expectedError: "gcs error during create: Internal Server Error (HTTP 500)", + }, + { + name: "conflict on non-create operation", + err: &googleapi.Error{Code: 409, Message: "Conflict"}, + operation: "delete", + bucketName: "test-bucket", + expectedError: "bucket 'test-bucket' already exists globally", + }, + { + name: "generic error", + err: assert.AnError, + operation: "create", + bucketName: "test-bucket", + expectedError: "failed to create bucket 'test-bucket':", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := handleGCSError(tt.err, tt.operation, tt.bucketName) + if tt.shouldBeNil { + assert.Nil(t, err) + } else { + assert.NotNil(t, err) + assert.Contains(t, err.Error(), tt.expectedError) + } + }) + } +} + +func TestGCSRetryConfig(t *testing.T) { + // Test that the default retry config is properly set + assert.Equal(t, 5, defaultGCSRetryConfig.maxRetries) + assert.Equal(t, float64(1), defaultGCSRetryConfig.initialDelay.Seconds()) + assert.Equal(t, float64(32), defaultGCSRetryConfig.maxDelay.Seconds()) +} diff --git a/pkg/credentials/stsflow/stsflow.go b/pkg/credentials/stsflow/stsflow.go index 8b6abd544a..a3c63f16b7 100644 --- a/pkg/credentials/stsflow/stsflow.go +++ b/pkg/credentials/stsflow/stsflow.go @@ -40,7 +40,6 @@ const ( // Azure workload identity secret name AzureWorkloadIdentitySecretName = "azure-workload-identity-env" - // Cloud Provider Secret Keys - standard key names for cloud credentials AzureClientID = "azure_client_id" AzureClientSecret = "azure_client_secret" @@ -51,6 +50,9 @@ const ( AzureTenantID = "azure_tenant_id" AzureFederatedTokenFile = "azure_federated_token_file" + // AWS Secret key name + AWSSecretCredentialsKey = "credentials" + // GCP Secret key name GcpSecretJSONKey = "service_account.json" @@ -121,7 +123,7 @@ func STSStandardizedFlow() (string, error) { func CreateOrUpdateSTSAWSSecret(setupLog logr.Logger, roleARN string, secretNS string, kubeconf *rest.Config) error { // AWS STS credentials format return CreateOrUpdateSTSSecret(setupLog, VeleroAWSSecretName, map[string]string{ - "credentials": fmt.Sprintf(`[default] + AWSSecretCredentialsKey: fmt.Sprintf(`[default] sts_regional_endpoints = regional role_arn = %s web_identity_token_file = %s`, roleARN, WebIdentityTokenPath), @@ -188,7 +190,6 @@ AZURE_CLOUD_NAME=AzurePublicCloud return nil } - func CreateOrUpdateSTSSecret(setupLog logr.Logger, secretName string, credStringData map[string]string, secretNS string, kubeconf *rest.Config) error { clientInstance, err := client.New(kubeconf, client.Options{}) if err != nil { @@ -318,6 +319,11 @@ func AnnotateVeleroServiceAccountForAzureWithClient(setupLog logr.Logger, client if sa.Annotations == nil { sa.Annotations = make(map[string]string) } + // Note: This annotation is not strictly necessary according to Azure workload identity documentation. + // The annotation instructs the Workload Identity webhook to inject the AZURE_CLIENT_ID environment variable. + // Since we're manually setting the environment variable in the deployment, this is just a precaution. + // See: https://azure.github.io/azure-workload-identity/docs/topics/service-account-labels-and-annotations.html#service-account + // sa.Annotations["azure.workload.identity/client-id"] = clientID // Apply the patch if err := clientInstance.Patch(context.Background(), sa, client.MergeFrom(originalSA)); err != nil { diff --git a/pkg/credentials/stsflow/stsflow_test.go b/pkg/credentials/stsflow/stsflow_test.go index f86e4335a0..599567813e 100644 --- a/pkg/credentials/stsflow/stsflow_test.go +++ b/pkg/credentials/stsflow/stsflow_test.go @@ -629,6 +629,8 @@ AZURE_CLOUD_NAME=AzurePublicCloud Namespace: testNamespace, }, saResult) assert.NoError(t, err) + // Annotation is commented out in implementation, so we shouldn't check for it + // assert.Equal(t, clientID, saResult.Annotations["azure.workload.identity/client-id"]) }) t.Run("Azure secret creation continues even if service account annotation fails", func(t *testing.T) {