diff --git a/.github/workflows/test_gcp_integration.yaml b/.github/workflows/test_gcp_integration.yaml index 71bb6c0791..7520cadf1a 100644 --- a/.github/workflows/test_gcp_integration.yaml +++ b/.github/workflows/test_gcp_integration.yaml @@ -73,7 +73,6 @@ jobs: - name: 'Authenticate to GCP' uses: 'google-github-actions/auth@v1' with: - token_format: access_token workload_identity_provider: ${{ env.GCP_WORKFLOW_PROVIDER }} service_account: ${{ env.GCP_SERVICE_ACCOUNT }} diff --git a/pyproject.toml b/pyproject.toml index e7b0629b34..37d209c45c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,12 @@ dependencies = [ "bcrypt==4.0.1", "boto3==1.34.63", "cloudflare==2.11.7", + "google-auth==2.31.0", + "google-cloud-compute==1.19.1", + "google-cloud-container==2.49.0", + "google-cloud-iam==2.15.1", + "google-cloud-storage==2.18.0", + "grpc-google-iam-v1==0.13.1", "jinja2", "kubernetes==27.2.0", "pluggy==1.3.0", diff --git a/src/_nebari/provider/cicd/github.py b/src/_nebari/provider/cicd/github.py index 2563af6ad9..0c9003ecdd 100644 --- a/src/_nebari/provider/cicd/github.py +++ b/src/_nebari/provider/cicd/github.py @@ -201,16 +201,6 @@ def setup_python_step(): ) -def setup_gcloud(): - return GHA_job_step( - name="Setup gcloud", - uses="google-github-actions/auth@v1", - with_={ - "credentials_json": "${{ secrets.GOOGLE_CREDENTIALS }}", - }, - ) - - def install_nebari_step(nebari_version): return GHA_job_step(name="Install Nebari", run=pip_install_nebari(nebari_version)) @@ -226,9 +216,6 @@ def gen_nebari_ops(config): step3 = install_nebari_step(config.nebari_version) gha_steps = [step1, step2, step3] - if config.provider == schema.ProviderEnum.gcp: - gha_steps.append(setup_gcloud()) - for step in config.ci_cd.before_script: gha_steps.append(GHA_job_step(**step)) diff --git a/src/_nebari/provider/cloud/google_cloud.py b/src/_nebari/provider/cloud/google_cloud.py index 67d0ebad7a..6b54e40e9d 100644 --- a/src/_nebari/provider/cloud/google_cloud.py +++ b/src/_nebari/provider/cloud/google_cloud.py @@ -1,7 +1,11 @@ import functools import json -import subprocess -from typing import Dict, List, Set +import os +from typing import List, Set + +import google.api_core.exceptions +from google.auth import load_credentials_from_dict, load_credentials_from_file +from google.cloud import compute_v1, container_v1, iam_admin_v1, storage from _nebari.constants import GCP_ENV_DOCS from _nebari.provider.cloud.commons import filter_by_highest_supported_k8s_version @@ -15,232 +19,144 @@ def check_credentials() -> None: @functools.lru_cache() -def projects() -> Dict[str, str]: - """Return a dict of available projects.""" +def load_credentials(): check_credentials() - output = subprocess.check_output( - ["gcloud", "projects", "list", "--format=json(name,projectId)"] - ) - data = json.loads(output) - return {_["name"]: _["projectId"] for _ in data} + credentials = os.environ["GOOGLE_CREDENTIALS"] + project_id = os.environ["PROJECT_ID"] + + # Scopes need to be explicitly defined when using workload identity + # federation. + scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + # Google credentials are stored as strings in GHA secrets so we need + # to determine if the credentials are stored as a file or not before + # reading them + if credentials.endswith(".json"): + loaded_credentials, _ = load_credentials_from_file(credentials, scopes=scopes) + else: + loaded_credentials, _ = load_credentials_from_dict( + json.loads(credentials), scopes=scopes + ) + + return loaded_credentials, project_id @functools.lru_cache() def regions() -> Set[str]: - """Return a set of available regions.""" - check_credentials() - output = subprocess.check_output( - ["gcloud", "compute", "regions", "list", "--format=json(name)"] - ) - data = json.loads(output) - return {_["name"] for _ in data} + """Return a dict of available regions.""" + credentials, project_id = load_credentials() + client = compute_v1.RegionsClient(credentials=credentials) + response = client.list(project=project_id) - -@functools.lru_cache() -def zones(project: str, region: str) -> Dict[str, str]: - """Return a dict of available zones.""" - check_credentials() - output = subprocess.check_output( - ["gcloud", "compute", "zones", "list", "--project", project, "--format=json"] - ) - data = json.loads(output.decode("utf-8")) - return {_["description"]: _["name"] for _ in data if _["name"].startswith(region)} + return {region.name for region in response} @functools.lru_cache() def kubernetes_versions(region: str) -> List[str]: """Return list of available kubernetes supported by cloud provider. Sorted from oldest to latest.""" - check_credentials() - output = subprocess.check_output( - [ - "gcloud", - "container", - "get-server-config", - "--region", - region, - "--format=json", - ] + credentials, project_id = load_credentials() + client = container_v1.ClusterManagerClient(credentials=credentials) + response = client.get_server_config( + name=f"projects/{project_id}/locations/{region}" ) - data = json.loads(output.decode("utf-8")) - supported_kubernetes_versions = sorted([_ for _ in data["validMasterVersions"]]) - return filter_by_highest_supported_k8s_version(supported_kubernetes_versions) - - -@functools.lru_cache() -def instances(project: str) -> Dict[str, str]: - """Return a dict of available instances.""" - check_credentials() - output = subprocess.check_output( - [ - "gcloud", - "compute", - "machine-types", - "list", - "--project", - project, - "--format=json", - ] - ) - data = json.loads(output.decode("utf-8")) - return {_["description"]: _["name"] for _ in data} - + supported_kubernetes_versions = response.valid_master_versions -def activated_services() -> Set[str]: - """Return a list of activated services.""" - check_credentials() - output = subprocess.check_output( - [ - "gcloud", - "services", - "list", - "--enabled", - "--format=json(config.title)", - ] - ) - data = json.loads(output) - return {service["config"]["title"] for service in data} + return filter_by_highest_supported_k8s_version(supported_kubernetes_versions) -def cluster_exists(cluster_name: str, project_id: str, region: str) -> bool: +def cluster_exists(cluster_name: str, region: str) -> bool: """Check if a GKE cluster exists.""" + credentials, project_id = load_credentials() + client = container_v1.ClusterManagerClient(credentials=credentials) + try: - subprocess.check_output( - [ - "gcloud", - "container", - "clusters", - "describe", - cluster_name, - "--project", - project_id, - "--region", - region, - ] + client.get_cluster( + name=f"projects/{project_id}/locations/{region}/clusters/{cluster_name}" ) - return True - except subprocess.CalledProcessError: + except google.api_core.exceptions.NotFound: return False + return True -def bucket_exists(bucket_name: str, project_id: str) -> bool: +def bucket_exists(bucket_name: str) -> bool: """Check if a storage bucket exists.""" + credentials, _ = load_credentials() + client = storage.Client(credentials=credentials) + try: - print(f"Checking if bucket {bucket_name} exists in project {project_id}.") - subprocess.check_output( - [ - "gsutil", - "ls", - f"gs://{bucket_name}/", - "-p", - project_id, - ] - ) - return True - except subprocess.CalledProcessError: + client.get_bucket(bucket_name) + except google.api_core.exceptions.NotFound: return False + return True -def service_account_exists(service_account_name: str, project_id: str) -> bool: +def service_account_exists(service_account_name: str) -> bool: """Check if a service account exists.""" + credentials, project_id = load_credentials() + client = iam_admin_v1.IAMClient(credentials=credentials) + + service_account_path = client.service_account_path(project_id, service_account_name) try: - subprocess.check_output( - [ - "gcloud", - "iam", - "service-accounts", - "describe", - service_account_name, - "--project", - project_id, - ] - ) - return True - except subprocess.CalledProcessError: + client.get_service_account(name=service_account_path) + except google.api_core.exceptions.NotFound: return False + return True -def delete_cluster(cluster_name: str, project_id: str, region: str): +def delete_cluster(cluster_name: str, region: str): """Delete a GKE cluster if it exists.""" - check_credentials() - - if not cluster_exists(cluster_name, project_id, region): + credentials, project_id = load_credentials() + if not cluster_exists(cluster_name, region): print( f"Cluster {cluster_name} does not exist in project {project_id}, region {region}. Exiting gracefully." ) return + client = container_v1.ClusterManagerClient(credentials=credentials) try: - subprocess.check_call( - [ - "gcloud", - "container", - "clusters", - "delete", - cluster_name, - "--project", - project_id, - "--region", - region, - "--quiet", - ] + client.delete_cluster( + name=f"projects/{project_id}/locations/{region}/clusters/{cluster_name}" ) print(f"Successfully deleted cluster {cluster_name}.") - except subprocess.CalledProcessError as e: - print(f"Failed to delete cluster {cluster_name}. Error: {e}") + except google.api_core.exceptions.GoogleAPIError as e: + print(f"Failed to delete bucket {bucket_name}. Error: {e}") -def delete_storage_bucket(bucket_name: str, project_id: str): +def delete_storage_bucket(bucket_name: str): """Delete a storage bucket if it exists.""" - check_credentials() + credentials, project_id = load_credentials() - if not bucket_exists(bucket_name, project_id): + if not bucket_exists(bucket_name): print( f"Bucket {bucket_name} does not exist in project {project_id}. Exiting gracefully." ) return + client = storage.Client(credentials=credentials) + bucket = client.get_bucket(bucket_name) try: - subprocess.check_call( - [ - "gsutil", - "-m", - "rm", - "-r", - f"gs://{bucket_name}", - "-p", - project_id, - ] - ) + bucket.delete(force=True) print(f"Successfully deleted bucket {bucket_name}.") - except subprocess.CalledProcessError as e: + except google.api_core.exceptions.GoogleAPIError as e: print(f"Failed to delete bucket {bucket_name}. Error: {e}") -def delete_service_account(service_account_name: str, project_id: str): +def delete_service_account(service_account_name: str): """Delete a service account if it exists.""" - check_credentials() + credentials, project_id = load_credentials() - if not service_account_exists(service_account_name, project_id): + if not service_account_exists(service_account_name): print( f"Service account {service_account_name} does not exist in project {project_id}. Exiting gracefully." ) return + client = iam_admin_v1.IAMClient(credentials=credentials) + service_account_path = client.service_account_path(project_id, service_account_name) try: - subprocess.check_call( - [ - "gcloud", - "iam", - "service-accounts", - "delete", - service_account_name, - "--quiet", - "--project", - project_id, - ] - ) + client.delete_service_account(name=service_account_path) print(f"Successfully deleted service account {service_account_name}.") - except subprocess.CalledProcessError as e: + except google.api_core.exceptions.GoogleAPIError as e: print(f"Failed to delete service account {service_account_name}. Error: {e}") @@ -257,43 +173,6 @@ def gcp_cleanup(config: schema.Main): f"{project_name}-{namespace}@{project_id}.iam.gserviceaccount.com" ) - delete_cluster(cluster_name, project_id, region) - delete_storage_bucket(bucket_name, project_id) - delete_service_account(service_account_name, project_id) - - -def check_missing_service() -> None: - """Check if all required services are activated.""" - required = { - "Compute Engine API", - "Kubernetes Engine API", - "Cloud Monitoring API", - "Cloud Autoscaling API", - "Identity and Access Management (IAM) API", - "Cloud Resource Manager API", - } - activated = activated_services() - common = required.intersection(activated) - missing = required.difference(common) - if missing: - raise ValueError( - f"""Missing required services: {missing}\n - Please see the documentation for more information: {GCP_ENV_DOCS}""" - ) - - -# Getting pricing data could come from here -# https://cloudpricingcalculator.appspot.com/static/data/pricelist.json - - -### PYDANTIC VALIDATORS ### - - -def validate_region(region: str) -> str: - """Validate the GCP region is valid.""" - available_regions = regions() - if region not in available_regions: - raise ValueError( - f"Region {region} is not one of available regions {available_regions}" - ) - return region + delete_cluster(cluster_name, region) + delete_storage_bucket(bucket_name) + delete_service_account(service_account_name) diff --git a/src/_nebari/stages/infrastructure/__init__.py b/src/_nebari/stages/infrastructure/__init__.py index 0820940f20..498b2c9b1d 100644 --- a/src/_nebari/stages/infrastructure/__init__.py +++ b/src/_nebari/stages/infrastructure/__init__.py @@ -356,7 +356,6 @@ class GoogleCloudPlatformProvider(schema.Base): @model_validator(mode="before") @classmethod def _check_input(cls, data: Any) -> Any: - google_cloud.check_credentials() available_regions = google_cloud.regions() if data["region"] not in available_regions: raise ValueError( diff --git a/tests/tests_unit/test_provider.py b/tests/tests_unit/test_provider.py deleted file mode 100644 index 3c4f35a1d0..0000000000 --- a/tests/tests_unit/test_provider.py +++ /dev/null @@ -1,54 +0,0 @@ -from contextlib import nullcontext - -import pytest - -from _nebari.provider.cloud.google_cloud import check_missing_service - - -@pytest.mark.parametrize( - "activated_services, exception", - [ - ( - { - "Compute Engine API", - "Kubernetes Engine API", - "Cloud Monitoring API", - "Cloud Autoscaling API", - "Identity and Access Management (IAM) API", - "Cloud Resource Manager API", - }, - nullcontext(), - ), - ( - { - "Compute Engine API", - "Kubernetes Engine API", - "Cloud Monitoring API", - "Cloud Autoscaling API", - "Identity and Access Management (IAM) API", - "Cloud Resource Manager API", - "Cloud SQL Admin API", - }, - nullcontext(), - ), - ( - { - "Compute Engine API", - "Kubernetes Engine API", - "Cloud Monitoring API", - "Cloud Autoscaling API", - "Cloud SQL Admin API", - }, - pytest.raises(ValueError, match=r"Missing required services:.*"), - ), - ], -) -def test_gcp_missing_service(monkeypatch, activated_services, exception): - def mock_return(): - return activated_services - - monkeypatch.setattr( - "_nebari.provider.cloud.google_cloud.activated_services", mock_return - ) - with exception: - check_missing_service()