From dc9f4a55e7dbe5bd0e17ca2d77c19c4f626d26d7 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 09:44:35 +0100 Subject: [PATCH 01/15] Google Cloud Bucket upload fails Fixes #257 --- .github/workflows/pr_code_changes.yaml | 6 ++++++ test_upload.py | 8 ++++++++ 2 files changed, 14 insertions(+) create mode 100644 test_upload.py diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 1e21595f..c3ac4d6d 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -26,6 +26,10 @@ jobs: uses: actions/checkout@v2 - name: Install uv uses: astral-sh/setup-uv@v5 + - uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" + service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Set up Python uses: actions/setup-python@v2 @@ -34,6 +38,8 @@ jobs: - name: Install package run: uv pip install -e .[dev] --system + - name: Test data upload + run: python test_upload.py - name: Download data inputs run: make download env: diff --git a/test_upload.py b/test_upload.py new file mode 100644 index 00000000..23341d1f --- /dev/null +++ b/test_upload.py @@ -0,0 +1,8 @@ + +from google.cloud import storage + +storage_client = storage.Client() +bucket = storage_client.bucket("policyengine-us-data") +blob = "README.md" +blob = bucket.blob(blob) +blob.upload_from_filename("README.md") \ No newline at end of file From 0e8a4fa65061a3116d2b0bc946a5fb0491062871 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 09:57:18 +0100 Subject: [PATCH 02/15] No-change --- policyengine_us_data/storage/upload_completed_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 72c59a3d..39211369 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -7,7 +7,7 @@ from policyengine_us_data.utils.huggingface import upload from google.cloud import storage - + def upload_datasets(): storage_client = storage.Client() bucket = storage_client.bucket("policyengine-us-data") From d5d9cbddee34512b00565475c65dc9e4c8fe2438 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 09:59:43 +0100 Subject: [PATCH 03/15] Add perms --- .github/workflows/pr_code_changes.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index c3ac4d6d..96f421b2 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -20,6 +20,10 @@ jobs: with: args: ". -l 79 --check" Test: + permissions: + contents: "read" + # Required to auth against gcp + id-token: "write" runs-on: ubuntu-latest steps: - name: Checkout repo From 6ff8e02de855b3c43bdd198e9a642369bfce240f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:26:07 +0100 Subject: [PATCH 04/15] Use GCP creds --- .github/workflows/pr_code_changes.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 96f421b2..c18233b4 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -32,8 +32,7 @@ jobs: uses: astral-sh/setup-uv@v5 - uses: "google-github-actions/auth@v2" with: - workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" - service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" + credentials_json: "${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}" - name: Set up Python uses: actions/setup-python@v2 From 635d7f01ef3c17ccbc53b865e1c784e3bf1cb8c8 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:34:54 +0100 Subject: [PATCH 05/15] Move GCP auth code to main action --- .github/workflows/code_changes.yaml | 3 +-- .github/workflows/pr_code_changes.yaml | 5 ----- policyengine_us_data/storage/upload_completed_datasets.py | 2 +- test_upload.py | 8 -------- 4 files changed, 2 insertions(+), 16 deletions(-) delete mode 100644 test_upload.py diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml index a15ae9fc..784c357d 100644 --- a/.github/workflows/code_changes.yaml +++ b/.github/workflows/code_changes.yaml @@ -38,8 +38,7 @@ jobs: python-version: '3.11' - uses: "google-github-actions/auth@v2" with: - workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" - service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" + credentials_json: "${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}" - name: Install package run: uv pip install -e .[dev] --system diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index c18233b4..5b618f07 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -30,9 +30,6 @@ jobs: uses: actions/checkout@v2 - name: Install uv uses: astral-sh/setup-uv@v5 - - uses: "google-github-actions/auth@v2" - with: - credentials_json: "${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}" - name: Set up Python uses: actions/setup-python@v2 @@ -41,8 +38,6 @@ jobs: - name: Install package run: uv pip install -e .[dev] --system - - name: Test data upload - run: python test_upload.py - name: Download data inputs run: make download env: diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 39211369..72c59a3d 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -7,7 +7,7 @@ from policyengine_us_data.utils.huggingface import upload from google.cloud import storage - + def upload_datasets(): storage_client = storage.Client() bucket = storage_client.bucket("policyengine-us-data") diff --git a/test_upload.py b/test_upload.py deleted file mode 100644 index 23341d1f..00000000 --- a/test_upload.py +++ /dev/null @@ -1,8 +0,0 @@ - -from google.cloud import storage - -storage_client = storage.Client() -bucket = storage_client.bucket("policyengine-us-data") -blob = "README.md" -blob = bucket.blob(blob) -blob.upload_from_filename("README.md") \ No newline at end of file From 14697210879668112a9f57ac906949324f0ddfd9 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:42:57 +0100 Subject: [PATCH 06/15] Try WIF again --- .github/workflows/pr_code_changes.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 5b618f07..96f421b2 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -30,6 +30,10 @@ jobs: uses: actions/checkout@v2 - name: Install uv uses: astral-sh/setup-uv@v5 + - uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" + service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Set up Python uses: actions/setup-python@v2 @@ -38,6 +42,8 @@ jobs: - name: Install package run: uv pip install -e .[dev] --system + - name: Test data upload + run: python test_upload.py - name: Download data inputs run: make download env: From 9bdbae2dd21d267abb9cb4aee49b88b422383e8a Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:43:40 +0100 Subject: [PATCH 07/15] No-change --- policyengine_us_data/storage/download_private_prerequisites.py | 1 + 1 file changed, 1 insertion(+) diff --git a/policyengine_us_data/storage/download_private_prerequisites.py b/policyengine_us_data/storage/download_private_prerequisites.py index c86e7189..0a430e46 100644 --- a/policyengine_us_data/storage/download_private_prerequisites.py +++ b/policyengine_us_data/storage/download_private_prerequisites.py @@ -15,3 +15,4 @@ local_folder=FOLDER, version=None, ) + \ No newline at end of file From b68668ba37beee894ee9541b26608db063e74782 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:53:41 +0100 Subject: [PATCH 08/15] Re-add test script --- test_upload.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 test_upload.py diff --git a/test_upload.py b/test_upload.py new file mode 100644 index 00000000..849a426a --- /dev/null +++ b/test_upload.py @@ -0,0 +1,8 @@ + +from google.cloud import storage + +storage_client = storage.Client() +bucket = storage_client.bucket("policyengine-us-data") +blob = "README.md" +blob = bucket.blob(blob) +blob.upload_from_filename("README.md") From f2c5d00e2ace71e6b2ba226819b446fac2253b73 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:53:49 +0100 Subject: [PATCH 09/15] No-change --- policyengine_us_data/storage/download_private_prerequisites.py | 1 - 1 file changed, 1 deletion(-) diff --git a/policyengine_us_data/storage/download_private_prerequisites.py b/policyengine_us_data/storage/download_private_prerequisites.py index 0a430e46..c86e7189 100644 --- a/policyengine_us_data/storage/download_private_prerequisites.py +++ b/policyengine_us_data/storage/download_private_prerequisites.py @@ -15,4 +15,3 @@ local_folder=FOLDER, version=None, ) - \ No newline at end of file From 7ba731abf352af153b4c2cf85a99626c5c62a240 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 10:54:33 +0100 Subject: [PATCH 10/15] No-change --- policyengine_us_data/utils/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/policyengine_us_data/utils/github.py b/policyengine_us_data/utils/github.py index 007a8d68..e810c282 100644 --- a/policyengine_us_data/utils/github.py +++ b/policyengine_us_data/utils/github.py @@ -10,7 +10,7 @@ "Authorization": f"token {os.environ.get('POLICYENGINE_US_DATA_GITHUB_TOKEN')}", } - + def get_asset_url( org: str, repo: str, release_tag: str, file_name: str ) -> str: From b0e72ac2ca2f545d3c3a9fd78d266df4ed6a286f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 11:26:35 +0100 Subject: [PATCH 11/15] Add google auth step --- pyproject.toml | 1 + test_upload.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7f0c0c37..765adf77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "microimpute", "pip-system-certs", "google-cloud-storage", + "google-auth", ] [project.optional-dependencies] diff --git a/test_upload.py b/test_upload.py index 849a426a..fc0c7047 100644 --- a/test_upload.py +++ b/test_upload.py @@ -1,7 +1,10 @@ from google.cloud import storage +import google.auth -storage_client = storage.Client() +credentials, project_id = google.auth.default() + +storage_client = storage.Client(credentials=credentials, project=project_id) bucket = storage_client.bucket("policyengine-us-data") blob = "README.md" blob = bucket.blob(blob) From 9161ea924e8455fe7cb73a372d55296c02040950 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 11:30:16 +0100 Subject: [PATCH 12/15] Move GCP code back to prod config --- .github/workflows/code_changes.yaml | 3 ++- .github/workflows/pr_code_changes.yaml | 6 ------ .../storage/upload_completed_datasets.py | 5 ++++- test_upload.py | 11 ----------- 4 files changed, 6 insertions(+), 19 deletions(-) delete mode 100644 test_upload.py diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml index 784c357d..a15ae9fc 100644 --- a/.github/workflows/code_changes.yaml +++ b/.github/workflows/code_changes.yaml @@ -38,7 +38,8 @@ jobs: python-version: '3.11' - uses: "google-github-actions/auth@v2" with: - credentials_json: "${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}" + workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" + service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Install package run: uv pip install -e .[dev] --system diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 96f421b2..5b618f07 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -30,10 +30,6 @@ jobs: uses: actions/checkout@v2 - name: Install uv uses: astral-sh/setup-uv@v5 - - uses: "google-github-actions/auth@v2" - with: - workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" - service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Set up Python uses: actions/setup-python@v2 @@ -42,8 +38,6 @@ jobs: - name: Install package run: uv pip install -e .[dev] --system - - name: Test data upload - run: python test_upload.py - name: Download data inputs run: make download env: diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 72c59a3d..09137b3d 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -6,10 +6,13 @@ from policyengine_us_data.storage import STORAGE_FOLDER from policyengine_us_data.utils.huggingface import upload from google.cloud import storage +import google.auth + def upload_datasets(): - storage_client = storage.Client() + credentials, project_id = google.auth.default() + storage_client = storage.Client(credentials=credentials, project=project_id) bucket = storage_client.bucket("policyengine-us-data") datasets_to_upload = [ diff --git a/test_upload.py b/test_upload.py deleted file mode 100644 index fc0c7047..00000000 --- a/test_upload.py +++ /dev/null @@ -1,11 +0,0 @@ - -from google.cloud import storage -import google.auth - -credentials, project_id = google.auth.default() - -storage_client = storage.Client(credentials=credentials, project=project_id) -bucket = storage_client.bucket("policyengine-us-data") -blob = "README.md" -blob = bucket.blob(blob) -blob.upload_from_filename("README.md") From b236a4843cee4401ba2cf6bd509077c95b1ca7bc Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 11:30:37 +0100 Subject: [PATCH 13/15] Versioning --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..12d1b8ce 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - GCP uploads use permissions correctly From d19331f34e2c07d5d7571e49f649ba3a487e3b83 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 11:30:58 +0100 Subject: [PATCH 14/15] Format --- policyengine_us_data/storage/upload_completed_datasets.py | 5 +++-- policyengine_us_data/utils/github.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py index 09137b3d..84f267d8 100644 --- a/policyengine_us_data/storage/upload_completed_datasets.py +++ b/policyengine_us_data/storage/upload_completed_datasets.py @@ -9,10 +9,11 @@ import google.auth - def upload_datasets(): credentials, project_id = google.auth.default() - storage_client = storage.Client(credentials=credentials, project=project_id) + storage_client = storage.Client( + credentials=credentials, project=project_id + ) bucket = storage_client.bucket("policyengine-us-data") datasets_to_upload = [ diff --git a/policyengine_us_data/utils/github.py b/policyengine_us_data/utils/github.py index e810c282..007a8d68 100644 --- a/policyengine_us_data/utils/github.py +++ b/policyengine_us_data/utils/github.py @@ -10,7 +10,7 @@ "Authorization": f"token {os.environ.get('POLICYENGINE_US_DATA_GITHUB_TOKEN')}", } - + def get_asset_url( org: str, repo: str, release_tag: str, file_name: str ) -> str: From 8ad05cd67ae6dcf1febc4d907731bb3c4a5bf55d Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Tue, 20 May 2025 11:31:46 +0100 Subject: [PATCH 15/15] Remove unnecessary permissions --- .github/workflows/pr_code_changes.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 5b618f07..1e21595f 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -20,10 +20,6 @@ jobs: with: args: ". -l 79 --check" Test: - permissions: - contents: "read" - # Required to auth against gcp - id-token: "write" runs-on: ubuntu-latest steps: - name: Checkout repo