Skip to content

Commit e7d330c

Browse files
committed
try setting up impl permissions
1 parent 82d8862 commit e7d330c

File tree

3 files changed

+84
-13
lines changed

3 files changed

+84
-13
lines changed

.github/workflows/deploy_semver_udf.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,11 @@ jobs:
2626
with:
2727
working-dir: ./semver_udf
2828
script: |
29-
dbt build
29+
dbt build
30+
31+
- name: permission schemas
32+
uses: ./.github/actions/run_in_venv
33+
with:
34+
working-dir: ./semver_udf
35+
script: |
36+
python published_permissions.py

.github/workflows/pipeline.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ jobs:
3232
needs:
3333
- refresh-semver-udf
3434

35-
refresh_vulns:
36-
uses: ./.github/workflows/deploy_vulns.yml
37-
permissions:
38-
contents: read
39-
id-token: write
40-
actions: read
41-
pages: write
42-
secrets: inherit
43-
needs:
44-
- refresh-semver-udf
45-
- refresh-safety
46-
- refresh-pypi
35+
# refresh_vulns:
36+
# uses: ./.github/workflows/deploy_vulns.yml
37+
# permissions:
38+
# contents: read
39+
# id-token: write
40+
# actions: read
41+
# pages: write
42+
# secrets: inherit
43+
# needs:
44+
# - refresh-semver-udf
45+
# - refresh-safety
46+
# - refresh-pypi
4747

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import json
2+
import pathlib
3+
4+
from google.cloud import bigquery
5+
6+
def is_duplicate_dataset_access_entry(e1, e2):
7+
def get_comparable(entry):
8+
return json.dumps(entry.to_api_repr().get('dataset'), sort_keys=True)
9+
10+
return get_comparable(e1) == get_comparable(e2)
11+
12+
if __name__ == '__main__':
13+
manifest_path = pathlib.Path.cwd() / 'target' / 'manifest.json'
14+
manifest = json.loads(manifest_path.read_text(encoding='utf-8'))
15+
databases = set()
16+
schemas = set()
17+
for node in manifest['nodes'].values():
18+
if node['resource_type'] == 'model':
19+
schemas.add(node['schema'])
20+
databases.add(node['database'])
21+
22+
if len(databases) != 1:
23+
raise ValueError("Multiple databases found in manifest. This script supports only one database.")
24+
25+
project_id = list(databases)[0]
26+
published_schemas = {schema for schema in schemas if not schema.endswith('_impl')}
27+
impl_schemas = schemas.difference(published_schemas)
28+
29+
client = bigquery.Client()
30+
31+
dataset_access_entries = [
32+
bigquery.AccessEntry(
33+
entity_type='dataset',
34+
entity_id={
35+
'dataset': {
36+
'projectId': project_id,
37+
'datasetId': schema
38+
},
39+
'targetTypes': ['VIEWS']
40+
}
41+
)
42+
for schema in published_schemas
43+
]
44+
45+
46+
for schema in impl_schemas:
47+
dataset = client.get_dataset(schema)
48+
current_access_entries = dataset.access_entries
49+
final_access_entries = current_access_entries.copy()
50+
51+
for published_entry in dataset_access_entries:
52+
is_duplicate = False
53+
for current_entry in current_access_entries:
54+
if is_duplicate_dataset_access_entry(current_entry, published_entry):
55+
is_duplicate = True
56+
break
57+
if not is_duplicate:
58+
print(f'Adding new access entry to dataset {schema}: {published_entry}')
59+
final_access_entries.append(published_entry)
60+
61+
dataset.access_entries = final_access_entries
62+
63+
client.update_dataset(dataset, ['access_entries'])
64+

0 commit comments

Comments
 (0)