Skip to content

Commit 017a391

Browse files
authored
Add support for datasets in other projects (#8)
1 parent 5bbd867 commit 017a391

File tree

2 files changed

+32
-11
lines changed

2 files changed

+32
-11
lines changed

main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919
arrow.Arrow, lambda o: o.format("YYYY-MM-DD HH:mm:ss ZZ")
2020
)
2121

22+
DEFAULT_PROJECT = os.environ.get("GCP_PROJECT", "the-psf")
2223
# Multiple datasets can be specified by separating them with whitespace
24+
# Datasets in other projects can be referenced by using the full dataset id:
25+
# <project_id>.<dataset_name>
26+
# If only the dataset name is provided (no separating period) the
27+
# DEFAULT_PROJECT will be used as the project ID.
2328
DATASETS = os.environ.get("BIGQUERY_DATASET", "").strip().split()
2429
SIMPLE_TABLE = os.environ.get("BIGQUERY_SIMPLE_TABLE")
2530
DOWNLOAD_TABLE = os.environ.get("BIGQUERY_DOWNLOAD_TABLE")
@@ -87,7 +92,9 @@ def process_fastly_log(data, context):
8792
job_config.ignore_unknown_values = True
8893

8994
for DATASET in DATASETS:
90-
dataset_ref = bigquery_client.dataset(DATASET)
95+
dataset_ref = bigquery.dataset.DatasetReference.from_string(
96+
DATASET, default_project=DEFAULT_PROJECT
97+
)
9198
if download_lines > 0:
9299
load_job = bigquery_client.load_table_from_file(
93100
download_results_file,

test_function.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,26 @@
66

77
import main
88

9+
GCP_PROJECT = "my-gcp-project"
910
BIGQUERY_DATASET = "my-bigquery-dataset"
1011
BIGQUERY_SIMPLE_TABLE = "my-simple-table"
1112
BIGQUERY_DOWNLOAD_TABLE = "my-download-table"
1213
RESULT_BUCKET = "my-result-bucket"
1314

1415

1516
@pytest.mark.parametrize(
16-
"bigquery_dataset, expected_dataset_calls",
17+
"bigquery_dataset, expected_from_string_calls",
1718
[
18-
("my-bigquery-dataset", [pretend.call("my-bigquery-dataset")]),
19+
(
20+
"my-bigquery-dataset",
21+
[pretend.call("my-bigquery-dataset", default_project=GCP_PROJECT)],
22+
),
1923
(
2024
"my-bigquery-dataset some-other-dataset",
21-
[pretend.call("my-bigquery-dataset"), pretend.call("some-other-dataset")],
25+
[
26+
pretend.call("my-bigquery-dataset", default_project=GCP_PROJECT),
27+
pretend.call("some-other-dataset", default_project=GCP_PROJECT),
28+
],
2229
),
2330
],
2431
)
@@ -45,8 +52,9 @@ def test_function(
4552
table_name,
4653
expected,
4754
bigquery_dataset,
48-
expected_dataset_calls,
55+
expected_from_string_calls,
4956
):
57+
monkeypatch.setenv("GCP_PROJECT", GCP_PROJECT)
5058
monkeypatch.setenv("BIGQUERY_DATASET", bigquery_dataset)
5159
monkeypatch.setenv("BIGQUERY_SIMPLE_TABLE", BIGQUERY_SIMPLE_TABLE)
5260
monkeypatch.setenv("BIGQUERY_DOWNLOAD_TABLE", BIGQUERY_DOWNLOAD_TABLE)
@@ -82,17 +90,21 @@ def _load_table_from_file(fh, *a, **kw):
8290
return load_job_stub
8391

8492
bigquery_client_stub = pretend.stub(
85-
dataset=pretend.call_recorder(lambda a: dataset_stub),
8693
load_table_from_file=pretend.call_recorder(_load_table_from_file),
8794
)
8895
job_config_stub = pretend.stub()
96+
dataset_reference_stub = pretend.stub(
97+
from_string=pretend.call_recorder(lambda *a, **kw: dataset_stub)
98+
)
99+
89100
monkeypatch.setattr(
90101
main,
91102
"bigquery",
92103
pretend.stub(
93104
Client=lambda: bigquery_client_stub,
94105
LoadJobConfig=lambda: job_config_stub,
95106
SourceFormat=pretend.stub(NEWLINE_DELIMITED_JSON=pretend.stub()),
107+
dataset=pretend.stub(DatasetReference=dataset_reference_stub),
96108
),
97109
)
98110

@@ -106,9 +118,9 @@ def _load_table_from_file(fh, *a, **kw):
106118

107119
assert storage_client_stub.bucket.calls == [pretend.call("my-bucket")] + [
108120
pretend.call(RESULT_BUCKET),
109-
] * len(expected_dataset_calls)
121+
] * len(expected_from_string_calls)
110122
assert bucket_stub.get_blob.calls == [pretend.call(log_filename)]
111-
assert bigquery_client_stub.dataset.calls == expected_dataset_calls
123+
assert dataset_reference_stub.from_string.calls == expected_from_string_calls
112124
assert bigquery_client_stub.load_table_from_file.calls == [
113125
pretend.call(
114126
bigquery_client_stub.load_table_from_file.calls[0].args[0], # shh
@@ -118,10 +130,12 @@ def _load_table_from_file(fh, *a, **kw):
118130
job_config=job_config_stub,
119131
rewind=True,
120132
)
121-
] * len(expected_dataset_calls)
133+
] * len(expected_from_string_calls)
122134
assert dataset_stub.table.calls == [pretend.call(table_name)] * len(
123-
expected_dataset_calls
135+
expected_from_string_calls
124136
)
125137
assert blob_stub.delete.calls == [pretend.call()]
126-
assert load_job_stub.result.calls == [pretend.call()] * len(expected_dataset_calls)
138+
assert load_job_stub.result.calls == [pretend.call()] * len(
139+
expected_from_string_calls
140+
)
127141
assert load_job_stub._result == expected

0 commit comments

Comments
 (0)