Skip to content

Commit 90a0791

Browse files
authored
Limit to MAX_BLOBS_PER_RUN (#58)
1 parent 33880cc commit 90a0791

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

main.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
DATASETS = os.environ.get("BIGQUERY_DATASET", "").strip().split()
3232
SIMPLE_TABLE = os.environ.get("BIGQUERY_SIMPLE_TABLE")
3333
DOWNLOAD_TABLE = os.environ.get("BIGQUERY_DOWNLOAD_TABLE")
34+
MAX_BLOBS_PER_RUN = 5000 # Cannot exceed 10,000
3435

3536
prefix = {Simple.__name__: "simple_requests", Download.__name__: "file_downloads"}
3637

@@ -138,12 +139,16 @@ def load_processed_files_into_bigquery(event, context):
138139

139140
# Get the processed files we're loading
140141
download_prefix = f"{folder}/downloads-"
141-
download_source_blobs = bucket.list_blobs(prefix=download_prefix)
142+
download_source_blobs = bucket.list_blobs(
143+
prefix=download_prefix, max_results=MAX_BLOBS_PER_RUN
144+
)
142145
download_source_uris = [
143146
f"gs://{blob.bucket.name}/{blob.name}" for blob in download_source_blobs
144147
]
145148
simple_prefix = f"{folder}/simple-"
146-
simple_source_blobs = bucket.list_blobs(prefix=simple_prefix)
149+
simple_source_blobs = bucket.list_blobs(
150+
prefix=simple_prefix, max_results=MAX_BLOBS_PER_RUN
151+
)
147152
simple_source_uris = [
148153
f"gs://{blob.bucket.name}/{blob.name}" for blob in simple_source_blobs
149154
]

test_functions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def test_load_processed_files_into_bigquery(
151151

152152
blob_stub = pretend.stub(name="blobname", bucket=bucket)
153153

154-
def _generate_blob_list(prefix):
154+
def _generate_blob_list(prefix, max_results):
155155
blob_list = [blob_stub]
156156
blob_lists[prefix] = blob_list
157157
return blob_list
@@ -203,8 +203,8 @@ def _generate_blob_list(prefix):
203203
pretend.call(RESULT_BUCKET),
204204
]
205205
assert bucket_stub.list_blobs.calls == [
206-
pretend.call(prefix=f"processed/{partition}/downloads-"),
207-
pretend.call(prefix=f"processed/{partition}/simple-"),
206+
pretend.call(prefix=f"processed/{partition}/downloads-", max_results=5000),
207+
pretend.call(prefix=f"processed/{partition}/simple-", max_results=5000),
208208
]
209209
assert (
210210
load_job_stub.result.calls

0 commit comments

Comments
 (0)