@@ -115,24 +115,32 @@ def upload_from_file(self, file_handler, rewind=False):
115
115
],
116
116
)
117
117
@pytest .mark .parametrize (
118
- "blobs, expected_load_jobs, expected_delete_calls" ,
118
+ "blobs, simple_fetch_current, expected_load_jobs, expected_delete_calls" ,
119
119
[
120
- ({"simple" : [], "downloads" : ["blob0" , "blob1" , "blob2" ]}, 1 , 3 ),
121
- ({"simple" : ["blob0" , "blob1" , "blob2" ], "downloads" : []}, 1 , 3 ),
120
+ ({"simple" : [], "downloads" : ["blob0" , "blob1" , "blob2" ]}, True , 1 , 3 ),
121
+ ({"simple" : ["blob0" , "blob1" , "blob2" ], "downloads" : []}, True , 1 , 3 ),
122
122
(
123
123
{
124
124
"simple" : ["blob0" , "blob1" , "blob2" ],
125
125
"downloads" : ["blob0" , "blob1" , "blob2" ],
126
126
},
127
+ True ,
127
128
2 ,
128
129
6 ,
129
130
),
131
+ (
132
+ {"simple" : ["pastblob0" , "pastblob1" ], "downloads" : ["blob0" , "blob1" ]},
133
+ False ,
134
+ 2 ,
135
+ 4 ,
136
+ ),
130
137
],
131
138
)
132
139
def test_load_processed_files_into_bigquery (
133
140
monkeypatch ,
134
141
bigquery_dataset ,
135
142
blobs ,
143
+ simple_fetch_current ,
136
144
expected_load_jobs ,
137
145
expected_delete_calls ,
138
146
):
@@ -150,11 +158,22 @@ def test_load_processed_files_into_bigquery(
150
158
name = "blobname" , bucket = bucket , delete = pretend .call_recorder (lambda : None )
151
159
)
152
160
161
+ past_partition = (datetime .datetime .utcnow () - datetime .timedelta (days = 1 )).strftime (
162
+ "%Y%m%d"
163
+ )
164
+ partition = datetime .datetime .utcnow ().strftime ("%Y%m%d" )
165
+
153
166
def _generate_blob_list (prefix , max_results ):
154
167
if "simple" in prefix :
155
- _blobs = blobs ["simple" ]
168
+ if past_partition in prefix :
169
+ _blobs = [b for b in blobs ["simple" ] if b .startswith ("past" )]
170
+ else :
171
+ _blobs = blobs ["simple" ]
156
172
elif "downloads" in prefix :
157
- _blobs = blobs ["downloads" ]
173
+ if past_partition in prefix :
174
+ _blobs = [b for b in blobs ["downloads" ] if b .startswith ("past" )]
175
+ else :
176
+ _blobs = blobs ["downloads" ]
158
177
else :
159
178
_blobs = []
160
179
blob_list = [blob_stub for b in _blobs ]
@@ -204,17 +223,21 @@ def fake_batch(*a, **kw):
204
223
205
224
event = {}
206
225
context = pretend .stub ()
207
- partition = datetime .datetime .utcnow ().strftime ("%Y%m%d" )
208
226
209
227
main .load_processed_files_into_bigquery (event , context )
210
228
211
229
assert storage_client_stub .bucket .calls == [
212
230
pretend .call (RESULT_BUCKET ),
213
231
]
214
- assert bucket_stub .list_blobs .calls == [
232
+ expected_list_blob_calls = [
233
+ pretend .call (prefix = f"processed/{ past_partition } /downloads-" , max_results = 1000 ),
215
234
pretend .call (prefix = f"processed/{ partition } /downloads-" , max_results = 1000 ),
235
+ pretend .call (prefix = f"processed/{ past_partition } /simple-" , max_results = 1000 ),
216
236
pretend .call (prefix = f"processed/{ partition } /simple-" , max_results = 1000 ),
217
237
]
238
+ if not simple_fetch_current :
239
+ expected_list_blob_calls = expected_list_blob_calls [:3 ]
240
+ assert bucket_stub .list_blobs .calls == expected_list_blob_calls
218
241
assert (
219
242
load_job_stub .result .calls
220
243
== [pretend .call ()] * len (bigquery_dataset .split ()) * expected_load_jobs
0 commit comments