11# coding: utf-8
22
3- import pytest
3+ import os
44import textwrap
5- from six import StringIO
65
6+ import pytest
7+ from fixtures import BaseIntegrationTestCase
78from pycrunch .shoji import as_entity
9+ from six import StringIO
810
911from scrunch .mutable_dataset import get_mutable_dataset
10- from fixtures import BaseIntegrationTestCase
12+
13+ PROJECT_ID = os .environ .get ("SCRUNCH_PROJECT_ID" )
1114
1215
1316class TestBackFill (BaseIntegrationTestCase ):
1417 def _prepare_ds (self , values ):
15- ds = self .site .datasets .create (
16- as_entity ({"name" : "test_backfill_values" })).refresh ()
18+ ds_data = {"name" : "test_backfill_values" }
19+ if PROJECT_ID :
20+ ds_data ["project" ] = f"/projects/{ PROJECT_ID } /"
21+ # 5c0d0727f0ee424bab69cfb9f0a47507
22+ ds = self .site .datasets .create (as_entity (ds_data )).refresh ()
1723 # We need a numeric PK
1824 pk = ds .variables .create (
1925 as_entity (
@@ -140,17 +146,23 @@ def _prepare_ds(self, values):
140146 return ds
141147
142148 def test_backfill_values (self ):
143- ds = self ._prepare_ds ({
144- "pk" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
145- "cat1" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
146- "cat2" : [11 , 22 , 33 , - 1 , - 1 , - 1 , 11 , 22 , 33 , 11 ],
147- "cat3" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
148- })
149- csv_file = StringIO (textwrap .dedent ("""pk,cat1,cat2
149+ ds = self ._prepare_ds (
150+ {
151+ "pk" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
152+ "cat1" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
153+ "cat2" : [11 , 22 , 33 , - 1 , - 1 , - 1 , 11 , 22 , 33 , 11 ],
154+ "cat3" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
155+ }
156+ )
157+ csv_file = StringIO (
158+ textwrap .dedent (
159+ """pk,cat1,cat2
150160 4,1,22
151161 5,2,33
152162 6,3,11
153- """ ))
163+ """
164+ )
165+ )
154166 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
155167
156168 rows_expr = "pk >= 4 and pk <=6"
@@ -167,30 +179,39 @@ def test_backfill_values(self):
167179 ds .delete ()
168180
169181 def test_backfill_on_subvars (self ):
170- ds = self ._prepare_ds ({
171- "pk" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
172- "cat1" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
173- "cat2" : [11 , 22 , 33 , - 1 , - 1 , - 1 , 11 , 22 , 33 , 11 ],
174- "cat3" : [2 , 3 , 1 , - 1 , - 1 , - 1 , 2 , 3 , 1 , 2 ]
175- })
182+ ds = self ._prepare_ds (
183+ {
184+ "pk" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ],
185+ "cat1" : [1 , 2 , 3 , - 1 , - 1 , - 1 , 1 , 2 , 3 , 1 ],
186+ "cat2" : [11 , 22 , 33 , - 1 , - 1 , - 1 , 11 , 22 , 33 , 11 ],
187+ "cat3" : [2 , 3 , 1 , - 1 , - 1 , - 1 , 2 , 3 , 1 , 2 ],
188+ }
189+ )
176190 vars = ds .variables .by ("alias" )
177- array = ds .variables .create (as_entity ({
178- "name" : "array" ,
179- "alias" : "array" ,
180- "type" : "categorical_array" ,
181- "subvariables" : [vars ["cat1" ].entity_url , vars ["cat3" ].entity_url ],
182- })).refresh ()
183-
184- csv_file = StringIO (textwrap .dedent ("""pk,cat1,cat3
191+ array = ds .variables .create (
192+ as_entity (
193+ {
194+ "name" : "array" ,
195+ "alias" : "array" ,
196+ "type" : "categorical_array" ,
197+ "subvariables" : [vars ["cat1" ].entity_url , vars ["cat3" ].entity_url ],
198+ }
199+ )
200+ ).refresh ()
201+
202+ csv_file = StringIO (
203+ textwrap .dedent (
204+ """pk,cat1,cat3
185205 4,1,2
186206 5,2,3
187207 6,3,1
188- """ ))
208+ """
209+ )
210+ )
189211 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
190212
191213 rows_expr = "pk >= 4 and pk <=6"
192- scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file ,
193- rows_expr )
214+ scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file , rows_expr )
194215
195216 data = ds .follow ("table" , "limit=10" )["data" ]
196217 assert data [array .body ["id" ]] == [
@@ -203,34 +224,48 @@ def test_backfill_on_subvars(self):
203224 [1 , 2 ],
204225 [2 , 3 ],
205226 [3 , 1 ],
206- [1 , 2 ]
227+ [1 , 2 ],
207228 ]
208229
209230 ds .delete ()
210231
211232 def test_backfill_on_subvars_full_row (self ):
212- ds = self ._prepare_ds ({
213- "pk" : [1 , 2 , 3 , 4 , 5 ],
214- "cat1" : [1 , 2 , 3 , - 1 , - 1 ],
215- "cat2" : [11 , 22 , 33 , - 1 , - 1 ],
216- "cat3" : [2 , 3 , 1 , - 1 , - 1 ]
217- })
233+ ds = self ._prepare_ds (
234+ {
235+ "pk" : [1 , 2 , 3 , 4 , 5 ],
236+ "cat1" : [1 , 2 , 3 , - 1 , - 1 ],
237+ "cat2" : [11 , 22 , 33 , - 1 , - 1 ],
238+ "cat3" : [2 , 3 , 1 , - 1 , - 1 ],
239+ }
240+ )
218241 vars = ds .variables .by ("alias" )
219- subvars = [vars ["cat1" ].entity_url , vars ["cat2" ].entity_url , vars ["cat3" ].entity_url ]
220- array = ds .variables .create (as_entity ({
221- "name" : "array" ,
222- "alias" : "array" ,
223- "type" : "categorical_array" ,
224- "subvariables" : subvars ,
225- })).refresh ()
226-
227- csv_file = StringIO (textwrap .dedent ("""pk,cat1,cat3
242+ subvars = [
243+ vars ["cat1" ].entity_url ,
244+ vars ["cat2" ].entity_url ,
245+ vars ["cat3" ].entity_url ,
246+ ]
247+ array = ds .variables .create (
248+ as_entity (
249+ {
250+ "name" : "array" ,
251+ "alias" : "array" ,
252+ "type" : "categorical_array" ,
253+ "subvariables" : subvars ,
254+ }
255+ )
256+ ).refresh ()
257+
258+ csv_file = StringIO (
259+ textwrap .dedent (
260+ """pk,cat1,cat3
228261 1,1,2
229262 2,2,3
230263 3,3,1
231264 4,2,3
232265 5,2,1
233- """ ))
266+ """
267+ )
268+ )
234269 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
235270
236271 # Not including a row_filter, same as passing None
@@ -241,27 +276,32 @@ def test_backfill_on_subvars_full_row(self):
241276 [2 , 2 , 3 ],
242277 [3 , 3 , 1 ],
243278 [2 , {"?" : - 1 }, 3 ],
244- [2 , {"?" : - 1 }, 1 ]
279+ [2 , {"?" : - 1 }, 1 ],
245280 ]
246281
247282 ds .delete ()
248283
249284 def test_backfill_on_non_missing (self ):
250- ds = self ._prepare_ds ({
251- "pk" : [1 , 2 , 3 , 4 , 5 ],
252- "cat1" : [1 , 2 , 3 , 3 , 3 ],
253- "cat2" : [11 , 22 , 33 , 11 , 22 ],
254- "cat3" : [1 , 1 , 1 , 1 , 1 ]
255- })
256- csv_file = StringIO (textwrap .dedent ("""pk,cat1,cat3
285+ ds = self ._prepare_ds (
286+ {
287+ "pk" : [1 , 2 , 3 , 4 , 5 ],
288+ "cat1" : [1 , 2 , 3 , 3 , 3 ],
289+ "cat2" : [11 , 22 , 33 , 11 , 22 ],
290+ "cat3" : [1 , 1 , 1 , 1 , 1 ],
291+ }
292+ )
293+ csv_file = StringIO (
294+ textwrap .dedent (
295+ """pk,cat1,cat3
257296 4,1,2
258297 5,2,3
259- """ ))
298+ """
299+ )
300+ )
260301 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
261302
262303 rows_expr = "pk >= 4 and pk <=5"
263- scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file ,
264- rows_expr )
304+ scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file , rows_expr )
265305
266306 vars = ds .variables .by ("alias" )
267307 data = ds .follow ("table" , "limit=10" )["data" ]
@@ -278,15 +318,20 @@ def test_bad_csv(self):
278318 "cat3" : [1 , - 1 , 3 , - 1 ],
279319 }
280320 ds = self ._prepare_ds (original_data )
281- csv_file = StringIO (textwrap .dedent ("""pk,BOGUS,BAD
321+ csv_file = StringIO (
322+ textwrap .dedent (
323+ """pk,BOGUS,BAD
282324 2,1,22
283- """ ))
325+ """
326+ )
327+ )
284328 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
285329
286330 rows_expr = "pk == 2"
287331 with pytest .raises (ValueError ) as err :
288- scrunch_dataset .backfill_from_csv (["cat1" , "cat2" ], "pk" , csv_file ,
289- rows_expr )
332+ scrunch_dataset .backfill_from_csv (
333+ ["cat1" , "cat2" ], "pk" , csv_file , rows_expr
334+ )
290335 assert err .value .args [0 ].startswith ("Invalid data provided: Expected column " )
291336
292337 # Verify that the backfill didn't proceed
@@ -301,23 +346,28 @@ def test_bad_csv(self):
301346 ds .delete ()
302347
303348 def test_with_exclusion_filter (self ):
304- ds = self ._prepare_ds ({
305- "pk" : [1 , 2 , 3 , 4 , 5 ],
306- "cat1" : [1 , 2 , 3 , 3 , 3 ],
307- "cat2" : [11 , 11 , 11 , 11 , 11 ],
308- "cat3" : [1 , 1 , 1 , 1 , 1 ]
309- })
310- csv_file = StringIO (textwrap .dedent ("""pk,cat1,cat3
349+ ds = self ._prepare_ds (
350+ {
351+ "pk" : [1 , 2 , 3 , 4 , 5 ],
352+ "cat1" : [1 , 2 , 3 , 3 , 3 ],
353+ "cat2" : [11 , 11 , 11 , 11 , 11 ],
354+ "cat3" : [1 , 1 , 1 , 1 , 1 ],
355+ }
356+ )
357+ csv_file = StringIO (
358+ textwrap .dedent (
359+ """pk,cat1,cat3
311360 4,1,2
312361 5,2,3
313- """ ))
362+ """
363+ )
364+ )
314365 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
315366
316367 excl = "pk == 4"
317368 scrunch_dataset .exclude (excl )
318369 rows_expr = "pk in [4, 5]"
319- scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file ,
320- rows_expr )
370+ scrunch_dataset .backfill_from_csv (["cat1" , "cat3" ], "pk" , csv_file , rows_expr )
321371
322372 # Exclusion gets set after backfilling
323373 assert scrunch_dataset .get_exclusion () == excl
@@ -331,15 +381,17 @@ def test_with_exclusion_filter(self):
331381 ds .delete ()
332382
333383 def test_too_big_file (self ):
334- ds = self ._prepare_ds ({
335- "pk" : [1 , 2 , 3 , 4 , 5 ],
336- "cat1" : [1 , 2 , 3 , 3 , 3 ],
337- "cat2" : [11 , 11 , 11 , 11 , 11 ],
338- "cat3" : [1 , 1 , 1 , 1 , 1 ]
339- })
384+ ds = self ._prepare_ds (
385+ {
386+ "pk" : [1 , 2 , 3 , 4 , 5 ],
387+ "cat1" : [1 , 2 , 3 , 3 , 3 ],
388+ "cat2" : [11 , 11 , 11 , 11 , 11 ],
389+ "cat3" : [1 , 1 , 1 , 1 , 1 ],
390+ }
391+ )
340392 scrunch_dataset = get_mutable_dataset (ds .body .id , self .site )
341393
342- size_200MB = 200 * 2 ** 20
394+ size_200MB = 200 * 2 ** 20
343395 csv_file = StringIO ("x" * size_200MB )
344396 with pytest .raises (ValueError ) as err :
345397 scrunch_dataset .backfill_from_csv (["cat1" ], "pk" , csv_file , None )
0 commit comments