Skip to content

Commit 39dc1cb

Browse files
committed
[#188658814]: Py3.11 integration
1 parent b9b977a commit 39dc1cb

File tree

6 files changed

+1816
-1009
lines changed

6 files changed

+1816
-1009
lines changed

integration/test_backfill.py

Lines changed: 131 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
# coding: utf-8
22

3-
import pytest
3+
import os
44
import textwrap
5-
from six import StringIO
65

6+
import pytest
7+
from fixtures import BaseIntegrationTestCase
78
from pycrunch.shoji import as_entity
9+
from six import StringIO
810

911
from scrunch.mutable_dataset import get_mutable_dataset
10-
from fixtures import BaseIntegrationTestCase
12+
13+
PROJECT_ID = os.environ.get("SCRUNCH_PROJECT_ID")
1114

1215

1316
class TestBackFill(BaseIntegrationTestCase):
1417
def _prepare_ds(self, values):
15-
ds = self.site.datasets.create(
16-
as_entity({"name": "test_backfill_values"})).refresh()
18+
ds_data = {"name": "test_backfill_values"}
19+
if PROJECT_ID:
20+
ds_data["project"] = f"/projects/{PROJECT_ID}/"
21+
# 5c0d0727f0ee424bab69cfb9f0a47507
22+
ds = self.site.datasets.create(as_entity(ds_data)).refresh()
1723
# We need a numeric PK
1824
pk = ds.variables.create(
1925
as_entity(
@@ -140,17 +146,23 @@ def _prepare_ds(self, values):
140146
return ds
141147

142148
def test_backfill_values(self):
143-
ds = self._prepare_ds({
144-
"pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
145-
"cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
146-
"cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11],
147-
"cat3": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
148-
})
149-
csv_file = StringIO(textwrap.dedent("""pk,cat1,cat2
149+
ds = self._prepare_ds(
150+
{
151+
"pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
152+
"cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
153+
"cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11],
154+
"cat3": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
155+
}
156+
)
157+
csv_file = StringIO(
158+
textwrap.dedent(
159+
"""pk,cat1,cat2
150160
4,1,22
151161
5,2,33
152162
6,3,11
153-
"""))
163+
"""
164+
)
165+
)
154166
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
155167

156168
rows_expr = "pk >= 4 and pk <=6"
@@ -167,30 +179,39 @@ def test_backfill_values(self):
167179
ds.delete()
168180

169181
def test_backfill_on_subvars(self):
170-
ds = self._prepare_ds({
171-
"pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
172-
"cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
173-
"cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11],
174-
"cat3": [2, 3, 1, -1, -1, -1, 2, 3, 1, 2]
175-
})
182+
ds = self._prepare_ds(
183+
{
184+
"pk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
185+
"cat1": [1, 2, 3, -1, -1, -1, 1, 2, 3, 1],
186+
"cat2": [11, 22, 33, -1, -1, -1, 11, 22, 33, 11],
187+
"cat3": [2, 3, 1, -1, -1, -1, 2, 3, 1, 2],
188+
}
189+
)
176190
vars = ds.variables.by("alias")
177-
array = ds.variables.create(as_entity({
178-
"name": "array",
179-
"alias": "array",
180-
"type": "categorical_array",
181-
"subvariables": [vars["cat1"].entity_url, vars["cat3"].entity_url],
182-
})).refresh()
183-
184-
csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3
191+
array = ds.variables.create(
192+
as_entity(
193+
{
194+
"name": "array",
195+
"alias": "array",
196+
"type": "categorical_array",
197+
"subvariables": [vars["cat1"].entity_url, vars["cat3"].entity_url],
198+
}
199+
)
200+
).refresh()
201+
202+
csv_file = StringIO(
203+
textwrap.dedent(
204+
"""pk,cat1,cat3
185205
4,1,2
186206
5,2,3
187207
6,3,1
188-
"""))
208+
"""
209+
)
210+
)
189211
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
190212

191213
rows_expr = "pk >= 4 and pk <=6"
192-
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file,
193-
rows_expr)
214+
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr)
194215

195216
data = ds.follow("table", "limit=10")["data"]
196217
assert data[array.body["id"]] == [
@@ -203,34 +224,48 @@ def test_backfill_on_subvars(self):
203224
[1, 2],
204225
[2, 3],
205226
[3, 1],
206-
[1, 2]
227+
[1, 2],
207228
]
208229

209230
ds.delete()
210231

211232
def test_backfill_on_subvars_full_row(self):
212-
ds = self._prepare_ds({
213-
"pk": [1, 2, 3, 4, 5],
214-
"cat1": [1, 2, 3, -1, -1],
215-
"cat2": [11, 22, 33, -1, -1],
216-
"cat3": [2, 3, 1, -1, -1]
217-
})
233+
ds = self._prepare_ds(
234+
{
235+
"pk": [1, 2, 3, 4, 5],
236+
"cat1": [1, 2, 3, -1, -1],
237+
"cat2": [11, 22, 33, -1, -1],
238+
"cat3": [2, 3, 1, -1, -1],
239+
}
240+
)
218241
vars = ds.variables.by("alias")
219-
subvars = [vars["cat1"].entity_url, vars["cat2"].entity_url, vars["cat3"].entity_url]
220-
array = ds.variables.create(as_entity({
221-
"name": "array",
222-
"alias": "array",
223-
"type": "categorical_array",
224-
"subvariables": subvars,
225-
})).refresh()
226-
227-
csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3
242+
subvars = [
243+
vars["cat1"].entity_url,
244+
vars["cat2"].entity_url,
245+
vars["cat3"].entity_url,
246+
]
247+
array = ds.variables.create(
248+
as_entity(
249+
{
250+
"name": "array",
251+
"alias": "array",
252+
"type": "categorical_array",
253+
"subvariables": subvars,
254+
}
255+
)
256+
).refresh()
257+
258+
csv_file = StringIO(
259+
textwrap.dedent(
260+
"""pk,cat1,cat3
228261
1,1,2
229262
2,2,3
230263
3,3,1
231264
4,2,3
232265
5,2,1
233-
"""))
266+
"""
267+
)
268+
)
234269
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
235270

236271
# Not including a row_filter, same as passing None
@@ -241,27 +276,32 @@ def test_backfill_on_subvars_full_row(self):
241276
[2, 2, 3],
242277
[3, 3, 1],
243278
[2, {"?": -1}, 3],
244-
[2, {"?": -1}, 1]
279+
[2, {"?": -1}, 1],
245280
]
246281

247282
ds.delete()
248283

249284
def test_backfill_on_non_missing(self):
250-
ds = self._prepare_ds({
251-
"pk": [1, 2, 3, 4, 5],
252-
"cat1": [1, 2, 3, 3, 3],
253-
"cat2": [11, 22, 33, 11, 22],
254-
"cat3": [1, 1, 1, 1, 1]
255-
})
256-
csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3
285+
ds = self._prepare_ds(
286+
{
287+
"pk": [1, 2, 3, 4, 5],
288+
"cat1": [1, 2, 3, 3, 3],
289+
"cat2": [11, 22, 33, 11, 22],
290+
"cat3": [1, 1, 1, 1, 1],
291+
}
292+
)
293+
csv_file = StringIO(
294+
textwrap.dedent(
295+
"""pk,cat1,cat3
257296
4,1,2
258297
5,2,3
259-
"""))
298+
"""
299+
)
300+
)
260301
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
261302

262303
rows_expr = "pk >= 4 and pk <=5"
263-
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file,
264-
rows_expr)
304+
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr)
265305

266306
vars = ds.variables.by("alias")
267307
data = ds.follow("table", "limit=10")["data"]
@@ -278,15 +318,20 @@ def test_bad_csv(self):
278318
"cat3": [1, -1, 3, -1],
279319
}
280320
ds = self._prepare_ds(original_data)
281-
csv_file = StringIO(textwrap.dedent("""pk,BOGUS,BAD
321+
csv_file = StringIO(
322+
textwrap.dedent(
323+
"""pk,BOGUS,BAD
282324
2,1,22
283-
"""))
325+
"""
326+
)
327+
)
284328
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
285329

286330
rows_expr = "pk == 2"
287331
with pytest.raises(ValueError) as err:
288-
scrunch_dataset.backfill_from_csv(["cat1", "cat2"], "pk", csv_file,
289-
rows_expr)
332+
scrunch_dataset.backfill_from_csv(
333+
["cat1", "cat2"], "pk", csv_file, rows_expr
334+
)
290335
assert err.value.args[0].startswith("Invalid data provided: Expected column ")
291336

292337
# Verify that the backfill didn't proceed
@@ -301,23 +346,28 @@ def test_bad_csv(self):
301346
ds.delete()
302347

303348
def test_with_exclusion_filter(self):
304-
ds = self._prepare_ds({
305-
"pk": [1, 2, 3, 4, 5],
306-
"cat1": [1, 2, 3, 3, 3],
307-
"cat2": [11, 11, 11, 11, 11],
308-
"cat3": [1, 1, 1, 1, 1]
309-
})
310-
csv_file = StringIO(textwrap.dedent("""pk,cat1,cat3
349+
ds = self._prepare_ds(
350+
{
351+
"pk": [1, 2, 3, 4, 5],
352+
"cat1": [1, 2, 3, 3, 3],
353+
"cat2": [11, 11, 11, 11, 11],
354+
"cat3": [1, 1, 1, 1, 1],
355+
}
356+
)
357+
csv_file = StringIO(
358+
textwrap.dedent(
359+
"""pk,cat1,cat3
311360
4,1,2
312361
5,2,3
313-
"""))
362+
"""
363+
)
364+
)
314365
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
315366

316367
excl = "pk == 4"
317368
scrunch_dataset.exclude(excl)
318369
rows_expr = "pk in [4, 5]"
319-
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file,
320-
rows_expr)
370+
scrunch_dataset.backfill_from_csv(["cat1", "cat3"], "pk", csv_file, rows_expr)
321371

322372
# Exclusion gets set after backfilling
323373
assert scrunch_dataset.get_exclusion() == excl
@@ -331,15 +381,17 @@ def test_with_exclusion_filter(self):
331381
ds.delete()
332382

333383
def test_too_big_file(self):
334-
ds = self._prepare_ds({
335-
"pk": [1, 2, 3, 4, 5],
336-
"cat1": [1, 2, 3, 3, 3],
337-
"cat2": [11, 11, 11, 11, 11],
338-
"cat3": [1, 1, 1, 1, 1]
339-
})
384+
ds = self._prepare_ds(
385+
{
386+
"pk": [1, 2, 3, 4, 5],
387+
"cat1": [1, 2, 3, 3, 3],
388+
"cat2": [11, 11, 11, 11, 11],
389+
"cat3": [1, 1, 1, 1, 1],
390+
}
391+
)
340392
scrunch_dataset = get_mutable_dataset(ds.body.id, self.site)
341393

342-
size_200MB = 200 * 2 ** 20
394+
size_200MB = 200 * 2**20
343395
csv_file = StringIO("x" * size_200MB)
344396
with pytest.raises(ValueError) as err:
345397
scrunch_dataset.backfill_from_csv(["cat1"], "pk", csv_file, None)

0 commit comments

Comments
 (0)