Skip to content

Commit 793f0f1

Browse files
authored
fix: restrict chunk size to 10MB per quota (#18438)
1 parent 3e6e607 commit 793f0f1

File tree

2 files changed

+23
-8
lines changed

2 files changed

+23
-8
lines changed

tests/unit/search/test_tasks.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def project_docs(db):
243243
class TestError(Exception):
244244
pass
245245

246-
def parallel_bulk(client, iterable, index=None):
246+
def parallel_bulk(client, iterable, index=None, max_chunk_bytes=None):
247247
assert client is es_client
248248
assert iterable is docs
249249
assert index == "warehouse-cbcbcbcbcb"
@@ -303,15 +303,19 @@ def project_docs(db):
303303
)
304304
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
305305

306-
parallel_bulk = pretend.call_recorder(lambda client, iterable, index: [None])
306+
parallel_bulk = pretend.call_recorder(
307+
lambda client, iterable, index, max_chunk_bytes: [None]
308+
)
307309
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
308310

309311
monkeypatch.setattr(os, "urandom", lambda n: b"\xcb" * n)
310312

311313
reindex(task, db_request)
312314

313315
assert parallel_bulk.calls == [
314-
pretend.call(es_client, docs, index="warehouse-cbcbcbcbcb")
316+
pretend.call(
317+
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
318+
)
315319
]
316320
assert es_client.indices.create.calls == [
317321
pretend.call(
@@ -367,15 +371,19 @@ def project_docs(db):
367371
)
368372
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
369373

370-
parallel_bulk = pretend.call_recorder(lambda client, iterable, index: [None])
374+
parallel_bulk = pretend.call_recorder(
375+
lambda client, iterable, index, max_chunk_bytes: [None]
376+
)
371377
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
372378

373379
monkeypatch.setattr(os, "urandom", lambda n: b"\xcb" * n)
374380

375381
reindex(task, db_request)
376382

377383
assert parallel_bulk.calls == [
378-
pretend.call(es_client, docs, index="warehouse-cbcbcbcbcb")
384+
pretend.call(
385+
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
386+
)
379387
]
380388
assert es_client.indices.create.calls == [
381389
pretend.call(
@@ -432,7 +440,9 @@ def project_docs(db):
432440
)
433441
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
434442

435-
parallel_bulk = pretend.call_recorder(lambda client, iterable, index: [None])
443+
parallel_bulk = pretend.call_recorder(
444+
lambda client, iterable, index, max_chunk_bytes: [None]
445+
)
436446
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
437447

438448
monkeypatch.setattr(os, "urandom", lambda n: b"\xcb" * n)
@@ -455,7 +465,9 @@ def project_docs(db):
455465
]
456466

457467
assert parallel_bulk.calls == [
458-
pretend.call(es_client, docs, index="warehouse-cbcbcbcbcb")
468+
pretend.call(
469+
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
470+
)
459471
]
460472
assert es_client.indices.create.calls == [
461473
pretend.call(

warehouse/search/tasks.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ def reindex(self, request):
165165
request.db.execute(text("SET statement_timeout = '600s'"))
166166

167167
for _ in parallel_bulk(
168-
client, _project_docs(request.db), index=new_index_name
168+
client,
169+
_project_docs(request.db),
170+
index=new_index_name,
171+
max_chunk_bytes=10 * 1024 * 1024, # 10MB, per OpenSearch defaults
169172
):
170173
pass
171174
except: # noqa

0 commit comments

Comments
 (0)