Skip to content

Commit f0c574a

Browse files
authored
fix(search): limit documents in bulk load chunk (#18441)
1 parent 2599304 commit f0c574a

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

tests/unit/search/test_tasks.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,9 @@ def project_docs(db):
243243
class TestError(Exception):
244244
pass
245245

246-
def parallel_bulk(client, iterable, index=None, max_chunk_bytes=None):
246+
def parallel_bulk(
247+
client, iterable, index=None, chunk_size=None, max_chunk_bytes=None
248+
):
247249
assert client is es_client
248250
assert iterable is docs
249251
assert index == "warehouse-cbcbcbcbcb"
@@ -304,7 +306,7 @@ def project_docs(db):
304306
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
305307

306308
parallel_bulk = pretend.call_recorder(
307-
lambda client, iterable, index, max_chunk_bytes: [None]
309+
lambda client, iterable, index, chunk_size, max_chunk_bytes: [None]
308310
)
309311
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
310312

@@ -314,7 +316,11 @@ def project_docs(db):
314316

315317
assert parallel_bulk.calls == [
316318
pretend.call(
317-
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
319+
es_client,
320+
docs,
321+
index="warehouse-cbcbcbcbcb",
322+
chunk_size=100,
323+
max_chunk_bytes=10485760,
318324
)
319325
]
320326
assert es_client.indices.create.calls == [
@@ -372,7 +378,7 @@ def project_docs(db):
372378
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
373379

374380
parallel_bulk = pretend.call_recorder(
375-
lambda client, iterable, index, max_chunk_bytes: [None]
381+
lambda client, iterable, index, chunk_size, max_chunk_bytes: [None]
376382
)
377383
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
378384

@@ -382,7 +388,11 @@ def project_docs(db):
382388

383389
assert parallel_bulk.calls == [
384390
pretend.call(
385-
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
391+
es_client,
392+
docs,
393+
index="warehouse-cbcbcbcbcb",
394+
chunk_size=100,
395+
max_chunk_bytes=10485760,
386396
)
387397
]
388398
assert es_client.indices.create.calls == [
@@ -441,7 +451,7 @@ def project_docs(db):
441451
monkeypatch.setattr(warehouse.search.tasks, "SearchLock", NotLock)
442452

443453
parallel_bulk = pretend.call_recorder(
444-
lambda client, iterable, index, max_chunk_bytes: [None]
454+
lambda client, iterable, index, chunk_size, max_chunk_bytes: [None]
445455
)
446456
monkeypatch.setattr(warehouse.search.tasks, "parallel_bulk", parallel_bulk)
447457

@@ -466,7 +476,11 @@ def project_docs(db):
466476

467477
assert parallel_bulk.calls == [
468478
pretend.call(
469-
es_client, docs, index="warehouse-cbcbcbcbcb", max_chunk_bytes=10485760
479+
es_client,
480+
docs,
481+
index="warehouse-cbcbcbcbcb",
482+
chunk_size=100,
483+
max_chunk_bytes=10485760,
470484
)
471485
]
472486
assert es_client.indices.create.calls == [

warehouse/search/tasks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def reindex(self, request):
168168
client,
169169
_project_docs(request.db),
170170
index=new_index_name,
171+
chunk_size=100,
171172
max_chunk_bytes=10 * 1024 * 1024, # 10MB, per OpenSearch defaults
172173
):
173174
pass

0 commit comments

Comments
 (0)