Skip to content

Commit 8f2db53

Browse files
authored
Merge pull request #15 from sanders41/batch
Improving auto batch performance
2 parents 3ddfc5e + 6914d9e commit 8f2db53

File tree

2 files changed

+24
-18
lines changed

2 files changed

+24
-18
lines changed

meilisearch_python_async/index.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,26 +1169,32 @@ async def _batch(documents: list[dict], batch_size: int) -> AsyncGenerator[list[
11691169
async def _generate_auto_batches(
11701170
documents: list[dict[str, Any]], max_payload_size: int
11711171
) -> AsyncGenerator[list[dict], None]:
1172-
batch = []
11731172
loop = get_running_loop()
11741173

1175-
for doc in documents:
1176-
doc_json_str = loop.run_in_executor(None, partial(json.dumps, doc))
1177-
doc_size = await loop.run_in_executor(None, partial(getsizeof, doc_json_str))
1178-
if doc_size > max_payload_size:
1179-
raise PayloadTooLarge(
1180-
f"Payload size {doc_size} is greater than the maximum payload size of {max_payload_size}"
1181-
)
1182-
batch.append(doc)
1183-
batch_json_str = await loop.run_in_executor(None, partial(json.dumps, batch))
1184-
batch_size = await loop.run_in_executor(None, partial(getsizeof, batch_json_str))
1185-
if batch_size >= max_payload_size:
1186-
batch.pop()
1187-
yield batch
1188-
batch.clear()
1174+
# Check the size of all documents together if it is below the max size yield it all at onece
1175+
doc_json_str = await loop.run_in_executor(None, partial(json.dumps, documents))
1176+
doc_size = await loop.run_in_executor(None, partial(getsizeof, doc_json_str))
1177+
if doc_size < max_payload_size:
1178+
yield documents
1179+
else:
1180+
batch = []
1181+
for doc in documents:
1182+
doc_json_str = await loop.run_in_executor(None, partial(json.dumps, doc))
1183+
doc_size = await loop.run_in_executor(None, partial(getsizeof, doc_json_str))
1184+
if doc_size > max_payload_size:
1185+
raise PayloadTooLarge(
1186+
f"Payload size {doc_size} is greater than the maximum payload size of {max_payload_size}"
1187+
)
11891188
batch.append(doc)
1190-
if batch:
1191-
yield batch
1189+
batch_json_str = await loop.run_in_executor(None, partial(json.dumps, batch))
1190+
batch_size = await loop.run_in_executor(None, partial(getsizeof, batch_json_str))
1191+
if batch_size >= max_payload_size:
1192+
batch.pop()
1193+
yield batch
1194+
batch.clear()
1195+
batch.append(doc)
1196+
if batch:
1197+
yield batch
11921198

11931199
@staticmethod
11941200
def _iso_to_date_time(iso_date: Optional[datetime | str]) -> Optional[datetime]:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "meilisearch-python-async"
3-
version = "0.11.0"
3+
version = "0.11.1"
44
description = "A Python async client for the MeiliSearch API"
55
authors = ["Paul Sanders <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)