@@ -1169,26 +1169,32 @@ async def _batch(documents: list[dict], batch_size: int) -> AsyncGenerator[list[
11691169 async def _generate_auto_batches (
11701170 documents : list [dict [str , Any ]], max_payload_size : int
11711171 ) -> AsyncGenerator [list [dict ], None ]:
1172- batch = []
11731172 loop = get_running_loop ()
11741173
1175- for doc in documents :
1176- doc_json_str = loop .run_in_executor (None , partial (json .dumps , doc ))
1177- doc_size = await loop .run_in_executor (None , partial (getsizeof , doc_json_str ))
1178- if doc_size > max_payload_size :
1179- raise PayloadTooLarge (
1180- f"Payload size { doc_size } is greater than the maximum payload size of { max_payload_size } "
1181- )
1182- batch . append ( doc )
1183- batch_json_str = await loop .run_in_executor (None , partial (json .dumps , batch ))
1184- batch_size = await loop .run_in_executor (None , partial (getsizeof , batch_json_str ))
1185- if batch_size >= max_payload_size :
1186- batch . pop ()
1187- yield batch
1188- batch . clear ( )
1174+ # Check the size of all documents together if it is below the max size yield it all at onece
1175+ doc_json_str = await loop .run_in_executor (None , partial (json .dumps , documents ))
1176+ doc_size = await loop .run_in_executor (None , partial (getsizeof , doc_json_str ))
1177+ if doc_size < max_payload_size :
1178+ yield documents
1179+ else :
1180+ batch = []
1181+ for doc in documents :
1182+ doc_json_str = await loop .run_in_executor (None , partial (json .dumps , doc ))
1183+ doc_size = await loop .run_in_executor (None , partial (getsizeof , doc_json_str ))
1184+ if doc_size > max_payload_size :
1185+ raise PayloadTooLarge (
1186+ f"Payload size { doc_size } is greater than the maximum payload size of { max_payload_size } "
1187+ )
11891188 batch .append (doc )
1190- if batch :
1191- yield batch
1189+ batch_json_str = await loop .run_in_executor (None , partial (json .dumps , batch ))
1190+ batch_size = await loop .run_in_executor (None , partial (getsizeof , batch_json_str ))
1191+ if batch_size >= max_payload_size :
1192+ batch .pop ()
1193+ yield batch
1194+ batch .clear ()
1195+ batch .append (doc )
1196+ if batch :
1197+ yield batch
11921198
11931199 @staticmethod
11941200 def _iso_to_date_time (iso_date : Optional [datetime | str ]) -> Optional [datetime ]:
0 commit comments