1616# under the License.
1717
1818import logging
19+ import queue
1920import time
2021from enum import Enum
2122from operator import methodcaller
22- from queue import Queue
23+ from threading import Thread
2324from typing import (
2425 Any ,
2526 Callable ,
4849
4950class BulkMeta (Enum ):
5051 flush = 1
52+ done = 2
5153
5254
5355BULK_FLUSH = BulkMeta .flush
5456
55- _TYPE_BULK_ACTION = Union [bytes , str , Dict [str , Any ], BulkMeta ]
57+ _TYPE_BULK_ACTION = Union [bytes , str , Dict [str , Any ]]
5658_TYPE_BULK_ACTION_HEADER = Dict [str , Any ]
57- _TYPE_BULK_ACTION_HEADER_AND_META = Union [Dict [str , Any ], BulkMeta ]
5859_TYPE_BULK_ACTION_BODY = Union [None , bytes , Dict [str , Any ]]
5960_TYPE_BULK_ACTION_HEADER_AND_BODY = Tuple [
60- _TYPE_BULK_ACTION_HEADER_AND_META , _TYPE_BULK_ACTION_BODY
61+ _TYPE_BULK_ACTION_HEADER , _TYPE_BULK_ACTION_BODY
62+ ]
63+
64+ _TYPE_BULK_ACTION_WITH_META = Union [bytes , str , Dict [str , Any ], BulkMeta ]
65+ _TYPE_BULK_ACTION_HEADER_WITH_META = Union [Dict [str , Any ], BulkMeta ]
66+ _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY = Union [
67+ Tuple [_TYPE_BULK_ACTION_HEADER , _TYPE_BULK_ACTION_BODY ],
68+ Tuple [BulkMeta , Any ],
6169]
6270
6371
@@ -71,9 +79,6 @@ def expand_action(data: _TYPE_BULK_ACTION) -> _TYPE_BULK_ACTION_HEADER_AND_BODY:
7179 if isinstance (data , (bytes , str )):
7280 return {"index" : {}}, to_bytes (data , "utf-8" )
7381
74- if isinstance (data , BulkMeta ):
75- return data , {}
76-
7782 # make sure we don't alter the action
7883 data = data .copy ()
7984 op_type : str = data .pop ("_op_type" , "index" )
@@ -151,7 +156,9 @@ def __init__(
151156 ] = []
152157
153158 def feed (
154- self , action : _TYPE_BULK_ACTION_HEADER_AND_META , data : _TYPE_BULK_ACTION_BODY
159+ self ,
160+ action : _TYPE_BULK_ACTION_HEADER_WITH_META ,
161+ data : _TYPE_BULK_ACTION_BODY ,
155162 ) -> Optional [
156163 Tuple [
157164 List [
@@ -224,9 +231,10 @@ def flush(
224231
225232
226233def _chunk_actions (
227- actions : Iterable [_TYPE_BULK_ACTION_HEADER_AND_BODY ],
234+ actions : Iterable [_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY ],
228235 chunk_size : int ,
229236 max_chunk_bytes : int ,
237+ flush_after_seconds : Optional [float ],
230238 serializer : Serializer ,
231239) -> Iterable [
232240 Tuple [
@@ -246,10 +254,48 @@ def _chunk_actions(
246254 chunker = _ActionChunker (
247255 chunk_size = chunk_size , max_chunk_bytes = max_chunk_bytes , serializer = serializer
248256 )
249- for action , data in actions :
250- ret = chunker .feed (action , data )
251- if ret :
252- yield ret
257+
258+ if not flush_after_seconds :
259+ for action , data in actions :
260+ ret = chunker .feed (action , data )
261+ if ret :
262+ yield ret
263+ else :
264+ item_queue : queue .Queue [_TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY ] = (
265+ queue .Queue ()
266+ )
267+
268+ def get_items () -> None :
269+ ret = None
270+ try :
271+ for item in actions :
272+ item_queue .put (item )
273+ except BaseException as exc :
274+ ret = exc
275+ item_queue .put ((BulkMeta .done , ret ))
276+
277+ item_getter_job = Thread (target = get_items )
278+ item_getter_job .start ()
279+
280+ timeout : Optional [float ] = flush_after_seconds
281+ while True :
282+ try :
283+ action , data = item_queue .get (timeout = timeout )
284+ timeout = flush_after_seconds
285+ except queue .Empty :
286+ action , data = BulkMeta .flush , None
287+ timeout = None
288+
289+ if action is BulkMeta .done :
290+ if isinstance (data , BaseException ):
291+ raise data
292+ break
293+ ret = chunker .feed (action , data )
294+ if ret :
295+ yield ret
296+
297+ item_getter_job .join ()
298+
253299 ret = chunker .flush ()
254300 if ret :
255301 yield ret
@@ -376,9 +422,10 @@ def _process_bulk_chunk(
376422
377423def streaming_bulk (
378424 client : Elasticsearch ,
379- actions : Iterable [_TYPE_BULK_ACTION ],
425+ actions : Iterable [_TYPE_BULK_ACTION_WITH_META ],
380426 chunk_size : int = 500 ,
381427 max_chunk_bytes : int = 100 * 1024 * 1024 ,
428+ flush_after_seconds : Optional [float ] = None ,
382429 raise_on_error : bool = True ,
383430 expand_action_callback : Callable [
384431 [_TYPE_BULK_ACTION ], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -440,6 +487,13 @@ def streaming_bulk(
440487
441488 serializer = client .transport .serializers .get_serializer ("application/json" )
442489
490+ def expand_action_with_meta (
491+ data : _TYPE_BULK_ACTION_WITH_META ,
492+ ) -> _TYPE_BULK_ACTION_HEADER_WITH_META_AND_BODY :
493+ if isinstance (data , BulkMeta ):
494+ return data , None
495+ return expand_action_callback (data )
496+
443497 bulk_data : List [
444498 Union [
445499 Tuple [_TYPE_BULK_ACTION_HEADER ],
@@ -448,9 +502,10 @@ def streaming_bulk(
448502 ]
449503 bulk_actions : List [bytes ]
450504 for bulk_data , bulk_actions in _chunk_actions (
451- map (expand_action_callback , actions ),
505+ map (expand_action_with_meta , actions ),
452506 chunk_size ,
453507 max_chunk_bytes ,
508+ flush_after_seconds ,
454509 serializer ,
455510 ):
456511 for attempt in range (max_retries + 1 ):
@@ -572,6 +627,7 @@ def parallel_bulk(
572627 thread_count : int = 4 ,
573628 chunk_size : int = 500 ,
574629 max_chunk_bytes : int = 100 * 1024 * 1024 ,
630+ flush_after_seconds : Optional [float ] = None ,
575631 queue_size : int = 4 ,
576632 expand_action_callback : Callable [
577633 [_TYPE_BULK_ACTION ], _TYPE_BULK_ACTION_HEADER_AND_BODY
@@ -611,7 +667,7 @@ def _setup_queues(self) -> None:
611667 super ()._setup_queues () # type: ignore[misc]
612668 # The queue must be at least the size of the number of threads to
613669 # prevent hanging when inserting sentinel values during teardown.
614- self ._inqueue : Queue [
670+ self ._inqueue : queue . Queue [
615671 Tuple [
616672 List [
617673 Union [
@@ -620,7 +676,7 @@ def _setup_queues(self) -> None:
620676 ],
621677 List [bytes ],
622678 ]
623- ] = Queue (max (queue_size , thread_count ))
679+ ] = queue . Queue (max (queue_size , thread_count ))
624680 self ._quick_put = self ._inqueue .put
625681
626682 with client ._otel .helpers_span ("helpers.parallel_bulk" ) as otel_span :
@@ -640,7 +696,11 @@ def _setup_queues(self) -> None:
640696 )
641697 ),
642698 _chunk_actions (
643- expanded_actions , chunk_size , max_chunk_bytes , serializer
699+ expanded_actions ,
700+ chunk_size ,
701+ max_chunk_bytes ,
702+ flush_after_seconds ,
703+ serializer ,
644704 ),
645705 ):
646706 yield from result
0 commit comments