File tree Expand file tree Collapse file tree 2 files changed +25
-4
lines changed Expand file tree Collapse file tree 2 files changed +25
-4
lines changed Original file line number Diff line number Diff line change 11
11
import pyarrow
12
12
import pymongo
13
13
from bson import BSON , Int64 , ObjectId
14
- from pymongoarrow .api import Schema , find_arrow_all , find_numpy_all , find_pandas_all
14
+ from pymongoarrow .api import (
15
+ Schema ,
16
+ find_arrow_all ,
17
+ find_numpy_all ,
18
+ find_pandas_all ,
19
+ write ,
20
+ )
15
21
16
22
assert pymongo .has_c ()
17
23
25
31
dtypes = {}
26
32
schemas = {}
27
33
raw_bsons = {}
34
+ arrow_tables = {}
28
35
29
36
30
37
def _setup ():
@@ -82,6 +89,8 @@ def _setup():
82
89
83
90
raw_bsons [SMALL ] = raw_bson_small
84
91
raw_bsons [LARGE ] = raw_bson_large
92
+ arrow_tables [SMALL ] = find_arrow_all (db [collection_names [SMALL ]], {}, schema = schemas [SMALL ])
93
+ arrow_tables [LARGE ] = find_arrow_all (db [collection_names [LARGE ]], {}, schema = schemas [LARGE ])
85
94
86
95
87
96
def _teardown ():
@@ -143,6 +152,17 @@ def to_arrow(use_large):
143
152
find_arrow_all (c , {}, schema = schema )
144
153
145
154
155
+ @bench ("insert_arrow" )
156
+ def insert_arrow (use_large ):
157
+ write (db [collection_names [use_large ]], arrow_tables [use_large ])
158
+
159
+
160
+ @bench ("insert_conventional" )
161
+ def insert_conventional (use_large ):
162
+ tab = arrow_tables [use_large ].to_pylist ()
163
+ db [collection_names [use_large ]].insert_many (tab )
164
+
165
+
146
166
parser = argparse .ArgumentParser (
147
167
formatter_class = argparse .RawTextHelpFormatter ,
148
168
epilog = """
Original file line number Diff line number Diff line change @@ -279,20 +279,21 @@ def write(collection, tabular):
279
279
"insertedCount" : 0 ,
280
280
}
281
281
tabular_gen = _tabular_generator (tabular )
282
- while cur_offset < len (tabular ):
282
+ tab_size = len (tabular )
283
+ while cur_offset < tab_size :
283
284
cur_size = 0
284
285
cur_batch = []
285
286
i = 0
286
287
while (
287
288
cur_size <= _MAX_MESSAGE_SIZE
288
289
and len (cur_batch ) <= _MAX_WRITE_BATCH_SIZE
289
- and cur_offset + i < len ( tabular )
290
+ and cur_offset + i < tab_size
290
291
):
291
292
enc_tab = RawBSONDocument (
292
293
encode (next (tabular_gen ), codec_options = collection .codec_options )
293
294
)
294
295
cur_batch .append (enc_tab )
295
- cur_size += len (enc_tab )
296
+ cur_size += len (enc_tab . raw )
296
297
i += 1
297
298
try :
298
299
collection .insert_many (cur_batch )
You can’t perform that action at this time.
0 commit comments