Skip to content

Commit 0213b2e

Browse files
authored
ARROW-72 Run performance tests on prototype and evaluate our approach (#68)
1 parent e243c67 commit 0213b2e

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

bindings/python/benchmark.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@
1111
import pyarrow
1212
import pymongo
1313
from bson import BSON, Int64, ObjectId
14-
from pymongoarrow.api import Schema, find_arrow_all, find_numpy_all, find_pandas_all
14+
from pymongoarrow.api import (
15+
Schema,
16+
find_arrow_all,
17+
find_numpy_all,
18+
find_pandas_all,
19+
write,
20+
)
1521

1622
assert pymongo.has_c()
1723

@@ -25,6 +31,7 @@
2531
dtypes = {}
2632
schemas = {}
2733
raw_bsons = {}
34+
arrow_tables = {}
2835

2936

3037
def _setup():
@@ -82,6 +89,8 @@ def _setup():
8289

8390
raw_bsons[SMALL] = raw_bson_small
8491
raw_bsons[LARGE] = raw_bson_large
92+
arrow_tables[SMALL] = find_arrow_all(db[collection_names[SMALL]], {}, schema=schemas[SMALL])
93+
arrow_tables[LARGE] = find_arrow_all(db[collection_names[LARGE]], {}, schema=schemas[LARGE])
8594

8695

8796
def _teardown():
@@ -143,6 +152,17 @@ def to_arrow(use_large):
143152
find_arrow_all(c, {}, schema=schema)
144153

145154

155+
@bench("insert_arrow")
156+
def insert_arrow(use_large):
157+
write(db[collection_names[use_large]], arrow_tables[use_large])
158+
159+
160+
@bench("insert_conventional")
161+
def insert_conventional(use_large):
162+
tab = arrow_tables[use_large].to_pylist()
163+
db[collection_names[use_large]].insert_many(tab)
164+
165+
146166
parser = argparse.ArgumentParser(
147167
formatter_class=argparse.RawTextHelpFormatter,
148168
epilog="""

bindings/python/pymongoarrow/api.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -279,20 +279,21 @@ def write(collection, tabular):
279279
"insertedCount": 0,
280280
}
281281
tabular_gen = _tabular_generator(tabular)
282-
while cur_offset < len(tabular):
282+
tab_size = len(tabular)
283+
while cur_offset < tab_size:
283284
cur_size = 0
284285
cur_batch = []
285286
i = 0
286287
while (
287288
cur_size <= _MAX_MESSAGE_SIZE
288289
and len(cur_batch) <= _MAX_WRITE_BATCH_SIZE
289-
and cur_offset + i < len(tabular)
290+
and cur_offset + i < tab_size
290291
):
291292
enc_tab = RawBSONDocument(
292293
encode(next(tabular_gen), codec_options=collection.codec_options)
293294
)
294295
cur_batch.append(enc_tab)
295-
cur_size += len(enc_tab)
296+
cur_size += len(enc_tab.raw)
296297
i += 1
297298
try:
298299
collection.insert_many(cur_batch)

0 commit comments

Comments
 (0)