Skip to content

Commit 60c2d64

Browse files
authored
ARROW-160 Consolidate benchmarks (#141)
1 parent 46e8c9c commit 60c2d64

File tree

4 files changed

+58
-241
lines changed

4 files changed

+58
-241
lines changed

bindings/python/benchmark.py

Lines changed: 0 additions & 241 deletions
This file was deleted.

bindings/python/benchmarks/benchmarks.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import os
1818
import string
1919

20+
import numpy as np
21+
import pandas as pd
2022
import pyarrow
2123
import pymongo
2224
from bson import BSON
@@ -54,6 +56,8 @@
5456

5557
large_doc_keys = [c * i for c in string.ascii_lowercase for i in range(1, 101)]
5658
schemas[SMALL] = Schema({"x": pyarrow.int64(), "y": pyarrow.float64()})
59+
dtypes[SMALL] = np.dtype([("x", np.int64), ("y", np.float64)])
60+
dtypes[LARGE] = np.dtype([(k, np.float64) for k in large_doc_keys])
5761
schemas[LARGE] = Schema({k: pyarrow.float64() for k in large_doc_keys})
5862
large = db[collection_names[LARGE]]
5963
large.drop()
@@ -95,3 +99,44 @@ def time_insert_pandas(self):
9599

96100
def time_insert_numpy(self):
97101
write(db[collection_names[CUR_SIZE]], numpy_arrays[CUR_SIZE])
102+
103+
104+
class ProfileRead:
105+
"""
106+
A benchmark that times the performance of various kinds
107+
of reading MongoDB data.
108+
"""
109+
110+
def setup(self):
111+
db[collection_names[CUR_SIZE]].drop()
112+
113+
def time_conventional_ndarray(self):
114+
collection = db[collection_names[CUR_SIZE]]
115+
cursor = collection.find()
116+
dtype = dtypes[CUR_SIZE]
117+
118+
if CUR_SIZE == LARGE:
119+
np.array([tuple(doc[k] for k in large_doc_keys) for doc in cursor], dtype=dtype)
120+
else:
121+
np.array([(doc["x"], doc["y"]) for doc in cursor], dtype=dtype)
122+
123+
def time_to_numpy(self):
124+
c = db[collection_names[CUR_SIZE]]
125+
schema = schemas[CUR_SIZE]
126+
find_numpy_all(c, {}, schema=schema)
127+
128+
def time_conventional_pandas(self):
129+
collection = db[collection_names[CUR_SIZE]]
130+
_ = dtypes[CUR_SIZE]
131+
cursor = collection.find(projection={"_id": 0})
132+
_ = pd.DataFrame(list(cursor))
133+
134+
def time_to_pandas(self):
135+
c = db[collection_names[CUR_SIZE]]
136+
schema = schemas[CUR_SIZE]
137+
find_pandas_all(c, {}, schema=schema)
138+
139+
def time_to_arrow(self):
140+
c = db[collection_names[CUR_SIZE]]
141+
schema = schemas[CUR_SIZE]
142+
find_arrow_all(c, {}, schema=schema)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
Running Benchmarks
2+
==================
3+
4+
.. highlight:: bash
5+
6+
System Requirements
7+
-------------------
8+
9+
To run the benchmarks, you need the `asv <https://pypi.org/project/asv/>`_ package,
10+
which can then be invoked like so::
11+
12+
$ asv run --strict -E existing

bindings/python/docs/source/developer/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ Technical guide for contributors to PyMongoArrow.
77
:maxdepth: 1
88

99
installation
10+
benchmarks

0 commit comments

Comments
 (0)