Skip to content

Commit 25a8832

Browse files
authored
ARROW-169 Add a benchmark with extension types (#155)
1 parent 2dc13fa commit 25a8832

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

bindings/python/benchmarks/benchmarks.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import abc
15-
import collections
1615
import math
1716
import os
1817
from abc import ABC
@@ -21,14 +20,15 @@
2120
import pandas as pd
2221
import pyarrow
2322
import pymongo
24-
from bson import BSON
23+
from bson import BSON, Binary, Decimal128
2524
from pymongoarrow.api import (
2625
Schema,
2726
find_arrow_all,
2827
find_numpy_all,
2928
find_pandas_all,
3029
write,
3130
)
31+
from pymongoarrow.types import BinaryType, Decimal128Type
3232

3333
N_DOCS = int(os.environ.get("N_DOCS"))
3434
assert pymongo.has_c()
@@ -163,7 +163,7 @@ class ProfileReadArray(Read):
163163
def setup_cache(self):
164164
coll = db.benchmark
165165
coll.drop()
166-
base_dict = collections.OrderedDict(
166+
base_dict = dict(
167167
[("x", 1), ("y", math.pi), ("emb", [math.pi for _ in range(EMBEDDED_OBJECT_SIZE)])]
168168
)
169169
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
@@ -208,7 +208,7 @@ class ProfileReadDocument(Read):
208208
def setup_cache(self):
209209
coll = db.benchmark
210210
coll.drop()
211-
base_dict = collections.OrderedDict(
211+
base_dict = dict(
212212
[
213213
("x", 1),
214214
("y", math.pi),
@@ -250,7 +250,7 @@ class ProfileReadSmall(Read):
250250
def setup_cache(self):
251251
coll = db.benchmark
252252
coll.drop()
253-
base_dict = collections.OrderedDict(
253+
base_dict = dict(
254254
[
255255
("x", 1),
256256
("y", math.pi),
@@ -272,7 +272,44 @@ def setup_cache(self):
272272
coll = db.benchmark
273273
coll.drop()
274274

275-
base_dict = collections.OrderedDict([(k, math.pi) for k in self.large_doc_keys])
275+
base_dict = dict([(k, math.pi) for k in self.large_doc_keys])
276+
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
277+
print(
278+
"%d docs, %dk each with %d keys"
279+
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
280+
)
281+
282+
283+
class ProfileReadExtensionSmall(Read):
284+
schema = Schema({"x": Decimal128Type(), "y": BinaryType(10)})
285+
dtypes = np.dtype(np.dtype([("x", np.object_), ("y", np.object_)]))
286+
287+
def setup_cache(self):
288+
coll = db.benchmark
289+
coll.drop()
290+
base_dict = dict(
291+
[
292+
("x", Decimal128("1")),
293+
("y", Binary(b"1234", 10)),
294+
]
295+
)
296+
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
297+
print(
298+
"%d docs, %dk each with %d keys"
299+
% (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict))
300+
)
301+
302+
303+
class ProfileReadExtensionLarge(Read):
304+
large_doc_keys = [f"{i}" for i in range(LARGE_DOC_SIZE)]
305+
schema = Schema({k: Decimal128Type() for k in large_doc_keys})
306+
dtypes = np.dtype([(k, np.object_) for k in large_doc_keys])
307+
308+
def setup_cache(self):
309+
coll = db.benchmark
310+
coll.drop()
311+
312+
base_dict = dict([(k, Decimal128(k)) for k in self.large_doc_keys])
276313
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
277314
print(
278315
"%d docs, %dk each with %d keys"
@@ -291,7 +328,7 @@ class ProfileInsertSmall(Insert):
291328
def setup_cache(self):
292329
coll = db.benchmark
293330
coll.drop()
294-
base_dict = collections.OrderedDict([("x", 1), ("y", math.pi)])
331+
base_dict = dict([("x", 1), ("y", math.pi)])
295332
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
296333
print(
297334
"%d docs, %dk each with %d keys"
@@ -310,7 +347,7 @@ class ProfileInsertLarge(Insert):
310347
def setup_cache(self):
311348
coll = db.benchmark
312349
coll.drop()
313-
base_dict = collections.OrderedDict([(k, math.pi) for k in self.large_doc_keys])
350+
base_dict = dict([(k, math.pi) for k in self.large_doc_keys])
314351
coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
315352
print(
316353
"%d docs, %dk each with %d keys"

bindings/python/docs/source/developer/benchmarks.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ System Requirements
99
To run the benchmarks, you need the `asv <https://pypi.org/project/asv/>`_ package,
1010
which can then be invoked like so::
1111

12-
$ asv run --strict -E existing
12+
$ asv run --strict --python=`which python`

0 commit comments

Comments
 (0)