|
17 | 17 | import os
|
18 | 18 | import string
|
19 | 19 |
|
| 20 | +import numpy as np |
| 21 | +import pandas as pd |
20 | 22 | import pyarrow
|
21 | 23 | import pymongo
|
22 | 24 | from bson import BSON
|
|
54 | 56 |
|
55 | 57 | large_doc_keys = [c * i for c in string.ascii_lowercase for i in range(1, 101)]
|
56 | 58 | schemas[SMALL] = Schema({"x": pyarrow.int64(), "y": pyarrow.float64()})
|
| 59 | +dtypes[SMALL] = np.dtype([("x", np.int64), ("y", np.float64)]) |
| 60 | +dtypes[LARGE] = np.dtype([(k, np.float64) for k in large_doc_keys]) |
57 | 61 | schemas[LARGE] = Schema({k: pyarrow.float64() for k in large_doc_keys})
|
58 | 62 | large = db[collection_names[LARGE]]
|
59 | 63 | large.drop()
|
@@ -95,3 +99,44 @@ def time_insert_pandas(self):
|
95 | 99 |
|
96 | 100 | def time_insert_numpy(self):
|
97 | 101 | write(db[collection_names[CUR_SIZE]], numpy_arrays[CUR_SIZE])
|
| 102 | + |
| 103 | + |
| 104 | +class ProfileRead: |
| 105 | + """ |
| 106 | + A benchmark that times the performance of various kinds |
| 107 | + of reading MongoDB data. |
| 108 | + """ |
| 109 | + |
| 110 | + def setup(self): |
| 111 | + db[collection_names[CUR_SIZE]].drop() |
| 112 | + |
| 113 | + def time_conventional_ndarray(self): |
| 114 | + collection = db[collection_names[CUR_SIZE]] |
| 115 | + cursor = collection.find() |
| 116 | + dtype = dtypes[CUR_SIZE] |
| 117 | + |
| 118 | + if CUR_SIZE == LARGE: |
| 119 | + np.array([tuple(doc[k] for k in large_doc_keys) for doc in cursor], dtype=dtype) |
| 120 | + else: |
| 121 | + np.array([(doc["x"], doc["y"]) for doc in cursor], dtype=dtype) |
| 122 | + |
| 123 | + def time_to_numpy(self): |
| 124 | + c = db[collection_names[CUR_SIZE]] |
| 125 | + schema = schemas[CUR_SIZE] |
| 126 | + find_numpy_all(c, {}, schema=schema) |
| 127 | + |
| 128 | + def time_conventional_pandas(self): |
| 129 | + collection = db[collection_names[CUR_SIZE]] |
| 130 | + _ = dtypes[CUR_SIZE] |
| 131 | + cursor = collection.find(projection={"_id": 0}) |
| 132 | + _ = pd.DataFrame(list(cursor)) |
| 133 | + |
| 134 | + def time_to_pandas(self): |
| 135 | + c = db[collection_names[CUR_SIZE]] |
| 136 | + schema = schemas[CUR_SIZE] |
| 137 | + find_pandas_all(c, {}, schema=schema) |
| 138 | + |
| 139 | + def time_to_arrow(self): |
| 140 | + c = db[collection_names[CUR_SIZE]] |
| 141 | + schema = schemas[CUR_SIZE] |
| 142 | + find_arrow_all(c, {}, schema=schema) |
0 commit comments