|
36 | 36 | db = pymongo.MongoClient().pymongoarrow_test
|
37 | 37 |
|
38 | 38 | LARGE_DOC_SIZE = 50
|
| 39 | +EMBEDDED_OBJECT_SIZE = ( |
| 40 | + 64 # The number of values or key/value pairs in the embedded object (array or document). |
| 41 | +) |
39 | 42 |
|
40 | 43 |
|
41 | 44 | # We have to use ABCs because ASV doesn't support any other way of skipping tests.
|
@@ -121,12 +124,14 @@ def setup(self):
|
121 | 124 | coll = db.benchmark
|
122 | 125 | coll.drop()
|
123 | 126 | base_dict = collections.OrderedDict(
|
124 |
| - [("x", 1), ("y", math.pi), ("emb", [math.pi for _ in range(64)])] |
| 127 | + [("x", 1), ("y", math.pi), ("emb", [math.pi for _ in range(EMBEDDED_OBJECT_SIZE)])] |
125 | 128 | )
|
126 |
| - schema_dict = {"x": pyarrow.int64(), "y": pyarrow.float64()} |
127 |
| - dtypes_list = np.dtype([("x", np.int64), ("y", np.float64)]) |
| 129 | + schema_dict = { |
| 130 | + "x": pyarrow.int64(), |
| 131 | + "y": pyarrow.float64(), |
| 132 | + "emb": pyarrow.list_(pyarrow.float64()), |
| 133 | + } |
128 | 134 | self.schema = Schema(schema_dict)
|
129 |
| - self.dtypes = np.dtype(dtypes_list) |
130 | 135 | coll.insert_many([base_dict.copy() for _ in range(N_DOCS)])
|
131 | 136 | print(
|
132 | 137 | "%d docs, %dk each with %d keys"
|
@@ -155,6 +160,53 @@ def time_conventional_pandas(self):
|
155 | 160 | pass
|
156 | 161 |
|
157 | 162 |
|
| 163 | +class ProfileReadDocument(Read): |
| 164 | + def setup(self): |
| 165 | + coll = db.benchmark |
| 166 | + coll.drop() |
| 167 | + base_dict = collections.OrderedDict( |
| 168 | + [ |
| 169 | + ("x", 1), |
| 170 | + ("y", math.pi), |
| 171 | + ("emb", {f"a{i}": math.pi for i in range(EMBEDDED_OBJECT_SIZE)}), |
| 172 | + ] |
| 173 | + ) |
| 174 | + schema_dict = { |
| 175 | + "x": pyarrow.int64(), |
| 176 | + "y": pyarrow.float64(), |
| 177 | + "emb": pyarrow.struct( |
| 178 | + [pyarrow.field(f"a{i}", pyarrow.float64()) for i in range(EMBEDDED_OBJECT_SIZE)] |
| 179 | + ), |
| 180 | + } |
| 181 | + self.schema = Schema(schema_dict) |
| 182 | + coll.insert_many([base_dict.copy() for _ in range(N_DOCS)]) |
| 183 | + print( |
| 184 | + "%d docs, %dk each with %d keys" |
| 185 | + % (N_DOCS, len(BSON.encode(base_dict)) // 1024, len(base_dict)) |
| 186 | + ) |
| 187 | + |
| 188 | + # We need this because the naive methods don't always convert nested objects. |
| 189 | + @staticmethod |
| 190 | + def exercise_table(table): |
| 191 | + [ |
| 192 | + [[n for n in i.values()] if isinstance(i, pyarrow.StructScalar) else i for i in column] |
| 193 | + for column in table.columns |
| 194 | + ] |
| 195 | + |
| 196 | + # All of the following tests are being skipped because NumPy/Pandas do not work with nested documents. |
| 197 | + def time_to_numpy(self): |
| 198 | + pass |
| 199 | + |
| 200 | + def time_to_pandas(self): |
| 201 | + pass |
| 202 | + |
| 203 | + def time_conventional_ndarray(self): |
| 204 | + pass |
| 205 | + |
| 206 | + def time_conventional_pandas(self): |
| 207 | + pass |
| 208 | + |
| 209 | + |
158 | 210 | class ProfileReadSmall(Read):
|
159 | 211 | schema = None
|
160 | 212 | dtypes = None
|
|
0 commit comments