18
18
19
19
import numpy as np
20
20
import pandas as pd
21
- import pyarrow
21
+ import pyarrow as pa
22
22
import pymongo
23
23
from bson import BSON , Binary , Decimal128
24
+
24
25
from pymongoarrow .api import (
25
26
Schema ,
26
27
find_arrow_all ,
31
32
from pymongoarrow .types import BinaryType , Decimal128Type
32
33
33
34
N_DOCS = int (os .environ .get ("N_DOCS" ))
34
- assert pymongo .has_c ()
35
+ assert pymongo .has_c () # noqa: S101
35
36
db = pymongo .MongoClient ().pymongoarrow_test
36
37
37
38
LARGE_DOC_SIZE = 20
@@ -49,7 +50,11 @@ class Insert(ABC):
49
50
50
51
timeout = 100000 # The setup sometimes times out.
51
52
number = 1
52
- repeat = (1 , 10 , 30.0 ) # Min repeat, max repeat, time limit (will stop sampling after this)
53
+ repeat = (
54
+ 1 ,
55
+ 10 ,
56
+ 30.0 ,
57
+ ) # Min repeat, max repeat, time limit (will stop sampling after this)
53
58
rounds = 1
54
59
55
60
@abc .abstractmethod
@@ -90,15 +95,19 @@ class Read(ABC):
90
95
91
96
timeout = 100000 # The setup sometimes times out.
92
97
number = 3
93
- repeat = (1 , 10 , 30.0 ) # Min repeat, max repeat, time limit (will stop sampling after this)
98
+ repeat = (
99
+ 1 ,
100
+ 10 ,
101
+ 30.0 ,
102
+ ) # Min repeat, max repeat, time limit (will stop sampling after this)
94
103
rounds = 1
95
104
96
105
@abc .abstractmethod
97
106
def setup (self ):
98
107
raise NotImplementedError
99
108
100
109
# We need this because the naive methods don't always convert nested objects.
101
- @staticmethod
110
+ @staticmethod # noqa: B027
102
111
def exercise_table (table ):
103
112
pass
104
113
@@ -107,7 +116,10 @@ def time_conventional_ndarray(self):
107
116
cursor = collection .find (projection = {"_id" : 0 })
108
117
dtype = self .dtypes
109
118
if "Large" in type (self ).__name__ :
110
- np .array ([tuple (doc [k ] for k in self .large_doc_keys ) for doc in cursor ], dtype = dtype )
119
+ np .array (
120
+ [tuple (doc [k ] for k in self .large_doc_keys ) for doc in cursor ],
121
+ dtype = dtype ,
122
+ )
111
123
else :
112
124
np .array ([(doc ["x" ], doc ["y" ]) for doc in cursor ], dtype = dtype )
113
125
@@ -132,7 +144,7 @@ def time_to_arrow(self):
132
144
def time_conventional_arrow (self ):
133
145
c = db .benchmark
134
146
f = list (c .find ({}, projection = {"_id" : 0 }))
135
- table = pyarrow .Table .from_pylist (f )
147
+ table = pa .Table .from_pylist (f )
136
148
self .exercise_table (table )
137
149
138
150
def peakmem_to_numpy (self ):
@@ -154,17 +166,21 @@ def peakmem_conventional_arrow(self):
154
166
class ProfileReadArray (Read ):
155
167
schema = Schema (
156
168
{
157
- "x" : pyarrow .int64 (),
158
- "y" : pyarrow .float64 (),
159
- "emb" : pyarrow .list_ (pyarrow .float64 ()),
169
+ "x" : pa .int64 (),
170
+ "y" : pa .float64 (),
171
+ "emb" : pa .list_ (pa .float64 ()),
160
172
}
161
173
)
162
174
163
175
def setup (self ):
164
176
coll = db .benchmark
165
177
coll .drop ()
166
178
base_dict = dict (
167
- [("x" , 1 ), ("y" , math .pi ), ("emb" , [math .pi for _ in range (EMBEDDED_OBJECT_SIZE )])]
179
+ [
180
+ ("x" , 1 ),
181
+ ("y" , math .pi ),
182
+ ("emb" , [math .pi for _ in range (EMBEDDED_OBJECT_SIZE )]),
183
+ ]
168
184
)
169
185
coll .insert_many ([base_dict .copy () for _ in range (N_DOCS )])
170
186
print (
@@ -176,7 +192,7 @@ def setup(self):
176
192
@staticmethod
177
193
def exercise_table (table ):
178
194
[
179
- [[n for n in i .values ] if isinstance (i , pyarrow .ListScalar ) else i for i in column ]
195
+ [[n for n in i .values ] if isinstance (i , pa .ListScalar ) else i for i in column ]
180
196
for column in table .columns
181
197
]
182
198
@@ -197,10 +213,10 @@ def time_conventional_pandas(self):
197
213
class ProfileReadDocument (Read ):
198
214
schema = Schema (
199
215
{
200
- "x" : pyarrow .int64 (),
201
- "y" : pyarrow .float64 (),
202
- "emb" : pyarrow .struct (
203
- [pyarrow .field (f"a{ i } " , pyarrow .float64 ()) for i in range (EMBEDDED_OBJECT_SIZE )]
216
+ "x" : pa .int64 (),
217
+ "y" : pa .float64 (),
218
+ "emb" : pa .struct (
219
+ [pa .field (f"a{ i } " , pa .float64 ()) for i in range (EMBEDDED_OBJECT_SIZE )]
204
220
),
205
221
}
206
222
)
@@ -225,7 +241,7 @@ def setup(self):
225
241
@staticmethod
226
242
def exercise_table (table ):
227
243
[
228
- [[n for n in i .values ()] if isinstance (i , pyarrow .StructScalar ) else i for i in column ]
244
+ [[n for n in i .values ()] if isinstance (i , pa .StructScalar ) else i for i in column ]
229
245
for column in table .columns
230
246
]
231
247
@@ -244,7 +260,7 @@ def time_conventional_pandas(self):
244
260
245
261
246
262
class ProfileReadSmall (Read ):
247
- schema = Schema ({"x" : pyarrow .int64 (), "y" : pyarrow .float64 ()})
263
+ schema = Schema ({"x" : pa .int64 (), "y" : pa .float64 ()})
248
264
dtypes = np .dtype (np .dtype ([("x" , np .int64 ), ("y" , np .float64 )]))
249
265
250
266
def setup (self ):
@@ -265,7 +281,7 @@ def setup(self):
265
281
266
282
class ProfileReadLarge (Read ):
267
283
large_doc_keys = [f"a{ i } " for i in range (LARGE_DOC_SIZE )]
268
- schema = Schema ({k : pyarrow .float64 () for k in large_doc_keys })
284
+ schema = Schema ({k : pa .float64 () for k in large_doc_keys })
269
285
dtypes = np .dtype ([(k , np .float64 ) for k in large_doc_keys ])
270
286
271
287
def setup (self ):
@@ -333,7 +349,7 @@ def time_insert_conventional(self):
333
349
334
350
class ProfileInsertSmall (Insert ):
335
351
large_doc_keys = [f"a{ i } " for i in range (LARGE_DOC_SIZE )]
336
- schema = Schema ({"x" : pyarrow .int64 (), "y" : pyarrow .float64 ()})
352
+ schema = Schema ({"x" : pa .int64 (), "y" : pa .float64 ()})
337
353
dtypes = np .dtype ([("x" , np .int64 ), ("y" , np .float64 )])
338
354
339
355
def setup (self ):
@@ -352,7 +368,7 @@ def setup(self):
352
368
353
369
class ProfileInsertLarge (Insert ):
354
370
large_doc_keys = [f"a{ i } " for i in range (LARGE_DOC_SIZE )]
355
- schema = Schema ({k : pyarrow .float64 () for k in large_doc_keys })
371
+ schema = Schema ({k : pa .float64 () for k in large_doc_keys })
356
372
dtypes = np .dtype ([(k , np .float64 ) for k in large_doc_keys ])
357
373
358
374
def setup (self ):
0 commit comments