58
58
_MAX_WRITE_BATCH_SIZE = max (100000 , MAX_WRITE_BATCH_SIZE )
59
59
60
60
61
- def find_arrow_all (collection , query , * , schema , ** kwargs ):
61
+ def find_arrow_all (collection , query , * , schema = None , ** kwargs ):
62
62
"""Method that returns the results of a find query as a
63
63
:class:`pyarrow.Table` instance.
64
64
65
65
:Parameters:
66
66
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
67
67
against which to run the ``find`` operation.
68
68
- `query`: A mapping containing the query to use for the find operation.
69
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
69
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
70
70
71
71
Additional keyword-arguments passed to this method will be passed
72
72
directly to the underlying ``find`` operation.
@@ -84,23 +84,25 @@ def find_arrow_all(collection, query, *, schema, **kwargs):
84
84
stacklevel = 2 ,
85
85
)
86
86
87
- kwargs .setdefault ("projection" , schema ._get_projection ())
87
+ if schema :
88
+ kwargs .setdefault ("projection" , schema ._get_projection ())
89
+
88
90
raw_batch_cursor = collection .find_raw_batches (query , ** kwargs )
89
91
for batch in raw_batch_cursor :
90
92
process_bson_stream (batch , context )
91
93
92
94
return context .finish ()
93
95
94
96
95
- def aggregate_arrow_all (collection , pipeline , * , schema , ** kwargs ):
97
+ def aggregate_arrow_all (collection , pipeline , * , schema = None , ** kwargs ):
96
98
"""Method that returns the results of an aggregation pipeline as a
97
99
:class:`pyarrow.Table` instance.
98
100
99
101
:Parameters:
100
102
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
101
103
against which to run the ``aggregate`` operation.
102
104
- `pipeline`: A list of aggregation pipeline stages.
103
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
105
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
104
106
105
107
Additional keyword-arguments passed to this method will be passed
106
108
directly to the underlying ``aggregate`` operation.
@@ -143,15 +145,15 @@ def _arrow_to_pandas(arrow_table):
143
145
return arrow_table .to_pandas (split_blocks = True , self_destruct = True )
144
146
145
147
146
- def find_pandas_all (collection , query , * , schema , ** kwargs ):
148
+ def find_pandas_all (collection , query , * , schema = None , ** kwargs ):
147
149
"""Method that returns the results of a find query as a
148
150
:class:`pandas.DataFrame` instance.
149
151
150
152
:Parameters:
151
153
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
152
154
against which to run the ``find`` operation.
153
155
- `query`: A mapping containing the query to use for the find operation.
154
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
156
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
155
157
156
158
Additional keyword-arguments passed to this method will be passed
157
159
directly to the underlying ``find`` operation.
@@ -162,15 +164,15 @@ def find_pandas_all(collection, query, *, schema, **kwargs):
162
164
return _arrow_to_pandas (find_arrow_all (collection , query , schema = schema , ** kwargs ))
163
165
164
166
165
- def aggregate_pandas_all (collection , pipeline , * , schema , ** kwargs ):
167
+ def aggregate_pandas_all (collection , pipeline , * , schema = None , ** kwargs ):
166
168
"""Method that returns the results of an aggregation pipeline as a
167
169
:class:`pandas.DataFrame` instance.
168
170
169
171
:Parameters:
170
172
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
171
173
against which to run the ``find`` operation.
172
174
- `pipeline`: A list of aggregation pipeline stages.
173
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
175
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
174
176
175
177
Additional keyword-arguments passed to this method will be passed
176
178
directly to the underlying ``aggregate`` operation.
@@ -181,7 +183,7 @@ def aggregate_pandas_all(collection, pipeline, *, schema, **kwargs):
181
183
return _arrow_to_pandas (aggregate_arrow_all (collection , pipeline , schema = schema , ** kwargs ))
182
184
183
185
184
- def _arrow_to_numpy (arrow_table , schema ):
186
+ def _arrow_to_numpy (arrow_table , schema = None ):
185
187
"""Helper function that converts an Arrow Table to a dictionary
186
188
containing NumPy arrays. The memory buffers backing the given Arrow Table
187
189
may be destroyed after conversion if the resulting Numpy array(s) is not a
@@ -190,6 +192,9 @@ def _arrow_to_numpy(arrow_table, schema):
190
192
See https://arrow.apache.org/docs/python/numpy.html for details.
191
193
"""
192
194
container = {}
195
+ if not schema :
196
+ schema = arrow_table .schema
197
+
193
198
for fname in schema :
194
199
dtype = get_numpy_type (schema .typemap [fname ])
195
200
if dtype == np .str_ :
@@ -199,7 +204,7 @@ def _arrow_to_numpy(arrow_table, schema):
199
204
return container
200
205
201
206
202
- def find_numpy_all (collection , query , * , schema , ** kwargs ):
207
+ def find_numpy_all (collection , query , * , schema = None , ** kwargs ):
203
208
"""Method that returns the results of a find query as a
204
209
:class:`dict` instance whose keys are field names and values are
205
210
:class:`~numpy.ndarray` instances bearing the appropriate dtype.
@@ -208,7 +213,7 @@ def find_numpy_all(collection, query, *, schema, **kwargs):
208
213
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
209
214
against which to run the ``find`` operation.
210
215
- `query`: A mapping containing the query to use for the find operation.
211
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
216
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
212
217
213
218
Additional keyword-arguments passed to this method will be passed
214
219
directly to the underlying ``find`` operation.
@@ -228,7 +233,7 @@ def find_numpy_all(collection, query, *, schema, **kwargs):
228
233
return _arrow_to_numpy (find_arrow_all (collection , query , schema = schema , ** kwargs ), schema )
229
234
230
235
231
- def aggregate_numpy_all (collection , pipeline , * , schema , ** kwargs ):
236
+ def aggregate_numpy_all (collection , pipeline , * , schema = None , ** kwargs ):
232
237
"""Method that returns the results of an aggregation pipeline as a
233
238
:class:`dict` instance whose keys are field names and values are
234
239
:class:`~numpy.ndarray` instances bearing the appropriate dtype.
@@ -237,7 +242,7 @@ def aggregate_numpy_all(collection, pipeline, *, schema, **kwargs):
237
242
- `collection`: Instance of :class:`~pymongo.collection.Collection`.
238
243
against which to run the ``find`` operation.
239
244
- `query`: A mapping containing the query to use for the find operation.
240
- - `schema`: Instance of :class:`~pymongoarrow.schema.Schema`.
245
+ - `schema` (optional) : Instance of :class:`~pymongoarrow.schema.Schema`.
241
246
242
247
Additional keyword-arguments passed to this method will be passed
243
248
directly to the underlying ``aggregate`` operation.
0 commit comments