@@ -6,10 +6,8 @@ use arrow::{
6
6
} ;
7
7
use bson:: doc;
8
8
use chrono:: { DateTime , NaiveDateTime , Utc } ;
9
- use mongodb:: {
10
- options:: { ClientOptions , StreamAddress } ,
11
- Client ,
12
- } ;
9
+ use mongodb:: options:: { ClientOptions , StreamAddress } ;
10
+ use mongodb:: sync:: Client ;
13
11
14
12
/// Configuration for the MongoDB writer
15
13
pub struct WriterConfig < ' a > {
@@ -71,7 +69,7 @@ impl Writer {
71
69
. database ( config. database )
72
70
. collection ( config. collection )
73
71
. drop ( None ) ;
74
- if let Err ( _ ) = drop {
72
+ if drop . is_err ( ) {
75
73
println ! ( "Collection does not exist, and was not dropped" ) ;
76
74
}
77
75
}
@@ -105,7 +103,7 @@ impl Writer {
105
103
}
106
104
107
105
/// MongoDB supports a subset of Apache Arrow supported types, check if schema can be written
108
- fn check_supported_schema ( fields : & Vec < Field > , coerce_types : bool ) -> Result < ( ) , ( ) > {
106
+ fn check_supported_schema ( fields : & [ Field ] , coerce_types : bool ) -> Result < ( ) , ( ) > {
109
107
for field in fields {
110
108
let t = field. data_type ( ) ;
111
109
match t {
@@ -131,16 +129,19 @@ impl Writer {
131
129
| DataType :: Float32
132
130
| DataType :: Float64
133
131
| DataType :: Utf8
132
+ | DataType :: LargeUtf8
134
133
| DataType :: Timestamp ( _, _) => {
135
134
// data types supported without coercion
136
135
}
137
136
DataType :: Float16 => {
138
137
eprintln ! ( "Float16 arrays not supported" ) ;
139
138
return Err ( ( ) ) ;
140
139
}
141
- DataType :: List ( data_type) | DataType :: FixedSizeList ( data_type, _) => {
140
+ DataType :: List ( data_type)
141
+ | DataType :: LargeList ( data_type)
142
+ | DataType :: FixedSizeList ( data_type, _) => {
142
143
Writer :: check_supported_schema (
143
- & vec ! [ Field :: new( field. name( ) . as_str( ) , * data_type. clone( ) , false ) ] ,
144
+ & [ Field :: new ( field. name ( ) . as_str ( ) , * data_type. clone ( ) , false ) ] ,
144
145
coerce_types,
145
146
) ?;
146
147
}
@@ -152,13 +153,26 @@ impl Writer {
152
153
| DataType :: Duration ( _)
153
154
| DataType :: Interval ( _)
154
155
| DataType :: Binary
156
+ | DataType :: LargeBinary
155
157
| DataType :: FixedSizeBinary ( _) => {
156
158
eprintln ! ( "Data type {:?} is not supported" , t) ;
157
159
return Err ( ( ) ) ;
158
160
}
161
+ DataType :: Null => {
162
+ eprintln ! ( "Data type {:?} is not supported" , t) ;
163
+ return Err ( ( ) ) ;
164
+ }
165
+ DataType :: Union ( _) => {
166
+ eprintln ! ( "Data type {:?} is not supported" , t) ;
167
+ return Err ( ( ) ) ;
168
+ }
169
+ DataType :: Dictionary ( _, _) => {
170
+ eprintln ! ( "Data type {:?} is not supported" , t) ;
171
+ return Err ( ( ) ) ;
172
+ }
159
173
}
160
174
}
161
- return Ok ( ( ) ) ;
175
+ Ok ( ( ) )
162
176
}
163
177
}
164
178
@@ -179,6 +193,7 @@ impl From<&RecordBatch> for Documents {
179
193
. as_any ( )
180
194
. downcast_ref :: < BooleanArray > ( )
181
195
. expect ( "Unable to unwrap array" ) ;
196
+ #[ allow( clippy:: needless_range_loop) ]
182
197
for i in 0 ..len {
183
198
if !array. is_null ( i) {
184
199
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -196,6 +211,7 @@ impl From<&RecordBatch> for Documents {
196
211
. as_any ( )
197
212
. downcast_ref :: < Int32Array > ( )
198
213
. expect ( "Unable to unwrap array" ) ;
214
+ #[ allow( clippy:: needless_range_loop) ]
199
215
for i in 0 ..len {
200
216
if !array. is_null ( i) {
201
217
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -208,6 +224,7 @@ impl From<&RecordBatch> for Documents {
208
224
. as_any ( )
209
225
. downcast_ref :: < Int64Array > ( )
210
226
. expect ( "Unable to unwrap array" ) ;
227
+ #[ allow( clippy:: needless_range_loop) ]
211
228
for i in 0 ..len {
212
229
if !array. is_null ( i) {
213
230
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -220,6 +237,7 @@ impl From<&RecordBatch> for Documents {
220
237
. as_any ( )
221
238
. downcast_ref :: < Float32Array > ( )
222
239
. expect ( "Unable to unwrap array" ) ;
240
+ #[ allow( clippy:: needless_range_loop) ]
223
241
for i in 0 ..len {
224
242
if !array. is_null ( i) {
225
243
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -231,6 +249,7 @@ impl From<&RecordBatch> for Documents {
231
249
. as_any ( )
232
250
. downcast_ref :: < Float64Array > ( )
233
251
. expect ( "Unable to unwrap array" ) ;
252
+ #[ allow( clippy:: needless_range_loop) ]
234
253
for i in 0 ..len {
235
254
if !array. is_null ( i) {
236
255
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -244,12 +263,13 @@ impl From<&RecordBatch> for Documents {
244
263
. as_any ( )
245
264
. downcast_ref :: < TimestampMillisecondArray > ( )
246
265
. expect ( "Unable to unwrap array" ) ;
266
+ #[ allow( clippy:: needless_range_loop) ]
247
267
for i in 0 ..len {
248
268
if !array. is_null ( i) {
249
269
let value = array. value ( i) ;
250
270
documents[ i] . insert (
251
271
field. name ( ) ,
252
- bson:: Bson :: UtcDatetime ( DateTime :: < Utc > :: from_utc (
272
+ bson:: Bson :: DateTime ( DateTime :: < Utc > :: from_utc (
253
273
NaiveDateTime :: from_timestamp ( value / 1000 , 0 ) ,
254
274
Utc ,
255
275
) ) ,
@@ -268,6 +288,7 @@ impl From<&RecordBatch> for Documents {
268
288
. as_any ( )
269
289
. downcast_ref :: < StringArray > ( )
270
290
. expect ( "Unable to unwrap array" ) ;
291
+ #[ allow( clippy:: needless_range_loop) ]
271
292
for i in 0 ..len {
272
293
if !array. is_null ( i) {
273
294
documents[ i] . insert ( field. name ( ) , array. value ( i) ) ;
@@ -279,7 +300,7 @@ impl From<&RecordBatch> for Documents {
279
300
panic ! ( "Write support for lists not yet implemented" )
280
301
}
281
302
DataType :: Struct ( _) => panic ! ( "Write support for structs not yet implemented" ) ,
282
- t @ _ => panic ! ( "Encountered unwritable data type {:?}" , t) ,
303
+ t => panic ! ( "Encountered unwritable data type {:?}" , t) ,
283
304
} ) ;
284
305
285
306
Self ( documents)
@@ -334,7 +355,7 @@ mod tests {
334
355
let writer = Writer :: try_new ( & writer_config, schema) ?;
335
356
336
357
// read from a collection and write to another
337
- while let Ok ( Some ( batch) ) = reader. next ( ) {
358
+ while let Ok ( Some ( batch) ) = reader. next_batch ( ) {
338
359
writer. write ( & batch) ?
339
360
}
340
361
Ok ( ( ) )
0 commit comments