update dependencies

nevi-me · nevi-me · commit 17d2fecb78bf · 2020-08-01T19:26:15.000+02:00
closes #4
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,7 +11,7 @@ readme = "README.md"
 repository = "https://github.com/TheDataEngine/mongodb-arrow-connector/ "
 
 [dependencies]
-bson = "0.14"
-mongodb = "0.9.2"
-arrow = "0.16.0"
+bson = "1.0"
+mongodb = { version = "1.0", default-features = false, features = ["sync"] }
+arrow = "1.0"
 chrono = "0.4"
diff --git a/src/reader.rs b/src/reader.rs
@@ -6,10 +6,8 @@ use arrow::{
     record_batch::{RecordBatch, RecordBatchReader},
 };
 use bson::{doc, Bson};
-use mongodb::{
-    options::{AggregateOptions, ClientOptions, StreamAddress},
-    Client,
-};
+use mongodb::options::{AggregateOptions, ClientOptions, StreamAddress};
+use mongodb::sync::Client;
 
 /// Configuration for the MongoDB reader
 pub struct ReaderConfig<'a> {
@@ -74,11 +72,11 @@ impl Reader {
     }
 
     /// Read the next record batch
-    pub fn next(&mut self) -> Result<Option<RecordBatch>, ()> {
+    pub fn next_batch(&mut self) -> Result<Option<RecordBatch>, ()> {
         let mut criteria = doc! {};
         let mut project = doc! {};
         for field in self.schema.fields() {
-            project.insert(field.name(), bson::Bson::I32(1));
+            project.insert(field.name(), bson::Bson::Int32(1));
         }
         criteria.insert("$project", project);
         let coll = self
@@ -108,7 +106,7 @@ impl Reader {
         }
 
         let docs_len = docs.len();
-        self.current_index = self.current_index + docs_len;
+        self.current_index += docs_len;
         if docs_len == 0 {
             return Ok(None);
         }
@@ -135,11 +133,11 @@ impl Reader {
                         TimeUnit::Millisecond => builder
                             .field_builder::<TimestampMillisecondBuilder>(i)
                             .unwrap(),
-                        t @ _ => panic!("Timestamp arrays can only be read as milliseconds, found {:?}. \nPlease read as milliseconds then cast to desired resolution.", t)
+                        t => panic!("Timestamp arrays can only be read as milliseconds, found {:?}. \nPlease read as milliseconds then cast to desired resolution.", t)
                     };
                     for v in 0..docs_len {
                         let doc: &_ = docs.get(v).unwrap();
-                        match doc.get_utc_datetime(field.name()) {
+                        match doc.get_datetime(field.name()) {
                             Ok(val) => field_builder.append_value(val.timestamp_millis()).unwrap(),
                             Err(_) => field_builder.append_null().unwrap(),
                         };
@@ -194,7 +192,7 @@ impl Reader {
                 }
                 DataType::List(_dtype) => panic!("Creating lists not yet implemented"),
                 DataType::Struct(_fields) => panic!("Creating nested structs not yet implemented"),
-                t @ _ => panic!("Data type {:?} not supported when reading from MongoDB", t),
+                t => panic!("Data type {:?} not supported when reading from MongoDB", t),
             }
         }
         // append true to all struct records
@@ -206,11 +204,11 @@ impl Reader {
 }
 
 impl RecordBatchReader for Reader {
-    fn schema(&mut self) -> Arc<Schema> {
+    fn schema(&self) -> Arc<Schema> {
         Arc::new(self.schema.clone())
     }
     fn next_batch(&mut self) -> arrow::error::Result<Option<RecordBatch>> {
-        self.next().map_err(|_| {
+        self.next_batch().map_err(|_| {
             arrow::error::ArrowError::IoError("Unable to read next batch from MongoDB".to_string())
         })
     }
@@ -259,7 +257,7 @@ mod tests {
         // write results to CSV as the schema would allow
         let file = File::create("./target/debug/delays.csv").unwrap();
         let mut writer = csv::Writer::new(file);
-        while let Ok(Some(batch)) = reader.next() {
+        while let Ok(Some(batch)) = reader.next_batch() {
             writer.write(&batch).unwrap();
         }
         Ok(())
diff --git a/src/writer.rs b/src/writer.rs
@@ -6,10 +6,8 @@ use arrow::{
 };
 use bson::doc;
 use chrono::{DateTime, NaiveDateTime, Utc};
-use mongodb::{
-    options::{ClientOptions, StreamAddress},
-    Client,
-};
+use mongodb::options::{ClientOptions, StreamAddress};
+use mongodb::sync::Client;
 
 /// Configuration for the MongoDB writer
 pub struct WriterConfig<'a> {
@@ -71,7 +69,7 @@ impl Writer {
                 .database(config.database)
                 .collection(config.collection)
                 .drop(None);
-            if let Err(_) = drop {
+            if drop.is_err() {
                 println!("Collection does not exist, and was not dropped");
             }
         }
@@ -105,7 +103,7 @@ impl Writer {
     }
 
     /// MongoDB supports a subset of Apache Arrow supported types, check if schema can be written
-    fn check_supported_schema(fields: &Vec<Field>, coerce_types: bool) -> Result<(), ()> {
+    fn check_supported_schema(fields: &[Field], coerce_types: bool) -> Result<(), ()> {
         for field in fields {
             let t = field.data_type();
             match t {
@@ -131,16 +129,19 @@ impl Writer {
                 | DataType::Float32
                 | DataType::Float64
                 | DataType::Utf8
+                | DataType::LargeUtf8
                 | DataType::Timestamp(_, _) => {
                     // data types supported without coercion
                 }
                 DataType::Float16 => {
                     eprintln!("Float16 arrays not supported");
                     return Err(());
                 }
-                DataType::List(data_type) | DataType::FixedSizeList(data_type, _) => {
+                DataType::List(data_type)
+                | DataType::LargeList(data_type)
+                | DataType::FixedSizeList(data_type, _) => {
                     Writer::check_supported_schema(
-                        &vec![Field::new(field.name().as_str(), *data_type.clone(), false)],
+                        &[Field::new(field.name().as_str(), *data_type.clone(), false)],
                         coerce_types,
                     )?;
                 }
@@ -152,13 +153,26 @@ impl Writer {
                 | DataType::Duration(_)
                 | DataType::Interval(_)
                 | DataType::Binary
+                | DataType::LargeBinary
                 | DataType::FixedSizeBinary(_) => {
                     eprintln!("Data type {:?} is not supported", t);
                     return Err(());
                 }
+                DataType::Null => {
+                    eprintln!("Data type {:?} is not supported", t);
+                    return Err(());
+                }
+                DataType::Union(_) => {
+                    eprintln!("Data type {:?} is not supported", t);
+                    return Err(());
+                }
+                DataType::Dictionary(_, _) => {
+                    eprintln!("Data type {:?} is not supported", t);
+                    return Err(());
+                }
             }
         }
-        return Ok(());
+        Ok(())
     }
 }
 
@@ -179,6 +193,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<BooleanArray>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -196,6 +211,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<Int32Array>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -208,6 +224,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<Int64Array>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -220,6 +237,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<Float32Array>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -231,6 +249,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<Float64Array>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -244,12 +263,13 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<TimestampMillisecondArray>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             let value = array.value(i);
                             documents[i].insert(
                                 field.name(),
-                                bson::Bson::UtcDatetime(DateTime::<Utc>::from_utc(
+                                bson::Bson::DateTime(DateTime::<Utc>::from_utc(
                                     NaiveDateTime::from_timestamp(value / 1000, 0),
                                     Utc,
                                 )),
@@ -268,6 +288,7 @@ impl From<&RecordBatch> for Documents {
                         .as_any()
                         .downcast_ref::<StringArray>()
                         .expect("Unable to unwrap array");
+                    #[allow(clippy::needless_range_loop)]
                     for i in 0..len {
                         if !array.is_null(i) {
                             documents[i].insert(field.name(), array.value(i));
@@ -279,7 +300,7 @@ impl From<&RecordBatch> for Documents {
                     panic!("Write support for lists not yet implemented")
                 }
                 DataType::Struct(_) => panic!("Write support for structs not yet implemented"),
-                t @ _ => panic!("Encountered unwritable data type {:?}", t),
+                t => panic!("Encountered unwritable data type {:?}", t),
             });
 
         Self(documents)
@@ -334,7 +355,7 @@ mod tests {
         let writer = Writer::try_new(&writer_config, schema)?;
 
         // read from a collection and write to another
-        while let Ok(Some(batch)) = reader.next() {
+        while let Ok(Some(batch)) = reader.next_batch() {
             writer.write(&batch)?
         }
         Ok(())