apache · tustvold · Apr 4, 2024 · Mar 25, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
@@ -308,7 +308,6 @@ impl FlightDataDecoder {
                     &state.schema,
                     &mut state.dictionaries_by_field,
                     &message.version(),
-                    false,
                 )
                 .map_err(|e| {
                     FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}"))

diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
@@ -613,7 +613,6 @@ pub fn arrow_data_from_flight_data(
                 &dictionaries_by_field,
                 None,
                 &ipc_message.version(),
-                false,
             )?;
             Ok(ArrowFlightData::RecordBatch(record_batch))
         }

diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
@@ -99,7 +99,6 @@ pub fn flight_data_to_arrow_batch(
                 dictionaries_by_id,
                 None,
                 &message.version(),
-                false,
             )
         })?
 }

diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -269,7 +269,6 @@ async fn receive_batch_flight_data(
             &schema,
             dictionaries_by_id,
             &message.version(),
-            false,
         )
         .expect("Error reading dictionary");
 

diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -308,7 +308,6 @@ async fn record_batch_from_message(
         dictionaries_by_id,
         None,
         &message.version(),
-        false,
     );
 
     arrow_batch_result
@@ -331,7 +330,6 @@ async fn dictionary_from_message(
         &schema_ref,
         dictionaries_by_id,
         &message.version(),
-        false,
     );
     dictionary_batch_result
         .map_err(|e| Status::internal(format!("Could not convert to Dictionary: {e:?}")))

diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
@@ -485,6 +485,35 @@ impl<'a> ArrayReader<'a> {
     }
 }
 
+pub fn read_record_batch(
+    buf: &Buffer,
+    batch: crate::RecordBatch,
+    schema: SchemaRef,
+    dictionaries_by_id: &HashMap<i64, ArrayRef>,
+    projection: Option<&[usize]>,
+    metadata: &MetadataVersion,
+) -> Result<RecordBatch, ArrowError> {
+    read_record_batch2(
+        buf,
+        batch,
+        schema,
+        dictionaries_by_id,
+        projection,
+        metadata,
+        false,
+    )
+}
+
+pub fn read_dictionary(
+    buf: &Buffer,
+    batch: crate::DictionaryBatch,
+    schema: &Schema,
+    dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
+    metadata: &MetadataVersion,
+) -> Result<(), ArrowError> {
+    read_dictionary2(buf, batch, schema, dictionaries_by_id, metadata, false)
+}
+
 /// Creates a record batch from binary data using the `crate::RecordBatch` indexes and the `Schema`.
 ///
 /// If `require_alignment` is true, this function will return an error if any array data in the
@@ -495,7 +524,7 @@ impl<'a> ArrayReader<'a> {
 /// and copy over the data if any array data in the input `buf` is not properly aligned.
 /// (Properly aligned array data will remain zero-copy.)
 /// Under the hood it will use [`arrow_data::ArrayDataBuilder::build_aligned`] to construct [`arrow_data::ArrayData`].
-pub fn read_record_batch(
+fn read_record_batch2(
     buf: &Buffer,
     batch: crate::RecordBatch,
     schema: SchemaRef,
@@ -564,7 +593,7 @@ pub fn read_record_batch(
 
 /// Read the dictionary from the buffer and provided metadata,
 /// updating the `dictionaries_by_id` with the resulting dictionary
-pub fn read_dictionary(
+fn read_dictionary2(
     buf: &Buffer,
     batch: crate::DictionaryBatch,
     schema: &Schema,
@@ -593,7 +622,7 @@ pub fn read_dictionary(
             let value = value_type.as_ref().clone();
             let schema = Schema::new(vec![Field::new("", value, true)]);
             // Read a single column
-            let record_batch = read_record_batch(
+            let record_batch = read_record_batch2(
                 buf,
                 batch.data().unwrap(),
                 Arc::new(schema),
@@ -781,7 +810,7 @@ impl FileDecoder {
         match message.header_type() {
             crate::MessageHeader::DictionaryBatch => {
                 let batch = message.header_as_dictionary_batch().unwrap();
-                read_dictionary(
+                read_dictionary2(
                     &buf.slice(block.metaDataLength() as _),
                     batch,
                     &self.schema,
@@ -812,7 +841,7 @@ impl FileDecoder {
                     ArrowError::IpcError("Unable to read IPC message as record batch".to_string())
                 })?;
                 // read the block that makes up the record batch into a buffer
-                read_record_batch(
+                read_record_batch2(
                     &buf.slice(block.metaDataLength() as _),
                     batch,
                     self.schema.clone(),
@@ -1255,7 +1284,7 @@ impl<R: Read> StreamReader<R> {
                 let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
                 self.reader.read_exact(&mut buf)?;
 
-                read_record_batch(
+                read_record_batch2(
                     &buf.into(),
                     batch,
                     self.schema(),
@@ -1276,7 +1305,7 @@ impl<R: Read> StreamReader<R> {
                 let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
                 self.reader.read_exact(&mut buf)?;
 
-                read_dictionary(
+                read_dictionary2(
                     &buf.into(),
                     batch,
                     &self.schema,
@@ -2048,7 +2077,7 @@ mod tests {
         assert_ne!(b.as_ptr().align_offset(8), 0);
 
         let ipc_batch = message.header_as_record_batch().unwrap();
-        let roundtrip = read_record_batch(
+        let roundtrip = read_record_batch2(
             &b,
             ipc_batch,
             batch.schema(),
@@ -2085,7 +2114,7 @@ mod tests {
         assert_ne!(b.as_ptr().align_offset(8), 0);
 
         let ipc_batch = message.header_as_record_batch().unwrap();
-        let result = read_record_batch(
+        let result = read_record_batch2(
             &b,
             ipc_batch,
             batch.schema(),

diff --git a/arrow-ipc/src/reader/stream.rs b/arrow-ipc/src/reader/stream.rs
@@ -24,7 +24,7 @@ use arrow_buffer::{Buffer, MutableBuffer};
 use arrow_schema::{ArrowError, SchemaRef};
 
 use crate::convert::MessageBuffer;
-use crate::reader::{read_dictionary, read_record_batch};
+use crate::reader::{read_dictionary2, read_record_batch2};
 use crate::{MessageHeader, CONTINUATION_MARKER};
 
 /// A low-level interface for reading [`RecordBatch`] data from a stream of bytes
@@ -40,6 +40,8 @@ pub struct StreamDecoder {
     state: DecoderState,
     /// A scratch buffer when a read is split across multiple `Buffer`
     buf: MutableBuffer,
+    /// Whether or not array data in input buffers are required to be aligned
+    require_alignment: bool,
 }
 
 #[derive(Debug)]
@@ -83,6 +85,23 @@ impl StreamDecoder {
         Self::default()
     }
 
+    /// Specifies whether or not array data in input buffers is required to be properly aligned.
+    ///
+    /// If `require_alignment` is true, this decoder will return an error if any array data in the
+    /// input `buf` is not properly aligned.
+    /// Under the hood it will use [`arrow_data::ArrayDataBuilder::build`] to construct
+    /// [`arrow_data::ArrayData`].
+    ///
+    /// If `require_alignment` is false (the default), this decoder will automatically allocate a
+    /// new aligned buffer and copy over the data if any array data in the input `buf` is not
+    /// properly aligned. (Properly aligned array data will remain zero-copy.)
+    /// Under the hood it will use [`arrow_data::ArrayDataBuilder::build_aligned`] to construct
+    /// [`arrow_data::ArrayData`].
+    pub fn with_require_alignment(mut self, require_alignment: bool) -> Self {
+        self.require_alignment = require_alignment;
+        self
+    }
+
     /// Try to read the next [`RecordBatch`] from the provided [`Buffer`]
     ///
     /// [`Buffer::advance`] will be called on `buffer` for any consumed bytes.
@@ -192,14 +211,14 @@ impl StreamDecoder {
                             let schema = self.schema.clone().ok_or_else(|| {
                                 ArrowError::IpcError("Missing schema".to_string())
                             })?;
-                            let batch = read_record_batch(
+                            let batch = read_record_batch2(
                                 &body,
                                 batch,
                                 schema,
                                 &self.dictionaries,
                                 None,
                                 &version,
-                                false,
+                                self.require_alignment,
                             )?;
                             self.state = DecoderState::default();
                             return Ok(Some(batch));
@@ -209,13 +228,13 @@ impl StreamDecoder {
                             let schema = self.schema.as_deref().ok_or_else(|| {
                                 ArrowError::IpcError("Missing schema".to_string())
                             })?;
-                            read_dictionary(
+                            read_dictionary2(
                                 &body,
                                 dictionary,
                                 schema,
                                 &mut self.dictionaries,
                                 &version,
-                                false,
+                                self.require_alignment,
                             )?;
                             self.state = DecoderState::default();
                         }
-Original file line number
+Diff line change
@@ Expand Up / @@ -613,7 +613,6 @@ pub fn arrow_data_from_flight_data( @@
                     &dictionaries_by_field,
                     None,
                     &ipc_message.version(),
-                    false,
                 )?;
                 Ok(ArrowFlightData::RecordBatch(record_batch))
             }
@@ Expand Down @@