SNOW-100191: Change pyarrow as optional dependency. Internally we bundled .so file of arrow cpp lib

sfc-gh-stakeda · ankit-bhatnagar167 · commit 3ddf7724a016 · 2019-10-22T15:06:03.000-07:00
diff --git a/arrow_iterator.pyx b/arrow_iterator.pyx
@@ -7,6 +7,11 @@
 
 from logging import getLogger
 from cpython.ref cimport PyObject
+from libc.stdint cimport *
+from libcpp cimport bool as c_bool
+from libcpp.memory cimport shared_ptr
+from libcpp.string cimport string as c_string
+from libcpp.vector cimport vector
 
 logger = getLogger(__name__)
 
@@ -26,12 +31,87 @@ cdef extern from "cpp/ArrowIterator/CArrowIterator.hpp" namespace "sf":
 
 cdef extern from "cpp/ArrowIterator/CArrowChunkIterator.hpp" namespace "sf":
     cdef cppclass CArrowChunkIterator(CArrowIterator):
-        CArrowChunkIterator(PyObject* context, PyObject* batches) except +
+        CArrowChunkIterator(PyObject* context, vector[shared_ptr[CRecordBatch]]* batches) except +
 
 
 cdef extern from "cpp/ArrowIterator/CArrowTableIterator.hpp" namespace "sf":
     cdef cppclass CArrowTableIterator(CArrowIterator):
-        CArrowTableIterator(PyObject* context, PyObject* batches) except +
+        CArrowTableIterator(PyObject* context, vector[shared_ptr[CRecordBatch]]* batches) except +
+
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+    cdef cppclass CStatus "arrow::Status":
+        CStatus()
+
+        c_string ToString()
+        c_string message()
+
+        c_bool ok()
+        c_bool IsIOError()
+        c_bool IsOutOfMemory()
+        c_bool IsInvalid()
+        c_bool IsKeyError()
+        c_bool IsNotImplemented()
+        c_bool IsTypeError()
+        c_bool IsCapacityError()
+        c_bool IsIndexError()
+        c_bool IsSerializationError()
+
+
+    cdef cppclass CBuffer" arrow::Buffer":
+        CBuffer(const uint8_t* data, int64_t size)
+
+    cdef cppclass CRecordBatch" arrow::RecordBatch"
+
+    cdef cppclass CRecordBatchReader" arrow::RecordBatchReader":
+        CStatus ReadNext(shared_ptr[CRecordBatch]* batch)
+
+
+cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
+    cdef cppclass CRecordBatchStreamReader \
+            " arrow::ipc::RecordBatchStreamReader"(CRecordBatchReader):
+        @staticmethod
+        CStatus Open(const InputStream* stream,
+                     shared_ptr[CRecordBatchReader]* out)
+
+
+cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
+    enum FileMode" arrow::io::FileMode::type":
+        FileMode_READ" arrow::io::FileMode::READ"
+        FileMode_WRITE" arrow::io::FileMode::WRITE"
+        FileMode_READWRITE" arrow::io::FileMode::READWRITE"
+
+    cdef cppclass FileInterface:
+        CStatus Close()
+        CStatus Tell(int64_t* position)
+        FileMode mode()
+        c_bool closed()
+
+    cdef cppclass Readable:
+        # put overload under a different name to avoid cython bug with multiple
+        # layers of inheritance
+        CStatus ReadBuffer" Read"(int64_t nbytes, shared_ptr[CBuffer]* out)
+        CStatus Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out)
+
+    cdef cppclass InputStream(FileInterface, Readable):
+        pass
+
+    cdef cppclass Seekable:
+        CStatus Seek(int64_t position)
+
+    cdef cppclass RandomAccessFile(InputStream, Seekable):
+        CStatus GetSize(int64_t* size)
+
+        CStatus ReadAt(int64_t position, int64_t nbytes,
+                       int64_t* bytes_read, uint8_t* buffer)
+        CStatus ReadAt(int64_t position, int64_t nbytes,
+                       shared_ptr[CBuffer]* out)
+        c_bool supports_zero_copy()
+
+
+cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
+    cdef cppclass PyReadableFile(RandomAccessFile):
+        PyReadableFile(object fo)
 
 
 cdef class EmptyPyArrowIterator:
@@ -53,12 +133,22 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
     cdef CArrowIterator* cIterator
     cdef str unit
     cdef PyObject* cret
-    cdef list batches
+    cdef vector[shared_ptr[CRecordBatch]] batches
+
+    def __cinit__(self, object py_inputstream, object arrow_context):
+        cdef shared_ptr[InputStream] input_stream
+        cdef shared_ptr[CRecordBatchReader] reader
+        cdef shared_ptr[CRecordBatch] record_batch
+        input_stream.reset(new PyReadableFile(py_inputstream))
+        CRecordBatchStreamReader.Open(input_stream.get(), &reader)
+        while True:
+            reader.get().ReadNext(&record_batch)
+
+            if record_batch.get() is NULL:
+                break
+
+            self.batches.push_back(record_batch)
 
-    def __cinit__(self, object arrow_stream_reader, object arrow_context):
-        self.batches = []
-        for rb in arrow_stream_reader:
-            self.batches.append(rb)
         self.context = arrow_context
         self.cIterator = NULL
         self.unit = ''
@@ -85,8 +175,8 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
         if iter_unit != ROW_UNIT and iter_unit != TABLE_UNIT:
             raise NotImplementedError
         elif iter_unit == ROW_UNIT:
-            self.cIterator = new CArrowChunkIterator(<PyObject*>self.context, <PyObject*>self.batches)
+            self.cIterator = new CArrowChunkIterator(<PyObject*>self.context, &self.batches)
         elif iter_unit == TABLE_UNIT:
-            self.cIterator = new CArrowTableIterator(<PyObject*>self.context, <PyObject*>self.batches)
+            self.cIterator = new CArrowTableIterator(<PyObject*>self.context, &self.batches)
         self.unit = iter_unit
 
diff --git a/arrow_result.pyx b/arrow_result.pyx
@@ -6,14 +6,14 @@
 # cython: language_level=3
 
 from base64 import b64decode
+import io
 from logging import getLogger
 from .telemetry import TelemetryField
 from .time_util import get_time_millis
 try:
-    from pyarrow.ipc import open_stream
-    from pyarrow import concat_tables
     from .arrow_iterator import PyArrowIterator, EmptyPyArrowIterator, ROW_UNIT, TABLE_UNIT, EMPTY_UNIT
     from .arrow_context import ArrowConverterContext
+    from pyarrow import concat_tables
 except ImportError:
     pass
 
@@ -52,9 +52,8 @@ cdef class ArrowResult:
 
         if rowset_b64:
             arrow_bytes = b64decode(rowset_b64)
-            arrow_reader = open_stream(arrow_bytes)
             self._arrow_context = ArrowConverterContext(self._connection._session_parameters)
-            self._current_chunk_row = PyArrowIterator(arrow_reader, self._arrow_context)
+            self._current_chunk_row = PyArrowIterator(io.BytesIO(arrow_bytes), self._arrow_context)
         else:
             self._current_chunk_row = EmptyPyArrowIterator(None, None)
         self._iter_unit = EMPTY_UNIT
diff --git a/chunk_downloader.py b/chunk_downloader.py
@@ -19,7 +19,6 @@
 from gzip import GzipFile
 
 try:
-    from pyarrow.ipc import open_stream
     from .arrow_iterator import PyArrowIterator
     from .arrow_context import ArrowConverterContext
 except ImportError:
@@ -331,6 +330,5 @@ def __init__(self, meta, connection):
     """
     def to_iterator(self, raw_data_fd, download_time):
         gzip_decoder = GzipFile(fileobj=raw_data_fd, mode='r')
-        reader = open_stream(gzip_decoder)
-        it = PyArrowIterator(reader, self._arrow_context)
+        it = PyArrowIterator(gzip_decoder, self._arrow_context)
         return it
diff --git a/cpp/ArrowIterator/CArrowChunkIterator.cpp b/cpp/ArrowIterator/CArrowChunkIterator.cpp
@@ -17,11 +17,11 @@
 namespace sf
 {
 
-CArrowChunkIterator::CArrowChunkIterator(PyObject* context, PyObject* batches)
+CArrowChunkIterator::CArrowChunkIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>> *batches)
 : CArrowIterator(batches), m_latestReturnedRow(nullptr), m_context(context)
 {
-  m_batchCount = m_cRecordBatches.size();
-  m_columnCount = m_batchCount > 0 ? m_cRecordBatches[0]->num_columns() : 0;
+  m_batchCount = m_cRecordBatches->size();
+  m_columnCount = m_batchCount > 0 ? (*m_cRecordBatches)[0]->num_columns() : 0;
   m_currentBatchIndex = -1;
   m_rowIndexInBatch = -1;
   m_rowCountInBatch = 0;
@@ -50,7 +50,7 @@ PyObject* CArrowChunkIterator::next()
     if (m_currentBatchIndex < m_batchCount)
     {
       m_rowIndexInBatch = 0;
-      m_rowCountInBatch = m_cRecordBatches[m_currentBatchIndex]->num_rows();
+      m_rowCountInBatch = (*m_cRecordBatches)[m_currentBatchIndex]->num_rows();
       this->initColumnConverters();
       if (py::checkPyError())
       {
@@ -90,7 +90,7 @@ void CArrowChunkIterator::initColumnConverters()
 {
   m_currentBatchConverters.clear();
   std::shared_ptr<arrow::RecordBatch> currentBatch =
-      m_cRecordBatches[m_currentBatchIndex];
+      (*m_cRecordBatches)[m_currentBatchIndex];
   std::shared_ptr<arrow::Schema> schema = currentBatch->schema();
   for (int i = 0; i < currentBatch->num_columns(); i++)
   {
diff --git a/cpp/ArrowIterator/CArrowChunkIterator.hpp b/cpp/ArrowIterator/CArrowChunkIterator.hpp
@@ -22,7 +22,7 @@ class CArrowChunkIterator : public CArrowIterator
   /**
    * Constructor
    */
-  CArrowChunkIterator(PyObject* context, PyObject* batches);
+  CArrowChunkIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>> *);
 
   /**
    * Desctructor
diff --git a/cpp/ArrowIterator/CArrowIterator.cpp b/cpp/ArrowIterator/CArrowIterator.cpp
@@ -9,17 +9,10 @@ namespace sf
 
 Logger CArrowIterator::logger("snowflake.connector.CArrowIterator");
 
-CArrowIterator::CArrowIterator(PyObject* batches)
+CArrowIterator::CArrowIterator(std::vector<std::shared_ptr<arrow::RecordBatch>>* batches) :
+  m_cRecordBatches(batches)
 {
-  int pyListSize = PyList_Size(batches);
-  logger.debug("Arrow BatchSize: %d", pyListSize);
-
-  for (int i=0; i<pyListSize; i++)
-  {
-    std::shared_ptr<arrow::RecordBatch> cRecordBatch;
-    arrow::Status status = arrow::py::unwrap_record_batch(PyList_GetItem(batches, i), &cRecordBatch);
-    m_cRecordBatches.push_back(cRecordBatch);
-  }
+  logger.debug("Arrow BatchSize: %d", batches->size());
 }
 
 }
diff --git a/cpp/ArrowIterator/CArrowIterator.hpp b/cpp/ArrowIterator/CArrowIterator.hpp
@@ -24,7 +24,7 @@ namespace sf
 class CArrowIterator
 {
 public:
-  CArrowIterator(PyObject *batches);
+  CArrowIterator(std::vector<std::shared_ptr<arrow::RecordBatch>> * batches);
 
   virtual ~CArrowIterator() = default;
 
@@ -35,7 +35,7 @@ class CArrowIterator
 
 protected:
    /** list of all record batch in current chunk */
-  std::vector<std::shared_ptr<arrow::RecordBatch>> m_cRecordBatches;
+  std::vector<std::shared_ptr<arrow::RecordBatch>> *m_cRecordBatches;
 
   static Logger logger;
 };
diff --git a/cpp/ArrowIterator/CArrowTableIterator.cpp b/cpp/ArrowIterator/CArrowTableIterator.cpp
@@ -26,9 +26,9 @@ namespace sf
 void CArrowTableIterator::reconstructRecordBatches()
 {
   // Type conversion, the code needs to be optimized
-  for (unsigned int batchIdx = 0; batchIdx <  m_cRecordBatches.size(); batchIdx++)
+  for (unsigned int batchIdx = 0; batchIdx <  m_cRecordBatches->size(); batchIdx++)
   {
-    std::shared_ptr<arrow::RecordBatch> currentBatch = m_cRecordBatches[batchIdx];
+    std::shared_ptr<arrow::RecordBatch> currentBatch = (*m_cRecordBatches)[batchIdx];
     std::shared_ptr<arrow::Schema> schema = currentBatch->schema();
     for (int colIdx = 0; colIdx < currentBatch->num_columns(); colIdx++)
     {
@@ -127,7 +127,7 @@ void CArrowTableIterator::reconstructRecordBatches()
   }
 }
 
-CArrowTableIterator::CArrowTableIterator(PyObject* context, PyObject* batches)
+CArrowTableIterator::CArrowTableIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>>* batches)
 : CArrowIterator(batches), m_context(context), m_pyTableObjRef(nullptr)
 {
   PyObject* tz = PyObject_GetAttrString(m_context, "_timezone");
@@ -156,7 +156,7 @@ arrow::Status CArrowTableIterator::replaceColumn(
     const std::shared_ptr<arrow::Array>& newColumn)
 {
   // replace the targeted column
-  std::shared_ptr<arrow::RecordBatch> currentBatch = m_cRecordBatches[batchIdx];
+  std::shared_ptr<arrow::RecordBatch> currentBatch = (*m_cRecordBatches)[batchIdx];
   arrow::Status ret = currentBatch->AddColumn(colIdx+1, newField, newColumn, &currentBatch);
   if(!ret.ok())
   {
@@ -167,7 +167,7 @@ arrow::Status CArrowTableIterator::replaceColumn(
   {
     return ret;
   }
-  m_cRecordBatches[batchIdx] = currentBatch;
+  (*m_cRecordBatches)[batchIdx] = currentBatch;
   return ret;
 }
 
@@ -842,10 +842,10 @@ void CArrowTableIterator::convertTimestampTZColumn(
 bool CArrowTableIterator::convertRecordBatchesToTable()
 {
   // only do conversion once and there exist some record batches
-  if (!m_cTable && !m_cRecordBatches.empty())
+  if (!m_cTable && !m_cRecordBatches->empty())
   {
     reconstructRecordBatches();
-    arrow::Table::FromRecordBatches(m_cRecordBatches, &m_cTable);
+    arrow::Table::FromRecordBatches(*m_cRecordBatches, &m_cTable);
     return true;
   }
   return false;
diff --git a/cpp/ArrowIterator/CArrowTableIterator.hpp b/cpp/ArrowIterator/CArrowTableIterator.hpp
@@ -22,7 +22,7 @@ class CArrowTableIterator : public CArrowIterator
   /**
    * Constructor
    */
-  CArrowTableIterator(PyObject* context, PyObject* batches);
+  CArrowTableIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>>* batches);
 
   /**
    * Destructor
diff --git a/cursor.py b/cursor.py
@@ -609,7 +609,7 @@ def _init_result_and_meta(self, data, use_ijson=False):
                                       column[u'nullable']))
 
         if self._query_result_format == 'arrow':
-            self.check_pyarrow_resultset()
+            self.check_can_use_arrow_resultset()
             self._result = ArrowResult(data, self)
         else:
             self._result = self._json_result_class(data, self, use_ijson)
@@ -628,21 +628,15 @@ def _init_result_and_meta(self, data, use_ijson=False):
             else:
                 self._total_rowcount += updated_rows
 
-    def check_pyarrow_resultset(self):
+    def check_can_use_arrow_resultset(self):
         global CAN_USE_ARROW_RESULT
-        global pyarrow
 
         if not CAN_USE_ARROW_RESULT:
             if self._connection.application == 'SnowSQL':
                 msg = (
                     "Currently SnowSQL doesn't support the result set in Apache Arrow format."
                 )
                 errno = ER_NO_PYARROW_SNOWSQL
-            elif pyarrow is None:
-                msg = (
-                    "pyarrow package is missing. Install using pip if the platform is supported."
-                )
-                errno = ER_NO_PYARROW
             else:
                 msg = (
                     "The result set in Apache Arrow format is not supported for the platform."
@@ -658,6 +652,24 @@ def check_pyarrow_resultset(self):
                 }
             )
 
+    def check_can_use_panadas(self):
+        global pyarrow
+
+        if pyarrow is None:
+            msg = (
+                "pyarrow package is missing. Install using pip if the platform is supported."
+            )
+            errno = ER_NO_PYARROW
+
+            Error.errorhandler_wrapper(
+                self.connection, self,
+                ProgrammingError,
+                {
+                    u'msg': msg,
+                    u'errno': errno,
+                }
+            )
+
     def query_result(self, qid, _use_ijson=False):
         url = '/queries/{qid}/result'.format(qid=qid)
         ret = self._connection.rest.request(url=url, method='get')
@@ -695,6 +707,7 @@ def fetch_pandas_batches(self, **kwargs):
         Fetch a single Arrow Table
         @param kwargs: will be passed to pyarrow.Table.to_pandas() method
         """
+        self.check_can_use_panadas()
         if self._query_result_format != 'arrow':  # TODO: or pandas isn't imported
             raise NotSupportedError
         for df in self._result._fetch_pandas_batches(**kwargs):
@@ -705,6 +718,7 @@ def fetch_pandas_all(self, **kwargs):
         Fetch Pandas dataframes in batch, where 'batch' refers to Snowflake Chunk
         @param kwargs: will be passed to pyarrow.Table.to_pandas() method
         """
+        self.check_can_use_panadas()
         if self._query_result_format != 'arrow':
             raise NotSupportedError
         return self._result._fetch_pandas_all(**kwargs)
diff --git a/setup.py b/setup.py
diff --git a/test/test_unit_arrow_chunk_iterator.py b/test/test_unit_arrow_chunk_iterator.py

Original file line number	Diff line number	Diff line change
`@@ -17,11 +17,11 @@`
`17`	`17`	`namespace sf`
`18`	`18`	`{`
`19`	`19`
`20`		`-CArrowChunkIterator::CArrowChunkIterator(PyObject* context, PyObject* batches)`
	`20`	`+CArrowChunkIterator::CArrowChunkIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>> *batches)`
`21`	`21`	`: CArrowIterator(batches), m_latestReturnedRow(nullptr), m_context(context)`
`22`	`22`	`{`
`23`		`- m_batchCount = m_cRecordBatches.size();`
`24`		`- m_columnCount = m_batchCount > 0 ? m_cRecordBatches[0]->num_columns() : 0;`
	`23`	`+ m_batchCount = m_cRecordBatches->size();`
	`24`	`+ m_columnCount = m_batchCount > 0 ? (*m_cRecordBatches)[0]->num_columns() : 0;`
`25`	`25`	`m_currentBatchIndex = -1;`
`26`	`26`	`m_rowIndexInBatch = -1;`
`27`	`27`	`m_rowCountInBatch = 0;`
`@@ -50,7 +50,7 @@ PyObject* CArrowChunkIterator::next()`
`50`	`50`	`if (m_currentBatchIndex < m_batchCount)`
`51`	`51`	`{`
`52`	`52`	`m_rowIndexInBatch = 0;`
`53`		`- m_rowCountInBatch = m_cRecordBatches[m_currentBatchIndex]->num_rows();`
	`53`	`+ m_rowCountInBatch = (*m_cRecordBatches)[m_currentBatchIndex]->num_rows();`
`54`	`54`	`this->initColumnConverters();`
`55`	`55`	`if (py::checkPyError())`
`56`	`56`	`{`
`@@ -90,7 +90,7 @@ void CArrowChunkIterator::initColumnConverters()`
`90`	`90`	`{`
`91`	`91`	`m_currentBatchConverters.clear();`
`92`	`92`	`std::shared_ptr<arrow::RecordBatch> currentBatch =`
`93`		`- m_cRecordBatches[m_currentBatchIndex];`
	`93`	`+ (*m_cRecordBatches)[m_currentBatchIndex];`
`94`	`94`	`std::shared_ptr<arrow::Schema> schema = currentBatch->schema();`
`95`	`95`	`for (int i = 0; i < currentBatch->num_columns(); i++)`
`96`	`96`	`{`
Original file line number	Diff line number	Diff line change
`@@ -9,17 +9,10 @@ namespace sf`
`9`	`9`
`10`	`10`	`Logger CArrowIterator::logger("snowflake.connector.CArrowIterator");`
`11`	`11`
`12`		`-CArrowIterator::CArrowIterator(PyObject* batches)`
	`12`	`+CArrowIterator::CArrowIterator(std::vector<std::shared_ptr<arrow::RecordBatch>>* batches) :`
	`13`	`+ m_cRecordBatches(batches)`
`13`	`14`	`{`
`14`		`- int pyListSize = PyList_Size(batches);`
`15`		`- logger.debug("Arrow BatchSize: %d", pyListSize);`
`16`		`-`
`17`		`- for (int i=0; i<pyListSize; i++)`
`18`		`- {`
`19`		`- std::shared_ptr<arrow::RecordBatch> cRecordBatch;`
`20`		`- arrow::Status status = arrow::py::unwrap_record_batch(PyList_GetItem(batches, i), &cRecordBatch);`
`21`		`- m_cRecordBatches.push_back(cRecordBatch);`
`22`		`- }`
	`15`	`+ logger.debug("Arrow BatchSize: %d", batches->size());`
`23`	`16`	`}`
`24`	`17`
`25`	`18`	`}`
Original file line number	Diff line number	Diff line change
`@@ -26,9 +26,9 @@ namespace sf`
`26`	`26`	`void CArrowTableIterator::reconstructRecordBatches()`
`27`	`27`	`{`
`28`	`28`	`// Type conversion, the code needs to be optimized`
`29`		`- for (unsigned int batchIdx = 0; batchIdx < m_cRecordBatches.size(); batchIdx++)`
	`29`	`+ for (unsigned int batchIdx = 0; batchIdx < m_cRecordBatches->size(); batchIdx++)`
`30`	`30`	`{`
`31`		`- std::shared_ptr<arrow::RecordBatch> currentBatch = m_cRecordBatches[batchIdx];`
	`31`	`+ std::shared_ptr<arrow::RecordBatch> currentBatch = (*m_cRecordBatches)[batchIdx];`
`32`	`32`	`std::shared_ptr<arrow::Schema> schema = currentBatch->schema();`
`33`	`33`	`for (int colIdx = 0; colIdx < currentBatch->num_columns(); colIdx++)`
`34`	`34`	`{`
`@@ -127,7 +127,7 @@ void CArrowTableIterator::reconstructRecordBatches()`
`127`	`127`	`}`
`128`	`128`	`}`
`129`	`129`
`130`		`-CArrowTableIterator::CArrowTableIterator(PyObject* context, PyObject* batches)`
	`130`	`+CArrowTableIterator::CArrowTableIterator(PyObject* context, std::vector<std::shared_ptr<arrow::RecordBatch>>* batches)`
`131`	`131`	`: CArrowIterator(batches), m_context(context), m_pyTableObjRef(nullptr)`
`132`	`132`	`{`
`133`	`133`	`PyObject* tz = PyObject_GetAttrString(m_context, "_timezone");`
`@@ -156,7 +156,7 @@ arrow::Status CArrowTableIterator::replaceColumn(`
`156`	`156`	`const std::shared_ptr<arrow::Array>& newColumn)`
`157`	`157`	`{`
`158`	`158`	`// replace the targeted column`
`159`		`- std::shared_ptr<arrow::RecordBatch> currentBatch = m_cRecordBatches[batchIdx];`
	`159`	`+ std::shared_ptr<arrow::RecordBatch> currentBatch = (*m_cRecordBatches)[batchIdx];`
`160`	`160`	`arrow::Status ret = currentBatch->AddColumn(colIdx+1, newField, newColumn, &currentBatch);`
`161`	`161`	`if(!ret.ok())`
`162`	`162`	`{`
`@@ -167,7 +167,7 @@ arrow::Status CArrowTableIterator::replaceColumn(`
`167`	`167`	`{`
`168`	`168`	`return ret;`
`169`	`169`	`}`
`170`		`- m_cRecordBatches[batchIdx] = currentBatch;`
	`170`	`+ (*m_cRecordBatches)[batchIdx] = currentBatch;`
`171`	`171`	`return ret;`
`172`	`172`	`}`
`173`	`173`
`@@ -842,10 +842,10 @@ void CArrowTableIterator::convertTimestampTZColumn(`
`842`	`842`	`bool CArrowTableIterator::convertRecordBatchesToTable()`
`843`	`843`	`{`
`844`	`844`	`// only do conversion once and there exist some record batches`
`845`		`- if (!m_cTable && !m_cRecordBatches.empty())`
	`845`	`+ if (!m_cTable && !m_cRecordBatches->empty())`
`846`	`846`	`{`
`847`	`847`	`reconstructRecordBatches();`
`848`		`- arrow::Table::FromRecordBatches(m_cRecordBatches, &m_cTable);`
	`848`	`+ arrow::Table::FromRecordBatches(*m_cRecordBatches, &m_cTable);`
`849`	`849`	`return true;`
`850`	`850`	`}`
`851`	`851`	`return false;`