|
| 1 | +# |
| 2 | +# Copyright (c) 2012-2019 Snowflake Computing Inc. All right reserved. |
| 3 | +# |
| 4 | + |
| 5 | +from decimal import Context |
| 6 | +from logging import getLogger |
| 7 | +from datetime import datetime, timedelta, date |
| 8 | + |
| 9 | +logger = getLogger(__name__) |
| 10 | + |
| 11 | +ZERO_EPOCH = datetime.utcfromtimestamp(0) |
| 12 | + |
| 13 | +cdef class ArrowChunkIterator: |
| 14 | + |
| 15 | + cdef: |
| 16 | + list _batches |
| 17 | + int _column_count |
| 18 | + int _batch_count |
| 19 | + int _batch_index |
| 20 | + int _index_in_batch |
| 21 | + int _row_count_in_batch |
| 22 | + list _current_batch |
| 23 | + |
| 24 | + def __init__(self, arrow_stream_reader, meta): |
| 25 | + self._batches = [] |
| 26 | + for record_batch in arrow_stream_reader: |
| 27 | + converters = [] |
| 28 | + for index, column in enumerate(record_batch.columns): |
| 29 | + converters.append(ColumnConverter.init_converter(column, meta[index])) |
| 30 | + self._batches.append(converters) |
| 31 | + |
| 32 | + self._column_count = len(self._batches[0]) |
| 33 | + self._batch_count = len(self._batches) |
| 34 | + self._batch_index = -1 |
| 35 | + self._index_in_batch = -1 |
| 36 | + self._row_count_in_batch = 0 |
| 37 | + self._current_batch = None |
| 38 | + |
| 39 | + def next(self): |
| 40 | + return self.__next__() |
| 41 | + |
| 42 | + def __next__(self): |
| 43 | + self._index_in_batch += 1 |
| 44 | + if self._index_in_batch < self._row_count_in_batch: |
| 45 | + return self._return_row() |
| 46 | + else: |
| 47 | + self._batch_index += 1 |
| 48 | + if self._batch_index < self._batch_count: |
| 49 | + self._current_batch = self._batches[self._batch_index] |
| 50 | + self._index_in_batch = 0 |
| 51 | + self._row_count_in_batch = self._current_batch[0].row_count() |
| 52 | + return self._return_row() |
| 53 | + |
| 54 | + raise StopIteration |
| 55 | + |
| 56 | + cdef _return_row(self): |
| 57 | + row = [] |
| 58 | + for col in self._current_batch: |
| 59 | + row.append(col.to_python_native(self._index_in_batch)) |
| 60 | + |
| 61 | + return row |
| 62 | + |
| 63 | + |
| 64 | +cdef class ColumnConverter: |
| 65 | + #Convert from arrow data into python native data types |
| 66 | + |
| 67 | + cdef object _arrow_column_array |
| 68 | + cdef object _meta |
| 69 | + |
| 70 | + def __init__(self, arrow_column_array, meta): |
| 71 | + """ |
| 72 | + Base Column Converter constructor |
| 73 | + :param arrow_column_array: arrow array |
| 74 | + :param meta: column metadata, which is a tuple with same form as cursor.description |
| 75 | + """ |
| 76 | + self._arrow_column_array = arrow_column_array |
| 77 | + self._meta = meta |
| 78 | + |
| 79 | + def to_python_native(self, index): |
| 80 | + return self._arrow_column_array[index].as_py() |
| 81 | + |
| 82 | + def row_count(self): |
| 83 | + return len(self._arrow_column_array) |
| 84 | + |
| 85 | + @staticmethod |
| 86 | + def init_converter(column_array, meta): |
| 87 | + # index 1 is type code |
| 88 | + if meta[1] == 'FIXED': |
| 89 | + return FixedColumnConverter(column_array, meta) |
| 90 | + else: |
| 91 | + return ColumnConverter(column_array, meta) |
| 92 | + |
| 93 | +cdef class FixedColumnConverter(ColumnConverter): |
| 94 | + cdef int _scale |
| 95 | + cdef object _convert_method |
| 96 | + |
| 97 | + def __init__(self, arrow_column_array, meta): |
| 98 | + super().__init__(arrow_column_array, meta) |
| 99 | + self._scale = meta[5] |
| 100 | + if self._scale == 0: |
| 101 | + self._convert_method = self._to_int |
| 102 | + else: |
| 103 | + self._decimal_ctx = Context(prec=meta['precision']) |
| 104 | + self._convert_method = self._to_decimal |
| 105 | + |
| 106 | + def to_python_native(self, index): |
| 107 | + val = self._arrow_column_array[index] |
| 108 | + return self._convert_method(val) |
| 109 | + |
| 110 | + def _to_int(self, val): |
| 111 | + return val.as_py() |
| 112 | + |
| 113 | + def _to_decimal(self, val): |
| 114 | + return 0 |
| 115 | + |
| 116 | +cdef class DateColumnConverter(ColumnConverter): |
| 117 | + |
| 118 | + def __init__(self, arrow_column_array, meta): |
| 119 | + super().__init__(arrow_column_array, meta) |
| 120 | + |
| 121 | + def to_python_native(self, index): |
| 122 | + value = self._arrow_column_array[index] |
| 123 | + try: |
| 124 | + return datetime.utcfromtimestamp(value.as_py() * 86400).date() |
| 125 | + except OSError as e: |
| 126 | + logger.debug("Failed to convert: %s", e) |
| 127 | + ts = ZERO_EPOCH + timedelta( |
| 128 | + seconds=value * (24 * 60 * 60)) |
| 129 | + return date(ts.year, ts.month, ts.day) |
0 commit comments