|
23 | 23 | from nanoarrow._lib import CArrayView, CArrowType |
24 | 24 | from nanoarrow.c_array_stream import c_array_stream |
25 | 25 | from nanoarrow.c_schema import c_schema, c_schema_view |
| 26 | +from nanoarrow.schema import Schema |
26 | 27 |
|
27 | 28 |
|
28 | 29 | def iter_py(obj, schema=None) -> Iterable: |
@@ -130,47 +131,22 @@ class UnregisteredExtensionWarning(UserWarning): |
130 | 131 |
|
131 | 132 |
|
132 | 133 | class ArrayViewBaseIterator: |
133 | | - """Base class for iterators that use an internal ArrowArrayView |
| 134 | + """Base class for iterators and visitors that use an internal ArrowArrayView |
134 | 135 | as the basis for conversion to Python objects. Intended for internal use. |
135 | 136 | """ |
136 | 137 |
|
137 | | - @classmethod |
138 | | - def get_iterator(cls, obj, schema=None): |
139 | | - with c_array_stream(obj, schema=schema) as stream: |
140 | | - iterator = cls(stream._get_cached_schema()) |
141 | | - for array in stream: |
142 | | - iterator._set_array(array) |
143 | | - yield from iterator._iter_chunk(0, len(array)) |
144 | | - |
145 | | - def __init__(self, schema, *, _array_view=None): |
| 138 | + def __init__(self, schema, *, array_view=None): |
146 | 139 | self._schema = c_schema(schema) |
147 | 140 | self._schema_view = c_schema_view(schema) |
148 | 141 |
|
149 | | - if _array_view is None: |
| 142 | + if array_view is None: |
150 | 143 | self._array_view = CArrayView.from_schema(self._schema) |
151 | 144 | else: |
152 | | - self._array_view = _array_view |
153 | | - |
154 | | - self._children = list( |
155 | | - map(self._make_child, self._schema.children, self._array_view.children) |
156 | | - ) |
157 | | - |
158 | | - if self._schema.dictionary is None: |
159 | | - self._dictionary = None |
160 | | - else: |
161 | | - self._dictionary = self._make_child( |
162 | | - self._schema.dictionary, self._array_view.dictionary |
163 | | - ) |
164 | | - |
165 | | - def _make_child(self, schema, array_view): |
166 | | - return type(self)(schema, _array_view=array_view) |
167 | | - |
168 | | - def _iter_chunk(self, offset, length) -> Iterable: |
169 | | - yield self._array_view |
| 145 | + self._array_view = array_view |
170 | 146 |
|
171 | 147 | @cached_property |
172 | | - def _child_names(self): |
173 | | - return [child.name for child in self._schema.children] |
| 148 | + def schema(self) -> Schema: |
| 149 | + return Schema(self._schema) |
174 | 150 |
|
175 | 151 | @cached_property |
176 | 152 | def _object_label(self): |
@@ -199,7 +175,41 @@ class PyIterator(ArrayViewBaseIterator): |
199 | 175 | Intended for internal use. |
200 | 176 | """ |
201 | 177 |
|
| 178 | + @classmethod |
| 179 | + def get_iterator(cls, obj, schema=None): |
| 180 | + with c_array_stream(obj, schema=schema) as stream: |
| 181 | + iterator = cls(stream._get_cached_schema()) |
| 182 | + for array in stream: |
| 183 | + iterator._set_array(array) |
| 184 | + yield from iterator |
| 185 | + |
| 186 | + def __init__(self, schema, *, array_view=None): |
| 187 | + super().__init__(schema, array_view=array_view) |
| 188 | + |
| 189 | + self._children = list( |
| 190 | + map(self._make_child, self._schema.children, self._array_view.children) |
| 191 | + ) |
| 192 | + |
| 193 | + if self._schema.dictionary is None: |
| 194 | + self._dictionary = None |
| 195 | + else: |
| 196 | + self._dictionary = self._make_child( |
| 197 | + self._schema.dictionary, self._array_view.dictionary |
| 198 | + ) |
| 199 | + |
| 200 | + def _make_child(self, schema, array_view): |
| 201 | + return type(self)(schema, array_view=array_view) |
| 202 | + |
| 203 | + @cached_property |
| 204 | + def _child_names(self): |
| 205 | + return [child.name for child in self._schema.children] |
| 206 | + |
| 207 | + def __iter__(self): |
| 208 | + """Iterate over all elements in the current chunk""" |
| 209 | + return self._iter_chunk(0, len(self._array_view)) |
| 210 | + |
202 | 211 | def _iter_chunk(self, offset, length): |
| 212 | + """Iterate over all elements in a slice of the current chunk""" |
203 | 213 | # Check for an extension type first since this isn't reflected by |
204 | 214 | # self._schema_view.type_id. Currently we just return the storage |
205 | 215 | # iterator with a warning for extension types. |
@@ -480,16 +490,16 @@ class RowTupleIterator(PyIterator): |
480 | 490 | Intended for internal use. |
481 | 491 | """ |
482 | 492 |
|
483 | | - def __init__(self, schema, *, _array_view=None): |
484 | | - super().__init__(schema, _array_view=_array_view) |
| 493 | + def __init__(self, schema, *, array_view=None): |
| 494 | + super().__init__(schema, array_view=array_view) |
485 | 495 | if self._schema_view.type != "struct": |
486 | 496 | raise TypeError( |
487 | 497 | "RowTupleIterator can only iterate over struct arrays " |
488 | 498 | f"(got '{self._schema_view.type}')" |
489 | 499 | ) |
490 | 500 |
|
491 | 501 | def _make_child(self, schema, array_view): |
492 | | - return PyIterator(schema, _array_view=array_view) |
| 502 | + return PyIterator(schema, array_view=array_view) |
493 | 503 |
|
494 | 504 | def _iter_chunk(self, offset, length): |
495 | 505 | return self._struct_tuple_iter(offset, length) |
|
0 commit comments