Skip to content

Commit fb32feb

Browse files
committed
0.0.190
1 parent 039a01c commit fb32feb

File tree

3 files changed

+182
-1
lines changed

3 files changed

+182
-1
lines changed

orso/schema.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ def from_arrow(cls, arrow_field, mappable_as_binary: bool = False) -> "FlatColum
228228

229229
# Fetch the native type mapping from Arrow to Python native types
230230
native_type = arrow_type_map(arrow_field.type)
231+
element_type = None
231232
# Initialize variables to hold optional decimal properties
232233
scale: Optional[int] = None
233234
precision: Optional[int] = None
@@ -238,6 +239,9 @@ def from_arrow(cls, arrow_field, mappable_as_binary: bool = False) -> "FlatColum
238239
precision = native_type.precision # type:ignore
239240
elif mappable_as_binary and native_type == dict:
240241
field_type = OrsoTypes.BLOB
242+
elif native_type == list:
243+
field_type = OrsoTypes.ARRAY
244+
element_type = PYTHON_TO_ORSO_MAP.get(arrow_type_map(arrow_field.type.value_type))
241245
else:
242246
# Fall back to the generic mapping
243247
field_type = PYTHON_TO_ORSO_MAP.get(native_type, None)
@@ -247,6 +251,7 @@ def from_arrow(cls, arrow_field, mappable_as_binary: bool = False) -> "FlatColum
247251
return FlatColumn(
248252
name=str(arrow_field.name),
249253
type=field_type,
254+
element_type=element_type,
250255
nullable=arrow_field.nullable,
251256
scale=scale,
252257
precision=precision,
@@ -302,6 +307,12 @@ def arrow_field(self):
302307
}
303308
# fmt: on
304309

310+
if self.type == OrsoTypes.ARRAY:
311+
return pyarrow.field(
312+
name=self.name,
313+
type=pyarrow.list_(type_map.get(self.element_type, pyarrow.string())),
314+
)
315+
305316
return pyarrow.field(name=self.name, type=type_map.get(self.type, pyarrow.string()))
306317

307318
def to_json(self) -> str:

orso/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
__version__: str = "0.0.189"
13+
__version__: str = "0.0.190"
1414
__author__: str = "@joocer"

tests/test_schema_arrow.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import os
2+
import sys
3+
4+
import pytest
5+
6+
sys.path.insert(1, os.path.join(sys.path[0], ".."))
7+
8+
import pyarrow
9+
from orso.types import OrsoTypes
10+
11+
from orso.schema import FlatColumn
12+
13+
14+
def test_column_to_field():
15+
column = FlatColumn(name="test", type=OrsoTypes.VARCHAR)
16+
arrow_field = column.arrow_field
17+
assert arrow_field.type == pyarrow.string()
18+
19+
column = FlatColumn(name="test", type=OrsoTypes.INTEGER)
20+
arrow_field = column.arrow_field
21+
assert arrow_field.type == pyarrow.int64()
22+
23+
column = FlatColumn(name="test", type=OrsoTypes.DOUBLE)
24+
arrow_field = column.arrow_field
25+
assert arrow_field.type == pyarrow.float64()
26+
27+
column = FlatColumn(name="test", type=OrsoTypes.BOOLEAN)
28+
arrow_field = column.arrow_field
29+
assert arrow_field.type == pyarrow.bool_()
30+
31+
column = FlatColumn(name="test", type=OrsoTypes.TIMESTAMP)
32+
arrow_field = column.arrow_field
33+
assert arrow_field.type == pyarrow.timestamp("us"), arrow_field.type
34+
35+
column = FlatColumn(name="test", type=OrsoTypes.DATE)
36+
arrow_field = column.arrow_field
37+
assert arrow_field.type == pyarrow.date64(), arrow_field.type
38+
39+
column = FlatColumn(name="test", type=OrsoTypes.BLOB)
40+
arrow_field = column.arrow_field
41+
assert arrow_field.type == pyarrow.binary()
42+
43+
column = FlatColumn(name="test", type=OrsoTypes.DECIMAL)
44+
arrow_field = column.arrow_field
45+
assert arrow_field.type == pyarrow.decimal128(28, 21), arrow_field.type
46+
47+
def test_array_column_to_field():
48+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.VARCHAR)
49+
arrow_field = column.arrow_field
50+
assert arrow_field.type == pyarrow.list_(pyarrow.string())
51+
52+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.INTEGER)
53+
arrow_field = column.arrow_field
54+
assert arrow_field.type == pyarrow.list_(pyarrow.int64())
55+
56+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.DOUBLE)
57+
arrow_field = column.arrow_field
58+
assert arrow_field.type == pyarrow.list_(pyarrow.float64())
59+
60+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.BOOLEAN)
61+
arrow_field = column.arrow_field
62+
assert arrow_field.type == pyarrow.list_(pyarrow.bool_())
63+
64+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.TIMESTAMP)
65+
arrow_field = column.arrow_field
66+
assert arrow_field.type == pyarrow.list_(pyarrow.timestamp("us"))
67+
68+
column = FlatColumn(name="test", type=OrsoTypes.ARRAY, element_type=OrsoTypes.BLOB)
69+
arrow_field = column.arrow_field
70+
assert arrow_field.type == pyarrow.list_(pyarrow.binary())
71+
72+
73+
def test_column_to_field_name():
74+
column = FlatColumn(name="test", type="ARRAY<VARCHAR>")
75+
arrow_field = column.arrow_field
76+
assert arrow_field.type == pyarrow.list_(pyarrow.string())
77+
78+
column = FlatColumn(name="test", type="ARRAY<INTEGER>")
79+
arrow_field = column.arrow_field
80+
assert arrow_field.type == pyarrow.list_(pyarrow.int64())
81+
82+
column = FlatColumn(name="test", type="ARRAY<DOUBLE>")
83+
arrow_field = column.arrow_field
84+
assert arrow_field.type == pyarrow.list_(pyarrow.float64())
85+
86+
column = FlatColumn(name="test", type="ARRAY<BOOLEAN>")
87+
arrow_field = column.arrow_field
88+
assert arrow_field.type == pyarrow.list_(pyarrow.bool_())
89+
90+
column = FlatColumn(name="test", type="ARRAY<TIMESTAMP>")
91+
arrow_field = column.arrow_field
92+
assert arrow_field.type == pyarrow.list_(pyarrow.timestamp("us"))
93+
94+
column = FlatColumn(name="test", type="ARRAY<BLOB>")
95+
arrow_field = column.arrow_field
96+
assert arrow_field.type == pyarrow.list_(pyarrow.binary())
97+
98+
def test_field_to_column():
99+
arrow_field = pyarrow.field("test", pyarrow.list_(pyarrow.int64()))
100+
column = FlatColumn.from_arrow(arrow_field)
101+
assert column.name == "test"
102+
assert column.type == OrsoTypes.ARRAY
103+
assert column.element_type == OrsoTypes.INTEGER, column.element_type
104+
105+
arrow_field = pyarrow.field("test", pyarrow.string())
106+
column = FlatColumn.from_arrow(arrow_field)
107+
assert column.name == "test"
108+
assert column.type == OrsoTypes.VARCHAR
109+
assert column.element_type is None
110+
111+
arrow_field = pyarrow.field("test", pyarrow.int64())
112+
column = FlatColumn.from_arrow(arrow_field)
113+
assert column.name == "test"
114+
assert column.type == OrsoTypes.INTEGER
115+
assert column.element_type is None
116+
117+
arrow_field = pyarrow.field("test", pyarrow.float64())
118+
column = FlatColumn.from_arrow(arrow_field)
119+
assert column.name == "test"
120+
assert column.type == OrsoTypes.DOUBLE
121+
assert column.element_type is None
122+
123+
arrow_field = pyarrow.field("test", pyarrow.bool_())
124+
column = FlatColumn.from_arrow(arrow_field)
125+
assert column.name == "test"
126+
assert column.type == OrsoTypes.BOOLEAN
127+
assert column.element_type is None
128+
129+
arrow_field = pyarrow.field("test", pyarrow.timestamp("us"))
130+
column = FlatColumn.from_arrow(arrow_field)
131+
assert column.name == "test"
132+
assert column.type == OrsoTypes.TIMESTAMP
133+
assert column.element_type is None
134+
135+
arrow_field = pyarrow.field("test", pyarrow.date32())
136+
column = FlatColumn.from_arrow(arrow_field)
137+
assert column.name == "test"
138+
assert column.type == OrsoTypes.DATE, column.type
139+
assert column.element_type is None
140+
141+
arrow_field = pyarrow.field("test", pyarrow.binary())
142+
column = FlatColumn.from_arrow(arrow_field)
143+
assert column.name == "test"
144+
assert column.type == OrsoTypes.BLOB
145+
assert column.element_type is None
146+
147+
arrow_field = pyarrow.field("test", pyarrow.decimal128(28, 21))
148+
column = FlatColumn.from_arrow(arrow_field)
149+
assert column.name == "test"
150+
assert column.type == OrsoTypes.DECIMAL
151+
assert column.element_type is None
152+
153+
arrow_field = pyarrow.field("test", pyarrow.list_(pyarrow.string()))
154+
column = FlatColumn.from_arrow(arrow_field)
155+
assert column.name == "test"
156+
assert column.type == OrsoTypes.ARRAY
157+
assert column.element_type == OrsoTypes.VARCHAR
158+
159+
arrow_field = pyarrow.field("test", pyarrow.list_(pyarrow.binary()))
160+
column = FlatColumn.from_arrow(arrow_field)
161+
assert column.name == "test"
162+
assert column.type == OrsoTypes.ARRAY
163+
assert column.element_type == OrsoTypes.BLOB
164+
165+
166+
167+
if __name__ == "__main__": # prgama: nocover
168+
from tests import run_tests
169+
170+
run_tests()

0 commit comments

Comments
 (0)