Skip to content

Commit a7bd7c2

Browse files
authored
fix: Add support for pyarrow type large_string (#2663)
* Add support for pyarrow type large_string * Update _data_types.py
1 parent 2cbe133 commit a7bd7c2

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

awswrangler/_data_types.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def pyarrow2athena( # noqa: PLR0911,PLR0912
4040
return "double"
4141
if pa.types.is_boolean(dtype):
4242
return "boolean"
43-
if pa.types.is_string(dtype):
43+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
4444
return "string"
4545
if pa.types.is_timestamp(dtype):
4646
return "timestamp"
@@ -87,7 +87,7 @@ def pyarrow2redshift( # noqa: PLR0911,PLR0912
8787
return "FLOAT8"
8888
if pa.types.is_boolean(dtype):
8989
return "BOOL"
90-
if pa.types.is_string(dtype):
90+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
9191
return string_type
9292
if pa.types.is_timestamp(dtype):
9393
return "TIMESTAMP"
@@ -132,7 +132,7 @@ def pyarrow2mysql( # noqa: PLR0911,PLR0912
132132
return "DOUBLE PRECISION"
133133
if pa.types.is_boolean(dtype):
134134
return "BOOLEAN"
135-
if pa.types.is_string(dtype):
135+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
136136
return string_type
137137
if pa.types.is_timestamp(dtype):
138138
return "TIMESTAMP"
@@ -167,7 +167,7 @@ def pyarrow2oracle( # noqa: PLR0911
167167
return "BINARY_DOUBLE"
168168
if pa.types.is_boolean(dtype):
169169
return "NUMBER(3)"
170-
if pa.types.is_string(dtype):
170+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
171171
return string_type
172172
if pa.types.is_timestamp(dtype):
173173
return "TIMESTAMP"
@@ -202,7 +202,7 @@ def pyarrow2postgresql( # noqa: PLR0911
202202
return "FLOAT8"
203203
if pa.types.is_boolean(dtype):
204204
return "BOOL"
205-
if pa.types.is_string(dtype):
205+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
206206
return string_type
207207
if pa.types.is_timestamp(dtype):
208208
return "TIMESTAMP"
@@ -237,7 +237,7 @@ def pyarrow2sqlserver( # noqa: PLR0911
237237
return "FLOAT"
238238
if pa.types.is_boolean(dtype):
239239
return "BIT"
240-
if pa.types.is_string(dtype):
240+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
241241
return string_type
242242
if pa.types.is_timestamp(dtype):
243243
return "DATETIME2"
@@ -270,7 +270,7 @@ def pyarrow2timestream(dtype: pa.DataType) -> str: # noqa: PLR0911
270270
return "DOUBLE"
271271
if pa.types.is_boolean(dtype):
272272
return "BOOLEAN"
273-
if pa.types.is_string(dtype):
273+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
274274
return "VARCHAR"
275275
if pa.types.is_date(dtype):
276276
return "DATE"
@@ -470,7 +470,7 @@ def pyarrow2pandas_extension( # noqa: PLR0911
470470
return pd.UInt64Dtype()
471471
if pa.types.is_boolean(dtype):
472472
return pd.BooleanDtype()
473-
if pa.types.is_string(dtype):
473+
if pa.types.is_string(dtype) or pa.types.is_large_string(dtype):
474474
return pd.StringDtype()
475475
return None
476476

0 commit comments

Comments
 (0)