Skip to content

Commit 5295edb

Browse files
authored
Update parquet.py
1 parent 6821d15 commit 5295edb

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

pandas/io/parquet.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,19 @@ def write(
186186
from_pandas_kwargs["preserve_index"] = index
187187

188188
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
189+
if any(isinstance(dtype,pd.StringDtype) for dtype in df.dtype):
190+
string_dtype={
191+
col:str(dtype.storage)
192+
for col,dtype in df.dtypes.items()
193+
if isinstance(dtype,pd.StringDtype)
194+
}
195+
metadata = table.schema.metadata or{}
196+
for col,storage in string_dtypes.items():
197+
key=f"pandas_string_dtype_{col}".encode()
198+
val= storage.encode()
199+
metadata[key]= val
200+
table= table.replace_schema_metadata(metadata)
201+
189202

190203
if df.attrs:
191204
df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
@@ -255,6 +268,16 @@ def read(
255268
)
256269
try:
257270
pa_table = self.api.parquet.read_table(
271+
metadata = pa_table.schema.metadata
272+
string_dtypes = {}
273+
if metadata:
274+
for key, value in metadata.items():
275+
if key.startswith(b"pandas_string_dtype_"):
276+
col_name = key.replace(b"pandas_string_dtype_", b"").decode()
277+
string_dtypes[col_name] = value.decode()
278+
279+
280+
258281
path_or_handle,
259282
columns=columns,
260283
filesystem=filesystem,

0 commit comments

Comments
 (0)