diff --git a/README.md b/README.md
index ebab2e6016850..45a400e423b5e 100644
--- a/README.md
+++ b/README.md
@@ -188,3 +188,12 @@ As contributors and maintainers to this project, you are expected to abide by pa
[Go to Top](#table-of-contents)
+Neva Aydın
+
+Heba Walid Awad
+
+Zeynep Genel
+
+Gül Akkoca
+
+Berat Nevcanoğlu
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 6a5a83088e986..1803c06d564e3 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -184,8 +184,21 @@ def write(
from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
if index is not None:
from_pandas_kwargs["preserve_index"] = index
-
+#ekleme yaptığım yer.
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
+ if any(isinstance(dtype,pd.StringDtype) for dtype in df.dtype):
+ string_dtype={
+ col:str(dtype.storage)
+ for col,dtype in df.dtypes.items()
+ if isinstance(dtype,pd.StringDtype)
+ }
+ metadata = table.schema.metadata or{}
+ for col,storage in string_dtypes.items():
+ key=f"pandas_string_dtype_{col}".encode()
+ val= storage.encode()
+ metadata[key]= val
+ table= table.replace_schema_metadata(metadata)
+
if df.attrs:
df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
@@ -254,13 +267,35 @@ def read(
mode="rb",
)
try:
- pa_table = self.api.parquet.read_table(
+ pa_table = self.api.parquet.read_table(
path_or_handle,
columns=columns,
filesystem=filesystem,
filters=filters,
**kwargs,
)
+
+ #eklediğim bölüm pandas_string_dtype_* metadata'larını oku
+ string_dtypes = {}
+ metadata = pa_table.schema.metadata
+ if metadata:
+ for key, value in metadata.items():
+ if key.startswith(b"pandas_string_dtype_"):
+ col_name = key.replace(b"pandas_string_dtype_", b"").decode()
+ string_dtypes[col_name] = value.decode()
+
+ # Eklediğim bölüm: types_mapper fonksiyonu
+ def types_mapper(pa_type):
+ for field in pa_table.schema:
+ if field.type == pa_type:
+ colname = field.name
+ if colname in string_dtypes:
+ return pd.StringDtype(storage=string_dtypes[colname])
+ return None # fallback to default mapper
+
+ if to_pandas_kwargs is None:
+ to_pandas_kwargs = {}
+ to_pandas_kwargs["types_mapper"] = types_mapper
with catch_warnings():
filterwarnings(
"ignore",
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 78f39b649cb9a..ce77bfa6f36d9 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -718,6 +718,26 @@ def test_basic_subset_columns(self, pa, df_full):
expected=df[["string", "int"]],
read_kwargs={"columns": ["string", "int"]},
)
+ #ekleme yapılan yeni yer***
+ @pytest.mark.parametrize("string_storage", ["pyarrow", "python"])
+ def test_parquet_stringdtype_roundtrip(self, tmp_path, pa):
+ import pandas as pd
+ from pandas.testing import assert_frame_equal
+
+ df = pd.DataFrame({
+ "a": pd.Series(["x", "y", "z"], dtype=pd.StringDtype(storage=string_storage))
+ })
+
+ file_path = tmp_path / "stringdtype.parquet"
+ df.to_parquet(file_path, engine="pyarrow")
+
+ result = pd.read_parquet(file_path, engine="pyarrow")
+
+ expected_dtype = pd.StringDtype(storage=string_storage)
+ assert result["a"].dtype == expected_dtype, f"Dtype mismatch: got {result['a'].dtype}, expected {expected_dtype}"
+
+ assert_frame_equal(result, df)
+
def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
# GH 37105