pandas-dev · GulAkkoca · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/README.md b/README.md
@@ -188,3 +188,12 @@ As contributors and maintainers to this project, you are expected to abide by pa
 <hr>
 
 [Go to Top](#table-of-contents)
+Neva Aydın
+
+Heba Walid Awad 
+
+Zeynep Genel 
+
+Gül Akkoca
+
+Berat Nevcanoğlu
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -184,8 +184,21 @@ def write(
         from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
         if index is not None:
             from_pandas_kwargs["preserve_index"] = index
-
+#ekleme yaptığım yer.
         table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
+        if any(isinstance(dtype,pd.StringDtype) for dtype in df.dtype):
+            string_dtype={
+                col:str(dtype.storage)
+                for col,dtype in df.dtypes.items()
+                if isinstance(dtype,pd.StringDtype) 
+            }
+            metadata = table.schema.metadata or{}
+            for col,storage in string_dtypes.items():
+                key=f"pandas_string_dtype_{col}".encode()
+                val= storage.encode()
+                metadata[key]= val
+                table= table.replace_schema_metadata(metadata)
+
 
         if df.attrs:
             df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
@@ -254,13 +267,35 @@ def read(
             mode="rb",
         )
         try:
-            pa_table = self.api.parquet.read_table(
+            pa_table = self.api.parquet.read_table( 
                 path_or_handle,
                 columns=columns,
                 filesystem=filesystem,
                 filters=filters,
                 **kwargs,
             )
+
+        #eklediğim bölüm  pandas_string_dtype_* metadata'larını oku
+        string_dtypes = {}
+        metadata = pa_table.schema.metadata
+        if metadata:
+            for key, value in metadata.items():
+                if key.startswith(b"pandas_string_dtype_"):
+                    col_name = key.replace(b"pandas_string_dtype_", b"").decode()
+                    string_dtypes[col_name] = value.decode()
+
+        #  Eklediğim bölüm: types_mapper fonksiyonu
+        def types_mapper(pa_type):
+            for field in pa_table.schema:
+                if field.type == pa_type:
+                    colname = field.name
+                    if colname in string_dtypes:
+                        return pd.StringDtype(storage=string_dtypes[colname])
+            return None  # fallback to default mapper
+
+        if to_pandas_kwargs is None:
+            to_pandas_kwargs = {}
+        to_pandas_kwargs["types_mapper"] = types_mapper
             with catch_warnings():
                 filterwarnings(
                     "ignore",

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -718,6 +718,26 @@ def test_basic_subset_columns(self, pa, df_full):
             expected=df[["string", "int"]],
             read_kwargs={"columns": ["string", "int"]},
         )
+        #ekleme yapılan yeni yer***
+        @pytest.mark.parametrize("string_storage", ["pyarrow", "python"])
+    def test_parquet_stringdtype_roundtrip(self, tmp_path, pa):
+        import pandas as pd
+        from pandas.testing import assert_frame_equal
+
+        df = pd.DataFrame({
+            "a": pd.Series(["x", "y", "z"], dtype=pd.StringDtype(storage=string_storage))
+        })
+
+        file_path = tmp_path / "stringdtype.parquet"
+        df.to_parquet(file_path, engine="pyarrow")
+
+        result = pd.read_parquet(file_path, engine="pyarrow")
+
+        expected_dtype = pd.StringDtype(storage=string_storage)
+        assert result["a"].dtype == expected_dtype, f"Dtype mismatch: got {result['a'].dtype}, expected {expected_dtype}"
+
+        assert_frame_equal(result, df)
+
 
     def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
         # GH 37105