scikit-hep · Copilot · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/src/awkward/operations/ak_to_dataframe.py b/src/awkward/operations/ak_to_dataframe.py
@@ -234,22 +234,14 @@ def recurse(layout, row_arrays, col_names):
         else:
             columns = pandas.MultiIndex.from_tuples([col_names])
 
-        # Before filling, ensure dtype is wide enough for "nan" fill value. Need
-        # at least three characters / bytes for 'nan' or b'nan', respectively.
+        # Pandas can handle None in object arrays, so convert masked strings
+        # to object arrays with None for missing values instead of filling with "nan"
         if numpy.ma.is_masked(column):
-            if np.issubdtype(column.dtype, np.str_):
-                char_width = column.dtype.itemsize // 4
-                if char_width < 3:
-                    column = column.astype(np.dtype(("U", 3)))
-            elif np.issubdtype(column.dtype, np.bytes_):
-                byte_width = column.dtype.itemsize
-                if byte_width < 3:
-                    column = column.astype(np.dtype(("S", 3)))
-        # Pandas can't handle masked strings
-        if np.issubdtype(column.dtype, np.str_):
-            column = numpy.ma.filled(column, "nan")
-        elif np.issubdtype(column.dtype, np.bytes_):
-            column = numpy.ma.filled(column, b"nan")
+            if np.issubdtype(column.dtype, np.str_) or np.issubdtype(
+                column.dtype, np.bytes_
+            ):
+                # Convert masked string/bytestring arrays to object arrays with None
+                column = numpy.where(column.mask, None, column.data).astype(object)
 
         if (
             last_row_arrays is not None

diff --git a/tests/test_0331_pandas_indexedarray.py b/tests/test_0331_pandas_indexedarray.py
@@ -182,5 +182,5 @@ def test_union_to_record():
         df_unionarray2["z"].values, [np.nan, np.nan, 999, np.nan, np.nan]
     )
     np.testing.assert_array_equal(
-        df_unionarray2["values"].values, ["nan", "one", "nan", "two", "nan"]
+        df_unionarray2["values"].values, [None, "one", None, "two", None]
     )
diff --git a/tests/test_3692_to_dataframe_masked_string_dtype_resize.py b/tests/test_3692_to_dataframe_masked_string_dtype_resize.py
@@ -18,13 +18,13 @@ def test_masked_string_array_with_option():
     # Convert to dataframe - should not raise an error about dtype width
     df = ak.operations.to_dataframe(array_str)
 
-    # Verify the conversion worked and "nan" appears for None values
+    # Verify the conversion worked and None values are preserved
     assert df["values"].values[0] == "a"
-    assert df["values"].values[1] == "nan"
+    assert df["values"].values[1] is None
     assert df["values"].values[2] == "c"
     assert df["values"].values[3] == "d"
     assert df["values"].values[4] == "e"
-    assert df["values"].values[5] == "nan"
+    assert df["values"].values[5] is None
 
 
 def test_masked_bytestring_array_with_option():
@@ -35,13 +35,13 @@ def test_masked_bytestring_array_with_option():
     # Convert to dataframe - should not raise an error about dtype width
     df = ak.operations.to_dataframe(array_bytes)
 
-    # Verify the conversion worked and b"nan" appears for None values
+    # Verify the conversion worked and None values are preserved
     assert df["values"].values[0] == b"x"
-    assert df["values"].values[1] == b"nan"
+    assert df["values"].values[1] is None
     assert df["values"].values[2] == b"z"
     assert df["values"].values[3] == b"a"
     assert df["values"].values[4] == b"b"
-    assert df["values"].values[5] == b"nan"
+    assert df["values"].values[5] is None
 
 
 def test_union_with_narrow_strings():
@@ -55,14 +55,14 @@ def test_union_with_narrow_strings():
 
     # Verify conversion works without dtype errors
     assert len(df) == 3
-    # The x column should have "nan" for missing values
-    assert df["x"].values[1] == "nan"
+    # The x column should have None for missing values
+    assert df["x"].values[1] is None
 
 
 def test_single_char_strings_with_none():
     """Test very short strings with None values."""
     # Single character strings with None values
-    # This is the edge case that needs dtype resizing
+    # This tests that even very short strings work correctly
     array = ak.Array([["a", "b"], [None, "c"], ["d", None]])
 
     df = ak.operations.to_dataframe(array)
@@ -73,8 +73,8 @@ def test_single_char_strings_with_none():
     assert "b" in values
     assert "c" in values
     assert "d" in values
-    # Check that "nan" string appears for None values
-    assert values.count("nan") == 2
+    # Check that None appears for None values
+    assert values.count(None) == 2
 
 
 def test_single_byte_bytestrings_with_none():
@@ -90,19 +90,19 @@ def test_single_byte_bytestrings_with_none():
     assert b"b" in values
     assert b"c" in values
     assert b"d" in values
-    # Check that b"nan" appears for None values
-    assert values.count(b"nan") == 2
+    # Check that None appears for None values
+    assert values.count(None) == 2
 
 
 def test_two_char_strings_with_none():
     """Test two-character strings with None values (edge case)."""
-    # Two character strings - exactly the edge case where "nan" (3 chars) won't fit
+    # Two character strings - tests that any string length works correctly
     array = ak.Array([["ab", None], [None, "cd"]])
 
     df = ak.operations.to_dataframe(array)
 
     values = df["values"].values.tolist()
     assert "ab" in values
     assert "cd" in values
-    # "nan" should appear (3 characters)
-    assert values.count("nan") == 2
+    # None should appear for None values
+    assert values.count(None) == 2
diff --git a/tests/test_3713_to_dataframe_none_vs_nan_string.py b/tests/test_3713_to_dataframe_none_vs_nan_string.py
@@ -0,0 +1,74 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+import pytest
+
+import awkward as ak
+
+pandas = pytest.importorskip("pandas")
+
+
+def test_none_vs_nan_string():
+    """Test that None and 'nan' string are distinguishable in to_dataframe."""
+    # Create array with None and literal "nan" string
+    array = ak.Array([None, "nan"])
+    df = ak.to_dataframe(array)
+    result = df.to_dict()
+
+    # None should be None, not the string "nan"
+    assert result["values"][0] is None
+    assert result["values"][1] == "nan"
+    # They should be distinguishable
+    assert result["values"][0] != result["values"][1]
+
+
+def test_none_vs_nan_bytestring():
+    """Test that None and b'nan' bytestring are distinguishable in to_dataframe."""
+    # Create array with None and literal b"nan" bytestring
+    array = ak.Array([None, b"nan"])
+    df = ak.to_dataframe(array)
+    result = df.to_dict()
+
+    # None should be None, not the bytestring b"nan"
+    assert result["values"][0] is None
+    assert result["values"][1] == b"nan"
+    # They should be distinguishable
+    assert result["values"][0] != result["values"][1]
+
+
+def test_nested_list_with_none_and_nan_string():
+    """Test nested lists containing both None and 'nan' string."""
+    array = ak.Array([["a", None, "nan"], ["b", "nan", None]])
+    df = ak.to_dataframe(array)
+
+    # Check that None and "nan" are distinguishable
+    values = df["values"].values
+    assert values[0] == "a"
+    assert values[1] is None
+    assert values[2] == "nan"
+    assert values[3] == "b"
+    assert values[4] == "nan"
+    assert values[5] is None
+
+
+def test_record_with_none_and_nan_string():
+    """Test records containing both None and 'nan' string."""
+    array = ak.Array(
+        [
+            {"x": "value", "y": None},
+            {"x": "nan", "y": "another"},
+            {"x": None, "y": "nan"},
+        ]
+    )
+    df = ak.to_dataframe(array)
+
+    # Check x column
+    assert df["x"].values[0] == "value"
+    assert df["x"].values[1] == "nan"
+    assert df["x"].values[2] is None
+
+    # Check y column
+    assert df["y"].values[0] is None
+    assert df["y"].values[1] == "another"
+    assert df["y"].values[2] == "nan"