Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ MultiIndex
I/O
^^^
- :meth:`DataFrame.to_excel` was storing decimals as strings instead of numbers (:issue:`49598`)
- :meth:`DataFrame.to_stata` no longer throws a ``TypeError('encoding without a string argument')`` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move this to v3.0.0.rst?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

-

Period
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3196,8 +3196,8 @@ def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]:
for o, (idx, row) in enumerate(selected.iterrows()):
for j, (col, v) in enumerate(col_index):
val = row[col]
# Allow columns with mixed str and None (GH 23633)
val = "" if val is None else val
# Allow columns with mixed str and None or pd.NA (GH 23633)
val = "" if (val is None) or isna(val) else val
key = gso_table.get(val, None)
if key is None:
# Stata prefers human numbers
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2587,3 +2587,17 @@ def test_many_strl(temp_file, version):
lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))]
value_labels = {"col": {i: lbls[i] for i in range(n)}}
df.to_stata(temp_file, value_labels=value_labels, version=version)


@pytest.mark.parametrize("version", [117, 118, 119, None])
def test_strl_missings(temp_file, version):
# GH 23633
# Check that strl supports None and pd.NA
df = DataFrame(
[
{"str1": "string" * 500, "number": 0},
{"str1": None, "number": 1},
{"str1": pd.NA, "number": 1},
]
)
df.to_stata(temp_file, version=version)
Loading