Skip to content

Commit ea08043

Browse files
authored
Update test_preserve_leading_zeros.py
1 parent b766c15 commit ea08043

File tree

1 file changed

+46
-23
lines changed

1 file changed

+46
-23
lines changed
Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,71 @@
1-
import pytest
21
from io import StringIO
3-
import pandas._testing as tm
2+
3+
import pytest
44

55

6-
@pytest.mark.xfail(reason="Leading zeros preservation may not work consistently across all engines")
6+
@pytest.mark.xfail(
7+
condition=getattr(all_parsers, "engine", "") == "pyarrow",
8+
reason="pyarrow engine strips leading zeros even with dtype=str",
9+
)
710
def test_leading_zeros_preserved_with_dtype_str(all_parsers):
811
"""
912
Ensure that all parser engines preserve leading zeros when dtype=str is passed.
10-
11-
This test verifies that when dtype=str is specified, leading zeros in
13+
14+
This test verifies that when dtype=str is specified, leading zeros in
1215
numeric-looking strings are preserved across all available parser engines.
1316
"""
1417
parser = all_parsers
15-
engine_name = getattr(parser, 'engine', 'unknown')
16-
18+
engine_name = getattr(parser, "engine", "unknown")
19+
1720
data = """col1|col2|col3|col4
1821
AB|000388907|abc|0150
1922
CD|101044572|def|0150
2023
EF|000023607|ghi|0205
2124
GH|100102040|jkl|0205"""
22-
25+
2326
result = parser.read_csv(
2427
StringIO(data),
2528
sep="|",
2629
dtype=str,
2730
)
28-
31+
2932
# Verify leading zeros are preserved in col2
30-
assert result.loc[0, "col2"] == "000388907", f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
31-
assert result.loc[2, "col2"] == "000023607", f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
32-
33+
assert result.loc[0, "col2"] == "000388907", (
34+
f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}"
35+
)
36+
assert result.loc[2, "col2"] == "000023607", (
37+
f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}"
38+
)
39+
3340
# Verify leading zeros are preserved in col4
34-
assert result.loc[0, "col4"] == "0150", f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
35-
assert result.loc[2, "col4"] == "0205", f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
36-
41+
assert result.loc[0, "col4"] == "0150", (
42+
f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}"
43+
)
44+
assert result.loc[2, "col4"] == "0205", (
45+
f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}"
46+
)
47+
3748
# Verify all columns are string type
38-
assert result.dtypes["col1"] == "object", f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
39-
assert result.dtypes["col2"] == "object", f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
40-
assert result.dtypes["col3"] == "object", f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
41-
assert result.dtypes["col4"] == "object", f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
42-
49+
assert result.dtypes["col1"] == "object", (
50+
f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}"
51+
)
52+
assert result.dtypes["col2"] == "object", (
53+
f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}"
54+
)
55+
assert result.dtypes["col3"] == "object", (
56+
f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}"
57+
)
58+
assert result.dtypes["col4"] == "object", (
59+
f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}"
60+
)
61+
4362
# Verify shape
44-
assert result.shape == (4, 4), f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
45-
63+
assert result.shape == (4, 4), (
64+
f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}"
65+
)
66+
4667
# Verify column names
4768
expected_columns = ["col1", "col2", "col3", "col4"]
48-
assert list(result.columns) == expected_columns, f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"
69+
assert list(result.columns) == expected_columns, (
70+
f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}"
71+
)

0 commit comments

Comments
 (0)