|
1 | | -import pytest |
2 | 1 | from io import StringIO |
3 | | -import pandas._testing as tm |
| 2 | + |
| 3 | +import pytest |
4 | 4 |
|
5 | 5 |
|
6 | | -@pytest.mark.xfail(reason="Leading zeros preservation may not work consistently across all engines") |
| 6 | +@pytest.mark.xfail( |
| 7 | + condition=getattr(all_parsers, "engine", "") == "pyarrow", |
| 8 | + reason="pyarrow engine strips leading zeros even with dtype=str", |
| 9 | +) |
7 | 10 | def test_leading_zeros_preserved_with_dtype_str(all_parsers): |
8 | 11 | """ |
9 | 12 | Ensure that all parser engines preserve leading zeros when dtype=str is passed. |
10 | | - |
11 | | - This test verifies that when dtype=str is specified, leading zeros in |
| 13 | +
|
| 14 | + This test verifies that when dtype=str is specified, leading zeros in |
12 | 15 | numeric-looking strings are preserved across all available parser engines. |
13 | 16 | """ |
14 | 17 | parser = all_parsers |
15 | | - engine_name = getattr(parser, 'engine', 'unknown') |
16 | | - |
| 18 | + engine_name = getattr(parser, "engine", "unknown") |
| 19 | + |
17 | 20 | data = """col1|col2|col3|col4 |
18 | 21 | AB|000388907|abc|0150 |
19 | 22 | CD|101044572|def|0150 |
20 | 23 | EF|000023607|ghi|0205 |
21 | 24 | GH|100102040|jkl|0205""" |
22 | | - |
| 25 | + |
23 | 26 | result = parser.read_csv( |
24 | 27 | StringIO(data), |
25 | 28 | sep="|", |
26 | 29 | dtype=str, |
27 | 30 | ) |
28 | | - |
| 31 | + |
29 | 32 | # Verify leading zeros are preserved in col2 |
30 | | - assert result.loc[0, "col2"] == "000388907", f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}" |
31 | | - assert result.loc[2, "col2"] == "000023607", f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}" |
32 | | - |
| 33 | + assert result.loc[0, "col2"] == "000388907", ( |
| 34 | + f"Engine {engine_name}: Leading zeros lost in col2, row 0. Got: {result.loc[0, 'col2']}" |
| 35 | + ) |
| 36 | + assert result.loc[2, "col2"] == "000023607", ( |
| 37 | + f"Engine {engine_name}: Leading zeros lost in col2, row 2. Got: {result.loc[2, 'col2']}" |
| 38 | + ) |
| 39 | + |
33 | 40 | # Verify leading zeros are preserved in col4 |
34 | | - assert result.loc[0, "col4"] == "0150", f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}" |
35 | | - assert result.loc[2, "col4"] == "0205", f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}" |
36 | | - |
| 41 | + assert result.loc[0, "col4"] == "0150", ( |
| 42 | + f"Engine {engine_name}: Leading zeros lost in col4, row 0. Got: {result.loc[0, 'col4']}" |
| 43 | + ) |
| 44 | + assert result.loc[2, "col4"] == "0205", ( |
| 45 | + f"Engine {engine_name}: Leading zeros lost in col4, row 2. Got: {result.loc[2, 'col4']}" |
| 46 | + ) |
| 47 | + |
37 | 48 | # Verify all columns are string type |
38 | | - assert result.dtypes["col1"] == "object", f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}" |
39 | | - assert result.dtypes["col2"] == "object", f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}" |
40 | | - assert result.dtypes["col3"] == "object", f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}" |
41 | | - assert result.dtypes["col4"] == "object", f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}" |
42 | | - |
| 49 | + assert result.dtypes["col1"] == "object", ( |
| 50 | + f"Engine {engine_name}: col1 should be string type, got {result.dtypes['col1']}" |
| 51 | + ) |
| 52 | + assert result.dtypes["col2"] == "object", ( |
| 53 | + f"Engine {engine_name}: col2 should be string type, got {result.dtypes['col2']}" |
| 54 | + ) |
| 55 | + assert result.dtypes["col3"] == "object", ( |
| 56 | + f"Engine {engine_name}: col3 should be string type, got {result.dtypes['col3']}" |
| 57 | + ) |
| 58 | + assert result.dtypes["col4"] == "object", ( |
| 59 | + f"Engine {engine_name}: col4 should be string type, got {result.dtypes['col4']}" |
| 60 | + ) |
| 61 | + |
43 | 62 | # Verify shape |
44 | | - assert result.shape == (4, 4), f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}" |
45 | | - |
| 63 | + assert result.shape == (4, 4), ( |
| 64 | + f"Engine {engine_name}: Expected shape (4, 4), got {result.shape}" |
| 65 | + ) |
| 66 | + |
46 | 67 | # Verify column names |
47 | 68 | expected_columns = ["col1", "col2", "col3", "col4"] |
48 | | - assert list(result.columns) == expected_columns, f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}" |
| 69 | + assert list(result.columns) == expected_columns, ( |
| 70 | + f"Engine {engine_name}: Expected columns {expected_columns}, got {list(result.columns)}" |
| 71 | + ) |
0 commit comments