33import pytest
44
55
6- @pytest .mark .xfail (
7- condition = getattr (all_parsers , "engine" , "" ) == "pyarrow" ,
8- reason = "pyarrow engine strips leading zeros even with dtype=str" ,
9- )
106def test_leading_zeros_preserved_with_dtype_str (all_parsers ):
117 """
128 Ensure that all parser engines preserve leading zeros when dtype=str is passed.
@@ -29,43 +25,30 @@ def test_leading_zeros_preserved_with_dtype_str(all_parsers):
2925 dtype = str ,
3026 )
3127
32- # Verify leading zeros are preserved in col2
33- assert result .loc [0 , "col2" ] == "000388907" , (
34- f"Engine { engine_name } : Leading zeros lost in col2, row 0. Got: { result .loc [0 , 'col2' ]} "
35- )
36- assert result .loc [2 , "col2" ] == "000023607" , (
37- f"Engine { engine_name } : Leading zeros lost in col2, row 2. Got: { result .loc [2 , 'col2' ]} "
38- )
39-
40- # Verify leading zeros are preserved in col4
41- assert result .loc [0 , "col4" ] == "0150" , (
42- f"Engine { engine_name } : Leading zeros lost in col4, row 0. Got: { result .loc [0 , 'col4' ]} "
43- )
44- assert result .loc [2 , "col4" ] == "0205" , (
45- f"Engine { engine_name } : Leading zeros lost in col4, row 2. Got: { result .loc [2 , 'col4' ]} "
46- )
47-
48- # Verify all columns are string type
49- assert result .dtypes ["col1" ] == "object" , (
50- f"Engine { engine_name } : col1 should be string type, got { result .dtypes ['col1' ]} "
51- )
52- assert result .dtypes ["col2" ] == "object" , (
53- f"Engine { engine_name } : col2 should be string type, got { result .dtypes ['col2' ]} "
54- )
55- assert result .dtypes ["col3" ] == "object" , (
56- f"Engine { engine_name } : col3 should be string type, got { result .dtypes ['col3' ]} "
57- )
58- assert result .dtypes ["col4" ] == "object" , (
59- f"Engine { engine_name } : col4 should be string type, got { result .dtypes ['col4' ]} "
60- )
61-
62- # Verify shape
63- assert result .shape == (4 , 4 ), (
64- f"Engine { engine_name } : Expected shape (4, 4), got { result .shape } "
65- )
66-
67- # Verify column names
68- expected_columns = ["col1" , "col2" , "col3" , "col4" ]
69- assert list (result .columns ) == expected_columns , (
70- f"Engine { engine_name } : Expected columns { expected_columns } , got { list (result .columns )} "
71- )
28+ try :
29+ assert result .loc [0 , "col2" ] == "000388907" , (
30+ f"{ engine_name } lost zeros in col2 row 0"
31+ )
32+ assert result .loc [2 , "col2" ] == "000023607" , (
33+ f"{ engine_name } lost zeros in col2 row 2"
34+ )
35+ assert result .loc [0 , "col4" ] == "0150" , (
36+ f"{ engine_name } lost zeros in col4 row 0"
37+ )
38+ assert result .loc [2 , "col4" ] == "0205" , (
39+ f"{ engine_name } lost zeros in col4 row 2"
40+ )
41+
42+ for col in ["col1" , "col2" , "col3" , "col4" ]:
43+ assert result .dtypes [col ] == "object" , (
44+ f"{ engine_name } wrong dtype for { col } "
45+ )
46+
47+ assert result .shape == (4 , 4 )
48+ assert list (result .columns ) == ["col1" , "col2" , "col3" , "col4" ]
49+ except AssertionError as exc :
50+ if engine_name == "pyarrow" :
51+ # Known issue: pyarrow engine strips leading zeros even with dtype=str.
52+ pytest .xfail (f"failed assertions: { exc } " )
53+ else :
54+ raise
0 commit comments