88import numpy as np
99import pytest
1010
11- from pandas ._config import using_string_dtype
12-
1311from pandas .errors import ParserWarning
1412
1513import pandas as pd
2422 "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
2523)
2624
25+ xfail_pyarrow = pytest .mark .usefixtures ("pyarrow_xfail" )
26+
2727
2828@pytest .mark .parametrize ("dtype" , [str , object ])
2929@pytest .mark .parametrize ("check_orig" , [True , False ])
@@ -54,7 +54,6 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig, using_infer_string):
5454 tm .assert_frame_equal (result , expected )
5555
5656
57- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
5857@pytest .mark .usefixtures ("pyarrow_xfail" )
5958def test_dtype_per_column (all_parsers ):
6059 parser = all_parsers
@@ -68,7 +67,6 @@ def test_dtype_per_column(all_parsers):
6867 [[1 , "2.5" ], [2 , "3.5" ], [3 , "4.5" ], [4 , "5.5" ]], columns = ["one" , "two" ]
6968 )
7069 expected ["one" ] = expected ["one" ].astype (np .float64 )
71- expected ["two" ] = expected ["two" ].astype (object )
7270
7371 result = parser .read_csv (StringIO (data ), dtype = {"one" : np .float64 , 1 : str })
7472 tm .assert_frame_equal (result , expected )
@@ -598,6 +596,7 @@ def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
598596 tm .assert_frame_equal (result , expected )
599597
600598
599+ @xfail_pyarrow
601600def test_accurate_parsing_of_large_integers (all_parsers ):
602601 # GH#52505
603602 data = """SYMBOL,MOMENT,ID,ID_DEAL
@@ -608,7 +607,7 @@ def test_accurate_parsing_of_large_integers(all_parsers):
608607AMZN,20230301181139587,2023552585717889759,2023552585717263360
609608MSFT,20230301181139587,2023552585717889863,2023552585717263361
610609NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
611- orders = pd .read_csv (StringIO (data ), dtype = {"ID_DEAL" : pd .Int64Dtype ()})
610+ orders = all_parsers .read_csv (StringIO (data ), dtype = {"ID_DEAL" : pd .Int64Dtype ()})
612611 assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263358 , "ID_DEAL" ]) == 1
613612 assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263359 , "ID_DEAL" ]) == 1
614613 assert len (orders .loc [orders ["ID_DEAL" ] == 2023552585717263360 , "ID_DEAL" ]) == 2
@@ -630,3 +629,16 @@ def test_dtypes_with_usecols(all_parsers):
630629 values = ["1" , "4" ]
631630 expected = DataFrame ({"a" : pd .Series (values , dtype = object ), "c" : [3 , 6 ]})
632631 tm .assert_frame_equal (result , expected )
632+
633+
634+ def test_index_col_with_dtype_no_rangeindex (all_parsers ):
635+ data = StringIO ("345.5,519.5,0\n 519.5,726.5,1" )
636+ result = all_parsers .read_csv (
637+ data ,
638+ header = None ,
639+ names = ["start" , "stop" , "bin_id" ],
640+ dtype = {"start" : np .float32 , "stop" : np .float32 , "bin_id" : np .uint32 },
641+ index_col = "bin_id" ,
642+ ).index
643+ expected = pd .Index ([0 , 1 ], dtype = np .uint32 , name = "bin_id" )
644+ tm .assert_index_equal (result , expected )
0 commit comments