@@ -1627,33 +1627,19 @@ def guess_data_type(col: pd.Series) -> pd.Series:
16271627 -------
16281628 Data column with correct dtype
16291629 """
1630- def parse_dates (s ):
1631- """
1632- This is an extremely fast approach to datetime parsing.
1633- For large data, the same dates are often repeated. Rather than
1634- re-parse these, we store all unique dates, parse them, and
1635- use a lookup to convert all dates.
1636- """
1637- try :
1638- dates = {date : pd .to_datetime (date ) for date in s .unique ()}
1639- except ValueError :
1640- return None
1641- return s .map (dates )
1642-
16431630 if pdtypes .is_numeric_dtype (col ):
16441631 unique_values = col .unique ()
16451632 if len (unique_values ) <= 2 and (
16461633 len (np .setdiff1d (unique_values , [0 , 1 ])) == 0
16471634 or len (np .setdiff1d (unique_values , [1 , 2 ])) == 0 ):
16481635 return col .astype ("category" )
16491636 else : # object
1650- # try parse as date - if None not a date
1651- parsed_col = parse_dates (col )
1652- if parsed_col is not None :
1653- return parsed_col
1654- unique_values = col .unique ()
1655- if len (unique_values ) < 100 and len (unique_values ) < len (col )** 0.7 :
1656- return col .astype ("category" )
1637+ try :
1638+ return pd .to_datetime (col )
1639+ except ValueError :
1640+ unique_values = col .unique ()
1641+ if len (unique_values ) < 100 and len (unique_values ) < len (col )** 0.7 :
1642+ return col .astype ("category" )
16571643 return col
16581644
16591645
0 commit comments