1+
2+ import datetime
3+
14import numpy
25
36import data_algebra
@@ -73,7 +76,8 @@ def guess_carried_scalar_type(col):
7376 :return: type of first non-None entry, if any , else type(None)
7477 """
7578 ct = map_type_to_canonical (type (col ))
76- if ct in {str , int , float , bool , type (None ), numpy .int64 , numpy .float64 }:
79+ if ct in {str , int , float , bool , type (None ), numpy .int64 , numpy .float64 ,
80+ datetime .datetime , datetime .date , datetime .timedelta }:
7781 return ct
7882 if len (col ) < 1 :
7983 return type (None )
@@ -85,25 +89,27 @@ def guess_carried_scalar_type(col):
8589
8690def guess_column_types (d , * , columns = None ):
8791 """
88- Guess column types as type of first non-missing value
92+ Guess column types as type of first non-missing value.
93+ Will not return series types, as some pandas data frames with non-trivial indexing report this type.
8994
9095 :param d: pandas.DataFrame
9196 :param columns: list of columns to check, if None all columns are checked
92- :return: map of column names to guessed types
97+ :return: map of column names to guessed types, empty dict if any column guess fails
9398 """
94- if d .shape [1 ] <= 0 :
99+ if ( d .shape [0 ] <= 0 ) or ( d . shape [ 1 ] <= 0 ) :
95100 return dict ()
96101 if columns is None :
97- columns = d .columns
102+ columns = d .columns . copy ()
98103 assert len (set (columns ) - set (d .columns )) == 0
99- if d . shape [ 0 ] <= 0 :
100- return { c : type ( None ) for c in columns }
104+ if len ( columns ) <= 0 :
105+ return dict ()
101106 res = dict ()
102107 for c in columns :
103- res [c ] = guess_carried_scalar_type (d [c ])
104- if any ([str (v ).endswith ('.Series\' >' ) for v in res .values ()]):
105- # pandas.concat() poisons types with Series, don't allow that
106- return dict ()
108+ gt = guess_carried_scalar_type (d [c ])
109+ if (gt is None ) or (not isinstance (gt , type )) or str (gt ).endswith ('.Series\' >' ):
110+ # pandas.concat() poisons types with Series, don't allow that
111+ return dict ()
112+ res [c ] = gt
107113 return res
108114
109115
@@ -129,7 +135,11 @@ def check_columns_appear_compatible(d_left, d_right, *, columns=None):
129135 assert len (set (columns ) - set (d_left .columns )) == 0
130136 assert len (set (columns ) - set (d_right .columns )) == 0
131137 left_types = data_algebra .util .guess_column_types (d_left , columns = columns )
138+ if (left_types is None ) or (len (left_types ) <= 0 ):
139+ return None
132140 right_types = data_algebra .util .guess_column_types (d_right , columns = columns )
141+ if (right_types is None ) or (len (right_types ) <= 0 ):
142+ return None
133143 mismatches = dict ()
134144 for c in columns :
135145 if not compatible_types ([left_types [c ], right_types [c ]]):
0 commit comments