@@ -145,16 +145,21 @@ <h1 class="modulename">
145145< span class ="sd "> :param col: column or scalar to inspect</ span >
146146< span class ="sd "> :return: type of first non-None entry, if any , else type(None)</ span >
147147< span class ="sd "> """</ span >
148+ < span class ="c1 "> # check for scalars first</ span >
148149 < span class ="n "> ct</ span > < span class ="o "> =</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> ))</ span >
149150 < span class ="k "> if</ span > < span class ="n "> ct</ span > < span class ="ow "> in</ span > < span class ="p "> {</ span > < span class ="nb "> str</ span > < span class ="p "> ,</ span > < span class ="nb "> int</ span > < span class ="p "> ,</ span > < span class ="nb "> float</ span > < span class ="p "> ,</ span > < span class ="nb "> bool</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="kc "> None</ span > < span class ="p "> ),</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> int64</ span > < span class ="p "> ,</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> float64</ span > < span class ="p "> ,</ span >
150151 < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> datetime</ span > < span class ="p "> ,</ span > < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> date</ span > < span class ="p "> ,</ span > < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> timedelta</ span > < span class ="p "> }:</ span >
151152 < span class ="k "> return</ span > < span class ="n "> ct</ span >
153+ < span class ="c1 "> # look at a list or Series</ span >
154+ < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> ,</ span > < span class ="n "> data_algebra</ span > < span class ="o "> .</ span > < span class ="n "> default_data_model</ span > < span class ="o "> .</ span > < span class ="n "> pd</ span > < span class ="o "> .</ span > < span class ="n "> core</ span > < span class ="o "> .</ span > < span class ="n "> series</ span > < span class ="o "> .</ span > < span class ="n "> Series</ span > < span class ="p "> ):</ span >
155+ < span class ="n "> col</ span > < span class ="o "> =</ span > < span class ="n "> col</ span > < span class ="o "> .</ span > < span class ="n "> values</ span >
152156 < span class ="k "> if</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> )</ span > < span class ="o "> <</ span > < span class ="mi "> 1</ span > < span class ="p "> :</ span >
153157 < span class ="k "> return</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span >
154- < span class ="n "> idx</ span > < span class ="o "> =</ span > < span class ="n "> col</ span > < span class ="o "> .</ span > < span class ="n "> notna</ span > < span class ="p "> ()</ span > < span class ="o "> .</ span > < span class ="n "> idxmax</ span > < span class ="p "> ()</ span >
155- < span class ="k "> if</ span > < span class ="n "> idx</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
156- < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]))</ span >
157- < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="n "> idx</ span > < span class ="p "> ]))</ span >
158+ < span class ="n "> good_idx</ span > < span class ="o "> =</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> where</ span > < span class ="p "> (</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> logical_not</ span > < span class ="p "> (</ span > < span class ="n "> data_algebra</ span > < span class ="o "> .</ span > < span class ="n "> default_data_model</ span > < span class ="o "> .</ span > < span class ="n "> pd</ span > < span class ="o "> .</ span > < span class ="n "> isna</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> )))[</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
159+ < span class ="n "> test_idx</ span > < span class ="o "> =</ span > < span class ="mi "> 0</ span >
160+ < span class ="k "> if</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> good_idx</ span > < span class ="p "> )</ span > < span class ="o "> ></ span > < span class ="mi "> 0</ span > < span class ="p "> :</ span >
161+ < span class ="n "> test_idx</ span > < span class ="o "> =</ span > < span class ="n "> good_idx</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
162+ < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="n "> test_idx</ span > < span class ="p "> ]))</ span >
158163
159164
160165< span class ="k "> def</ span > < span class ="nf "> guess_column_types</ span > < span class ="p "> (</ span > < span class ="n "> d</ span > < span class ="p "> ,</ span > < span class ="o "> *</ span > < span class ="p "> ,</ span > < span class ="n "> columns</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ):</ span >
@@ -176,7 +181,7 @@ <h1 class="modulename">
176181 < span class ="n "> res</ span > < span class ="o "> =</ span > < span class ="nb "> dict</ span > < span class ="p "> ()</ span >
177182 < span class ="k "> for</ span > < span class ="n "> c</ span > < span class ="ow "> in</ span > < span class ="n "> columns</ span > < span class ="p "> :</ span >
178183 < span class ="n "> gt</ span > < span class ="o "> =</ span > < span class ="n "> guess_carried_scalar_type</ span > < span class ="p "> (</ span > < span class ="n "> d</ span > < span class ="p "> [</ span > < span class ="n "> c</ span > < span class ="p "> ])</ span >
179- < span class ="k "> if</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span > < span class ="ow "> or</ span > < span class ="p "> (</ span > < span class ="ow "> not</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> ))</ span > < span class ="ow "> or</ span > < span class ="nb " > str </ span > < span class ="p " > ( </ span > < span class ="n "> gt </ span > < span class ="p " > ) </ span > < span class ="o "> .</ span > < span class ="n "> endswith </ span > < span class ="p " > ( </ span > < span class ="s1 " > '.Series </ span > < span class ="se " > \' </ span > < span class ="s1 " > >' </ span > < span class ="p "> ) :</ span >
184+ < span class ="k "> if</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span > < span class ="ow "> or</ span > < span class ="p "> (</ span > < span class ="ow "> not</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> ))</ span > < span class ="ow "> or</ span > < span class ="n " > gt </ span > < span class ="o " > == </ span > < span class ="n "> data_algebra </ span > < span class ="o " > . </ span > < span class ="n " > default_data_model </ span > < span class =" o "> .</ span > < span class ="n "> pd </ span > < span class ="o " > . </ span > < span class ="n " > core </ span > < span class =" o " > . </ span > < span class ="n " > series </ span > < span class ="o " > . </ span > < span class ="n " > Series </ span > < span class =" p "> :</ span >
180185 < span class ="c1 "> # pandas.concat() poisons types with Series, don't allow that</ span >
181186 < span class ="k "> return</ span > < span class ="nb "> dict</ span > < span class ="p "> ()</ span >
182187 < span class ="n "> res</ span > < span class ="p "> [</ span > < span class ="n "> c</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="n "> gt</ span >
@@ -337,16 +342,21 @@ <h1 class="modulename">
337342< span class ="sd "> :param col: column or scalar to inspect</ span >
338343< span class ="sd "> :return: type of first non-None entry, if any , else type(None)</ span >
339344< span class ="sd "> """</ span >
345+ < span class ="c1 "> # check for scalars first</ span >
340346 < span class ="n "> ct</ span > < span class ="o "> =</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> ))</ span >
341347 < span class ="k "> if</ span > < span class ="n "> ct</ span > < span class ="ow "> in</ span > < span class ="p "> {</ span > < span class ="nb "> str</ span > < span class ="p "> ,</ span > < span class ="nb "> int</ span > < span class ="p "> ,</ span > < span class ="nb "> float</ span > < span class ="p "> ,</ span > < span class ="nb "> bool</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="kc "> None</ span > < span class ="p "> ),</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> int64</ span > < span class ="p "> ,</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> float64</ span > < span class ="p "> ,</ span >
342348 < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> datetime</ span > < span class ="p "> ,</ span > < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> date</ span > < span class ="p "> ,</ span > < span class ="n "> datetime</ span > < span class ="o "> .</ span > < span class ="n "> timedelta</ span > < span class ="p "> }:</ span >
343349 < span class ="k "> return</ span > < span class ="n "> ct</ span >
350+ < span class ="c1 "> # look at a list or Series</ span >
351+ < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> ,</ span > < span class ="n "> data_algebra</ span > < span class ="o "> .</ span > < span class ="n "> default_data_model</ span > < span class ="o "> .</ span > < span class ="n "> pd</ span > < span class ="o "> .</ span > < span class ="n "> core</ span > < span class ="o "> .</ span > < span class ="n "> series</ span > < span class ="o "> .</ span > < span class ="n "> Series</ span > < span class ="p "> ):</ span >
352+ < span class ="n "> col</ span > < span class ="o "> =</ span > < span class ="n "> col</ span > < span class ="o "> .</ span > < span class ="n "> values</ span >
344353 < span class ="k "> if</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> )</ span > < span class ="o "> <</ span > < span class ="mi "> 1</ span > < span class ="p "> :</ span >
345354 < span class ="k "> return</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span >
346- < span class ="n "> idx</ span > < span class ="o "> =</ span > < span class ="n "> col</ span > < span class ="o "> .</ span > < span class ="n "> notna</ span > < span class ="p "> ()</ span > < span class ="o "> .</ span > < span class ="n "> idxmax</ span > < span class ="p "> ()</ span >
347- < span class ="k "> if</ span > < span class ="n "> idx</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
348- < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]))</ span >
349- < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="n "> idx</ span > < span class ="p "> ]))</ span >
355+ < span class ="n "> good_idx</ span > < span class ="o "> =</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> where</ span > < span class ="p "> (</ span > < span class ="n "> numpy</ span > < span class ="o "> .</ span > < span class ="n "> logical_not</ span > < span class ="p "> (</ span > < span class ="n "> data_algebra</ span > < span class ="o "> .</ span > < span class ="n "> default_data_model</ span > < span class ="o "> .</ span > < span class ="n "> pd</ span > < span class ="o "> .</ span > < span class ="n "> isna</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> )))[</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
356+ < span class ="n "> test_idx</ span > < span class ="o "> =</ span > < span class ="mi "> 0</ span >
357+ < span class ="k "> if</ span > < span class ="nb "> len</ span > < span class ="p "> (</ span > < span class ="n "> good_idx</ span > < span class ="p "> )</ span > < span class ="o "> ></ span > < span class ="mi "> 0</ span > < span class ="p "> :</ span >
358+ < span class ="n "> test_idx</ span > < span class ="o "> =</ span > < span class ="n "> good_idx</ span > < span class ="p "> [</ span > < span class ="mi "> 0</ span > < span class ="p "> ]</ span >
359+ < span class ="k "> return</ span > < span class ="n "> map_type_to_canonical</ span > < span class ="p "> (</ span > < span class ="nb "> type</ span > < span class ="p "> (</ span > < span class ="n "> col</ span > < span class ="p "> [</ span > < span class ="n "> test_idx</ span > < span class ="p "> ]))</ span >
350360</ pre > </ div >
351361
352362 </ details >
@@ -388,7 +398,7 @@ <h1 class="modulename">
388398 < span class ="n "> res</ span > < span class ="o "> =</ span > < span class ="nb "> dict</ span > < span class ="p "> ()</ span >
389399 < span class ="k "> for</ span > < span class ="n "> c</ span > < span class ="ow "> in</ span > < span class ="n "> columns</ span > < span class ="p "> :</ span >
390400 < span class ="n "> gt</ span > < span class ="o "> =</ span > < span class ="n "> guess_carried_scalar_type</ span > < span class ="p "> (</ span > < span class ="n "> d</ span > < span class ="p "> [</ span > < span class ="n "> c</ span > < span class ="p "> ])</ span >
391- < span class ="k "> if</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span > < span class ="ow "> or</ span > < span class ="p "> (</ span > < span class ="ow "> not</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> ))</ span > < span class ="ow "> or</ span > < span class ="nb " > str </ span > < span class ="p " > ( </ span > < span class ="n "> gt </ span > < span class ="p " > ) </ span > < span class ="o "> .</ span > < span class ="n "> endswith </ span > < span class ="p " > ( </ span > < span class ="s1 " > '.Series </ span > < span class ="se " > \' </ span > < span class ="s1 " > >' </ span > < span class ="p "> ) :</ span >
401+ < span class ="k "> if</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span > < span class ="ow "> or</ span > < span class ="p "> (</ span > < span class ="ow "> not</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> gt</ span > < span class ="p "> ,</ span > < span class ="nb "> type</ span > < span class ="p "> ))</ span > < span class ="ow "> or</ span > < span class ="n " > gt </ span > < span class ="o " > == </ span > < span class ="n "> data_algebra </ span > < span class ="o " > . </ span > < span class ="n " > default_data_model </ span > < span class =" o "> .</ span > < span class ="n "> pd </ span > < span class ="o " > . </ span > < span class ="n " > core </ span > < span class =" o " > . </ span > < span class ="n " > series </ span > < span class ="o " > . </ span > < span class ="n " > Series </ span > < span class =" p "> :</ span >
392402 < span class ="c1 "> # pandas.concat() poisons types with Series, don't allow that</ span >
393403 < span class ="k "> return</ span > < span class ="nb "> dict</ span > < span class ="p "> ()</ span >
394404 < span class ="n "> res</ span > < span class ="p "> [</ span > < span class ="n "> c</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="n "> gt</ span >
0 commit comments