88import scipy .sparse
99import sparse as sp
1010import itertools
11+ import inspect
1112from operator import getitem
1213from collections import defaultdict , Counter
1314from sklearn import clone
1415from sklearn .base import TransformerMixin , BaseEstimator
1516from sklearn .linear_model import LassoCV , MultiTaskLassoCV , Lasso , MultiTaskLasso
1617from functools import reduce , wraps
1718from sklearn .utils import check_array , check_X_y
19+ from sklearn .utils .validation import assert_all_finite
1820import warnings
1921from warnings import warn
2022from sklearn .model_selection import KFold , StratifiedKFold , GroupKFold
@@ -512,7 +514,7 @@ def check_inputs(Y, T, X, W=None, multi_output_T=True, multi_output_Y=True):
512514 return Y , T , X , W
513515
514516
515- def check_input_arrays (* args , validate_len = True ):
517+ def check_input_arrays (* args , validate_len = True , force_all_finite = True ):
516518 """Cast input sequences into numpy arrays.
517519
518520 Only inputs that are sequence-like will be converted, all other inputs will be left as is.
@@ -526,23 +528,35 @@ def check_input_arrays(*args, validate_len=True):
526528 validate_len : bool (default=True)
527529 Whether to check if the input arrays have the same length.
528530
531+ force_all_finite : bool (default=True)
532+ Whether to allow inf and nan in input arrays.
533+
529534 Returns
530535 -------
531536 args: array-like
532537 List of inputs where sequence-like objects have been cast to numpy arrays.
533538
534539 """
535- args = [check_array (arg , dtype = None , ensure_2d = False , accept_sparse = True )
536- if np .ndim (arg ) > 0 else arg for arg in args ]
537- if validate_len :
538- n = None
539- for arg in args :
540- if np .ndim (arg ) > 0 :
541- m = arg .shape [0 ]
540+ n = None
541+ args = list (args )
542+ for i , arg in enumerate (args ):
543+ if np .ndim (arg ) > 0 :
544+ new_arg = check_array (arg , dtype = None , ensure_2d = False , accept_sparse = True ,
545+ force_all_finite = force_all_finite )
546+ if not force_all_finite :
547+ # For when checking input values is disabled
548+ try :
549+ assert_all_finite (new_arg )
550+ except ValueError :
551+ warnings .warn ("Input contains NaN, infinity or a value too large for dtype('float64') "
552+ "but input check is disabled. Check the inputs before proceeding." )
553+ if validate_len :
554+ m = new_arg .shape [0 ]
542555 if n is None :
543556 n = m
544557 else :
545558 assert (m == n ), "Input arrays have incompatible lengths: {} and {}" .format (n , m )
559+ args [i ] = new_arg
546560 return args
547561
548562
@@ -582,6 +596,25 @@ def get_input_columns(X, prefix="X"):
582596 return [f"{ prefix } { i } " for i in range (len_X )]
583597
584598
599+ def get_feature_names_or_default (featurizer , feature_names ):
600+ if hasattr (featurizer , 'get_feature_names' ):
601+ # Get number of arguments, some sklearn featurizer don't accept feature_names
602+ arg_no = len (inspect .getfullargspec (featurizer .get_feature_names ).args )
603+ if arg_no == 1 :
604+ return featurizer .get_feature_names ()
605+ elif arg_no == 2 :
606+ return featurizer .get_feature_names (feature_names )
607+ # Featurizer doesn't have 'get_feature_names' or has atypical 'get_feature_names'
608+ try :
609+ # Get feature names using featurizer
610+ dummy_X = np .ones ((1 , len (feature_names )))
611+ return get_input_columns (featurizer .transform (dummy_X ), prefix = "feat(X)" )
612+ except Exception :
613+ # All attempts at retrieving transformed feature names have failed
614+ # Delegate handling to downstream logic
615+ return None
616+
617+
585618def check_models (models , n ):
586619 """
587620 Input validation for metalearner models.
0 commit comments