1- import copy
21import numpy as np
32
43
@@ -326,118 +325,4 @@ def _alpha_max(X, y, use_noise_estimate=False):
326325
327326 alpha_max = np .max (np .abs (np .dot (X .T , y )) / (n_samples * sigma_star ))
328327
329- return alpha_max
330-
331-
332- ########################### Data Preprocessing ##########################
333- def create_X_y (
334- X ,
335- y ,
336- sampling_with_repetition = True ,
337- split_percentage = 0.8 ,
338- problem_type = "regression" ,
339- list_continuous = None ,
340- random_state = None ,
341- ):
342- """
343- Create train/valid split of input data X and target variable y
344-
345- Parameters
346- ----------
347- X : {array-like, sparse matrix} of shape (n_samples, n_features)
348- The input samples before the splitting process.
349- y : ndarray, shape (n_samples, )
350- The output samples before the splitting process.
351- sampling_with_repetition : bool, default=True
352- Sampling with repetition the train part of the train/valid scheme under
353- the training set. The number of training samples in train is equal to
354- the number of instances in the training set.
355- split_percentage : float, default=0.8
356- The training/validation cut for the provided data.
357- problem_type : str, default='regression'
358- A classification or a regression problem.
359- list_continuous : list, default=[]
360- The list of continuous variables.
361- random_state : int, default=2023
362- Fixing the seeds of the random generator.
363-
364- Returns
365- -------
366- X_train_scaled : {array-like, sparse matrix} of shape (n_train_samples, n_features)
367- The training input samples with scaled continuous variables.
368- y_train_scaled : {array-like} of shape (n_train_samples, )
369- The sampling_with_repetitionped training output samples scaled if continous.
370- X_validation_scaled : {array-like, sparse matrix} of shape (n_validation_samples, n_features)
371- The validation input samples with scaled continuous variables.
372- y_validation_scaled : {array-like} of shape (n_validation_samples, )
373- The validation output samples scaled if continous.
374- X_scaled : {array-like, sparse matrix} of shape (n_samples, n_features)
375- The original input samples with scaled continuous variables.
376- y_validation : {array-like} of shape (n_samples, )
377- The original output samples with validation indices.
378- scaler_x : Scikit-learn StandardScaler
379- The standard scaler encoder for the continuous variables of the input.
380- scaler_y : Scikit-learn StandardScaler
381- The standard scaler encoder for the output if continuous.
382- valid_ind : list
383- The list of indices of the validation set.
384- """
385- rng = np .random .RandomState (random_state )
386- scaler_x , scaler_y = StandardScaler (), StandardScaler ()
387- n = X .shape [0 ]
388-
389- if sampling_with_repetition :
390- train_ind = rng .choice (n , n , replace = True )
391- else :
392- train_ind = rng .choice (
393- n , size = int (np .floor (split_percentage * n )), replace = False
394- )
395- valid_ind = np .array ([ind for ind in range (n ) if ind not in train_ind ])
396-
397- X_train , X_validation = X [train_ind ], X [valid_ind ]
398- y_train , y_validation = y [train_ind ], y [valid_ind ]
399-
400- # Scaling X and y
401- X_train_scaled = X_train .copy ()
402- X_validation_scaled = X_validation .copy ()
403- X_scaled = X .copy ()
404-
405- if len (list_continuous ) > 0 :
406- X_train_scaled [:, list_continuous ] = scaler_x .fit_transform (
407- X_train [:, list_continuous ]
408- )
409- X_validation_scaled [:, list_continuous ] = scaler_x .transform (
410- X_validation [:, list_continuous ]
411- )
412- X_scaled [:, list_continuous ] = scaler_x .transform (X [:, list_continuous ])
413- if problem_type == "regression" :
414- y_train_scaled = scaler_y .fit_transform (y_train )
415- y_validation_scaled = scaler_y .transform (y_validation )
416- else :
417- y_train_scaled = y_train .copy ()
418- y_validation_scaled = y_validation .copy ()
419-
420- return (
421- X_train_scaled ,
422- y_train_scaled ,
423- X_validation_scaled ,
424- y_validation_scaled ,
425- X_scaled ,
426- y_validation ,
427- scaler_x ,
428- scaler_y ,
429- valid_ind ,
430- )
431-
432-
433- def _check_vim_predict_method (method ):
434- """Check if the method is a valid method for variable importance measure
435- prediction"""
436- if method in ["predict" , "predict_proba" , "decision_function" , "transform" ]:
437- return method
438- else :
439- raise ValueError (
440- "The method {} is not a valid method for variable importance measure prediction" .format (
441- method
442- )
443- )
328+ return alpha_max
0 commit comments