Skip to content

Commit fd65543

Browse files
committed
Remove unecesary function from utils
1 parent 7afc7e5 commit fd65543

File tree

1 file changed

+1
-116
lines changed

1 file changed

+1
-116
lines changed

src/hidimstat/utils.py

Lines changed: 1 addition & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import copy
21
import numpy as np
32

43

@@ -326,118 +325,4 @@ def _alpha_max(X, y, use_noise_estimate=False):
326325

327326
alpha_max = np.max(np.abs(np.dot(X.T, y)) / (n_samples * sigma_star))
328327

329-
return alpha_max
330-
331-
332-
########################### Data Preprocessing ##########################
333-
def create_X_y(
334-
X,
335-
y,
336-
sampling_with_repetition=True,
337-
split_percentage=0.8,
338-
problem_type="regression",
339-
list_continuous=None,
340-
random_state=None,
341-
):
342-
"""
343-
Create train/valid split of input data X and target variable y
344-
345-
Parameters
346-
----------
347-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
348-
The input samples before the splitting process.
349-
y : ndarray, shape (n_samples, )
350-
The output samples before the splitting process.
351-
sampling_with_repetition : bool, default=True
352-
Sampling with repetition the train part of the train/valid scheme under
353-
the training set. The number of training samples in train is equal to
354-
the number of instances in the training set.
355-
split_percentage : float, default=0.8
356-
The training/validation cut for the provided data.
357-
problem_type : str, default='regression'
358-
A classification or a regression problem.
359-
list_continuous : list, default=[]
360-
The list of continuous variables.
361-
random_state : int, default=2023
362-
Fixing the seeds of the random generator.
363-
364-
Returns
365-
-------
366-
X_train_scaled : {array-like, sparse matrix} of shape (n_train_samples, n_features)
367-
The training input samples with scaled continuous variables.
368-
y_train_scaled : {array-like} of shape (n_train_samples, )
369-
The sampling_with_repetitionped training output samples scaled if continous.
370-
X_validation_scaled : {array-like, sparse matrix} of shape (n_validation_samples, n_features)
371-
The validation input samples with scaled continuous variables.
372-
y_validation_scaled : {array-like} of shape (n_validation_samples, )
373-
The validation output samples scaled if continous.
374-
X_scaled : {array-like, sparse matrix} of shape (n_samples, n_features)
375-
The original input samples with scaled continuous variables.
376-
y_validation : {array-like} of shape (n_samples, )
377-
The original output samples with validation indices.
378-
scaler_x : Scikit-learn StandardScaler
379-
The standard scaler encoder for the continuous variables of the input.
380-
scaler_y : Scikit-learn StandardScaler
381-
The standard scaler encoder for the output if continuous.
382-
valid_ind : list
383-
The list of indices of the validation set.
384-
"""
385-
rng = np.random.RandomState(random_state)
386-
scaler_x, scaler_y = StandardScaler(), StandardScaler()
387-
n = X.shape[0]
388-
389-
if sampling_with_repetition:
390-
train_ind = rng.choice(n, n, replace=True)
391-
else:
392-
train_ind = rng.choice(
393-
n, size=int(np.floor(split_percentage * n)), replace=False
394-
)
395-
valid_ind = np.array([ind for ind in range(n) if ind not in train_ind])
396-
397-
X_train, X_validation = X[train_ind], X[valid_ind]
398-
y_train, y_validation = y[train_ind], y[valid_ind]
399-
400-
# Scaling X and y
401-
X_train_scaled = X_train.copy()
402-
X_validation_scaled = X_validation.copy()
403-
X_scaled = X.copy()
404-
405-
if len(list_continuous) > 0:
406-
X_train_scaled[:, list_continuous] = scaler_x.fit_transform(
407-
X_train[:, list_continuous]
408-
)
409-
X_validation_scaled[:, list_continuous] = scaler_x.transform(
410-
X_validation[:, list_continuous]
411-
)
412-
X_scaled[:, list_continuous] = scaler_x.transform(X[:, list_continuous])
413-
if problem_type == "regression":
414-
y_train_scaled = scaler_y.fit_transform(y_train)
415-
y_validation_scaled = scaler_y.transform(y_validation)
416-
else:
417-
y_train_scaled = y_train.copy()
418-
y_validation_scaled = y_validation.copy()
419-
420-
return (
421-
X_train_scaled,
422-
y_train_scaled,
423-
X_validation_scaled,
424-
y_validation_scaled,
425-
X_scaled,
426-
y_validation,
427-
scaler_x,
428-
scaler_y,
429-
valid_ind,
430-
)
431-
432-
433-
def _check_vim_predict_method(method):
434-
"""Check if the method is a valid method for variable importance measure
435-
prediction"""
436-
if method in ["predict", "predict_proba", "decision_function", "transform"]:
437-
return method
438-
else:
439-
raise ValueError(
440-
"The method {} is not a valid method for variable importance measure prediction".format(
441-
method
442-
)
443-
)
328+
return alpha_max

0 commit comments

Comments
 (0)