GenomicBasedRegression/tuning_parameters_NSV.txt at main · genpat-it/GenomicBasedRegression · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{
# when combined with any model or pipeline, minimizing the number of hyperparameter combinations helps reduce overall search time

# --- Feature selection (SelectKBest) ---
# used only to modify selected SKB behavior
'feature_selection__k': [25, 50], # number of top features to keep
'feature_selection__score_func': [mutual_info_regression], # score function (avoid chi2 in a context of regression and f_regression it captures only linear dependancies)

# --- Feature selection (SelectFromModel with lasso) ---
# used only to modify laSFM behavior
#'feature_selection__threshold': [-float('inf')], # disable thresholding to rely solely on max_features
#'feature_selection__max_features': [25, 50], # select exactly this number of top features
#'feature_selection__estimator__alpha': [0.01, 0.1, 1.0], # increase regularization to speed up convergence
#'feature_selection__estimator__max_iter': [500], # reduce iterations for faster training
#'feature_selection__estimator__tol': [1e-2], # relax convergence criteria to save time
#'feature_selection__estimator__fit_intercept': [True], # whether to estimate the intercept

# --- Feature selection (SelectFromModel with elasticnet) ---
# used only to modify enSFM behavior
#'feature_selection__threshold': [-float('inf')], # rank features by importance
#'feature_selection__max_features': [25, 50], # number of top features to keep
#'feature_selection__estimator__alpha': [0.1], # moderate regularization strength
#'feature_selection__estimator__l1_ratio': [0.5], # balanced L1/L2 penalty
#'feature_selection__estimator__max_iter': [300], # max iterations for convergence
#'feature_selection__estimator__tol': [1e-2], # relaxed convergence tolerance

# --- Feature selection (SelectFromModel with ridge) ---
# used only to modify riSFM behavior
#'feature_selection__threshold': [-float('inf')], # disable hard thresholding; rank by |coef|
#'feature_selection__max_features': [25, 50], # keep top-ranked features only
#'feature_selection__estimator__alpha': [0.1, 1.0, 10.0], # regularization strength
#'feature_selection__estimator__max_iter': [1000], # ensure convergence for high-dimensional OHE
#'feature_selection__estimator__tol': [1e-3, 1e-4], # trade-off speed vs numerical precision
#'feature_selection__estimator__fit_intercept': [True], # center target; recommended for regression

# --- Feature selection (SelectFromModel with random forest) ---
# used only to modify rfSFM behavior
#'feature_selection__threshold': [-float('inf')], # rank all features by importance
#'feature_selection__max_features': [25, 50], # number of top features to keep
#'feature_selection__estimator__n_estimators': [100], # number of trees
#'feature_selection__estimator__max_depth': [10], # shallow trees for speed

# --- Model tuning (NuSVR) ---
# used only to modify NuSVR behavior
# simplified tuning used together with SKB
'model__C': [1, 10], # moderate regularization values
'model__nu': [0.25, 0.5], # balanced margin/support vector trade-offs
'model__kernel': ['rbf', 'linear'], # most common kernels
'model__gamma': ['scale', 0.1], # default and a flexible alternative
'model__degree': [3], # typical polynomial degree if poly kernel used
'model__coef0': [0.0], # fixed bias term
'model__shrinking': [True], # enable shrinking for speed
'model__tol': [1e-3, 1e-4], # balance speed and convergence
'model__max_iter': [2000], # iteration cap

# --- Model tuning (NuSVR) ---
# used only to modify NuSVR behavior
#'model__C': [0.1, 1, 10, 100], # broad range from light to strong regularization
#'model__nu': [0.1, 0.25, 0.5, 0.75, 0.9], # compromise between margin and support vectors
#'model__kernel': ['linear', 'rbf', 'poly'], # covers all kernel types
#'model__gamma': ['scale', 0.01, 0.1], # default and a flexible alternative
#'model__degree': [2], # fixed polynomial degree
#'model__coef0': [0.0], # fixed bias term in 'poly' kernel
#'model__shrinking': [True], # keep enabled to reduce training time
#'model__tol': [1e-3, 1e-4], # balance speed and convergence
#'model__max_iter': [2000], # fixed iteration cap
}