Skip to content

Optimisation Techniques

SRIJA DE CHOWDHURY edited this page Jan 4, 2026 · 1 revision

⚡ Optimization Techniques

Advanced Methods to Improve Model Performance

Optimization Performance


🎯 Optimization Overview

📉 Regularization

Prevent overfitting

🔄 Advanced GD

Better convergence

⚖️ Class Balance

Handle imbalance

🎲 Feature Engineering

Better features


1️⃣ Regularization

What is Regularization?

Regularization adds a penalty term to the cost function to prevent overfitting by discouraging complex models.

Without Regularization          With Regularization
       y                              y
       │    ╱╲  ╱╲                   │      ╱
       │   ╱  ╲╱  ╲                  │     ╱
       │  ╱        ╲                 │    ╱
       │ ╱          ╲                │   ╱
       └──────────── x               └──────────── x
    (Overfitting)                  (Good fit)

L2 Regularization (Ridge)

Modified Cost Function:

$$J(θ) = -\frac{1}{m} \sum_{i=1}^{m} [y^{(i)} \log(h_θ(x^{(i)})) + (1-y^{(i)}) \log(1-h_θ(x^{(i)}))] + \frac{λ}{2m} \sum_{j=1}^{n} θ_j^2$$

Implementation:

class LogisticRegressionL2(LogisticRegression):
    """Logistic Regression with L2 regularization"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 lambda_reg=0.1, verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.lambda_reg = lambda_reg  # Regularization strength
    
    def _compute_cost(self, y_true, y_pred):
        """Cost function with L2 penalty"""
        m = len(y_true)
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        
        # Binary cross-entropy
        bce_cost = -1/m * np.sum(
            y_true * np.log(y_pred) + 
            (1 - y_true) * np.log(1 - y_pred)
        )
        
        # L2 regularization term (don't regularize bias)
        l2_penalty = (self.lambda_reg / (2 * m)) * np.sum(self.weights ** 2)
        
        return bce_cost + l2_penalty
    
    def fit(self, X, y):
        """Train with L2 regularization"""
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        for i in range(self.n_iterations):
            # Forward propagation
            z = np.dot(X, self.weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute cost
            cost = self._compute_cost(y, predictions)
            self.cost_history.append(cost)
            
            # Backward propagation with regularization
            dz = predictions - y
            dw = (1/m) * np.dot(X.T, dz) + (self.lambda_reg / m) * self.weights
            db = (1/m) * np.sum(dz)
            
            # Update parameters
            self.weights -= self. learning_rate * dw
            self.bias -= self.learning_rate * db
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}: Cost = {cost:.4f}")
        
        return self

# Usage
model_l2 = LogisticRegressionL2(
    learning_rate=0.01,
    n_iterations=1000,
    lambda_reg=0.1,  # Try different values:  0.001, 0.01, 0.1, 1, 10
    verbose=True
)
model_l2.fit(X_train_scaled, y_train)

L1 Regularization (Lasso)

Modified Cost Function:

$$J(θ) = -\frac{1}{m} \sum_{i=1}^{m} [y^{(i)} \log(h_θ(x^{(i)})) + (1-y^{(i)}) \log(1-h_θ(x^{(i)}))] + \frac{λ}{m} \sum_{j=1}^{n} |θ_j|$$

Implementation:

class LogisticRegressionL1(LogisticRegression):
    """Logistic Regression with L1 regularization (feature selection)"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 lambda_reg=0.1, verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.lambda_reg = lambda_reg
    
    def _compute_cost(self, y_true, y_pred):
        """Cost function with L1 penalty"""
        m = len(y_true)
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        
        # Binary cross-entropy
        bce_cost = -1/m * np.sum(
            y_true * np. log(y_pred) + 
            (1 - y_true) * np.log(1 - y_pred)
        )
        
        # L1 regularization term
        l1_penalty = (self.lambda_reg / m) * np.sum(np.abs(self.weights))
        
        return bce_cost + l1_penalty
    
    def fit(self, X, y):
        """Train with L1 regularization"""
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        for i in range(self.n_iterations):
            # Forward propagation
            z = np.dot(X, self. weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute cost
            cost = self._compute_cost(y, predictions)
            self.cost_history.append(cost)
            
            # Backward propagation with L1 gradient
            dz = predictions - y
            dw = (1/m) * np.dot(X.T, dz) + (self.lambda_reg / m) * np.sign(self.weights)
            db = (1/m) * np.sum(dz)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}: Cost = {cost:.4f}")
        
        return self

# Usage
model_l1 = LogisticRegressionL1(
    learning_rate=0.01,
    n_iterations=1000,
    lambda_reg=0.1,
    verbose=True
)
model_l1.fit(X_train_scaled, y_train)

Comparison Table

| Aspect | L1 (Lasso) | L2 (Ridge) | |: -------|:-----------|:-----------| | Penalty | Sum of absolute weights | Sum of squared weights | | Formula | λ Σ|θⱼ| | λ Σθⱼ² | | Effect | Some weights → 0 | All weights → small | | Use Case | Feature selection | Prevent overfitting | | Sparsity | Sparse (many zeros) | Dense (no zeros) |

Finding Optimal λ

def find_best_lambda(X_train, y_train, X_val, y_val, lambdas):
    """
    Find optimal regularization parameter
    
    Parameters:
    -----------
    X_train, y_train :  training data
    X_val, y_val : validation data
    lambdas : list of lambda values to try
    
    Returns:
    --------
    best_lambda, results
    """
    results = []
    best_score = 0
    best_lambda = None
    
    for lam in lambdas:
        # Train model
        model = LogisticRegressionL2(
            learning_rate=0.01,
            n_iterations=1000,
            lambda_reg=lam
        )
        model.fit(X_train, y_train)
        
        # Evaluate
        train_score = model.score(X_train, y_train)
        val_score = model.score(X_val, y_val)
        
        results.append({
            'lambda': lam,
            'train_accuracy': train_score,
            'val_accuracy': val_score,
            'gap': train_score - val_score
        })
        
        if val_score > best_score:
            best_score = val_score
            best_lambda = lam
        
        print(f"λ={lam: 6.4f} | Train: {train_score:. 4f} | Val: {val_score:.4f} | Gap: {train_score - val_score:.4f}")
    
    # Visualize results
    results_df = pd.DataFrame(results)
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.semilogx(results_df['lambda'], results_df['train_accuracy'], 
                 marker='o', label='Train', linewidth=2)
    plt.semilogx(results_df['lambda'], results_df['val_accuracy'], 
                 marker='s', label='Validation', linewidth=2)
    plt.axvline(x=best_lambda, color='red', linestyle='--', 
                label=f'Best λ = {best_lambda}')
    plt.xlabel('Lambda (λ)', fontweight='bold')
    plt.ylabel('Accuracy', fontweight='bold')
    plt.title('🔍 Regularization Parameter Tuning', fontweight='bold', pad=15)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.semilogx(results_df['lambda'], results_df['gap'], 
                 marker='D', color='green', linewidth=2)
    plt.axvline(x=best_lambda, color='red', linestyle='--', 
                label=f'Best λ = {best_lambda}')
    plt.xlabel('Lambda (λ)', fontweight='bold')
    plt.ylabel('Train-Val Gap', fontweight='bold')
    plt.title('⚖️ Overfitting Gap Analysis', fontweight='bold', pad=15)
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"\n🏆 Best Lambda: {best_lambda}")
    print(f"📊 Best Validation Accuracy: {best_score:.4f}")
    
    return best_lambda, results_df

# Usage
lambdas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
best_lambda, results = find_best_lambda(X_train_scaled, y_train, 
                                        X_val_scaled, y_val, lambdas)

2️⃣ Advanced Gradient Descent Methods

Mini-Batch Gradient Descent

class LogisticRegressionMiniBatch(LogisticRegression):
    """Logistic Regression with mini-batch gradient descent"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 batch_size=32, verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.batch_size = batch_size
    
    def fit(self, X, y):
        """Train using mini-batch gradient descent"""
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        for i in range(self.n_iterations):
            # Shuffle data
            indices = np.random. permutation(m)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            
            # Process mini-batches
            for start in range(0, m, self.batch_size):
                end = min(start + self.batch_size, m)
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]
                batch_size_actual = len(y_batch)
                
                # Forward propagation
                z = np.dot(X_batch, self.weights) + self.bias
                predictions = self._sigmoid(z)
                
                # Backward propagation
                dz = predictions - y_batch
                dw = (1/batch_size_actual) * np.dot(X_batch.T, dz)
                db = (1/batch_size_actual) * np.sum(dz)
                
                # Update parameters
                self. weights -= self.learning_rate * dw
                self.bias -= self.learning_rate * db
            
            # Compute full cost for monitoring
            z_full = np.dot(X, self.weights) + self.bias
            pred_full = self._sigmoid(z_full)
            cost = self._compute_cost(y, pred_full)
            self.cost_history.append(cost)
            
            if self.verbose and i % 100 == 0:
                print(f"Epoch {i}:  Cost = {cost:.4f}")
        
        return self

# Usage
model_minibatch = LogisticRegressionMiniBatch(
    learning_rate=0.01,
    n_iterations=1000,
    batch_size=32,
    verbose=True
)
model_minibatch.fit(X_train_scaled, y_train)

Comparison of GD Variants

| Method | Update Frequency | Speed | Memory | Convergence | |: -------|:-----------------|: ------|:-------|:------------| | Batch GD | Once per epoch | Slow | High | Smooth | | Stochastic GD | Once per sample | Fast | Low | Noisy | | Mini-Batch GD | Once per batch | Medium | Medium | Balanced ✅ |

Momentum

class LogisticRegressionMomentum(LogisticRegression):
    """Logistic Regression with momentum"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 momentum=0.9, verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.momentum = momentum
    
    def fit(self, X, y):
        """Train with momentum"""
        m, n = X.shape
        self. weights = np.zeros(n)
        self.bias = 0
        
        # Initialize velocity
        v_w = np.zeros(n)
        v_b = 0
        
        for i in range(self.n_iterations):
            # Forward propagation
            z = np. dot(X, self.weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute cost
            cost = self._compute_cost(y, predictions)
            self.cost_history.append(cost)
            
            # Backward propagation
            dz = predictions - y
            dw = (1/m) * np.dot(X.T, dz)
            db = (1/m) * np.sum(dz)
            
            # Update velocity
            v_w = self.momentum * v_w - self.learning_rate * dw
            v_b = self.momentum * v_b - self.learning_rate * db
            
            # Update parameters
            self.weights += v_w
            self.bias += v_b
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}: Cost = {cost:.4f}")
        
        return self

# Usage
model_momentum = LogisticRegressionMomentum(
    learning_rate=0.01,
    n_iterations=1000,
    momentum=0.9,
    verbose=True
)
model_momentum.fit(X_train_scaled, y_train)

Adam Optimizer

class LogisticRegressionAdam(LogisticRegression):
    """Logistic Regression with Adam optimizer"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 beta1=0.9, beta2=0.999, epsilon=1e-8, verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.beta1 = beta1  # Exponential decay rate for 1st moment
        self. beta2 = beta2  # Exponential decay rate for 2nd moment
        self.epsilon = epsilon  # Small constant for numerical stability
    
    def fit(self, X, y):
        """Train with Adam optimizer"""
        m, n = X. shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        # Initialize moment estimates
        m_w = np.zeros(n)  # 1st moment (mean)
        v_w = np.zeros(n)  # 2nd moment (variance)
        m_b = 0
        v_b = 0
        
        for i in range(1, self.n_iterations + 1):
            # Forward propagation
            z = np.dot(X, self.weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute cost
            cost = self._compute_cost(y, predictions)
            self.cost_history.append(cost)
            
            # Backward propagation
            dz = predictions - y
            dw = (1/m) * np.dot(X.T, dz)
            db = (1/m) * np.sum(dz)
            
            # Update biased first moment estimate
            m_w = self. beta1 * m_w + (1 - self.beta1) * dw
            m_b = self.beta1 * m_b + (1 - self.beta1) * db
            
            # Update biased second moment estimate
            v_w = self.beta2 * v_w + (1 - self.beta2) * (dw ** 2)
            v_b = self.beta2 * v_b + (1 - self.beta2) * (db ** 2)
            
            # Compute bias-corrected moment estimates
            m_w_corrected = m_w / (1 - self.beta1 ** i)
            m_b_corrected = m_b / (1 - self.beta1 ** i)
            v_w_corrected = v_w / (1 - self. beta2 ** i)
            v_b_corrected = v_b / (1 - self.beta2 ** i)
            
            # Update parameters
            self.weights -= self.learning_rate * m_w_corrected / (np.sqrt(v_w_corrected) + self.epsilon)
            self.bias -= self.learning_rate * m_b_corrected / (np.sqrt(v_b_corrected) + self.epsilon)
            
            if self.verbose and (i-1) % 100 == 0:
                print(f"Iteration {i-1}: Cost = {cost:.4f}")
        
        return self

# Usage
model_adam = LogisticRegressionAdam(
    learning_rate=0.01,
    n_iterations=1000,
    verbose=True
)
model_adam.fit(X_train_scaled, y_train)

3️⃣ Learning Rate Scheduling

Step Decay

class LogisticRegressionStepDecay(LogisticRegression):
    """Logistic Regression with step decay learning rate"""
    
    def __init__(self, initial_lr=0.1, n_iterations=1000, 
                 decay_rate=0.5, decay_steps=200, verbose=False):
        super().__init__(initial_lr, n_iterations, verbose)
        self.initial_lr = initial_lr
        self.decay_rate = decay_rate
        self.decay_steps = decay_steps
    
    def fit(self, X, y):
        """Train with learning rate decay"""
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        self.lr_history = []
        
        for i in range(self.n_iterations):
            # Update learning rate
            self.learning_rate = self.initial_lr * (self.decay_rate ** (i // self.decay_steps))
            self.lr_history.append(self.learning_rate)
            
            # Forward propagation
            z = np.dot(X, self.weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute cost
            cost = self._compute_cost(y, predictions)
            self.cost_history.append(cost)
            
            # Backward propagation
            dz = predictions - y
            dw = (1/m) * np.dot(X.T, dz)
            db = (1/m) * np.sum(dz)
            
            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self. learning_rate * db
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}: Cost = {cost:.4f}, LR = {self.learning_rate:.6f}")
        
        return self
    
    def plot_lr_schedule(self):
        """Plot learning rate over time"""
        plt.figure(figsize=(12, 5))
        
        plt.subplot(1, 2, 1)
        plt.plot(self.lr_history, linewidth=2, color='purple')
        plt.xlabel('Iterations', fontweight='bold')
        plt.ylabel('Learning Rate', fontweight='bold')
        plt.title('📉 Learning Rate Schedule', fontweight='bold', pad=15)
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 2, 2)
        plt.plot(self.cost_history, linewidth=2, color='blue')
        plt.xlabel('Iterations', fontweight='bold')
        plt.ylabel('Cost', fontweight='bold')
        plt.title('📊 Cost History', fontweight='bold', pad=15)
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()

# Usage
model_decay = LogisticRegressionStepDecay(
    initial_lr=0.1,
    n_iterations=1000,
    decay_rate=0.5,
    decay_steps=200,
    verbose=True
)
model_decay.fit(X_train_scaled, y_train)
model_decay.plot_lr_schedule()

Exponential Decay

def exponential_decay(initial_lr, iteration, decay_rate):
    """
    Exponential learning rate decay
    
    Formula: lr = initial_lr * exp(-decay_rate * iteration)
    """
    return initial_lr * np.exp(-decay_rate * iteration)

Time-Based Decay

def time_based_decay(initial_lr, iteration, decay_rate):
    """
    Time-based learning rate decay
    
    Formula: lr = initial_lr / (1 + decay_rate * iteration)
    """
    return initial_lr / (1 + decay_rate * iteration)

4️⃣ Handling Class Imbalance

Class Weights

class LogisticRegressionWeighted(LogisticRegression):
    """Logistic Regression with class weights"""
    
    def __init__(self, learning_rate=0.01, n_iterations=1000, 
                 class_weight='balanced', verbose=False):
        super().__init__(learning_rate, n_iterations, verbose)
        self.class_weight = class_weight
    
    def _compute_class_weights(self, y):
        """Compute class weights"""
        unique_classes = np.unique(y)
        n_samples = len(y)
        n_classes = len(unique_classes)
        
        weights = {}
        for cls in unique_classes:
            n_samples_cls = np.sum(y == cls)
            weights[cls] = n_samples / (n_classes * n_samples_cls)
        
        return weights
    
    def fit(self, X, y):
        """Train with class weights"""
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        # Compute class weights
        if self.class_weight == 'balanced':
            class_weights = self._compute_class_weights(y)
            sample_weights = np.array([class_weights[label] for label in y])
        else:
            sample_weights = np.ones(m)
        
        print(f"Class weights: {class_weights if self.class_weight == 'balanced' else 'None'}")
        
        for i in range(self.n_iterations):
            # Forward propagation
            z = np.dot(X, self.weights) + self.bias
            predictions = self._sigmoid(z)
            
            # Compute weighted cost
            epsilon = 1e-15
            predictions_clipped = np.clip(predictions, epsilon, 1 - epsilon)
            cost = -1/m * np.sum(
                sample_weights * (y * np.log(predictions_clipped) + 
                                 (1 - y) * np.log(1 - predictions_clipped))
            )
            self.cost_history.append(cost)
            
            # Backward propagation with weights
            dz = (predictions - y) * sample_weights
            dw = (1/m) * np.dot(X.T, dz)
            db = (1/m) * np.sum(dz)
            
            # Update parameters
            self.weights -= self. learning_rate * dw
            self.bias -= self.learning_rate * db
            
            if self.verbose and i % 100 == 0:
                print(f"Iteration {i}:  Cost = {cost:.4f}")
        
        return self

# Usage
model_weighted = LogisticRegressionWeighted(
    learning_rate=0.01,
    n_iterations=1000,
    class_weight='balanced',
    verbose=True
)
model_weighted.fit(X_train_scaled, y_train)

SMOTE (Synthetic Minority Over-sampling)

from imblearn.over_sampling import SMOTE

# Apply SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

print(f"Original class distribution: {np.bincount(y_train)}")
print(f"Resampled class distribution: {np. bincount(y_train_resampled)}")

# Train on resampled data
model = LogisticRegression(learning_rate=0.01, n_iterations=1000)
model.fit(X_train_resampled, y_train_resampled)

5️⃣ Feature Engineering

Polynomial Features

from sklearn.preprocessing import PolynomialFeatures

# Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

print(f"Original features:  {X_train_scaled.shape[1]}")
print(f"Polynomial features: {X_train_poly.shape[1]}")

# Train on polynomial features
model_poly = LogisticRegression(learning_rate=0.01, n_iterations=1000)
model_poly.fit(X_train_poly, y_train)

Feature Selection

from sklearn.feature_selection import SelectKBest, chi2, f_classif

def select_best_features(X_train, y_train, X_test, k=10):
    """
    Select k best features
    
    Parameters:
    -----------
    X_train, y_train :  training data
    X_test : test data
    k : number of features to select
    
    Returns:
    --------
    X_train_selected, X_test_selected, selected_indices
    """
    # Create selector
    selector = SelectKBest(score_func=f_classif, k=k)
    
    # Fit and transform
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_test_selected = selector.transform(X_test)
    
    # Get selected feature indices
    selected_indices = selector.get_support(indices=True)
    
    print(f"✅ Selected {k} best features")
    print(f"   Feature indices: {selected_indices}")
    
    # Visualize feature scores
    scores = selector.scores_
    
    plt.figure(figsize=(12, 6))
    plt.bar(range(len(scores)), scores, alpha=0.7)
    plt.bar(selected_indices, scores[selected_indices], 
            alpha=1.0, color='red', label='Selected')
    plt.xlabel('Feature Index', fontweight='bold')
    plt.ylabel('Score', fontweight='bold')
    plt.title('🔍 Feature Selection Scores', fontweight='bold', pad=15)
    plt.legend()
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.show()
    
    return X_train_selected, X_test_selected, selected_indices

# Usage
X_train_selected, X_test_selected, indices = select_best_features(
    X_train_scaled, y_train, X_test_scaled, k=10
)

6️⃣ Ensemble Methods

Voting Classifier

class VotingClassifier: 
    """Simple voting ensemble"""
    
    def __init__(self, models):
        self.models = models
    
    def fit(self, X, y):
        """Train all models"""
        for i, model in enumerate(self.models):
            print(f"Training model {i+1}/{len(self.models)}...")
            model.fit(X, y)
        return self
    
    def predict(self, X):
        """Majority vote prediction"""
        predictions = np.array([model.predict(X) for model in self.models])
        return np.round(np.mean(predictions, axis=0)).astype(int)
    
    def predict_proba(self, X):
        """Average probability"""
        probabilities = np.array([model. predict_proba(X) for model in self.models])
        return np.mean(probabilities, axis=0)

# Usage
models = [
    LogisticRegression(learning_rate=0.01, n_iterations=1000),
    LogisticRegressionL2(learning_rate=0.01, n_iterations=1000, lambda_reg=0.1),
    LogisticRegressionAdam(learning_rate=0.01, n_iterations=1000)
]

ensemble = VotingClassifier(models)
ensemble.fit(X_train_scaled, y_train)
predictions = ensemble.predict(X_test_scaled)

7️⃣ Complete Optimization Pipeline

def optimize_model(X_train, y_train, X_val, y_val):
    """
    Complete optimization pipeline
    
    Returns best model
    """
    print("=" * 60)
    print("🚀 STARTING OPTIMIZATION PIPELINE")
    print("=" * 60)
    
    results = []
    
    # 1. Baseline model
    print("\n1️⃣ Training baseline model...")
    model_baseline = LogisticRegression(learning_rate=0.01, n_iterations=1000)
    model_baseline.fit(X_train, y_train)
    baseline_score = model_baseline.score(X_val, y_val)
    results.append({'model': 'Baseline', 'accuracy': baseline_score})
    print(f"   Validation Accuracy: {baseline_score:.4f}")
    
    # 2. L2 Regularization
    print("\n2️⃣ Training with L2 regularization...")
    model_l2 = LogisticRegressionL2(learning_rate=0.01, n_iterations=1000, lambda_reg=0.1)
    model_l2.fit(X_train, y_train)
    l2_score = model_l2.score(X_val, y_val)
    results.append({'model': 'L2 Regularization', 'accuracy': l2_score})
    print(f"   Validation Accuracy: {l2_score:.4f}")
    
    # 3. Mini-batch GD
    print("\n3️⃣ Training with mini-batch gradient descent...")
    model_mb = LogisticRegressionMiniBatch(learning_rate=0.01, n_iterations=1000, batch_size=32)
    model_mb.fit(X_train, y_train)
    mb_score = model_mb.score(X_val, y_val)
    results.append({'model': 'Mini-Batch GD', 'accuracy':  mb_score})
    print(f"   Validation Accuracy:  {mb_score:.4f}")
    
    # 4. Adam optimizer
    print("\n4️⃣ Training with Adam optimizer...")
    model_adam = LogisticRegressionAdam(learning_rate=0.01, n_iterations=1000)
    model_adam.fit(X_train, y_train)
    adam_score = model_adam. score(X_val, y_val)
    results.append({'model': 'Adam Optimizer', 'accuracy': adam_score})
    print(f"   Validation Accuracy: {adam_score:.4f}")
    
    # 5. Learning rate decay
    print("\n5️⃣ Training with learning rate decay...")
    model_decay = LogisticRegressionStepDecay(initial_lr=0.1, n_iterations=1000, 
                                               decay_rate=0.5, decay_steps=200)
    model_decay.fit(X_train, y_train)
    decay_score = model_decay. score(X_val, y_val)
    results.append({'model': 'LR Decay', 'accuracy':  decay_score})
    print(f"   Validation Accuracy:  {decay_score:.4f}")
    
    # 6. Class weights
    print("\n6️⃣ Training with class weights...")
    model_weighted = LogisticRegressionWeighted(learning_rate=0.01, n_iterations=1000, 
                                                 class_weight='balanced')
    model_weighted.fit(X_train, y_train)
    weighted_score = model_weighted. score(X_val, y_val)
    results.append({'model': 'Class Weights', 'accuracy': weighted_score})
    print(f"   Validation Accuracy: {weighted_score:.4f}")
    
    # Results summary
    print("\n" + "=" * 60)
    print("📊 OPTIMIZATION RESULTS SUMMARY")
    print("=" * 60)
    
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('accuracy', ascending=False)
    print(results_df.to_string(index=False))
    
    # Visualize
    plt.figure(figsize=(12, 6))
    colors = plt.cm.viridis(np.linspace(0, 1, len(results_df)))
    bars = plt.barh(results_df['model'], results_df['accuracy'], color=colors)
    plt.xlabel('Validation Accuracy', fontweight='bold', fontsize=12)
    plt.ylabel('Model', fontweight='bold', fontsize=12)
    plt.title('🏆 Model Comparison - Optimization Techniques', 
              fontweight='bold', fontsize=14, pad=20)
    plt.grid(True, alpha=0.3, axis='x')
    
    # Add value labels
    for i, bar in enumerate(bars):
        width = bar. get_width()
        plt.text(width, bar.get_y() + bar.get_height()/2, 
                f'{width:.4f}',
                ha='left', va='center', fontweight='bold', fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    # Return best model
    best_model_name = results_df.iloc[0]['model']
    best_accuracy = results_df.iloc[0]['accuracy']
    
    print(f"\n🏆 Best Model: {best_model_name}")
    print(f"📊 Best Accuracy: {best_accuracy:. 4f}")
    
    # Map to actual model
    models_map = {
        'Baseline': model_baseline,
        'L2 Regularization': model_l2,
        'Mini-Batch GD': model_mb,
        'Adam Optimizer': model_adam,
        'LR Decay': model_decay,
        'Class Weights': model_weighted
    }
    
    return models_map[best_model_name], results_df

# Usage
best_model, results = optimize_model(X_train_scaled, y_train, 
                                     X_val_scaled, y_val)

💡 Optimization Best Practices

✅ Do

  • Start with baseline model
  • Try regularization first
  • Use validation set for tuning
  • Monitor both train and val metrics
  • Experiment with learning rates
  • Use Adam for quick convergence
  • Handle class imbalance
  • Document all experiments

❌ Don't

  • Skip baseline comparison
  • Optimize on test set
  • Use single metric only
  • Ignore overfitting
  • Stick to one optimizer
  • Forget about class balance
  • Over-engineer too early
  • Forget to save results

Clone this wiki locally