Merge pull request #283 from emharsha1812/main

moe18 · web-flow · commit 2281a233371b · 2025-02-22T20:14:16.000-05:00
Addition : ML Problems on Logistic Regression, Cross Entropy Loss, Early stopping
diff --git a/Problems/104_logistic_regression/learn.md b/Problems/104_logistic_regression/learn.md
@@ -0,0 +1,38 @@
+## Binary Classification with Logistic Regression
+
+Logistic Regression is a fundamental algorithm for binary classification. Given input features and learned model parameters (weights and bias), your task is to implement the prediction function that computes class probabilities.
+
+### Mathematical Background
+
+The logistic regression model makes predictions using the sigmoid function:
+
+$\sigma(z) = \frac{1}{1 + e^{-z}}$
+
+where z is the linear combination of features and weights plus bias:
+
+$z = \mathbf{w}^T\mathbf{x} + b = \sum_{i=1}^{n} w_ix_i + b$
+
+### Implementation Requirements
+
+Your task is to implement a function that:
+
+- Takes a batch of samples $\mathbf{X}$ (shape: N × D), weights $\mathbf{w}$ (shape: D), and bias b
+- Computes $z = \mathbf{X}\mathbf{w} + b$ for all samples
+- Applies the sigmoid function to get probabilities
+- Returns binary predictions i.e 0 or 1 using a threshold of 0.5
+
+### Important Considerations
+
+- Handle numerical stability in sigmoid computation
+- Ensure efficient vectorized operations using numpy
+- Return binary predictions i.e zeroes and ones
+
+### Hint
+
+To prevent overflow in the exponential calculation of sigmoid function, use np.clip to limit z values:
+
+```python
+z = np.clip(z, -500, 500)
+```
+
+This ensures numerical stability when dealing with large input values.
diff --git a/Problems/104_logistic_regression/solution.py b/Problems/104_logistic_regression/solution.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+def predict_logistic(X: np.ndarray, weights: np.ndarray, bias: float) -> np.ndarray:
+
+    z = np.dot(X, weights) + bias
+    z = np.clip(z, -500, 500)  # Prevent overflow in exp
+    probabilities = 1 / (1 + np.exp(-z))
+    return (probabilities >= 0.5).astype(int)
+
+def test_predict_logistic():
+    # Test case 1: Simple linearly separable case
+    X1 = np.array([[1, 1], [2, 2], [-1, -1], [-2, -2]])
+    w1 = np.array([1, 1])
+    b1 = 0
+    expected1 = np.array([1, 1, 0, 0])
+    assert np.array_equal(predict_logistic(X1, w1, b1), expected1), "Test case 1 failed"
+
+    # Test case 2: Decision boundary case
+    X2 = np.array([[0, 0], [0.1, 0.1], [-0.1, -0.1]])
+    w2 = np.array([1, 1])
+    b2 = 0
+    expected2 = np.array([1, 1, 0])
+    assert np.array_equal(predict_logistic(X2, w2, b2), expected2), "Test case 2 failed"
+
+    # Test case 3: Higher dimensional input
+    X3 = np.array([[1, 2, 3], [-1, -2, -3], [0.5, 1, 1.5]])
+    w3 = np.array([0.1, 0.2, 0.3])
+    b3 = -1
+    expected3 = np.array([1, 0, 0])
+    assert np.array_equal(predict_logistic(X3, w3, b3), expected3), "Test case 3 failed"
+
+#     # Test case 4: Single feature
+    X4 = np.array([[1], [2], [-1], [-2]]).reshape(-1, 1)
+    w4 = np.array([2])
+    b4 = 0
+    expected4 = np.array([1, 1, 0, 0])
+    assert np.array_equal(predict_logistic(X4, w4, b4), expected4), "Test case 4 failed"
+
+#     # Test case 5: Numerical stability test with large values
+    X6 = np.array([[1000, 2000], [-1000, -2000]])
+    w6 = np.array([0.1, 0.1])
+    b6 = 0
+    result6 = predict_logistic(X6, w6, b6)
+    assert result6[0] == 1 and result6[1] == 0, "Test case 5 failed"
+
+if __name__ == "__main__":
+    test_predict_logistic()
+    print("All test cases passed!")