From ee2865f3851b0906b5a37d5c84f1e1cdf92fda5b Mon Sep 17 00:00:00 2001 From: Coder1010ayush Date: Tue, 30 Sep 2025 21:34:08 +0530 Subject: [PATCH 1/4] gmm for regression added --- build/160.json | 66 +++++++ build/175.json | 50 +++++ .../description.md | 3 + .../example.json | 5 + .../175_guassian_mixture_regression/learn.md | 125 +++++++++++++ .../175_guassian_mixture_regression/meta.json | 15 ++ .../solution.py | 171 ++++++++++++++++++ .../starter_code.py | 63 +++++++ .../tests.json | 26 +++ 9 files changed, 524 insertions(+) create mode 100644 build/160.json create mode 100644 build/175.json create mode 100644 questions/175_guassian_mixture_regression/description.md create mode 100644 questions/175_guassian_mixture_regression/example.json create mode 100644 questions/175_guassian_mixture_regression/learn.md create mode 100644 questions/175_guassian_mixture_regression/meta.json create mode 100644 questions/175_guassian_mixture_regression/solution.py create mode 100644 questions/175_guassian_mixture_regression/starter_code.py create mode 100644 questions/175_guassian_mixture_regression/tests.json diff --git a/build/160.json b/build/160.json new file mode 100644 index 00000000..4b4ccad7 --- /dev/null +++ b/build/160.json @@ -0,0 +1,66 @@ +{ + "id": "160", + "title": "Mixed Precision Training", + "difficulty": "medium", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/komaksym", + "name": "komaksym" + } + ], + "description": "Write a Python class to implement Mixed Precision Training that uses both float32 and float16 data types to optimize memory usage and speed. Your class should have an `__init__(self, loss_scale=1024.0)` method to initialize with loss scaling factor. Implement `forward(self, weights, inputs, targets)` to perform forward pass with float16 computation and return Mean Squared Error (MSE) loss (scaled) in float32, and `backward(self, gradients)` to unscale gradients and check for overflow. Use float16 for computations but float32 for gradient accumulation. Return gradients as float32 and set them to zero if overflow is detected. Only use NumPy.", + "learn_section": "# **Mixed Precision Training**\n## **1. Definition**\nMixed Precision Training is a **deep learning optimization technique** that uses both **float16** (half precision) and **float32** (single precision) data types during training to reduce memory usage and increase training speed while maintaining model accuracy.\nThe technique works by:\n- **Using float16 for forward pass computations** to save memory and increase speed\n- **Using float32 for gradient accumulation** to maintain numerical precision\n- **Applying loss scaling** to prevent gradient underflow in float16\n---\n## **2. Key Components**\n### **Mean Squared Error (MSE) Loss**\nThe loss function must be computed as Mean Squared Error:\n$$\n\\text{MSE} = \\frac{1}{n} \\sum_{i=1}^{n} (y_i - \\hat{y}_i)^2\n$$\nwhere $y_i$ is the target and $\\hat{y}_i$ is the prediction for sample $i$.\n\n### **Loss Scaling**\nTo prevent gradient underflow in float16, gradients are scaled up during the forward pass:\n$$\n\\text{scaled\\_loss} = \\text{MSE} \\times \\text{scale\\_factor}\n$$\nThen unscaled during backward pass:\n$$\n\\text{gradient} = \\frac{\\text{scaled\\_gradient}}{\\text{scale\\_factor}}\n$$\n### **Overflow Detection**\nCheck for invalid gradients (NaN or Inf) that indicate numerical overflow:\n$$\n\\text{overflow} = \\text{any}(\\text{isnan}(\\text{gradients}) \\text{ or } \\text{isinf}(\\text{gradients}))\n$$\n---\n## **3. Precision Usage**\n- **float16**: Forward pass computations, activations, temporary calculations\n- **float32**: Gradient accumulation, parameter updates, loss scaling\n- **Automatic casting**: Convert between precisions as needed\n- **Loss computation**: Use MSE as the loss function before scaling\n---\n## **4. Benefits and Applications**\n- **Memory Efficiency**: Reduces memory usage by ~50% for activations\n- **Speed Improvement**: Faster computation on modern GPUs with Tensor Cores\n- **Training Stability**: Loss scaling prevents gradient underflow\n- **Model Accuracy**: Maintains comparable accuracy to full precision training\nCommon in training large neural networks where memory is a constraint and speed is critical.\n---", + "starter_code": "import numpy as np\n\nclass MixedPrecision:\n def __init__(self, loss_scale=1024.0):\n # Initialize loss scaling factor\n pass\n \n def forward(self, weights, inputs, targets):\n # Perform forward pass with float16, return scaled loss as float32\n pass\n \n def backward(self, gradients):\n # Unscale gradients and check for overflow, return as float32\n pass", + "solution": "import numpy as np\n\nclass MixedPrecision:\n def __init__(self, loss_scale=1024.0):\n self.loss_scale = loss_scale\n\n def forward(self, weights, inputs, targets):\n # Convert ALL inputs to float16 for computation (regardless of input dtype)\n weights_fp16 = weights.astype(np.float16)\n inputs_fp16 = inputs.astype(np.float16)\n targets_fp16 = targets.astype(np.float16)\n\n # Simple forward pass: linear model + MSE loss\n predictions = np.dot(inputs_fp16, weights_fp16)\n loss = np.mean((targets_fp16 - predictions) ** 2)\n\n # Scale loss and convert back to float32 (Python float)\n scaled_loss = float(loss) * self.loss_scale\n return scaled_loss\n\n def backward(self, gradients):\n # Convert gradients to float32 for precision (regardless of input dtype)\n gradients_fp32 = gradients.astype(np.float32)\n\n # Check for overflow (NaN or Inf)\n overflow = np.any(np.isnan(gradients_fp32)) or np.any(np.isinf(gradients_fp32))\n\n if overflow:\n # Return zero gradients if overflow detected (must be float32)\n return np.zeros_like(gradients_fp32, dtype=np.float32)\n\n # Unscale gradients (ensure result is float32)\n unscaled_gradients = gradients_fp32 / self.loss_scale\n return unscaled_gradients.astype(np.float32)", + "example": { + "input": "import numpy as np\nmp = MixedPrecision(loss_scale=1024.0)\nweights = np.array([0.5, -0.3], dtype=np.float32)\ninputs = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)\ntargets = np.array([1.0, 0.0], dtype=np.float32)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.4f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")\ngrads = np.array([512.0, -256.0], dtype=np.float32)\nresult = mp.backward(grads)\nprint(f\"Gradients: {result}\")\nprint(f\"Grad dtype: {result.dtype}\")", + "output": "Loss: 665.0000\nLoss dtype: float\nGradients: [0.5 -0.25]\nGrad dtype: float32", + "reasoning": "Forward pass converts inputs to float16, computes loss, then scales and returns as Python float (float32). Backward converts gradients to float32 and unscales. Final gradients must be float32 type." + }, + "test_cases": [ + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=1024.0)\nweights = np.array([0.5, -0.3], dtype=np.float32)\ninputs = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)\ntargets = np.array([1.0, 0.0], dtype=np.float32)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.4f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")", + "expected_output": "Loss: 665.0000\nLoss dtype: float" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=1024.0)\ngrads = np.array([512.0, -256.0], dtype=np.float32)\nresult = mp.backward(grads)\nprint(f\"Gradients: {result}\")\nprint(f\"Grad dtype: {result.dtype}\")", + "expected_output": "Gradients: [ 0.5 -0.25]\nGrad dtype: float32" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=512.0)\nweights = np.array([1.0, 0.5], dtype=np.float64)\ninputs = np.array([[2.0, 1.0]], dtype=np.float64)\ntargets = np.array([3.0], dtype=np.float64)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.1f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")", + "expected_output": "Loss: 128.0\nLoss dtype: float" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=512.0)\ngrads = np.array([1024.0, 512.0], dtype=np.float16)\nresult = mp.backward(grads)\nprint(f\"Gradients: [{result[0]:.0f} {result[1]:.0f}]\")\nprint(f\"Grad dtype: {result.dtype}\")", + "expected_output": "Gradients: [2 1]\nGrad dtype: float32" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=100.0)\nweights = np.array([0.1, 0.2], dtype=np.float32)\ninputs = np.array([[1.0, 1.0]], dtype=np.float32)\ntargets = np.array([0.5], dtype=np.float32)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.1f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")", + "expected_output": "Loss: 4.0\nLoss dtype: float" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=100.0)\ngrads = np.array([200.0, 100.0], dtype=np.float64)\nresult = mp.backward(grads)\nprint(f\"Gradients: [{result[0]:.0f} {result[1]:.0f}]\")\nprint(f\"Grad dtype: {result.dtype}\")", + "expected_output": "Gradients: [2 1]\nGrad dtype: float32" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=2048.0)\nweights = np.array([0.25], dtype=np.float64)\ninputs = np.array([[4.0]], dtype=np.float64)\ntargets = np.array([2.0], dtype=np.float64)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.1f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")", + "expected_output": "Loss: 2048.0\nLoss dtype: float" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=2048.0)\ngrads = np.array([np.nan], dtype=np.float16)\nresult = mp.backward(grads)\nprint(f\"Gradients: [{result[0]:.0f}]\")\nprint(f\"Grad dtype: {result.dtype}\")", + "expected_output": "Gradients: [0]\nGrad dtype: float32" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=256.0)\nweights = np.array([1.0], dtype=np.float16)\ninputs = np.array([[2.0]], dtype=np.float16)\ntargets = np.array([3.0], dtype=np.float16)\nloss = mp.forward(weights, inputs, targets)\nprint(f\"Loss: {loss:.1f}\")\nprint(f\"Loss dtype: {type(loss).__name__}\")", + "expected_output": "Loss: 256.0\nLoss dtype: float" + }, + { + "test": "import numpy as np\nmp = MixedPrecision(loss_scale=256.0)\ngrads = np.array([np.inf], dtype=np.float64)\nresult = mp.backward(grads)\nprint(f\"Gradients: [{result[0]:.0f}]\")\nprint(f\"Grad dtype: {result.dtype}\")", + "expected_output": "Gradients: [0]\nGrad dtype: float32" + } + ] +} \ No newline at end of file diff --git a/build/175.json b/build/175.json new file mode 100644 index 00000000..336f6679 --- /dev/null +++ b/build/175.json @@ -0,0 +1,50 @@ +{ + "id": "175", + "title": "Gaussian Process for Regression", + "difficulty": "medium", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Coder1010ayush", + "name": "Ayush" + } + ], + "description": "## Problem\n\nProblem Statement: Task is to implement GaussianProcessRegression class which is a guassian process model for prediction regression problems.", + "learn_section": "# **Gaussian Processes (GP): From-Scratch Regression Example**\n\n## **1. What’s a Gaussian Process?**\nA **Gaussian Process** defines a distribution over functions \\( f(\\cdot) \\). For any finite set of inputs \\(X=\\{x_i\\}_{i=1}^n\\), the function values \\(f(X)\\) follow a multivariate normal:\n\n\\[\nf(X) \\sim \\mathcal{N}\\big(0,\\; K(X,X)\\big),\n\\]\n\nwhere \\(K\\) is a **kernel** (covariance) function encoding similarity between inputs. With noisy targets \\(y=f(X)+\\varepsilon,\\; \\varepsilon\\sim\\mathcal{N}(0,\\sigma_n^2 I)\\), GP regression yields a closed-form posterior predictive mean and variance at new points \\(X_*\\).\n\n---\n\n## **2. The Implementation at a Glance**\nThe provided code builds a minimal yet complete GP regression stack:\n\n- **Kernels implemented**\n - Radial Basis Function (RBF / Squared Exponential)\n - Matérn (\\(\\nu=0.5, 1.5, 2.5\\), or general \\(\\nu\\))\n - Periodic\n - Linear\n - Rational Quadratic\n- **Core GP classes**\n - `_GaussianProcessBase`: kernel selection & covariance matrix computation\n - `GaussianProcessRegression`:\n - `fit`: builds \\(K\\), does **Cholesky decomposition**, solves \\(\\alpha\\)\n - `predict`: returns posterior mean & variance\n - `log_marginal_likelihood`: computes GP evidence\n - `optimize_hyperparameters`: basic optimizer (for RBF hyperparams)\n\n---\n\n## **3. Kernel Cheat-Sheet**\nLet \\(x, x'\\in\\mathbb{R}^d\\), \\(r=\\lVert x-x'\\rVert\\).\n\n- **RBF (SE):** \n \\[\n k_{\\text{RBF}}(x,x')=\\sigma^2\\exp\\!\\left(-\\tfrac{1}{2}\\tfrac{r^2}{\\ell^2}\\right)\n \\]\n\n- **Matérn (\\(\\nu=1.5\\)):** \n \\[\n k(x,x')=\\Big(1+\\tfrac{\\sqrt{3}\\,r}{\\ell}\\Big)\\exp\\!\\Big(-\\tfrac{\\sqrt{3}\\,r}{\\ell}\\Big)\n \\]\n\n- **Periodic:** \n \\[\n k(x,x')=\\sigma^2\\exp\\!\\left(-\\tfrac{2}{\\ell^2}\\sin^2\\!\\Big(\\tfrac{\\pi r}{p}\\Big)\\right)\n \\]\n\n- **Linear:** \n \\[\n k(x,x')=\\sigma_b^2+\\sigma_v^2\\,x^\\top x'\n \\]\n\n- **Rational Quadratic:** \n \\[\n k(x,x')=\\sigma^2\\Big(1+\\tfrac{r^2}{2\\alpha \\ell^2}\\Big)^{-\\alpha}\n \\]\n\n---\n\n## **4. GP Regression Mechanics**\n### Training\n1. Build covariance: \n \\(K = K(X,X) + \\sigma_n^2 I\\)\n2. Cholesky factorization: \n \\(K=LL^\\top\\)\n3. Solve \\(\\alpha\\): \n \\(L L^\\top \\alpha = y\\)\n\n### Prediction\nAt new inputs \\(X_*\\):\n- \\(K_* = K(X, X_*)\\), \\(K_{**} = K(X_*, X_*)\\)\n- **Mean:** \n \\(\\mu_* = K_*^\\top \\alpha\\)\n- **Covariance:** \n \\(\\Sigma_* = K_{**} - V^\\top V,\\;\\; V = L^{-1}K_*\\)\n\n### Model Selection\n- **Log Marginal Likelihood (LML):** \n \\[\n \\log p(y\\mid X)= -\\tfrac{1}{2}y^\\top \\alpha - \\sum\\nolimits_i \\log L_{ii} - \\tfrac{n}{2}\\log(2\\pi)\n \\]\n\n---\n\n## **5. Worked Example (Linear Kernel)**\n\n```python\nimport numpy as np\ngp = GaussianProcessRegression(kernel='linear',\n kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0},\n noise=1e-8)\n\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9]) # y = 2x + 1\ngp.fit(X_train, y_train)\n\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\") # -> 7.0000\n```\n\n\n## **6. When to Use GP Regression**\n\n- **Small-to-medium datasets** where uncertainty estimates are valuable \n- Cases requiring **predictive intervals** (not just point predictions) \n- **Nonparametric modeling** with kernel priors \n- Automatic hyperparameter tuning via **marginal likelihood** \n\n---\n\n## **7. Practical Tips**\n\n- Always add **jitter** (`1e-6`) to the diagonal for numerical stability \n- **Standardize inputs/outputs** before training \n- Be aware: Exact GP has complexity **\\(\\mathcal{O}(n^3)\\)** in time and **\\(\\mathcal{O}(n^2)\\)** in memory \n- Choose kernels to match problem structure: \n - **RBF:** smooth functions \n - **Matérn:** rougher functions \n - **Periodic:** seasonal/cyclical data \n - **Linear:** global linear trends ", + "starter_code": "import math # ---------------------------------------- utf-8 encoding ---------------------------------\n\n# This file contains Gaussian Process implementation.\nimport numpy as np\nimport math\n\n\ndef matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5):\n pass\n\n\ndef rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0):\n pass\n\n\ndef periodic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0\n):\n pass\n\n\ndef linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0):\n pass\n\n\ndef rational_quadratic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0\n):\n pass\n\n\n# --- BASE CLASS -------------------------------------------------------------\n\n\nclass _GaussianProcessBase:\n def __init__(self, kernel=\"rbf\", noise=1e-5, kernel_params=None):\n pass\n\n def _select_kernel(self, x1, x2):\n \"\"\"Selects and computes the kernel value for two single data points.\"\"\"\n pass\n\n def _compute_covariance(self, X1, X2):\n \"\"\"\n Computes the covariance matrix between two sets of points.\n This method fixes the vectorization bug from the original code.\n \"\"\"\n pass\n\n\n# --- REGRESSION MODEL -------------------------------------------------------\nclass GaussianProcessRegression(_GaussianProcessBase):\n def fit(self, X, y):\n pass\n\n def predict(self, X_test, return_std=False):\n pass\n\n def log_marginal_likelihood(self):\n pass\n\n def optimize_hyperparameters(self):\n pass", + "solution": "# ---------------------------------------- utf-8 encoding ---------------------------------\n# This file contains Gaussian Process implementation.\nimport numpy as np\nimport math\nfrom scipy.spatial.distance import euclidean\nfrom scipy.special import kv as bessel_kv\nfrom scipy.special import gamma\nfrom scipy.linalg import cholesky, solve_triangular\nfrom scipy.optimize import minimize\nfrom scipy.special import expit, softmax\n\n\n# --- KERNEL FUNCTIONS --------------------------------------------------------\ndef matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5):\n d = euclidean(x, x_prime)\n if d == 0:\n return 1.0 # Covariance with self is 1 before scaling\n if nu == 0.5:\n return np.exp(-d / length_scale)\n elif nu == 1.5:\n return (1 + np.sqrt(3) * d / length_scale) * np.exp(\n -np.sqrt(3) * d / length_scale\n )\n elif nu == 2.5:\n return (\n 1 + np.sqrt(5) * d / length_scale + 5 * d**2 / (3 * length_scale**2)\n ) * np.exp(-np.sqrt(5) * d / length_scale)\n else:\n factor = (2 ** (1 - nu)) / gamma(nu)\n scaled_d = np.sqrt(2 * nu) * d / length_scale\n return factor * (scaled_d**nu) * bessel_kv(nu, scaled_d)\n\n\ndef rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0):\n # This is a squared exponential kernel\n return sigma**2 * np.exp(-0.5 * np.linalg.norm(x - x_prime) ** 2 / length_scale**2)\n\n\ndef periodic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0\n):\n return sigma**2 * np.exp(\n -2 * np.sin(np.pi * np.linalg.norm(x - x_prime) / period) ** 2 / length_scale**2\n )\n\n\ndef linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0):\n return sigma_b**2 + sigma_v**2 * np.dot(x, x_prime)\n\n\ndef rational_quadratic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0\n):\n return sigma**2 * (\n 1 + np.linalg.norm(x - x_prime) ** 2 / (2 * alpha * length_scale**2)\n ) ** (-alpha)\n\n\n# --- BASE CLASS -------------------------------------------------------------\n\n\nclass _GaussianProcessBase:\n def __init__(self, kernel=\"rbf\", noise=1e-5, kernel_params=None):\n self.kernel_name = kernel\n self.noise = noise\n self.kernel_params = kernel_params if kernel_params else {}\n self.X_train = None\n self.y_train = None\n self.K = None\n\n def _select_kernel(self, x1, x2):\n \"\"\"Selects and computes the kernel value for two single data points.\"\"\"\n if self.kernel_name == \"rbf\":\n return rbf_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"matern\":\n return matern_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"periodic\":\n return periodic_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"linear\":\n return linear_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"rational_quadratic\":\n return rational_quadratic_kernel(x1, x2, **self.kernel_params)\n else:\n raise ValueError(\n \"Unsupported kernel. Choose from ['rbf', 'matern', 'periodic', 'linear', 'rational_quadratic'].\"\n )\n\n def _compute_covariance(self, X1, X2):\n \"\"\"\n Computes the covariance matrix between two sets of points.\n This method fixes the vectorization bug from the original code.\n \"\"\"\n # Ensuring X1 and X2 are 2D arrays\n X1 = np.atleast_2d(X1)\n X2 = np.atleast_2d(X2)\n\n n1, _ = X1.shape\n n2, _ = X2.shape\n K = np.zeros((n1, n2))\n for i in range(n1):\n for j in range(n2):\n K[i, j] = self._select_kernel(X1[i], X2[j])\n return K\n\n\n# --- REGRESSION MODEL -------------------------------------------------------\nclass GaussianProcessRegression(_GaussianProcessBase):\n def fit(self, X, y):\n self.X_train = np.asarray(X)\n self.y_train = np.asarray(y)\n self.K = self._compute_covariance(\n self.X_train, self.X_train\n ) + self.noise * np.eye(len(self.X_train))\n\n # Compute Cholesky decomposition for stable inversion\n self.L = cholesky(self.K, lower=True)\n # alpha = K_inv * y\n self.alpha = solve_triangular(\n self.L.T, solve_triangular(self.L, self.y_train, lower=True)\n )\n\n def predict(self, X_test, return_std=False):\n X_test = np.atleast_2d(X_test)\n K_s = self._compute_covariance(self.X_train, X_test)\n K_ss = self._compute_covariance(X_test, X_test)\n\n # Compute predictive mean\n mu = K_s.T @ self.alpha\n\n # Compute predictive variance\n v = solve_triangular(self.L, K_s, lower=True)\n cov = K_ss - v.T @ v\n\n if return_std:\n return mu, np.sqrt(np.diag(cov))\n return mu\n\n def log_marginal_likelihood(self):\n return (\n -0.5 * (self.y_train.T @ self.alpha)\n - np.sum(np.log(np.diag(self.L)))\n - len(self.X_train) / 2 * np.log(2 * np.pi)\n )\n\n def optimize_hyperparameters(self):\n # NOTE: This is a simplified optimizer for 'rbf' kernel's params.\n def objective(params):\n self.kernel_params = {\n \"length_scale\": np.exp(params[0]),\n \"sigma\": np.exp(params[1]),\n }\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n\n init_params = np.log(\n [\n self.kernel_params.get(\"length_scale\", 1.0),\n self.kernel_params.get(\"sigma\", 1.0),\n ]\n )\n res = minimize(\n objective, init_params, method=\"L-BFGS-B\", bounds=[(-5, 5), (-5, 5)]\n )\n\n self.kernel_params = {\n \"length_scale\": np.exp(res.x[0]),\n \"sigma\": np.exp(res.x[1]),\n }\n # Re-fit with optimal hyperparameters\n self.fit(self.X_train, self.y_train)\n print(\"Optimized Hyperparameters:\", self.kernel_params)", + "example": { + "input": "import numpy as np\ngp = GaussianProcessRegression(kernel='linear', kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0}, noise=1e-8)\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9])\ngp.fit(X_train, y_train)\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "output": "7.0000", + "reasoning": "A Gaussian Process with a linear kernel is trained on perfectly linear data that follows the function y = 2x + 1. When asked to predict the value at x=3, the model perfectly interpolates the linear function it has learned, resulting in a prediction of 2*3 + 1 = 7. The near-zero noise ensures the prediction is exact." + }, + "test_cases": [ + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "0.7787" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", + "expected_output": "mu=0.7787, std=0.6274" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0]])\ny_train = np.array([1.0, 3.0, 1.5])\ngp.fit(X_train, y_train)\nX_test = np.array([[2.5]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", + "expected_output": "mu=3.0000, std=0.0001" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='linear', kernel_params={'sigma_b': 0.1, 'sigma_v': 1.0}, noise=1e-8)\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9])\ngp.fit(X_train, y_train)\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "7.0000" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.5}, noise=1e-8)\nX_train = np.array([[1, 2], [3, 4], [5, 1]])\ny_train = np.sum(X_train, axis=1)\ngp.fit(X_train, y_train)\nX_test = np.array([[2, 3]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "4.5444" + }, + { + "test": "import numpy as np\n# Monkey-patch the optimizer to prevent printing to stdout\noriginal_optimizer = GaussianProcessRegression.optimize_hyperparameters\ndef silent_optimizer(self):\n print_state = self.__dict__.get('__print__', True)\n if print_state:\n def objective(params):\n self.kernel_params = {'length_scale': np.exp(params[0]), 'sigma': np.exp(params[1])}\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n init_params = np.log([self.kernel_params.get('length_scale', 1.0), self.kernel_params.get('sigma', 1.0)])\n res = minimize(objective, init_params, method='L-BFGS-B', bounds=[(-5, 5), (-5, 5)])\n self.kernel_params = {'length_scale': np.exp(res.x[0]), 'sigma': np.exp(res.x[1])}\n self.fit(self.X_train, self.y_train)\nGaussianProcessRegression.optimize_hyperparameters = silent_optimizer\n\nnp.random.seed(42)\ngp = GaussianProcessRegression(kernel='rbf', noise=0.01)\nX_train = np.linspace(0, 2 * np.pi, 10).reshape(-1, 1)\ny_train = np.sin(X_train).ravel() + np.random.randn(10) * 0.1\ngp.fit(X_train, y_train)\ngp.optimize_hyperparameters()\nX_test = np.array([[np.pi]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "0.0543" + } + ] +} \ No newline at end of file diff --git a/questions/175_guassian_mixture_regression/description.md b/questions/175_guassian_mixture_regression/description.md new file mode 100644 index 00000000..8b8b22c9 --- /dev/null +++ b/questions/175_guassian_mixture_regression/description.md @@ -0,0 +1,3 @@ +## Problem + +Problem Statement: Task is to implement GaussianProcessRegression class which is a guassian process model for prediction regression problems. \ No newline at end of file diff --git a/questions/175_guassian_mixture_regression/example.json b/questions/175_guassian_mixture_regression/example.json new file mode 100644 index 00000000..a9e00f4f --- /dev/null +++ b/questions/175_guassian_mixture_regression/example.json @@ -0,0 +1,5 @@ +{ + "input": "import numpy as np\ngp = GaussianProcessRegression(kernel='linear', kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0}, noise=1e-8)\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9])\ngp.fit(X_train, y_train)\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "output": "7.0000", + "reasoning": "A Gaussian Process with a linear kernel is trained on perfectly linear data that follows the function y = 2x + 1. When asked to predict the value at x=3, the model perfectly interpolates the linear function it has learned, resulting in a prediction of 2*3 + 1 = 7. The near-zero noise ensures the prediction is exact." +} \ No newline at end of file diff --git a/questions/175_guassian_mixture_regression/learn.md b/questions/175_guassian_mixture_regression/learn.md new file mode 100644 index 00000000..61c97f8d --- /dev/null +++ b/questions/175_guassian_mixture_regression/learn.md @@ -0,0 +1,125 @@ +# **Gaussian Processes (GP): From-Scratch Regression Example** + +## **1. What’s a Gaussian Process?** +A **Gaussian Process** defines a distribution over functions \( f(\cdot) \). For any finite set of inputs \(X=\{x_i\}_{i=1}^n\), the function values \(f(X)\) follow a multivariate normal: + +\[ +f(X) \sim \mathcal{N}\big(0,\; K(X,X)\big), +\] + +where \(K\) is a **kernel** (covariance) function encoding similarity between inputs. With noisy targets \(y=f(X)+\varepsilon,\; \varepsilon\sim\mathcal{N}(0,\sigma_n^2 I)\), GP regression yields a closed-form posterior predictive mean and variance at new points \(X_*\). + +--- + +## **2. The Implementation at a Glance** +The provided code builds a minimal yet complete GP regression stack: + +- **Kernels implemented** + - Radial Basis Function (RBF / Squared Exponential) + - Matérn (\(\nu=0.5, 1.5, 2.5\), or general \(\nu\)) + - Periodic + - Linear + - Rational Quadratic +- **Core GP classes** + - `_GaussianProcessBase`: kernel selection & covariance matrix computation + - `GaussianProcessRegression`: + - `fit`: builds \(K\), does **Cholesky decomposition**, solves \(\alpha\) + - `predict`: returns posterior mean & variance + - `log_marginal_likelihood`: computes GP evidence + - `optimize_hyperparameters`: basic optimizer (for RBF hyperparams) + +--- + +## **3. Kernel Cheat-Sheet** +Let \(x, x'\in\mathbb{R}^d\), \(r=\lVert x-x'\rVert\). + +- **RBF (SE):** + \[ + k_{\text{RBF}}(x,x')=\sigma^2\exp\!\left(-\tfrac{1}{2}\tfrac{r^2}{\ell^2}\right) + \] + +- **Matérn (\(\nu=1.5\)):** + \[ + k(x,x')=\Big(1+\tfrac{\sqrt{3}\,r}{\ell}\Big)\exp\!\Big(-\tfrac{\sqrt{3}\,r}{\ell}\Big) + \] + +- **Periodic:** + \[ + k(x,x')=\sigma^2\exp\!\left(-\tfrac{2}{\ell^2}\sin^2\!\Big(\tfrac{\pi r}{p}\Big)\right) + \] + +- **Linear:** + \[ + k(x,x')=\sigma_b^2+\sigma_v^2\,x^\top x' + \] + +- **Rational Quadratic:** + \[ + k(x,x')=\sigma^2\Big(1+\tfrac{r^2}{2\alpha \ell^2}\Big)^{-\alpha} + \] + +--- + +## **4. GP Regression Mechanics** +### Training +1. Build covariance: + \(K = K(X,X) + \sigma_n^2 I\) +2. Cholesky factorization: + \(K=LL^\top\) +3. Solve \(\alpha\): + \(L L^\top \alpha = y\) + +### Prediction +At new inputs \(X_*\): +- \(K_* = K(X, X_*)\), \(K_{**} = K(X_*, X_*)\) +- **Mean:** + \(\mu_* = K_*^\top \alpha\) +- **Covariance:** + \(\Sigma_* = K_{**} - V^\top V,\;\; V = L^{-1}K_*\) + +### Model Selection +- **Log Marginal Likelihood (LML):** + \[ + \log p(y\mid X)= -\tfrac{1}{2}y^\top \alpha - \sum\nolimits_i \log L_{ii} - \tfrac{n}{2}\log(2\pi) + \] + +--- + +## **5. Worked Example (Linear Kernel)** + +```python +import numpy as np +gp = GaussianProcessRegression(kernel='linear', + kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0}, + noise=1e-8) + +X_train = np.array([[1], [2], [4]]) +y_train = np.array([3, 5, 9]) # y = 2x + 1 +gp.fit(X_train, y_train) + +X_test = np.array([[3.0]]) +mu = gp.predict(X_test) +print(f"{mu[0]:.4f}") # -> 7.0000 +``` + + +## **6. When to Use GP Regression** + +- **Small-to-medium datasets** where uncertainty estimates are valuable +- Cases requiring **predictive intervals** (not just point predictions) +- **Nonparametric modeling** with kernel priors +- Automatic hyperparameter tuning via **marginal likelihood** + +--- + +## **7. Practical Tips** + +- Always add **jitter** (`1e-6`) to the diagonal for numerical stability +- **Standardize inputs/outputs** before training +- Be aware: Exact GP has complexity **\(\mathcal{O}(n^3)\)** in time and **\(\mathcal{O}(n^2)\)** in memory +- Choose kernels to match problem structure: + - **RBF:** smooth functions + - **Matérn:** rougher functions + - **Periodic:** seasonal/cyclical data + - **Linear:** global linear trends + diff --git a/questions/175_guassian_mixture_regression/meta.json b/questions/175_guassian_mixture_regression/meta.json new file mode 100644 index 00000000..73c96a0e --- /dev/null +++ b/questions/175_guassian_mixture_regression/meta.json @@ -0,0 +1,15 @@ +{ + "id": "175", + "title": "Gaussian Process for Regression", + "difficulty": "medium", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Coder1010ayush", + "name": "Ayush" + } + ] +} diff --git a/questions/175_guassian_mixture_regression/solution.py b/questions/175_guassian_mixture_regression/solution.py new file mode 100644 index 00000000..93ba3f45 --- /dev/null +++ b/questions/175_guassian_mixture_regression/solution.py @@ -0,0 +1,171 @@ +# ---------------------------------------- utf-8 encoding --------------------------------- +# This file contains Gaussian Process implementation. +import numpy as np +import math +from scipy.spatial.distance import euclidean +from scipy.special import kv as bessel_kv +from scipy.special import gamma +from scipy.linalg import cholesky, solve_triangular +from scipy.optimize import minimize +from scipy.special import expit, softmax + + +# --- KERNEL FUNCTIONS -------------------------------------------------------- +def matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5): + d = euclidean(x, x_prime) + if d == 0: + return 1.0 # Covariance with self is 1 before scaling + if nu == 0.5: + return np.exp(-d / length_scale) + elif nu == 1.5: + return (1 + np.sqrt(3) * d / length_scale) * np.exp( + -np.sqrt(3) * d / length_scale + ) + elif nu == 2.5: + return ( + 1 + np.sqrt(5) * d / length_scale + 5 * d**2 / (3 * length_scale**2) + ) * np.exp(-np.sqrt(5) * d / length_scale) + else: + factor = (2 ** (1 - nu)) / gamma(nu) + scaled_d = np.sqrt(2 * nu) * d / length_scale + return factor * (scaled_d**nu) * bessel_kv(nu, scaled_d) + + +def rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0): + # This is a squared exponential kernel + return sigma**2 * np.exp(-0.5 * np.linalg.norm(x - x_prime) ** 2 / length_scale**2) + + +def periodic_kernel( + x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0 +): + return sigma**2 * np.exp( + -2 * np.sin(np.pi * np.linalg.norm(x - x_prime) / period) ** 2 / length_scale**2 + ) + + +def linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0): + return sigma_b**2 + sigma_v**2 * np.dot(x, x_prime) + + +def rational_quadratic_kernel( + x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0 +): + return sigma**2 * ( + 1 + np.linalg.norm(x - x_prime) ** 2 / (2 * alpha * length_scale**2) + ) ** (-alpha) + + +# --- BASE CLASS ------------------------------------------------------------- + + +class _GaussianProcessBase: + def __init__(self, kernel="rbf", noise=1e-5, kernel_params=None): + self.kernel_name = kernel + self.noise = noise + self.kernel_params = kernel_params if kernel_params else {} + self.X_train = None + self.y_train = None + self.K = None + + def _select_kernel(self, x1, x2): + """Selects and computes the kernel value for two single data points.""" + if self.kernel_name == "rbf": + return rbf_kernel(x1, x2, **self.kernel_params) + elif self.kernel_name == "matern": + return matern_kernel(x1, x2, **self.kernel_params) + elif self.kernel_name == "periodic": + return periodic_kernel(x1, x2, **self.kernel_params) + elif self.kernel_name == "linear": + return linear_kernel(x1, x2, **self.kernel_params) + elif self.kernel_name == "rational_quadratic": + return rational_quadratic_kernel(x1, x2, **self.kernel_params) + else: + raise ValueError( + "Unsupported kernel. Choose from ['rbf', 'matern', 'periodic', 'linear', 'rational_quadratic']." + ) + + def _compute_covariance(self, X1, X2): + """ + Computes the covariance matrix between two sets of points. + This method fixes the vectorization bug from the original code. + """ + # Ensuring X1 and X2 are 2D arrays + X1 = np.atleast_2d(X1) + X2 = np.atleast_2d(X2) + + n1, _ = X1.shape + n2, _ = X2.shape + K = np.zeros((n1, n2)) + for i in range(n1): + for j in range(n2): + K[i, j] = self._select_kernel(X1[i], X2[j]) + return K + + +# --- REGRESSION MODEL ------------------------------------------------------- +class GaussianProcessRegression(_GaussianProcessBase): + def fit(self, X, y): + self.X_train = np.asarray(X) + self.y_train = np.asarray(y) + self.K = self._compute_covariance( + self.X_train, self.X_train + ) + self.noise * np.eye(len(self.X_train)) + + # Compute Cholesky decomposition for stable inversion + self.L = cholesky(self.K, lower=True) + # alpha = K_inv * y + self.alpha = solve_triangular( + self.L.T, solve_triangular(self.L, self.y_train, lower=True) + ) + + def predict(self, X_test, return_std=False): + X_test = np.atleast_2d(X_test) + K_s = self._compute_covariance(self.X_train, X_test) + K_ss = self._compute_covariance(X_test, X_test) + + # Compute predictive mean + mu = K_s.T @ self.alpha + + # Compute predictive variance + v = solve_triangular(self.L, K_s, lower=True) + cov = K_ss - v.T @ v + + if return_std: + return mu, np.sqrt(np.diag(cov)) + return mu + + def log_marginal_likelihood(self): + return ( + -0.5 * (self.y_train.T @ self.alpha) + - np.sum(np.log(np.diag(self.L))) + - len(self.X_train) / 2 * np.log(2 * np.pi) + ) + + def optimize_hyperparameters(self): + # NOTE: This is a simplified optimizer for 'rbf' kernel's params. + def objective(params): + self.kernel_params = { + "length_scale": np.exp(params[0]), + "sigma": np.exp(params[1]), + } + self.fit(self.X_train, self.y_train) + return -self.log_marginal_likelihood() + + init_params = np.log( + [ + self.kernel_params.get("length_scale", 1.0), + self.kernel_params.get("sigma", 1.0), + ] + ) + res = minimize( + objective, init_params, method="L-BFGS-B", bounds=[(-5, 5), (-5, 5)] + ) + + self.kernel_params = { + "length_scale": np.exp(res.x[0]), + "sigma": np.exp(res.x[1]), + } + # Re-fit with optimal hyperparameters + self.fit(self.X_train, self.y_train) + print("Optimized Hyperparameters:", self.kernel_params) diff --git a/questions/175_guassian_mixture_regression/starter_code.py b/questions/175_guassian_mixture_regression/starter_code.py new file mode 100644 index 00000000..89bf9c9e --- /dev/null +++ b/questions/175_guassian_mixture_regression/starter_code.py @@ -0,0 +1,63 @@ +import math # ---------------------------------------- utf-8 encoding --------------------------------- + +# This file contains Gaussian Process implementation. +import numpy as np +import math + + +def matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5): + pass + + +def rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0): + pass + + +def periodic_kernel( + x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0 +): + pass + + +def linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0): + pass + + +def rational_quadratic_kernel( + x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0 +): + pass + + +# --- BASE CLASS ------------------------------------------------------------- + + +class _GaussianProcessBase: + def __init__(self, kernel="rbf", noise=1e-5, kernel_params=None): + pass + + def _select_kernel(self, x1, x2): + """Selects and computes the kernel value for two single data points.""" + pass + + def _compute_covariance(self, X1, X2): + """ + Computes the covariance matrix between two sets of points. + This method fixes the vectorization bug from the original code. + """ + pass + + +# --- REGRESSION MODEL ------------------------------------------------------- +class GaussianProcessRegression(_GaussianProcessBase): + def fit(self, X, y): + pass + + def predict(self, X_test, return_std=False): + pass + + def log_marginal_likelihood(self): + pass + + def optimize_hyperparameters(self): + pass diff --git a/questions/175_guassian_mixture_regression/tests.json b/questions/175_guassian_mixture_regression/tests.json new file mode 100644 index 00000000..048c6257 --- /dev/null +++ b/questions/175_guassian_mixture_regression/tests.json @@ -0,0 +1,26 @@ +[ + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "0.7787" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", + "expected_output": "mu=0.7787, std=0.6274" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0]])\ny_train = np.array([1.0, 3.0, 1.5])\ngp.fit(X_train, y_train)\nX_test = np.array([[2.5]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", + "expected_output": "mu=3.0000, std=0.0001" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='linear', kernel_params={'sigma_b': 0.1, 'sigma_v': 1.0}, noise=1e-8)\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9])\ngp.fit(X_train, y_train)\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "7.0000" + }, + { + "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.5}, noise=1e-8)\nX_train = np.array([[1, 2], [3, 4], [5, 1]])\ny_train = np.sum(X_train, axis=1)\ngp.fit(X_train, y_train)\nX_test = np.array([[2, 3]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "4.5444" + }, + { + "test": "import numpy as np\n# Monkey-patch the optimizer to prevent printing to stdout\noriginal_optimizer = GaussianProcessRegression.optimize_hyperparameters\ndef silent_optimizer(self):\n print_state = self.__dict__.get('__print__', True)\n if print_state:\n def objective(params):\n self.kernel_params = {'length_scale': np.exp(params[0]), 'sigma': np.exp(params[1])}\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n init_params = np.log([self.kernel_params.get('length_scale', 1.0), self.kernel_params.get('sigma', 1.0)])\n res = minimize(objective, init_params, method='L-BFGS-B', bounds=[(-5, 5), (-5, 5)])\n self.kernel_params = {'length_scale': np.exp(res.x[0]), 'sigma': np.exp(res.x[1])}\n self.fit(self.X_train, self.y_train)\nGaussianProcessRegression.optimize_hyperparameters = silent_optimizer\n\nnp.random.seed(42)\ngp = GaussianProcessRegression(kernel='rbf', noise=0.01)\nX_train = np.linspace(0, 2 * np.pi, 10).reshape(-1, 1)\ny_train = np.sin(X_train).ravel() + np.random.randn(10) * 0.1\ngp.fit(X_train, y_train)\ngp.optimize_hyperparameters()\nX_test = np.array([[np.pi]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", + "expected_output": "0.0543" + } +] \ No newline at end of file From ab3f35af10fb21e99898b1987f0e566399e78ffe Mon Sep 17 00:00:00 2001 From: Coder1010ayush Date: Tue, 30 Sep 2025 22:18:01 +0530 Subject: [PATCH 2/4] updated --- build/175.json | 12 ++++------- .../__pycache__/solution.cpython-310.pyc | Bin 0 -> 5502 bytes .../solution.py | 19 +++++++++++++++++- .../tests.json | 10 +++------ 4 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 questions/175_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc diff --git a/build/175.json b/build/175.json index 336f6679..c3c87229 100644 --- a/build/175.json +++ b/build/175.json @@ -15,7 +15,7 @@ "description": "## Problem\n\nProblem Statement: Task is to implement GaussianProcessRegression class which is a guassian process model for prediction regression problems.", "learn_section": "# **Gaussian Processes (GP): From-Scratch Regression Example**\n\n## **1. What’s a Gaussian Process?**\nA **Gaussian Process** defines a distribution over functions \\( f(\\cdot) \\). For any finite set of inputs \\(X=\\{x_i\\}_{i=1}^n\\), the function values \\(f(X)\\) follow a multivariate normal:\n\n\\[\nf(X) \\sim \\mathcal{N}\\big(0,\\; K(X,X)\\big),\n\\]\n\nwhere \\(K\\) is a **kernel** (covariance) function encoding similarity between inputs. With noisy targets \\(y=f(X)+\\varepsilon,\\; \\varepsilon\\sim\\mathcal{N}(0,\\sigma_n^2 I)\\), GP regression yields a closed-form posterior predictive mean and variance at new points \\(X_*\\).\n\n---\n\n## **2. The Implementation at a Glance**\nThe provided code builds a minimal yet complete GP regression stack:\n\n- **Kernels implemented**\n - Radial Basis Function (RBF / Squared Exponential)\n - Matérn (\\(\\nu=0.5, 1.5, 2.5\\), or general \\(\\nu\\))\n - Periodic\n - Linear\n - Rational Quadratic\n- **Core GP classes**\n - `_GaussianProcessBase`: kernel selection & covariance matrix computation\n - `GaussianProcessRegression`:\n - `fit`: builds \\(K\\), does **Cholesky decomposition**, solves \\(\\alpha\\)\n - `predict`: returns posterior mean & variance\n - `log_marginal_likelihood`: computes GP evidence\n - `optimize_hyperparameters`: basic optimizer (for RBF hyperparams)\n\n---\n\n## **3. Kernel Cheat-Sheet**\nLet \\(x, x'\\in\\mathbb{R}^d\\), \\(r=\\lVert x-x'\\rVert\\).\n\n- **RBF (SE):** \n \\[\n k_{\\text{RBF}}(x,x')=\\sigma^2\\exp\\!\\left(-\\tfrac{1}{2}\\tfrac{r^2}{\\ell^2}\\right)\n \\]\n\n- **Matérn (\\(\\nu=1.5\\)):** \n \\[\n k(x,x')=\\Big(1+\\tfrac{\\sqrt{3}\\,r}{\\ell}\\Big)\\exp\\!\\Big(-\\tfrac{\\sqrt{3}\\,r}{\\ell}\\Big)\n \\]\n\n- **Periodic:** \n \\[\n k(x,x')=\\sigma^2\\exp\\!\\left(-\\tfrac{2}{\\ell^2}\\sin^2\\!\\Big(\\tfrac{\\pi r}{p}\\Big)\\right)\n \\]\n\n- **Linear:** \n \\[\n k(x,x')=\\sigma_b^2+\\sigma_v^2\\,x^\\top x'\n \\]\n\n- **Rational Quadratic:** \n \\[\n k(x,x')=\\sigma^2\\Big(1+\\tfrac{r^2}{2\\alpha \\ell^2}\\Big)^{-\\alpha}\n \\]\n\n---\n\n## **4. GP Regression Mechanics**\n### Training\n1. Build covariance: \n \\(K = K(X,X) + \\sigma_n^2 I\\)\n2. Cholesky factorization: \n \\(K=LL^\\top\\)\n3. Solve \\(\\alpha\\): \n \\(L L^\\top \\alpha = y\\)\n\n### Prediction\nAt new inputs \\(X_*\\):\n- \\(K_* = K(X, X_*)\\), \\(K_{**} = K(X_*, X_*)\\)\n- **Mean:** \n \\(\\mu_* = K_*^\\top \\alpha\\)\n- **Covariance:** \n \\(\\Sigma_* = K_{**} - V^\\top V,\\;\\; V = L^{-1}K_*\\)\n\n### Model Selection\n- **Log Marginal Likelihood (LML):** \n \\[\n \\log p(y\\mid X)= -\\tfrac{1}{2}y^\\top \\alpha - \\sum\\nolimits_i \\log L_{ii} - \\tfrac{n}{2}\\log(2\\pi)\n \\]\n\n---\n\n## **5. Worked Example (Linear Kernel)**\n\n```python\nimport numpy as np\ngp = GaussianProcessRegression(kernel='linear',\n kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0},\n noise=1e-8)\n\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9]) # y = 2x + 1\ngp.fit(X_train, y_train)\n\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\") # -> 7.0000\n```\n\n\n## **6. When to Use GP Regression**\n\n- **Small-to-medium datasets** where uncertainty estimates are valuable \n- Cases requiring **predictive intervals** (not just point predictions) \n- **Nonparametric modeling** with kernel priors \n- Automatic hyperparameter tuning via **marginal likelihood** \n\n---\n\n## **7. Practical Tips**\n\n- Always add **jitter** (`1e-6`) to the diagonal for numerical stability \n- **Standardize inputs/outputs** before training \n- Be aware: Exact GP has complexity **\\(\\mathcal{O}(n^3)\\)** in time and **\\(\\mathcal{O}(n^2)\\)** in memory \n- Choose kernels to match problem structure: \n - **RBF:** smooth functions \n - **Matérn:** rougher functions \n - **Periodic:** seasonal/cyclical data \n - **Linear:** global linear trends ", "starter_code": "import math # ---------------------------------------- utf-8 encoding ---------------------------------\n\n# This file contains Gaussian Process implementation.\nimport numpy as np\nimport math\n\n\ndef matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5):\n pass\n\n\ndef rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0):\n pass\n\n\ndef periodic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0\n):\n pass\n\n\ndef linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0):\n pass\n\n\ndef rational_quadratic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0\n):\n pass\n\n\n# --- BASE CLASS -------------------------------------------------------------\n\n\nclass _GaussianProcessBase:\n def __init__(self, kernel=\"rbf\", noise=1e-5, kernel_params=None):\n pass\n\n def _select_kernel(self, x1, x2):\n \"\"\"Selects and computes the kernel value for two single data points.\"\"\"\n pass\n\n def _compute_covariance(self, X1, X2):\n \"\"\"\n Computes the covariance matrix between two sets of points.\n This method fixes the vectorization bug from the original code.\n \"\"\"\n pass\n\n\n# --- REGRESSION MODEL -------------------------------------------------------\nclass GaussianProcessRegression(_GaussianProcessBase):\n def fit(self, X, y):\n pass\n\n def predict(self, X_test, return_std=False):\n pass\n\n def log_marginal_likelihood(self):\n pass\n\n def optimize_hyperparameters(self):\n pass", - "solution": "# ---------------------------------------- utf-8 encoding ---------------------------------\n# This file contains Gaussian Process implementation.\nimport numpy as np\nimport math\nfrom scipy.spatial.distance import euclidean\nfrom scipy.special import kv as bessel_kv\nfrom scipy.special import gamma\nfrom scipy.linalg import cholesky, solve_triangular\nfrom scipy.optimize import minimize\nfrom scipy.special import expit, softmax\n\n\n# --- KERNEL FUNCTIONS --------------------------------------------------------\ndef matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5):\n d = euclidean(x, x_prime)\n if d == 0:\n return 1.0 # Covariance with self is 1 before scaling\n if nu == 0.5:\n return np.exp(-d / length_scale)\n elif nu == 1.5:\n return (1 + np.sqrt(3) * d / length_scale) * np.exp(\n -np.sqrt(3) * d / length_scale\n )\n elif nu == 2.5:\n return (\n 1 + np.sqrt(5) * d / length_scale + 5 * d**2 / (3 * length_scale**2)\n ) * np.exp(-np.sqrt(5) * d / length_scale)\n else:\n factor = (2 ** (1 - nu)) / gamma(nu)\n scaled_d = np.sqrt(2 * nu) * d / length_scale\n return factor * (scaled_d**nu) * bessel_kv(nu, scaled_d)\n\n\ndef rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0):\n # This is a squared exponential kernel\n return sigma**2 * np.exp(-0.5 * np.linalg.norm(x - x_prime) ** 2 / length_scale**2)\n\n\ndef periodic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0\n):\n return sigma**2 * np.exp(\n -2 * np.sin(np.pi * np.linalg.norm(x - x_prime) / period) ** 2 / length_scale**2\n )\n\n\ndef linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0):\n return sigma_b**2 + sigma_v**2 * np.dot(x, x_prime)\n\n\ndef rational_quadratic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0\n):\n return sigma**2 * (\n 1 + np.linalg.norm(x - x_prime) ** 2 / (2 * alpha * length_scale**2)\n ) ** (-alpha)\n\n\n# --- BASE CLASS -------------------------------------------------------------\n\n\nclass _GaussianProcessBase:\n def __init__(self, kernel=\"rbf\", noise=1e-5, kernel_params=None):\n self.kernel_name = kernel\n self.noise = noise\n self.kernel_params = kernel_params if kernel_params else {}\n self.X_train = None\n self.y_train = None\n self.K = None\n\n def _select_kernel(self, x1, x2):\n \"\"\"Selects and computes the kernel value for two single data points.\"\"\"\n if self.kernel_name == \"rbf\":\n return rbf_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"matern\":\n return matern_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"periodic\":\n return periodic_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"linear\":\n return linear_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"rational_quadratic\":\n return rational_quadratic_kernel(x1, x2, **self.kernel_params)\n else:\n raise ValueError(\n \"Unsupported kernel. Choose from ['rbf', 'matern', 'periodic', 'linear', 'rational_quadratic'].\"\n )\n\n def _compute_covariance(self, X1, X2):\n \"\"\"\n Computes the covariance matrix between two sets of points.\n This method fixes the vectorization bug from the original code.\n \"\"\"\n # Ensuring X1 and X2 are 2D arrays\n X1 = np.atleast_2d(X1)\n X2 = np.atleast_2d(X2)\n\n n1, _ = X1.shape\n n2, _ = X2.shape\n K = np.zeros((n1, n2))\n for i in range(n1):\n for j in range(n2):\n K[i, j] = self._select_kernel(X1[i], X2[j])\n return K\n\n\n# --- REGRESSION MODEL -------------------------------------------------------\nclass GaussianProcessRegression(_GaussianProcessBase):\n def fit(self, X, y):\n self.X_train = np.asarray(X)\n self.y_train = np.asarray(y)\n self.K = self._compute_covariance(\n self.X_train, self.X_train\n ) + self.noise * np.eye(len(self.X_train))\n\n # Compute Cholesky decomposition for stable inversion\n self.L = cholesky(self.K, lower=True)\n # alpha = K_inv * y\n self.alpha = solve_triangular(\n self.L.T, solve_triangular(self.L, self.y_train, lower=True)\n )\n\n def predict(self, X_test, return_std=False):\n X_test = np.atleast_2d(X_test)\n K_s = self._compute_covariance(self.X_train, X_test)\n K_ss = self._compute_covariance(X_test, X_test)\n\n # Compute predictive mean\n mu = K_s.T @ self.alpha\n\n # Compute predictive variance\n v = solve_triangular(self.L, K_s, lower=True)\n cov = K_ss - v.T @ v\n\n if return_std:\n return mu, np.sqrt(np.diag(cov))\n return mu\n\n def log_marginal_likelihood(self):\n return (\n -0.5 * (self.y_train.T @ self.alpha)\n - np.sum(np.log(np.diag(self.L)))\n - len(self.X_train) / 2 * np.log(2 * np.pi)\n )\n\n def optimize_hyperparameters(self):\n # NOTE: This is a simplified optimizer for 'rbf' kernel's params.\n def objective(params):\n self.kernel_params = {\n \"length_scale\": np.exp(params[0]),\n \"sigma\": np.exp(params[1]),\n }\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n\n init_params = np.log(\n [\n self.kernel_params.get(\"length_scale\", 1.0),\n self.kernel_params.get(\"sigma\", 1.0),\n ]\n )\n res = minimize(\n objective, init_params, method=\"L-BFGS-B\", bounds=[(-5, 5), (-5, 5)]\n )\n\n self.kernel_params = {\n \"length_scale\": np.exp(res.x[0]),\n \"sigma\": np.exp(res.x[1]),\n }\n # Re-fit with optimal hyperparameters\n self.fit(self.X_train, self.y_train)\n print(\"Optimized Hyperparameters:\", self.kernel_params)", + "solution": "# ---------------------------------------- utf-8 encoding ---------------------------------\n# This file contains Gaussian Process implementation.\nimport numpy as np\nimport math\nfrom scipy.spatial.distance import euclidean\nfrom scipy.special import kv as bessel_kv\nfrom scipy.special import gamma\nfrom scipy.linalg import cholesky, solve_triangular\nfrom scipy.optimize import minimize\nfrom scipy.special import expit, softmax\n\n\n# --- KERNEL FUNCTIONS --------------------------------------------------------\ndef matern_kernel(x: np.ndarray, x_prime: np.ndarray, length_scale=1.0, nu=1.5):\n d = euclidean(x, x_prime)\n if d == 0:\n return 1.0 # Covariance with self is 1 before scaling\n if nu == 0.5:\n return np.exp(-d / length_scale)\n elif nu == 1.5:\n return (1 + np.sqrt(3) * d / length_scale) * np.exp(\n -np.sqrt(3) * d / length_scale\n )\n elif nu == 2.5:\n return (\n 1 + np.sqrt(5) * d / length_scale + 5 * d**2 / (3 * length_scale**2)\n ) * np.exp(-np.sqrt(5) * d / length_scale)\n else:\n factor = (2 ** (1 - nu)) / gamma(nu)\n scaled_d = np.sqrt(2 * nu) * d / length_scale\n return factor * (scaled_d**nu) * bessel_kv(nu, scaled_d)\n\n\ndef rbf_kernel(x: np.ndarray, x_prime, sigma=1.0, length_scale=1.0):\n # This is a squared exponential kernel\n\n # Calculate the squared euclidean distance\n sq_norm = np.linalg.norm(x - x_prime) ** 2\n\n # Correctly implement the formula\n return sigma**2 * np.exp(-sq_norm / (2 * length_scale**2))\n\n\ndef periodic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, period=1.0\n):\n return sigma**2 * np.exp(\n -2 * np.sin(np.pi * np.linalg.norm(x - x_prime) / period) ** 2 / length_scale**2\n )\n\n\ndef linear_kernel(x: np.ndarray, x_prime: np.ndarray, sigma_b=1.0, sigma_v=1.0):\n return sigma_b**2 + sigma_v**2 * np.dot(x, x_prime)\n\n\ndef rational_quadratic_kernel(\n x: np.ndarray, x_prime: np.ndarray, sigma=1.0, length_scale=1.0, alpha=1.0\n):\n return sigma**2 * (\n 1 + np.linalg.norm(x - x_prime) ** 2 / (2 * alpha * length_scale**2)\n ) ** (-alpha)\n\n\n# --- BASE CLASS -------------------------------------------------------------\n\n\nclass _GaussianProcessBase:\n def __init__(self, kernel=\"rbf\", noise=1e-5, kernel_params=None):\n self.kernel_name = kernel\n self.noise = noise\n self.kernel_params = kernel_params if kernel_params else {}\n self.X_train = None\n self.y_train = None\n self.K = None\n\n def _select_kernel(self, x1, x2):\n \"\"\"Selects and computes the kernel value for two single data points.\"\"\"\n if self.kernel_name == \"rbf\":\n return rbf_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"matern\":\n return matern_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"periodic\":\n return periodic_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"linear\":\n return linear_kernel(x1, x2, **self.kernel_params)\n elif self.kernel_name == \"rational_quadratic\":\n return rational_quadratic_kernel(x1, x2, **self.kernel_params)\n else:\n raise ValueError(\n \"Unsupported kernel. Choose from ['rbf', 'matern', 'periodic', 'linear', 'rational_quadratic'].\"\n )\n\n def _compute_covariance(self, X1, X2):\n \"\"\"\n Computes the covariance matrix between two sets of points.\n This method fixes the vectorization bug from the original code.\n \"\"\"\n # Ensuring X1 and X2 are 2D arrays\n X1 = np.atleast_2d(X1)\n X2 = np.atleast_2d(X2)\n\n n1, _ = X1.shape\n n2, _ = X2.shape\n K = np.zeros((n1, n2))\n for i in range(n1):\n for j in range(n2):\n K[i, j] = self._select_kernel(X1[i], X2[j])\n return K\n\n\n# --- REGRESSION MODEL -------------------------------------------------------\nclass GaussianProcessRegression(_GaussianProcessBase):\n def fit(self, X, y):\n self.X_train = np.asarray(X)\n self.y_train = np.asarray(y)\n self.K = self._compute_covariance(\n self.X_train, self.X_train\n ) + self.noise * np.eye(len(self.X_train))\n\n # Compute Cholesky decomposition for stable inversion\n self.L = cholesky(self.K, lower=True)\n # alpha = K_inv * y\n self.alpha = solve_triangular(\n self.L.T, solve_triangular(self.L, self.y_train, lower=True)\n )\n\n def predict(self, X_test, return_std=False):\n X_test = np.atleast_2d(X_test)\n K_s = self._compute_covariance(self.X_train, X_test)\n K_ss = self._compute_covariance(X_test, X_test)\n\n # Compute predictive mean\n mu = K_s.T @ self.alpha\n\n # Compute predictive variance\n v = solve_triangular(self.L, K_s, lower=True)\n cov = K_ss - v.T @ v\n\n if return_std:\n return mu, np.sqrt(np.diag(cov))\n return mu\n\n def log_marginal_likelihood(self):\n return (\n -0.5 * (self.y_train.T @ self.alpha)\n - np.sum(np.log(np.diag(self.L)))\n - len(self.X_train) / 2 * np.log(2 * np.pi)\n )\n\n def optimize_hyperparameters(self):\n # NOTE: This is a simplified optimizer for 'rbf' kernel's params.\n def objective(params):\n self.kernel_params = {\n \"length_scale\": np.exp(params[0]),\n \"sigma\": np.exp(params[1]),\n }\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n\n init_params = np.log(\n [\n self.kernel_params.get(\"length_scale\", 1.0),\n self.kernel_params.get(\"sigma\", 1.0),\n ]\n )\n res = minimize(\n objective, init_params, method=\"L-BFGS-B\", bounds=[(-5, 5), (-5, 5)]\n )\n\n self.kernel_params = {\n \"length_scale\": np.exp(res.x[0]),\n \"sigma\": np.exp(res.x[1]),\n }\n # Re-fit with optimal hyperparameters\n self.fit(self.X_train, self.y_train)\n print(\"Optimized Hyperparameters:\", self.kernel_params)\n\n\n# if __name__ == \"__main__\":\n# gp = GaussianProcessRegression(\n# kernel=\"rbf\", kernel_params={\"sigma\": 1.0, \"length_scale\": 1.0}, noise=1e-8\n# )\n# X_train = np.array([[0], [2.5], [5.0]])\n# y_train = np.array([1.0, 3.0, 1.5])\n# gp.fit(X_train, y_train)\n# X_test = np.array([[2.5]])\n# mu, std = gp.predict(X_test, return_std=True)\n# print(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", "example": { "input": "import numpy as np\ngp = GaussianProcessRegression(kernel='linear', kernel_params={'sigma_b': 0.0, 'sigma_v': 1.0}, noise=1e-8)\nX_train = np.array([[1], [2], [4]])\ny_train = np.array([3, 5, 9])\ngp.fit(X_train, y_train)\nX_test = np.array([[3.0]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", "output": "7.0000", @@ -24,11 +24,11 @@ "test_cases": [ { "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", - "expected_output": "0.7787" + "expected_output": "0.2814" }, { "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0], [7.5], [10.0]])\ny_train = np.sin(X_train).ravel()\ngp.fit(X_train, y_train)\nX_test = np.array([[1.25]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", - "expected_output": "mu=0.7787, std=0.6274" + "expected_output": "mu=0.2814, std=0.7734" }, { "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.0}, noise=1e-8)\nX_train = np.array([[0], [2.5], [5.0]])\ny_train = np.array([1.0, 3.0, 1.5])\ngp.fit(X_train, y_train)\nX_test = np.array([[2.5]])\nmu, std = gp.predict(X_test, return_std=True)\nprint(f\"mu={mu[0]:.4f}, std={std[0]:.4f}\")", @@ -40,11 +40,7 @@ }, { "test": "import numpy as np\ngp = GaussianProcessRegression(kernel='rbf', kernel_params={'sigma': 1.0, 'length_scale': 1.5}, noise=1e-8)\nX_train = np.array([[1, 2], [3, 4], [5, 1]])\ny_train = np.sum(X_train, axis=1)\ngp.fit(X_train, y_train)\nX_test = np.array([[2, 3]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", - "expected_output": "4.5444" - }, - { - "test": "import numpy as np\n# Monkey-patch the optimizer to prevent printing to stdout\noriginal_optimizer = GaussianProcessRegression.optimize_hyperparameters\ndef silent_optimizer(self):\n print_state = self.__dict__.get('__print__', True)\n if print_state:\n def objective(params):\n self.kernel_params = {'length_scale': np.exp(params[0]), 'sigma': np.exp(params[1])}\n self.fit(self.X_train, self.y_train)\n return -self.log_marginal_likelihood()\n init_params = np.log([self.kernel_params.get('length_scale', 1.0), self.kernel_params.get('sigma', 1.0)])\n res = minimize(objective, init_params, method='L-BFGS-B', bounds=[(-5, 5), (-5, 5)])\n self.kernel_params = {'length_scale': np.exp(res.x[0]), 'sigma': np.exp(res.x[1])}\n self.fit(self.X_train, self.y_train)\nGaussianProcessRegression.optimize_hyperparameters = silent_optimizer\n\nnp.random.seed(42)\ngp = GaussianProcessRegression(kernel='rbf', noise=0.01)\nX_train = np.linspace(0, 2 * np.pi, 10).reshape(-1, 1)\ny_train = np.sin(X_train).ravel() + np.random.randn(10) * 0.1\ngp.fit(X_train, y_train)\ngp.optimize_hyperparameters()\nX_test = np.array([[np.pi]])\nmu = gp.predict(X_test)\nprint(f\"{mu[0]:.4f}\")", - "expected_output": "0.0543" + "expected_output": "5.5553" } ] } \ No newline at end of file diff --git a/questions/175_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc b/questions/175_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..74a21fbd48a9d8d02246691c2c04483e30e26d60 GIT binary patch literal 5502 zcmb7IOK%(36`p(N!QqgSX^YV#wiBk0I80-ycAB|ll!N)}B8d~xNfY1aBwFXmf-r#uiP>+m;!Dvno&B$sL z6m9^w8@9sDuoyWFCn_~cs&9qmXsR)#@B-FNH>RUgjZ^49EgYY9DxyTNWr(tvdT2II zi?jK7i;HP-3L|HVwm5fWZAF~M%*=T0Xr0sGz^?XwT=6E?nFoYAV9u!cOx&T0qF z!|J2wiJ~xv`n)!yrN&U-W@ojn{6@v0Ud2tvU*EEZ=Fl4Ibb5*%Lu;GUed^A#WT0M> z_O{XReEOO!;k8TSyE?#V+_<(&TD;2v>XxKe#=E^thY&I&S(B+G4rj#`KS})1g-{E! zj1f6S!{yNLw$oKNX?mfLjs1*?%xZbfG?oopjS5#h*5uPz{qM%oY8?4X1IT^pM%?U2 zem6~)Zv5!Z#kYEX_iY)kgnqQN*7uV%h`Y(srSH7rw)3%QcCnKAgR_NMS&vCgZYBZXiz6k?^JuFlL83_)j_|iks^@@9`j{+8hh)8JcCze zg+bQ~+nLdgWh7^4#yr4AGYQ&}CucF7w~$5;gf598?*Vfh}lO~2dYB8W1=o|V0;OduIzTiXi3^z*Kn~ z*jx3EUVcG(WEfuPuJt`ZoiUHUi?b6Oz|x_ELYgBC zVLoIHI7d-iz42p(BXDUS=GJe^_988n47TD>t5n#IyCq5f3AM^(OoAYaFp>sD6Et={c=k)N5}IKWq@ zMq@osdQp-U?jz!O(AUD|2w}JD23(vUwi;HxRxaTNlCE)-pSJ5FK&7sG1Ct49k;N=F zjb<^24bC4&_u_a@lbAVv2q!gG6#ogxQ0u^fAby-m8K~ziPV{#8pV@CHCebX%>@%r; z!YswIi-;7*@1=-#1<~R7SrfCjQ|rmuv>>c)_DfbT48DKY5B+AE)V!{!HRGt)PyM8p zuKKl{YqfPR?EAG=ENkh9aScA99r`ulrCzNU2k`KVk_@|H;>GyiEOnou>&|>ZN)i_S@DH%t#cP~J#F1%E`kRMCf_+Y|w>m}^T!!NwQShwXO zE~T7jE^%@v%Ht^eR#uTYKPA%MkTRCANAdv~-?)T!`KW?SyHF%>Y2+^MU}-{*&f!%k z$l=C0XU6{EtH&$Np>qzYP7&u#B9lZ%c|5kmwwS^=Fg|1*{vO}bDMl&G+%hn$BRw!% zw8F}hiW}Pdb6W-A<~9Rf*|LWk;sF~L7RdbU1#O2TxhM?n7QdpceSPiOp+)94_?=T5 zJ+B|sg=W0&QAXGFYY>PGHfk$=`l0W4bEWX1KXGfU5R)VBtp-Ug^3&B=)LOyDNUzqx z$XEsg<-=+#{dO*KQaKE_DU?DHg}*pCXB3t)$4f)sOH%i;$joHb>%koid>JR1DUo&f z@_S%gy{JUl;P)ZQ`*Jb%KrZW~~taDVrGww+B@g7t9K2|4H0LOBe!yPz; zGJel6zHjis3&+d%$zz~eWFYb)0C_Vw8**f>n>nr<#iAb)UUtcshxv?rg*Ff|@+N^V zbA?HCmEXXG@-pP@ZOVk~@@X~N#K574oYFP4gluBs?U60Ox9g%b^7_We>zgC5ZwUt) zQO*{QIQ^eYGPFbLOCLE_eaX9z6_|WH6wx3LNDWG;bxh1!X@UCNP<>8Jp>fsC?YzP@=YK}%@Axm1bKx@ zD*cgxW}+#c5ifR71Elbjo#Hn5cb3+eNb~fLLxoutJcLE)%cJ7z)4%0KKq=R%FPTIX zAbajdP_4Kv-$6I$*;|Auj(!c;W2X3alo@l_WR`Haf*xfsDRUKwBinDTIz^0oxTlR<$Y7j zhptJ|B;%ygDtQ$E$3l{VjXps*S0B(PAJB22fo9SN@Ih+_WfSCQjLK(9!yj{1!KhjW zl`jcS)joO7aF4o5T*_@wJb-(#@|07)N3#=uoE77h4gy=S?*9Q32^HsMUS{@Q#Xt)A z#>rxed#Uv zhLU4>m@%1&|7yBae~}xIKL==(R6adQ&UIAuIj0oQl;@m3iR|BFdqUY-nHx57knb7$ z+$y7DoHnP4pJs&(F8}}h#9vr9A>&&(T9QMMKOmq;`59p(UP-2Xz#u2$l|RHp4^0Wy z)@)N`aH^)e$?B{Lz972YBifpnn7=KG3g;6^1?+C zBq`-t`JXjY<4M1XNl9iZpCUkpo0ap~{ND!^z4FjCosW-MoQsdVp`;`;D|AI35{b=u zj{H1fFA#W<03{cSwBvY@*;n$k Date: Fri, 10 Oct 2025 08:18:56 +0530 Subject: [PATCH 3/4] learn.md now uses $ and 48689 properly as given instructions --- .../175_guassian_mixture_regression/learn.md | 174 ++++++++++-------- .../solution.py | 20 +- 2 files changed, 112 insertions(+), 82 deletions(-) diff --git a/questions/175_guassian_mixture_regression/learn.md b/questions/175_guassian_mixture_regression/learn.md index 61c97f8d..14f6a3df 100644 --- a/questions/175_guassian_mixture_regression/learn.md +++ b/questions/175_guassian_mixture_regression/learn.md @@ -1,87 +1,116 @@ # **Gaussian Processes (GP): From-Scratch Regression Example** ## **1. What’s a Gaussian Process?** -A **Gaussian Process** defines a distribution over functions \( f(\cdot) \). For any finite set of inputs \(X=\{x_i\}_{i=1}^n\), the function values \(f(X)\) follow a multivariate normal: -\[ -f(X) \sim \mathcal{N}\big(0,\; K(X,X)\big), -\] +A **Gaussian Process** defines a distribution over functions $f(\cdot)$. +For any finite set of inputs $( X = {x_i}_{i=1}^n )$, the function values $f(X)$ follow a multivariate normal: -where \(K\) is a **kernel** (covariance) function encoding similarity between inputs. With noisy targets \(y=f(X)+\varepsilon,\; \varepsilon\sim\mathcal{N}(0,\sigma_n^2 I)\), GP regression yields a closed-form posterior predictive mean and variance at new points \(X_*\). +$$ +f(X) \sim \mathcal{N}\big(0,; K(X,X)\big) +$$ + +where ( K ) is a **kernel** (covariance) function encoding similarity between inputs. +With noisy targets $( y = f(X) + \varepsilon, \varepsilon \sim \mathcal{N}(0,\sigma_n^2 I) )$, +GP regression yields a closed-form posterior predictive mean and variance at new points $( X_* )$. --- ## **2. The Implementation at a Glance** + The provided code builds a minimal yet complete GP regression stack: -- **Kernels implemented** - - Radial Basis Function (RBF / Squared Exponential) - - Matérn (\(\nu=0.5, 1.5, 2.5\), or general \(\nu\)) - - Periodic - - Linear - - Rational Quadratic -- **Core GP classes** - - `_GaussianProcessBase`: kernel selection & covariance matrix computation - - `GaussianProcessRegression`: - - `fit`: builds \(K\), does **Cholesky decomposition**, solves \(\alpha\) - - `predict`: returns posterior mean & variance - - `log_marginal_likelihood`: computes GP evidence - - `optimize_hyperparameters`: basic optimizer (for RBF hyperparams) +* **Kernels implemented** + + * Radial Basis Function (RBF / Squared Exponential) + * Matérn $(( \nu = 0.5, 1.5, 2.5 ), or general ( \nu ))$ + * Periodic + * Linear + * Rational Quadratic + +* **Core GP classes** + + * `_GaussianProcessBase`: kernel selection & covariance matrix computation + * `GaussianProcessRegression`: + + * `fit`: $builds ( K )$, does **Cholesky decomposition**, $solves ( \alpha )$ + * `predict`: returns posterior mean & variance + * `log_marginal_likelihood`: computes GP evidence + * `optimize_hyperparameters`: basic optimizer (for RBF hyperparams) --- ## **3. Kernel Cheat-Sheet** -Let \(x, x'\in\mathbb{R}^d\), \(r=\lVert x-x'\rVert\). -- **RBF (SE):** - \[ - k_{\text{RBF}}(x,x')=\sigma^2\exp\!\left(-\tfrac{1}{2}\tfrac{r^2}{\ell^2}\right) - \] +Let $( x, x' \in \mathbb{R}^d ), ( r = \lVert x - x' \rVert )$. + +* **RBF (SE):** + $$ + k_{\text{RBF}}(x,x') = \sigma^2 \exp!\left(-\tfrac{1}{2}\tfrac{r^2}{\ell^2}\right) + $$ -- **Matérn (\(\nu=1.5\)):** - \[ - k(x,x')=\Big(1+\tfrac{\sqrt{3}\,r}{\ell}\Big)\exp\!\Big(-\tfrac{\sqrt{3}\,r}{\ell}\Big) - \] +* **Matérn (( \nu = 1.5 )):** + $$ + k(x,x') = \Big(1 + \tfrac{\sqrt{3},r}{\ell}\Big)\exp!\Big(-\tfrac{\sqrt{3},r}{\ell}\Big) + $$ -- **Periodic:** - \[ - k(x,x')=\sigma^2\exp\!\left(-\tfrac{2}{\ell^2}\sin^2\!\Big(\tfrac{\pi r}{p}\Big)\right) - \] +* **Periodic:** + $$ + k(x,x') = \sigma^2 \exp!\left(-\tfrac{2}{\ell^2}\sin^2!\Big(\tfrac{\pi r}{p}\Big)\right) + $$ -- **Linear:** - \[ - k(x,x')=\sigma_b^2+\sigma_v^2\,x^\top x' - \] +* **Linear:** + $$ + k(x,x') = \sigma_b^2 + \sigma_v^2,x^\top x' + $$ -- **Rational Quadratic:** - \[ - k(x,x')=\sigma^2\Big(1+\tfrac{r^2}{2\alpha \ell^2}\Big)^{-\alpha} - \] +* **Rational Quadratic:** + $$ + k(x,x') = \sigma^2\Big(1 + \tfrac{r^2}{2\alpha \ell^2}\Big)^{-\alpha} + $$ --- ## **4. GP Regression Mechanics** -### Training -1. Build covariance: - \(K = K(X,X) + \sigma_n^2 I\) -2. Cholesky factorization: - \(K=LL^\top\) -3. Solve \(\alpha\): - \(L L^\top \alpha = y\) - -### Prediction -At new inputs \(X_*\): -- \(K_* = K(X, X_*)\), \(K_{**} = K(X_*, X_*)\) -- **Mean:** - \(\mu_* = K_*^\top \alpha\) -- **Covariance:** - \(\Sigma_* = K_{**} - V^\top V,\;\; V = L^{-1}K_*\) - -### Model Selection -- **Log Marginal Likelihood (LML):** - \[ - \log p(y\mid X)= -\tfrac{1}{2}y^\top \alpha - \sum\nolimits_i \log L_{ii} - \tfrac{n}{2}\log(2\pi) - \] + +### **Training** + +1. Build covariance: + $$ + K = K(X,X) + \sigma_n^2 I + $$ + +2. Cholesky factorization: + $$ + K = L L^\top + $$ + +3. Solve ( \alpha ): + $$ + L L^\top \alpha = y + $$ + +### **Prediction** + +At new inputs ( X_* ): + +* $( K_* = K(X, X_*) ), ( K_{**} = K(X_*, X_*) )$ + +* **Mean:** + $$ + \mu_* = K_*^\top \alpha + $$ + +* **Covariance:** + $$ + \Sigma_* = K_{**} - V^\top V, \quad V = L^{-1} K_* + $$ + +### **Model Selection** + +* **Log Marginal Likelihood (LML):** + $$ + \log p(y \mid X) = -\tfrac{1}{2} y^\top \alpha - \sum\nolimits_i \log L_{ii} - \tfrac{n}{2}\log(2\pi) + $$ --- @@ -102,24 +131,25 @@ mu = gp.predict(X_test) print(f"{mu[0]:.4f}") # -> 7.0000 ``` +--- ## **6. When to Use GP Regression** -- **Small-to-medium datasets** where uncertainty estimates are valuable -- Cases requiring **predictive intervals** (not just point predictions) -- **Nonparametric modeling** with kernel priors -- Automatic hyperparameter tuning via **marginal likelihood** +* **Small-to-medium datasets** where uncertainty estimates are valuable +* Cases requiring **predictive intervals** (not just point predictions) +* **Nonparametric modeling** with kernel priors +* Automatic hyperparameter tuning via **marginal likelihood** --- ## **7. Practical Tips** -- Always add **jitter** (`1e-6`) to the diagonal for numerical stability -- **Standardize inputs/outputs** before training -- Be aware: Exact GP has complexity **\(\mathcal{O}(n^3)\)** in time and **\(\mathcal{O}(n^2)\)** in memory -- Choose kernels to match problem structure: - - **RBF:** smooth functions - - **Matérn:** rougher functions - - **Periodic:** seasonal/cyclical data - - **Linear:** global linear trends +* Always add **jitter** $10^{-6}$ to the diagonal for numerical stability +* **Standardize inputs/outputs** before training +* Be aware: Exact GP has complexity **$\mathcal{O}(n^3)$** in time and **$\mathcal{O}(n^2)$** in memory +* Choose kernels to match problem structure: + * **RBF:** smooth functions + * **Matérn:** rougher functions + * **Periodic:** seasonal/cyclical data + * **Linear:** global linear trends diff --git a/questions/175_guassian_mixture_regression/solution.py b/questions/175_guassian_mixture_regression/solution.py index bee7e3b5..4e470f22 100644 --- a/questions/175_guassian_mixture_regression/solution.py +++ b/questions/175_guassian_mixture_regression/solution.py @@ -176,13 +176,13 @@ def objective(params): print("Optimized Hyperparameters:", self.kernel_params) -# if __name__ == "__main__": -# gp = GaussianProcessRegression( -# kernel="rbf", kernel_params={"sigma": 1.0, "length_scale": 1.0}, noise=1e-8 -# ) -# X_train = np.array([[0], [2.5], [5.0]]) -# y_train = np.array([1.0, 3.0, 1.5]) -# gp.fit(X_train, y_train) -# X_test = np.array([[2.5]]) -# mu, std = gp.predict(X_test, return_std=True) -# print(f"mu={mu[0]:.4f}, std={std[0]:.4f}") +if __name__ == "__main__": + gp = GaussianProcessRegression( + kernel="linear", kernel_params={"sigma_b": 0.0, "sigma_v": 1.0}, noise=1e-8 + ) + X_train = np.array([[1], [2], [4]]) + y_train = np.array([3, 5, 9]) + gp.fit(X_train, y_train) + X_test = np.array([[3.0]]) + mu = gp.predict(X_test) + print(f"{mu[0]:.4f}") From e4f9712a01b253484d39b89d95cdf4948cecb8b4 Mon Sep 17 00:00:00 2001 From: Coder1010ayush Date: Fri, 10 Oct 2025 23:13:29 +0530 Subject: [PATCH 4/4] ids is fixed to 186 and also folder name is remaned and now starting with 186_ and also removed all the print statement --- .../__pycache__/solution.cpython-310.pyc | Bin .../description.md | 0 .../example.json | 0 .../learn.md | 0 .../meta.json | 4 ++-- .../solution.py | 2 -- .../starter_code.py | 0 .../tests.json | 0 8 files changed, 2 insertions(+), 4 deletions(-) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/__pycache__/solution.cpython-310.pyc (100%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/description.md (100%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/example.json (100%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/learn.md (100%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/meta.json (94%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/solution.py (98%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/starter_code.py (100%) rename questions/{175_guassian_mixture_regression => 186_guassian_mixture_regression}/tests.json (100%) diff --git a/questions/175_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc b/questions/186_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc similarity index 100% rename from questions/175_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc rename to questions/186_guassian_mixture_regression/__pycache__/solution.cpython-310.pyc diff --git a/questions/175_guassian_mixture_regression/description.md b/questions/186_guassian_mixture_regression/description.md similarity index 100% rename from questions/175_guassian_mixture_regression/description.md rename to questions/186_guassian_mixture_regression/description.md diff --git a/questions/175_guassian_mixture_regression/example.json b/questions/186_guassian_mixture_regression/example.json similarity index 100% rename from questions/175_guassian_mixture_regression/example.json rename to questions/186_guassian_mixture_regression/example.json diff --git a/questions/175_guassian_mixture_regression/learn.md b/questions/186_guassian_mixture_regression/learn.md similarity index 100% rename from questions/175_guassian_mixture_regression/learn.md rename to questions/186_guassian_mixture_regression/learn.md diff --git a/questions/175_guassian_mixture_regression/meta.json b/questions/186_guassian_mixture_regression/meta.json similarity index 94% rename from questions/175_guassian_mixture_regression/meta.json rename to questions/186_guassian_mixture_regression/meta.json index 73c96a0e..c74e23b3 100644 --- a/questions/175_guassian_mixture_regression/meta.json +++ b/questions/186_guassian_mixture_regression/meta.json @@ -1,5 +1,5 @@ { - "id": "175", + "id": "186", "title": "Gaussian Process for Regression", "difficulty": "medium", "category": "Machine Learning", @@ -12,4 +12,4 @@ "name": "Ayush" } ] -} +} \ No newline at end of file diff --git a/questions/175_guassian_mixture_regression/solution.py b/questions/186_guassian_mixture_regression/solution.py similarity index 98% rename from questions/175_guassian_mixture_regression/solution.py rename to questions/186_guassian_mixture_regression/solution.py index 4e470f22..11307bf7 100644 --- a/questions/175_guassian_mixture_regression/solution.py +++ b/questions/186_guassian_mixture_regression/solution.py @@ -173,7 +173,6 @@ def objective(params): } # Re-fit with optimal hyperparameters self.fit(self.X_train, self.y_train) - print("Optimized Hyperparameters:", self.kernel_params) if __name__ == "__main__": @@ -185,4 +184,3 @@ def objective(params): gp.fit(X_train, y_train) X_test = np.array([[3.0]]) mu = gp.predict(X_test) - print(f"{mu[0]:.4f}") diff --git a/questions/175_guassian_mixture_regression/starter_code.py b/questions/186_guassian_mixture_regression/starter_code.py similarity index 100% rename from questions/175_guassian_mixture_regression/starter_code.py rename to questions/186_guassian_mixture_regression/starter_code.py diff --git a/questions/175_guassian_mixture_regression/tests.json b/questions/186_guassian_mixture_regression/tests.json similarity index 100% rename from questions/175_guassian_mixture_regression/tests.json rename to questions/186_guassian_mixture_regression/tests.json