From 457a1f34ca74dd58dd7b5399a8064bb13bd930b5 Mon Sep 17 00:00:00 2001
From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 11:32:22 +0530
Subject: [PATCH 1/8] feat-kadane's algo

---
 dynamic_programming/kadane's_algo.r | 162 ++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)
 create mode 100644 dynamic_programming/kadane's_algo.r

diff --git a/dynamic_programming/kadane's_algo.r b/dynamic_programming/kadane's_algo.r
new file mode 100644
index 00000000..16b13a54
--- /dev/null
+++ b/dynamic_programming/kadane's_algo.r
@@ -0,0 +1,162 @@
+# Kadane's Algorithm in R
+#
+# Finds the contiguous subarray with the largest sum.
+# Time Complexity: O(n)
+# Space Complexity: O(1) (not counting output subarray)
+#
+# Applications:
+# - Financial time series (max profit window)
+# - Signal processing (max energy segment)
+# - Pattern detection in sequences
+# - As a subroutine in more complex DP/optimization tasks
+
+kadane <- function(arr) {
+  #' Kadane's algorithm to find maximum subarray sum and its indices
+  #' @param arr: Numeric vector (can include negatives and positives)
+  #' @return: A list with fields:
+  #'         max_sum - numeric: maximum subarray sum
+  #'         start   - integer: start index of the subarray (1-based), NA if empty input
+  #'         end     - integer: end index of the subarray (1-based), NA if empty input
+  #'         subarray- numeric vector: the subarray that gives max_sum (empty if input empty)
+  
+  n <- length(arr)
+  
+  # Edge cases
+  if (n == 0) {
+    return(list(
+      max_sum = -Inf,
+      start = NA_integer_,
+      end = NA_integer_,
+      subarray = numeric(0)
+    ))
+  }
+  
+  # Initialize with first element (handles all-negative arrays correctly)
+  max_ending_here <- arr[1]
+  max_so_far <- arr[1]
+  s <- 1
+  start <- 1
+  end <- 1
+  
+  if (n >= 2) {
+    for (i in 2:n) {
+      # If adding arr[i] to current segment is worse than starting new at arr[i]
+      if (max_ending_here + arr[i] < arr[i]) {
+        max_ending_here <- arr[i]
+        s <- i
+      } else {
+        max_ending_here <- max_ending_here + arr[i]
+      }
+      
+      # Update best segment if needed
+      if (max_ending_here > max_so_far) {
+        max_so_far <- max_ending_here
+        start <- s
+        end <- i
+      }
+    }
+  }
+  
+  return(list(
+    max_sum = max_so_far,
+    start = as.integer(start),
+    end = as.integer(end),
+    subarray = arr[start:end]
+  ))
+}
+
+# Variant: Kadane that returns also when you want first-occurrence vs. any occurrence
+kadane_first_occurrence <- function(arr) {
+  # exactly like kadane() but ties favor earlier segment (current code already does)
+  kadane(arr)
+}
+
+# Helper to pretty-print results
+print_kadane_result <- function(res, arr_name="Array") {
+  cat("Input:", arr_name, "\n")
+  if (is.na(res$start)) {
+    cat("Result: empty input\n\n")
+    return(invisible(NULL))
+  }
+  cat("Max Subarray Sum:", res$max_sum, "\n")
+  cat("Start Index:", res$start, " End Index:", res$end, "\n")
+  cat("Subarray:", paste(res$subarray, collapse = ", "), "\n\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Kadane's Algorithm Tests ===\n\n")
+
+# Test 1: Mixed positive and negative
+arr1 <- c(-2, 1, -3, 4, -1, 2, 1, -5, 4)
+res1 <- kadane(arr1)
+print_kadane_result(res1, "arr1 (mixed)")
+
+# Test 2: All positive
+arr2 <- c(2, 3, 1, 4)
+res2 <- kadane(arr2)
+print_kadane_result(res2, "arr2 (all positive)")
+
+# Test 3: All negative
+arr3 <- c(-8, -3, -6, -2, -5, -4)
+res3 <- kadane(arr3)
+print_kadane_result(res3, "arr3 (all negative)")
+
+# Test 4: Single element
+arr4 <- c(5)
+res4 <- kadane(arr4)
+print_kadane_result(res4, "arr4 (single element)")
+
+# Test 5: Empty array
+arr5 <- numeric(0)
+res5 <- kadane(arr5)
+print_kadane_result(res5, "arr5 (empty)")
+
+# Test 6: Random large array - timing example
+set.seed(123)
+arr6 <- sample(-100:100, 100000, replace = TRUE)
+start_time <- Sys.time()
+res6 <- kadane(arr6)
+end_time <- Sys.time()
+print_kadane_result(res6, "arr6 (large random)")
+cat("Elapsed time (seconds):", as.numeric(end_time - start_time, units = "secs"), "\n\n")
+
+# Optional: function to get maximum circular subarray (Kadane + total sum trick)
+kadane_circular <- function(arr) {
+  #' Finds max subarray sum for circular arrays (wrap-around allowed)
+  #' If all elements are negative, returns max element (non-wrap).
+  n <- length(arr)
+  if (n == 0) return(list(max_sum = -Inf, start = NA, end = NA, subarray = numeric(0)))
+  
+  # Standard Kadane for non-circular max
+  normal <- kadane(arr)$max_sum
+  
+  # If all negative, normal already is max element; circular logic would fail
+  if (all(arr <= 0)) {
+    return(list(max_sum = normal, start = which.max(arr), end = which.max(arr), subarray = arr[which.max(arr)]))
+  }
+  
+  # Max wrap = total_sum - min_subarray_sum
+  total_sum <- sum(arr)
+  
+  # Find minimum subarray using Kadane on inverted array
+  inverted <- -arr
+  min_sub_sum <- kadane(inverted)$max_sum  # this is -min_subarray_sum
+  max_wrap <- total_sum + min_sub_sum      # because min_sub_sum is negative of min subarray
+  
+  if (max_wrap > normal) {
+    return(list(max_sum = max_wrap, start = NA, end = NA, subarray = NA)) # indices for wrap-around not computed here
+  } else {
+    return(list(max_sum = normal, start = kadane(arr)$start, end = kadane(arr)$end, subarray = kadane(arr)$subarray))
+  }
+}
+
+# Example for circular
+cat("=== Circular Kadane Example ===\n")
+arrc <- c(8, -1, 3, 4)
+res_circ <- kadane_circular(arrc)
+cat("Input:", paste(arrc, collapse = ", "), "\n")
+cat("Max circular subarray sum:", res_circ$max_sum, "\n\n")
+
+# End of script

From d51320b3870decf4c3e905b5672a0a0d9bb92f07 Mon Sep 17 00:00:00 2001
From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 11:51:14 +0530
Subject: [PATCH 2/8] k-Nearest Neighbors

---
 machine_learning/k-NN.r | 272 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 272 insertions(+)
 create mode 100644 machine_learning/k-NN.r

diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r
new file mode 100644
index 00000000..d72c7d4d
--- /dev/null
+++ b/machine_learning/k-NN.r
@@ -0,0 +1,272 @@
+# k-Nearest Neighbors implementation in R
+#
+# Purpose: Simple, readable k-NN from-scratch supporting classification and regression.
+# Time Complexity: O(m * n * d) for m test samples, n train samples, d features.
+# Space Complexity: O(n * d) for training data + O(m * n) temporarily for distance matrix.
+#
+# Features:
+# - Euclidean distance (squared) computed efficiently with matrix ops
+# - Supports classification (factor labels) and regression (numeric labels)
+# - Optional distance weighting (inverse-distance)
+# - Optional normalization (z-score) using training-set params
+# - Safe handling of edge cases (k > n, empty data, NAs)
+#
+# Usage:
+#  model <- knn_train(train_X, train_y, k=5, weighted=TRUE, normalize=TRUE)
+#  pred <- knn_predict(model, test_X)
+#  pred$predictions  # vector of predictions
+#  pred$probs        # (classification) matrix of class probabilities (if requested)
+
+# ---------------------------
+# Helpers: z-score normalization
+# ---------------------------
+zscore_fit <- function(X) {
+  mu <- colMeans(X, na.rm = TRUE)
+  sigma <- apply(X, 2, sd, na.rm = TRUE)
+  sigma[sigma == 0] <- 1.0   # avoid division by zero
+  list(mu = mu, sigma = sigma)
+}
+
+zscore_transform <- function(X, fit) {
+  sweep(sweep(X, 2, fit$mu, "-"), 2, fit$sigma, "/")
+}
+
+# ---------------------------
+# Training: just store data + normalization params
+# ---------------------------
+knn_train <- function(X, y, k = 3, weighted = FALSE, normalize = TRUE) {
+  #' X: numeric matrix or data.frame (n x d)
+  #' y: factor (classification) or numeric vector (regression) of length n
+  #' k: number of neighbors
+  #' weighted: use inverse-distance weighting (TRUE/FALSE)
+  #' normalize: z-score features using train stats (TRUE/FALSE)
+  
+  if (is.data.frame(X)) X <- as.matrix(X)
+  if (!is.matrix(X)) stop("X must be a matrix or data.frame.")
+  if (nrow(X) == 0) stop("Training set X is empty.")
+  if (length(y) != nrow(X)) stop("Length of y must match number of rows in X.")
+  if (k <= 0) stop("k must be positive integer.")
+  
+  k <- as.integer(k)
+  if (k > nrow(X)) {
+    warning("k > n (train size). Reducing k to n.")
+    k <- nrow(X)
+  }
+  
+  # remove rows with NA in features or labels
+  good_idx <- which(apply(X, 1, function(r) !any(is.na(r))) & !is.na(y))
+  if (length(good_idx) < nrow(X)) {
+    warning(sprintf("Removed %d rows with NA from training data.", nrow(X) - length(good_idx)))
+    X <- X[good_idx, , drop = FALSE]
+    y <- y[good_idx]
+  }
+  
+  is_classification <- is.factor(y) || is.character(y)
+  if (is.character(y)) y <- factor(y)
+  
+  norm_fit <- NULL
+  if (normalize) {
+    norm_fit <- zscore_fit(X)
+    X <- zscore_transform(X, norm_fit)
+  }
+  
+  list(
+    X = X,
+    y = y,
+    k = k,
+    weighted = as.logical(weighted),
+    normalize = as.logical(normalize),
+    norm_fit = norm_fit,
+    is_classification = is_classification,
+    classes = if (is_classification) levels(y) else NULL
+  )
+}
+
+# ---------------------------
+# Distance computation (efficient)
+# ---------------------------
+squared_euclidean_distances <- function(A, B) {
+  #' Compute squared Euclidean distances between rows of A (m x d) and B (n x d)
+  #' Returns matrix (m x n) where entry (i,j) is ||A[i,] - B[j,]||^2
+  if (!is.matrix(A)) A <- as.matrix(A)
+  if (!is.matrix(B)) B <- as.matrix(B)
+  if (ncol(A) != ncol(B)) stop("Feature dimension mismatch between A and B.")
+  
+  # rowSums(A^2) is length m; rowSums(B^2) is length n
+  A_sq <- rowSums(A * A)
+  B_sq <- rowSums(B * B)
+  # cross term: A %*% t(B) gives m x n
+  cross <- tcrossprod(A, B)  # same as A %*% t(B) but often a bit faster
+  # use broadcasting: dist^2 = A_sq - 2*cross + B_sq
+  # We build matrix: outer(A_sq, rep(1,n)) - 2*cross + outer(rep(1,m), B_sq)
+  outer(A_sq, rep(1, length(B_sq))) - 2 * cross + outer(rep(1, length(A_sq)), B_sq)
+}
+
+# ---------------------------
+# Prediction
+# ---------------------------
+knn_predict <- function(model, X_new, return_probs = TRUE, return_neighbors = FALSE) {
+  #' model: object from knn_train
+  #' X_new: matrix/data.frame of test points (m x d) or single vector (1 x d)
+  #' return_probs: for classification, return class probabilities
+  #' return_neighbors: return neighbor indices & distances
+  if (is.data.frame(X_new)) X_new <- as.matrix(X_new)
+  if (is.vector(X_new)) X_new <- matrix(X_new, nrow = 1)
+  if (!is.matrix(X_new)) stop("X_new must be matrix/data.frame or vector.")
+  if (ncol(X_new) != ncol(model$X)) stop("Feature dimensionality mismatch.")
+  
+  # normalize if needed
+  if (model$normalize && !is.null(model$norm_fit)) {
+    X_proc <- zscore_transform(X_new, model$norm_fit)
+  } else {
+    X_proc <- X_new
+  }
+  
+  m <- nrow(X_proc)
+  n <- nrow(model$X)
+  k <- model$k
+  
+  if (n == 0) stop("Model has no training samples.")
+  
+  # distances: m x n
+  dists <- squared_euclidean_distances(X_proc, model$X)
+  
+  # For each test row, find k smallest distances (ties handled by order)
+  idx_mat <- t(apply(dists, 1, function(r) {
+    order(r, decreasing = FALSE)[seq_len(k)]
+  })) # m x k
+  
+  dist_mat <- matrix(NA_real_, nrow = m, ncol = k)
+  for (i in seq_len(m)) dist_mat[i, ] <- dists[i, idx_mat[i, ]]
+  
+  # handle zero distances (exact matches) to avoid division by zero in weighting
+  eps <- 1e-12
+  if (model$is_classification) {
+    preds <- vector("character", m)
+    probs <- matrix(0, nrow = m, ncol = length(model$classes))
+    colnames(probs) <- model$classes
+    
+    for (i in seq_len(m)) {
+      neighbor_idx <- idx_mat[i, ]
+      neighbor_labels <- as.character(model$y[neighbor_idx])
+      neighbor_dists <- dist_mat[i, ]
+      
+      if (model$weighted) {
+        # weights: 1 / (dist + eps)
+        w <- 1 / (neighbor_dists + eps)
+        # if any dist==0, set weight large for exact matches
+        if (any(neighbor_dists == 0)) {
+          w <- as.numeric(neighbor_dists == 0) * 1e12  # very large weight for exact matches
+        }
+        tab <- tapply(w, neighbor_labels, sum)
+      } else {
+        tab <- table(neighbor_labels)
+      }
+      # ensure all classes present
+      counts <- rep(0, length(model$classes))
+      names(counts) <- model$classes
+      tab_names <- names(tab)
+      counts[tab_names] <- as.numeric(tab)
+      
+      # normalize to probabilities
+      if (sum(counts) > 0) probs[i, ] <- counts / sum(counts)
+      else probs[i, ] <- counts
+      
+      # choose class with max probability (first tie wins because which.max)
+      preds[i] <- names(which.max(probs[i, ]))
+    }
+    # cast to factor with original levels
+    preds <- factor(preds, levels = model$classes)
+    
+    out <- list(predictions = preds)
+    if (return_probs) out$probs <- probs
+  } else {
+    # regression
+    preds_reg <- numeric(m)
+    for (i in seq_len(m)) {
+      neighbor_idx <- idx_mat[i, ]
+      neighbor_vals <- as.numeric(model$y[neighbor_idx])
+      neighbor_dists <- dist_mat[i, ]
+      if (model$weighted) {
+        w <- 1 / (neighbor_dists + eps)
+        if (any(neighbor_dists == 0)) {
+          w <- as.numeric(neighbor_dists == 0) * 1e12
+        }
+        preds_reg[i] <- sum(w * neighbor_vals) / sum(w)
+      } else {
+        preds_reg[i] <- mean(neighbor_vals)
+      }
+    }
+    out <- list(predictions = preds_reg)
+  }
+  
+  if (return_neighbors) {
+    out$neighbor_indices <- idx_mat
+    out$neighbor_distances <- dist_mat
+  }
+  
+  out
+}
+
+# ---------------------------
+# Utility: accuracy and confusion (classification)
+# ---------------------------
+knn_accuracy <- function(y_true, y_pred) {
+  if (length(y_true) != length(y_pred)) stop("Lengths mismatch.")
+  mean(y_true == y_pred)
+}
+
+confusion_matrix <- function(y_true, y_pred) {
+  table(Actual = y_true, Predicted = y_pred)
+}
+
+# ---------------------------
+# Example/Test: Iris classification
+# ---------------------------
+cat("=== k-NN Example: Iris dataset (classification) ===\n")
+data(iris)
+set.seed(42)
+# Use only numeric features
+X <- as.matrix(iris[, 1:4])
+y <- factor(iris$Species)
+
+# train/test split 70/30
+n <- nrow(X)
+train_idx <- sample(seq_len(n), size = floor(0.7 * n))
+test_idx <- setdiff(seq_len(n), train_idx)
+
+X_train <- X[train_idx, , drop = FALSE]
+y_train <- y[train_idx]
+X_test <- X[test_idx, , drop = FALSE]
+y_test <- y[test_idx]
+
+model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE)
+pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE)
+
+acc <- knn_accuracy(y_test, pred$predictions)
+cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n",
+            model$k, model$weighted, model$normalize, acc))
+cat("Confusion Matrix:\n")
+print(confusion_matrix(y_test, pred$predictions))
+cat("\n")
+
+# ---------------------------
+# Example/Test: Regression (toy)
+# ---------------------------
+cat("=== k-NN Example: Toy regression ===\n")
+set.seed(1)
+n_reg <- 200
+X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2)
+y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5)
+train_idx <- sample(seq_len(n_reg), size = 150)
+X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx]
+X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx]
+
+model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE)
+pred_reg <- knn_predict(model_reg, X_te)
+mse <- mean((pred_reg$predictions - y_te)^2)
+cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse))
+
+# ---------------------------
+# End of script
+# ---------------------------

From 563abaa6efbd5f08d88cb4ad33936c15c7144df5 Mon Sep 17 00:00:00 2001
From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 12:27:15 +0530
Subject: [PATCH 3/8] Convolutional Neural Network

---
 machine_learning/cnn.r | 56 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 machine_learning/cnn.r

diff --git a/machine_learning/cnn.r b/machine_learning/cnn.r
new file mode 100644
index 00000000..5aa9d9e9
--- /dev/null
+++ b/machine_learning/cnn.r
@@ -0,0 +1,56 @@
+# ==============================================
+# Convolutional Neural Network (CNN)
+# ==============================================
+# Algorithm: Deep learning model using convolutional, pooling, and dense layers.
+# Framework: Keras (TensorFlow backend)
+#
+# Purpose:
+# - Automatically extract spatial and hierarchical features from image data.
+# - Commonly used for image classification, object detection, and visual recognition.
+#
+# Architecture Steps:
+# 1. Convolution Layer: Extracts local spatial patterns using learnable filters.
+# 2. Activation (ReLU): Adds non-linearity by thresholding at zero.
+# 3. Pooling Layer: Reduces spatial dimensions (downsampling) while preserving features.
+# 4. Flatten Layer: Converts 2D feature maps into 1D vector.
+# 5. Dense Layers: Combines extracted features for classification.
+# 6. Output Layer: Uses Softmax activation for class probabilities.
+#
+# Complexity:
+# - Time:  O(E × N × F × K²)  where E=epochs, N=samples, F=filters, K=kernel size
+# - Space: O(parameters + feature maps)
+#
+# Reference:
+# LeCun et al., "Gradient-based learning applied to document recognition" (1998)
+# https://yann.lecun.com/exdb/lenet/
+#
+# ==============================================
+
+# Load Required Library
+suppressPackageStartupMessages(library(keras))
+
+# Define CNN Architecture (Algorithm Only)
+cnn_model <- keras_model_sequential() %>%
+  layer_conv_2d(
+    filters = 32, kernel_size = c(3, 3), activation = "relu",
+    input_shape = c(28, 28, 1), padding = "same"
+  ) %>%
+  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
+  layer_conv_2d(
+    filters = 64, kernel_size = c(3, 3),
+    activation = "relu", padding = "same"
+  ) %>%
+  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
+  layer_flatten() %>%
+  layer_dense(units = 128, activation = "relu") %>%
+  layer_dense(units = 10, activation = "softmax")
+
+# Display Model Summary
+summary(cnn_model)
+
+# ==============================================
+# Note:
+# - This script defines the CNN algorithm structure only.
+# - You can compile and train it using model %>% compile() and model %>% fit()
+#   with any dataset (e.g., MNIST, CIFAR-10).
+# ==============================================

From 5676cd68b4249a2d1f5ca948c5f32a9d216f82af Mon Sep 17 00:00:00 2001
From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 13:05:56 +0530
Subject: [PATCH 4/8]  Gaussian Process Regression

---
 machine_learning/guassian_process.r | 62 +++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 machine_learning/guassian_process.r

diff --git a/machine_learning/guassian_process.r b/machine_learning/guassian_process.r
new file mode 100644
index 00000000..89b5f2eb
--- /dev/null
+++ b/machine_learning/guassian_process.r
@@ -0,0 +1,62 @@
+# ==============================================
+# Gaussian Process Regression (GP)
+# ==============================================
+# Algorithm: Non-parametric Bayesian regression using Gaussian Processes.
+# Framework: R (kernlab package)
+#
+# Purpose:
+# - Perform regression while providing uncertainty estimates.
+# - Useful for small datasets and Bayesian optimization.
+#
+# Core Idea:
+# - Define a prior over functions using a kernel (covariance) function.
+# - Update the posterior distribution using observed data.
+# - Predictions include mean and variance (uncertainty) for each point.
+#
+# Complexity:
+# - Time:  O(n^3) due to inversion of the kernel matrix
+# - Space: O(n^2) for storing the kernel matrix
+#
+# Edge Cases / Notes:
+# - Choice of kernel is critical for good performance.
+# - Computationally heavy for large datasets; sparse approximations exist.
+# - Great for uncertainty quantification in predictions.
+#
+# Typical Applications:
+# - Bayesian optimization
+# - Small-data regression tasks
+# - Time-series forecasting with uncertainty estimates
+#
+# Reference:
+# Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian Processes for Machine Learning.
+# ==============================================
+
+# Load required library
+suppressPackageStartupMessages(library(kernlab))
+
+# Example Dataset (Synthetic)
+set.seed(42)
+x <- seq(-5, 5, length.out = 50)
+y <- sin(x) + rnorm(length(x), sd = 0.2)
+
+# Define Gaussian Process Regression Model
+gp_model <- gausspr(
+  x = as.matrix(x), y = y,
+  kernel = "rbfdot" # Radial Basis Function (RBF) kernel
+)
+
+# Make Predictions
+x_test <- seq(-6, 6, length.out = 100)
+y_pred <- predict(gp_model, as.matrix(x_test), type = "response")
+
+# Plot Results
+plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19)
+lines(x_test, y_pred, col = "blue", lwd = 2)
+legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1))
+
+# ==============================================
+# Note:
+# - This script defines a Gaussian Process Regression model in R.
+# - Can be applied to other regression datasets by replacing x and y.
+# - For large datasets, consider sparse GP approximations.
+# ==============================================

From a46a2dfb7cb3091985393f35aeb628c6fb64ec20 Mon Sep 17 00:00:00 2001
From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 17:59:20 +0530
Subject: [PATCH 5/8] Update machine_learning/guassian_process.r

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 machine_learning/guassian_process.r | 63 ++++++++++++++++++++---------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/machine_learning/guassian_process.r b/machine_learning/guassian_process.r
index 89b5f2eb..2a3d8fb1 100644
--- a/machine_learning/guassian_process.r
+++ b/machine_learning/guassian_process.r
@@ -34,26 +34,51 @@
 # Load required library
 suppressPackageStartupMessages(library(kernlab))
 
-# Example Dataset (Synthetic)
-set.seed(42)
-x <- seq(-5, 5, length.out = 50)
-y <- sin(x) + rnorm(length(x), sd = 0.2)
-
-# Define Gaussian Process Regression Model
-gp_model <- gausspr(
-  x = as.matrix(x), y = y,
-  kernel = "rbfdot" # Radial Basis Function (RBF) kernel
-)
-
-# Make Predictions
-x_test <- seq(-6, 6, length.out = 100)
-y_pred <- predict(gp_model, as.matrix(x_test), type = "response")
-
-# Plot Results
-plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19)
-lines(x_test, y_pred, col = "blue", lwd = 2)
-legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1))
+# ---- Core Functions ----
 
+#' Train a Gaussian Process Regression model
+#' @param x Numeric vector or matrix of input features
+#' @param y Numeric vector of target values
+#' @param kernel Kernel to use (default: "rbfdot")
+#' @param ... Additional arguments passed to gausspr
+#' @return Trained GP model (kernlab::gausspr object)
+gp_train <- function(x, y, kernel = "rbfdot", ...) {
+  gausspr(
+    x = as.matrix(x), y = y,
+    kernel = kernel,
+    ...
+  )
+}
+
+#' Predict using a trained Gaussian Process Regression model
+#' @param model Trained GP model (from gp_train)
+#' @param x_test Numeric vector or matrix of test inputs
+#' @param type Prediction type (default: "response")
+#' @param ... Additional arguments passed to predict
+#' @return Predicted values
+gp_predict <- function(model, x_test, type = "response", ...) {
+  predict(model, as.matrix(x_test), type = type, ...)
+}
+
+# ---- Example Usage (runs only in interactive sessions) ----
+if (interactive()) {
+  # Example Dataset (Synthetic)
+  set.seed(42)
+  x <- seq(-5, 5, length.out = 50)
+  y <- sin(x) + rnorm(length(x), sd = 0.2)
+
+  # Train GP model
+  gp_model <- gp_train(x, y)
+
+  # Make Predictions
+  x_test <- seq(-6, 6, length.out = 100)
+  y_pred <- gp_predict(gp_model, x_test)
+
+  # Plot Results
+  plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19)
+  lines(x_test, y_pred, col = "blue", lwd = 2)
+  legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1))
+}
 # ==============================================
 # Note:
 # - This script defines a Gaussian Process Regression model in R.

From a08463022ac8353961359f348dc875b563aa9621 Mon Sep 17 00:00:00 2001
From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 18:00:08 +0530
Subject: [PATCH 6/8] Update machine_learning/k-NN.r

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 machine_learning/k-NN.r | 63 +++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r
index d72c7d4d..2314b021 100644
--- a/machine_learning/k-NN.r
+++ b/machine_learning/k-NN.r
@@ -231,42 +231,43 @@ X <- as.matrix(iris[, 1:4])
 y <- factor(iris$Species)
 
 # train/test split 70/30
-n <- nrow(X)
-train_idx <- sample(seq_len(n), size = floor(0.7 * n))
-test_idx <- setdiff(seq_len(n), train_idx)
+if (interactive()) {
+  n <- nrow(X)
+  train_idx <- sample(seq_len(n), size = floor(0.7 * n))
+  test_idx <- setdiff(seq_len(n), train_idx)
 
-X_train <- X[train_idx, , drop = FALSE]
-y_train <- y[train_idx]
-X_test <- X[test_idx, , drop = FALSE]
-y_test <- y[test_idx]
+  X_train <- X[train_idx, , drop = FALSE]
+  y_train <- y[train_idx]
+  X_test <- X[test_idx, , drop = FALSE]
+  y_test <- y[test_idx]
 
-model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE)
-pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE)
+  model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE)
+  pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE)
 
-acc <- knn_accuracy(y_test, pred$predictions)
-cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n",
-            model$k, model$weighted, model$normalize, acc))
-cat("Confusion Matrix:\n")
-print(confusion_matrix(y_test, pred$predictions))
-cat("\n")
+  acc <- knn_accuracy(y_test, pred$predictions)
+  cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n",
+              model$k, model$weighted, model$normalize, acc))
+  cat("Confusion Matrix:\n")
+  print(confusion_matrix(y_test, pred$predictions))
+  cat("\n")
 
-# ---------------------------
-# Example/Test: Regression (toy)
-# ---------------------------
-cat("=== k-NN Example: Toy regression ===\n")
-set.seed(1)
-n_reg <- 200
-X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2)
-y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5)
-train_idx <- sample(seq_len(n_reg), size = 150)
-X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx]
-X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx]
-
-model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE)
-pred_reg <- knn_predict(model_reg, X_te)
-mse <- mean((pred_reg$predictions - y_te)^2)
-cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse))
+  # ---------------------------
+  # Example/Test: Regression (toy)
+  # ---------------------------
+  cat("=== k-NN Example: Toy regression ===\n")
+  set.seed(1)
+  n_reg <- 200
+  X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2)
+  y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5)
+  train_idx <- sample(seq_len(n_reg), size = 150)
+  X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx]
+  X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx]
 
+  model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE)
+  pred_reg <- knn_predict(model_reg, X_te)
+  mse <- mean((pred_reg$predictions - y_te)^2)
+  cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse))
+}
 # ---------------------------
 # End of script
 # ---------------------------

From 606ef9d50f3335b9bd1ba241cf999aead4124073 Mon Sep 17 00:00:00 2001
From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 20:20:04 +0530
Subject: [PATCH 7/8] Update machine_learning/cnn.r

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 machine_learning/cnn.r | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/machine_learning/cnn.r b/machine_learning/cnn.r
index 5aa9d9e9..b0154f0e 100644
--- a/machine_learning/cnn.r
+++ b/machine_learning/cnn.r
@@ -29,24 +29,29 @@
 # Load Required Library
 suppressPackageStartupMessages(library(keras))
 
-# Define CNN Architecture (Algorithm Only)
-cnn_model <- keras_model_sequential() %>%
-  layer_conv_2d(
-    filters = 32, kernel_size = c(3, 3), activation = "relu",
-    input_shape = c(28, 28, 1), padding = "same"
-  ) %>%
-  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
-  layer_conv_2d(
-    filters = 64, kernel_size = c(3, 3),
-    activation = "relu", padding = "same"
-  ) %>%
-  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
-  layer_flatten() %>%
-  layer_dense(units = 128, activation = "relu") %>%
-  layer_dense(units = 10, activation = "softmax")
+# Define CNN Architecture as a Function (Reusable)
+build_cnn_model <- function(input_shape = c(28, 28, 1), num_classes = 10) {
+  keras_model_sequential() %>%
+    layer_conv_2d(
+      filters = 32, kernel_size = c(3, 3), activation = "relu",
+      input_shape = input_shape, padding = "same"
+    ) %>%
+    layer_max_pooling_2d(pool_size = c(2, 2)) %>%
+    layer_conv_2d(
+      filters = 64, kernel_size = c(3, 3),
+      activation = "relu", padding = "same"
+    ) %>%
+    layer_max_pooling_2d(pool_size = c(2, 2)) %>%
+    layer_flatten() %>%
+    layer_dense(units = 128, activation = "relu") %>%
+    layer_dense(units = num_classes, activation = "softmax")
+}
 
-# Display Model Summary
-summary(cnn_model)
+# Example: Display Model Summary (only in interactive sessions)
+if (interactive()) {
+  model <- build_cnn_model()
+  summary(model)
+}
 
 # ==============================================
 # Note:

From 0c2f3694ace84aa69dfb7f402b6e98b865a80aea Mon Sep 17 00:00:00 2001
From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com>
Date: Sat, 18 Oct 2025 20:20:20 +0530
Subject: [PATCH 8/8] Update machine_learning/k-NN.r

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 machine_learning/k-NN.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r
index 2314b021..04e984c2 100644
--- a/machine_learning/k-NN.r
+++ b/machine_learning/k-NN.r
@@ -173,7 +173,7 @@ knn_predict <- function(model, X_new, return_probs = TRUE, return_neighbors = FA
       else probs[i, ] <- counts
       
       # choose class with max probability (first tie wins because which.max)
-      preds[i] <- names(which.max(probs[i, ]))
+      preds[i] <- colnames(probs)[which.max(probs[i, ])]
     }
     # cast to factor with original levels
     preds <- factor(preds, levels = model$classes)