From 457a1f34ca74dd58dd7b5399a8064bb13bd930b5 Mon Sep 17 00:00:00 2001 From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 11:32:22 +0530 Subject: [PATCH 1/8] feat-kadane's algo --- dynamic_programming/kadane's_algo.r | 162 ++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 dynamic_programming/kadane's_algo.r diff --git a/dynamic_programming/kadane's_algo.r b/dynamic_programming/kadane's_algo.r new file mode 100644 index 00000000..16b13a54 --- /dev/null +++ b/dynamic_programming/kadane's_algo.r @@ -0,0 +1,162 @@ +# Kadane's Algorithm in R +# +# Finds the contiguous subarray with the largest sum. +# Time Complexity: O(n) +# Space Complexity: O(1) (not counting output subarray) +# +# Applications: +# - Financial time series (max profit window) +# - Signal processing (max energy segment) +# - Pattern detection in sequences +# - As a subroutine in more complex DP/optimization tasks + +kadane <- function(arr) { + #' Kadane's algorithm to find maximum subarray sum and its indices + #' @param arr: Numeric vector (can include negatives and positives) + #' @return: A list with fields: + #' max_sum - numeric: maximum subarray sum + #' start - integer: start index of the subarray (1-based), NA if empty input + #' end - integer: end index of the subarray (1-based), NA if empty input + #' subarray- numeric vector: the subarray that gives max_sum (empty if input empty) + + n <- length(arr) + + # Edge cases + if (n == 0) { + return(list( + max_sum = -Inf, + start = NA_integer_, + end = NA_integer_, + subarray = numeric(0) + )) + } + + # Initialize with first element (handles all-negative arrays correctly) + max_ending_here <- arr[1] + max_so_far <- arr[1] + s <- 1 + start <- 1 + end <- 1 + + if (n >= 2) { + for (i in 2:n) { + # If adding arr[i] to current segment is worse than starting new at arr[i] + if (max_ending_here + arr[i] < arr[i]) { + max_ending_here <- arr[i] + s <- i + } else { + max_ending_here <- max_ending_here + arr[i] + } + + # Update best segment if needed + if (max_ending_here > max_so_far) { + max_so_far <- max_ending_here + start <- s + end <- i + } + } + } + + return(list( + max_sum = max_so_far, + start = as.integer(start), + end = as.integer(end), + subarray = arr[start:end] + )) +} + +# Variant: Kadane that returns also when you want first-occurrence vs. any occurrence +kadane_first_occurrence <- function(arr) { + # exactly like kadane() but ties favor earlier segment (current code already does) + kadane(arr) +} + +# Helper to pretty-print results +print_kadane_result <- function(res, arr_name="Array") { + cat("Input:", arr_name, "\n") + if (is.na(res$start)) { + cat("Result: empty input\n\n") + return(invisible(NULL)) + } + cat("Max Subarray Sum:", res$max_sum, "\n") + cat("Start Index:", res$start, " End Index:", res$end, "\n") + cat("Subarray:", paste(res$subarray, collapse = ", "), "\n\n") +} + +# =========================== +# Example Usage & Testing +# =========================== +cat("=== Kadane's Algorithm Tests ===\n\n") + +# Test 1: Mixed positive and negative +arr1 <- c(-2, 1, -3, 4, -1, 2, 1, -5, 4) +res1 <- kadane(arr1) +print_kadane_result(res1, "arr1 (mixed)") + +# Test 2: All positive +arr2 <- c(2, 3, 1, 4) +res2 <- kadane(arr2) +print_kadane_result(res2, "arr2 (all positive)") + +# Test 3: All negative +arr3 <- c(-8, -3, -6, -2, -5, -4) +res3 <- kadane(arr3) +print_kadane_result(res3, "arr3 (all negative)") + +# Test 4: Single element +arr4 <- c(5) +res4 <- kadane(arr4) +print_kadane_result(res4, "arr4 (single element)") + +# Test 5: Empty array +arr5 <- numeric(0) +res5 <- kadane(arr5) +print_kadane_result(res5, "arr5 (empty)") + +# Test 6: Random large array - timing example +set.seed(123) +arr6 <- sample(-100:100, 100000, replace = TRUE) +start_time <- Sys.time() +res6 <- kadane(arr6) +end_time <- Sys.time() +print_kadane_result(res6, "arr6 (large random)") +cat("Elapsed time (seconds):", as.numeric(end_time - start_time, units = "secs"), "\n\n") + +# Optional: function to get maximum circular subarray (Kadane + total sum trick) +kadane_circular <- function(arr) { + #' Finds max subarray sum for circular arrays (wrap-around allowed) + #' If all elements are negative, returns max element (non-wrap). + n <- length(arr) + if (n == 0) return(list(max_sum = -Inf, start = NA, end = NA, subarray = numeric(0))) + + # Standard Kadane for non-circular max + normal <- kadane(arr)$max_sum + + # If all negative, normal already is max element; circular logic would fail + if (all(arr <= 0)) { + return(list(max_sum = normal, start = which.max(arr), end = which.max(arr), subarray = arr[which.max(arr)])) + } + + # Max wrap = total_sum - min_subarray_sum + total_sum <- sum(arr) + + # Find minimum subarray using Kadane on inverted array + inverted <- -arr + min_sub_sum <- kadane(inverted)$max_sum # this is -min_subarray_sum + max_wrap <- total_sum + min_sub_sum # because min_sub_sum is negative of min subarray + + if (max_wrap > normal) { + return(list(max_sum = max_wrap, start = NA, end = NA, subarray = NA)) # indices for wrap-around not computed here + } else { + return(list(max_sum = normal, start = kadane(arr)$start, end = kadane(arr)$end, subarray = kadane(arr)$subarray)) + } +} + +# Example for circular +cat("=== Circular Kadane Example ===\n") +arrc <- c(8, -1, 3, 4) +res_circ <- kadane_circular(arrc) +cat("Input:", paste(arrc, collapse = ", "), "\n") +cat("Max circular subarray sum:", res_circ$max_sum, "\n\n") + +# End of script From d51320b3870decf4c3e905b5672a0a0d9bb92f07 Mon Sep 17 00:00:00 2001 From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 11:51:14 +0530 Subject: [PATCH 2/8] k-Nearest Neighbors --- machine_learning/k-NN.r | 272 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 machine_learning/k-NN.r diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r new file mode 100644 index 00000000..d72c7d4d --- /dev/null +++ b/machine_learning/k-NN.r @@ -0,0 +1,272 @@ +# k-Nearest Neighbors implementation in R +# +# Purpose: Simple, readable k-NN from-scratch supporting classification and regression. +# Time Complexity: O(m * n * d) for m test samples, n train samples, d features. +# Space Complexity: O(n * d) for training data + O(m * n) temporarily for distance matrix. +# +# Features: +# - Euclidean distance (squared) computed efficiently with matrix ops +# - Supports classification (factor labels) and regression (numeric labels) +# - Optional distance weighting (inverse-distance) +# - Optional normalization (z-score) using training-set params +# - Safe handling of edge cases (k > n, empty data, NAs) +# +# Usage: +# model <- knn_train(train_X, train_y, k=5, weighted=TRUE, normalize=TRUE) +# pred <- knn_predict(model, test_X) +# pred$predictions # vector of predictions +# pred$probs # (classification) matrix of class probabilities (if requested) + +# --------------------------- +# Helpers: z-score normalization +# --------------------------- +zscore_fit <- function(X) { + mu <- colMeans(X, na.rm = TRUE) + sigma <- apply(X, 2, sd, na.rm = TRUE) + sigma[sigma == 0] <- 1.0 # avoid division by zero + list(mu = mu, sigma = sigma) +} + +zscore_transform <- function(X, fit) { + sweep(sweep(X, 2, fit$mu, "-"), 2, fit$sigma, "/") +} + +# --------------------------- +# Training: just store data + normalization params +# --------------------------- +knn_train <- function(X, y, k = 3, weighted = FALSE, normalize = TRUE) { + #' X: numeric matrix or data.frame (n x d) + #' y: factor (classification) or numeric vector (regression) of length n + #' k: number of neighbors + #' weighted: use inverse-distance weighting (TRUE/FALSE) + #' normalize: z-score features using train stats (TRUE/FALSE) + + if (is.data.frame(X)) X <- as.matrix(X) + if (!is.matrix(X)) stop("X must be a matrix or data.frame.") + if (nrow(X) == 0) stop("Training set X is empty.") + if (length(y) != nrow(X)) stop("Length of y must match number of rows in X.") + if (k <= 0) stop("k must be positive integer.") + + k <- as.integer(k) + if (k > nrow(X)) { + warning("k > n (train size). Reducing k to n.") + k <- nrow(X) + } + + # remove rows with NA in features or labels + good_idx <- which(apply(X, 1, function(r) !any(is.na(r))) & !is.na(y)) + if (length(good_idx) < nrow(X)) { + warning(sprintf("Removed %d rows with NA from training data.", nrow(X) - length(good_idx))) + X <- X[good_idx, , drop = FALSE] + y <- y[good_idx] + } + + is_classification <- is.factor(y) || is.character(y) + if (is.character(y)) y <- factor(y) + + norm_fit <- NULL + if (normalize) { + norm_fit <- zscore_fit(X) + X <- zscore_transform(X, norm_fit) + } + + list( + X = X, + y = y, + k = k, + weighted = as.logical(weighted), + normalize = as.logical(normalize), + norm_fit = norm_fit, + is_classification = is_classification, + classes = if (is_classification) levels(y) else NULL + ) +} + +# --------------------------- +# Distance computation (efficient) +# --------------------------- +squared_euclidean_distances <- function(A, B) { + #' Compute squared Euclidean distances between rows of A (m x d) and B (n x d) + #' Returns matrix (m x n) where entry (i,j) is ||A[i,] - B[j,]||^2 + if (!is.matrix(A)) A <- as.matrix(A) + if (!is.matrix(B)) B <- as.matrix(B) + if (ncol(A) != ncol(B)) stop("Feature dimension mismatch between A and B.") + + # rowSums(A^2) is length m; rowSums(B^2) is length n + A_sq <- rowSums(A * A) + B_sq <- rowSums(B * B) + # cross term: A %*% t(B) gives m x n + cross <- tcrossprod(A, B) # same as A %*% t(B) but often a bit faster + # use broadcasting: dist^2 = A_sq - 2*cross + B_sq + # We build matrix: outer(A_sq, rep(1,n)) - 2*cross + outer(rep(1,m), B_sq) + outer(A_sq, rep(1, length(B_sq))) - 2 * cross + outer(rep(1, length(A_sq)), B_sq) +} + +# --------------------------- +# Prediction +# --------------------------- +knn_predict <- function(model, X_new, return_probs = TRUE, return_neighbors = FALSE) { + #' model: object from knn_train + #' X_new: matrix/data.frame of test points (m x d) or single vector (1 x d) + #' return_probs: for classification, return class probabilities + #' return_neighbors: return neighbor indices & distances + if (is.data.frame(X_new)) X_new <- as.matrix(X_new) + if (is.vector(X_new)) X_new <- matrix(X_new, nrow = 1) + if (!is.matrix(X_new)) stop("X_new must be matrix/data.frame or vector.") + if (ncol(X_new) != ncol(model$X)) stop("Feature dimensionality mismatch.") + + # normalize if needed + if (model$normalize && !is.null(model$norm_fit)) { + X_proc <- zscore_transform(X_new, model$norm_fit) + } else { + X_proc <- X_new + } + + m <- nrow(X_proc) + n <- nrow(model$X) + k <- model$k + + if (n == 0) stop("Model has no training samples.") + + # distances: m x n + dists <- squared_euclidean_distances(X_proc, model$X) + + # For each test row, find k smallest distances (ties handled by order) + idx_mat <- t(apply(dists, 1, function(r) { + order(r, decreasing = FALSE)[seq_len(k)] + })) # m x k + + dist_mat <- matrix(NA_real_, nrow = m, ncol = k) + for (i in seq_len(m)) dist_mat[i, ] <- dists[i, idx_mat[i, ]] + + # handle zero distances (exact matches) to avoid division by zero in weighting + eps <- 1e-12 + if (model$is_classification) { + preds <- vector("character", m) + probs <- matrix(0, nrow = m, ncol = length(model$classes)) + colnames(probs) <- model$classes + + for (i in seq_len(m)) { + neighbor_idx <- idx_mat[i, ] + neighbor_labels <- as.character(model$y[neighbor_idx]) + neighbor_dists <- dist_mat[i, ] + + if (model$weighted) { + # weights: 1 / (dist + eps) + w <- 1 / (neighbor_dists + eps) + # if any dist==0, set weight large for exact matches + if (any(neighbor_dists == 0)) { + w <- as.numeric(neighbor_dists == 0) * 1e12 # very large weight for exact matches + } + tab <- tapply(w, neighbor_labels, sum) + } else { + tab <- table(neighbor_labels) + } + # ensure all classes present + counts <- rep(0, length(model$classes)) + names(counts) <- model$classes + tab_names <- names(tab) + counts[tab_names] <- as.numeric(tab) + + # normalize to probabilities + if (sum(counts) > 0) probs[i, ] <- counts / sum(counts) + else probs[i, ] <- counts + + # choose class with max probability (first tie wins because which.max) + preds[i] <- names(which.max(probs[i, ])) + } + # cast to factor with original levels + preds <- factor(preds, levels = model$classes) + + out <- list(predictions = preds) + if (return_probs) out$probs <- probs + } else { + # regression + preds_reg <- numeric(m) + for (i in seq_len(m)) { + neighbor_idx <- idx_mat[i, ] + neighbor_vals <- as.numeric(model$y[neighbor_idx]) + neighbor_dists <- dist_mat[i, ] + if (model$weighted) { + w <- 1 / (neighbor_dists + eps) + if (any(neighbor_dists == 0)) { + w <- as.numeric(neighbor_dists == 0) * 1e12 + } + preds_reg[i] <- sum(w * neighbor_vals) / sum(w) + } else { + preds_reg[i] <- mean(neighbor_vals) + } + } + out <- list(predictions = preds_reg) + } + + if (return_neighbors) { + out$neighbor_indices <- idx_mat + out$neighbor_distances <- dist_mat + } + + out +} + +# --------------------------- +# Utility: accuracy and confusion (classification) +# --------------------------- +knn_accuracy <- function(y_true, y_pred) { + if (length(y_true) != length(y_pred)) stop("Lengths mismatch.") + mean(y_true == y_pred) +} + +confusion_matrix <- function(y_true, y_pred) { + table(Actual = y_true, Predicted = y_pred) +} + +# --------------------------- +# Example/Test: Iris classification +# --------------------------- +cat("=== k-NN Example: Iris dataset (classification) ===\n") +data(iris) +set.seed(42) +# Use only numeric features +X <- as.matrix(iris[, 1:4]) +y <- factor(iris$Species) + +# train/test split 70/30 +n <- nrow(X) +train_idx <- sample(seq_len(n), size = floor(0.7 * n)) +test_idx <- setdiff(seq_len(n), train_idx) + +X_train <- X[train_idx, , drop = FALSE] +y_train <- y[train_idx] +X_test <- X[test_idx, , drop = FALSE] +y_test <- y[test_idx] + +model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE) +pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE) + +acc <- knn_accuracy(y_test, pred$predictions) +cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n", + model$k, model$weighted, model$normalize, acc)) +cat("Confusion Matrix:\n") +print(confusion_matrix(y_test, pred$predictions)) +cat("\n") + +# --------------------------- +# Example/Test: Regression (toy) +# --------------------------- +cat("=== k-NN Example: Toy regression ===\n") +set.seed(1) +n_reg <- 200 +X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2) +y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5) +train_idx <- sample(seq_len(n_reg), size = 150) +X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx] +X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx] + +model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE) +pred_reg <- knn_predict(model_reg, X_te) +mse <- mean((pred_reg$predictions - y_te)^2) +cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse)) + +# --------------------------- +# End of script +# --------------------------- From 563abaa6efbd5f08d88cb4ad33936c15c7144df5 Mon Sep 17 00:00:00 2001 From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 12:27:15 +0530 Subject: [PATCH 3/8] Convolutional Neural Network --- machine_learning/cnn.r | 56 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 machine_learning/cnn.r diff --git a/machine_learning/cnn.r b/machine_learning/cnn.r new file mode 100644 index 00000000..5aa9d9e9 --- /dev/null +++ b/machine_learning/cnn.r @@ -0,0 +1,56 @@ +# ============================================== +# Convolutional Neural Network (CNN) +# ============================================== +# Algorithm: Deep learning model using convolutional, pooling, and dense layers. +# Framework: Keras (TensorFlow backend) +# +# Purpose: +# - Automatically extract spatial and hierarchical features from image data. +# - Commonly used for image classification, object detection, and visual recognition. +# +# Architecture Steps: +# 1. Convolution Layer: Extracts local spatial patterns using learnable filters. +# 2. Activation (ReLU): Adds non-linearity by thresholding at zero. +# 3. Pooling Layer: Reduces spatial dimensions (downsampling) while preserving features. +# 4. Flatten Layer: Converts 2D feature maps into 1D vector. +# 5. Dense Layers: Combines extracted features for classification. +# 6. Output Layer: Uses Softmax activation for class probabilities. +# +# Complexity: +# - Time: O(E × N × F × K²) where E=epochs, N=samples, F=filters, K=kernel size +# - Space: O(parameters + feature maps) +# +# Reference: +# LeCun et al., "Gradient-based learning applied to document recognition" (1998) +# https://yann.lecun.com/exdb/lenet/ +# +# ============================================== + +# Load Required Library +suppressPackageStartupMessages(library(keras)) + +# Define CNN Architecture (Algorithm Only) +cnn_model <- keras_model_sequential() %>% + layer_conv_2d( + filters = 32, kernel_size = c(3, 3), activation = "relu", + input_shape = c(28, 28, 1), padding = "same" + ) %>% + layer_max_pooling_2d(pool_size = c(2, 2)) %>% + layer_conv_2d( + filters = 64, kernel_size = c(3, 3), + activation = "relu", padding = "same" + ) %>% + layer_max_pooling_2d(pool_size = c(2, 2)) %>% + layer_flatten() %>% + layer_dense(units = 128, activation = "relu") %>% + layer_dense(units = 10, activation = "softmax") + +# Display Model Summary +summary(cnn_model) + +# ============================================== +# Note: +# - This script defines the CNN algorithm structure only. +# - You can compile and train it using model %>% compile() and model %>% fit() +# with any dataset (e.g., MNIST, CIFAR-10). +# ============================================== From 5676cd68b4249a2d1f5ca948c5f32a9d216f82af Mon Sep 17 00:00:00 2001 From: AtharvaPatange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 13:05:56 +0530 Subject: [PATCH 4/8] Gaussian Process Regression --- machine_learning/guassian_process.r | 62 +++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 machine_learning/guassian_process.r diff --git a/machine_learning/guassian_process.r b/machine_learning/guassian_process.r new file mode 100644 index 00000000..89b5f2eb --- /dev/null +++ b/machine_learning/guassian_process.r @@ -0,0 +1,62 @@ +# ============================================== +# Gaussian Process Regression (GP) +# ============================================== +# Algorithm: Non-parametric Bayesian regression using Gaussian Processes. +# Framework: R (kernlab package) +# +# Purpose: +# - Perform regression while providing uncertainty estimates. +# - Useful for small datasets and Bayesian optimization. +# +# Core Idea: +# - Define a prior over functions using a kernel (covariance) function. +# - Update the posterior distribution using observed data. +# - Predictions include mean and variance (uncertainty) for each point. +# +# Complexity: +# - Time: O(n^3) due to inversion of the kernel matrix +# - Space: O(n^2) for storing the kernel matrix +# +# Edge Cases / Notes: +# - Choice of kernel is critical for good performance. +# - Computationally heavy for large datasets; sparse approximations exist. +# - Great for uncertainty quantification in predictions. +# +# Typical Applications: +# - Bayesian optimization +# - Small-data regression tasks +# - Time-series forecasting with uncertainty estimates +# +# Reference: +# Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian Processes for Machine Learning. +# ============================================== + +# Load required library +suppressPackageStartupMessages(library(kernlab)) + +# Example Dataset (Synthetic) +set.seed(42) +x <- seq(-5, 5, length.out = 50) +y <- sin(x) + rnorm(length(x), sd = 0.2) + +# Define Gaussian Process Regression Model +gp_model <- gausspr( + x = as.matrix(x), y = y, + kernel = "rbfdot" # Radial Basis Function (RBF) kernel +) + +# Make Predictions +x_test <- seq(-6, 6, length.out = 100) +y_pred <- predict(gp_model, as.matrix(x_test), type = "response") + +# Plot Results +plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19) +lines(x_test, y_pred, col = "blue", lwd = 2) +legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1)) + +# ============================================== +# Note: +# - This script defines a Gaussian Process Regression model in R. +# - Can be applied to other regression datasets by replacing x and y. +# - For large datasets, consider sparse GP approximations. +# ============================================== From a46a2dfb7cb3091985393f35aeb628c6fb64ec20 Mon Sep 17 00:00:00 2001 From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 17:59:20 +0530 Subject: [PATCH 5/8] Update machine_learning/guassian_process.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- machine_learning/guassian_process.r | 63 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/machine_learning/guassian_process.r b/machine_learning/guassian_process.r index 89b5f2eb..2a3d8fb1 100644 --- a/machine_learning/guassian_process.r +++ b/machine_learning/guassian_process.r @@ -34,26 +34,51 @@ # Load required library suppressPackageStartupMessages(library(kernlab)) -# Example Dataset (Synthetic) -set.seed(42) -x <- seq(-5, 5, length.out = 50) -y <- sin(x) + rnorm(length(x), sd = 0.2) - -# Define Gaussian Process Regression Model -gp_model <- gausspr( - x = as.matrix(x), y = y, - kernel = "rbfdot" # Radial Basis Function (RBF) kernel -) - -# Make Predictions -x_test <- seq(-6, 6, length.out = 100) -y_pred <- predict(gp_model, as.matrix(x_test), type = "response") - -# Plot Results -plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19) -lines(x_test, y_pred, col = "blue", lwd = 2) -legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1)) +# ---- Core Functions ---- +#' Train a Gaussian Process Regression model +#' @param x Numeric vector or matrix of input features +#' @param y Numeric vector of target values +#' @param kernel Kernel to use (default: "rbfdot") +#' @param ... Additional arguments passed to gausspr +#' @return Trained GP model (kernlab::gausspr object) +gp_train <- function(x, y, kernel = "rbfdot", ...) { + gausspr( + x = as.matrix(x), y = y, + kernel = kernel, + ... + ) +} + +#' Predict using a trained Gaussian Process Regression model +#' @param model Trained GP model (from gp_train) +#' @param x_test Numeric vector or matrix of test inputs +#' @param type Prediction type (default: "response") +#' @param ... Additional arguments passed to predict +#' @return Predicted values +gp_predict <- function(model, x_test, type = "response", ...) { + predict(model, as.matrix(x_test), type = type, ...) +} + +# ---- Example Usage (runs only in interactive sessions) ---- +if (interactive()) { + # Example Dataset (Synthetic) + set.seed(42) + x <- seq(-5, 5, length.out = 50) + y <- sin(x) + rnorm(length(x), sd = 0.2) + + # Train GP model + gp_model <- gp_train(x, y) + + # Make Predictions + x_test <- seq(-6, 6, length.out = 100) + y_pred <- gp_predict(gp_model, x_test) + + # Plot Results + plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19) + lines(x_test, y_pred, col = "blue", lwd = 2) + legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1)) +} # ============================================== # Note: # - This script defines a Gaussian Process Regression model in R. From a08463022ac8353961359f348dc875b563aa9621 Mon Sep 17 00:00:00 2001 From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 18:00:08 +0530 Subject: [PATCH 6/8] Update machine_learning/k-NN.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- machine_learning/k-NN.r | 63 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r index d72c7d4d..2314b021 100644 --- a/machine_learning/k-NN.r +++ b/machine_learning/k-NN.r @@ -231,42 +231,43 @@ X <- as.matrix(iris[, 1:4]) y <- factor(iris$Species) # train/test split 70/30 -n <- nrow(X) -train_idx <- sample(seq_len(n), size = floor(0.7 * n)) -test_idx <- setdiff(seq_len(n), train_idx) +if (interactive()) { + n <- nrow(X) + train_idx <- sample(seq_len(n), size = floor(0.7 * n)) + test_idx <- setdiff(seq_len(n), train_idx) -X_train <- X[train_idx, , drop = FALSE] -y_train <- y[train_idx] -X_test <- X[test_idx, , drop = FALSE] -y_test <- y[test_idx] + X_train <- X[train_idx, , drop = FALSE] + y_train <- y[train_idx] + X_test <- X[test_idx, , drop = FALSE] + y_test <- y[test_idx] -model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE) -pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE) + model <- knn_train(X_train, y_train, k = 5, weighted = TRUE, normalize = TRUE) + pred <- knn_predict(model, X_test, return_probs = TRUE, return_neighbors = FALSE) -acc <- knn_accuracy(y_test, pred$predictions) -cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n", - model$k, model$weighted, model$normalize, acc)) -cat("Confusion Matrix:\n") -print(confusion_matrix(y_test, pred$predictions)) -cat("\n") + acc <- knn_accuracy(y_test, pred$predictions) + cat(sprintf("Test accuracy (k=%d, weighted=%s, normalize=%s): %.4f\n", + model$k, model$weighted, model$normalize, acc)) + cat("Confusion Matrix:\n") + print(confusion_matrix(y_test, pred$predictions)) + cat("\n") -# --------------------------- -# Example/Test: Regression (toy) -# --------------------------- -cat("=== k-NN Example: Toy regression ===\n") -set.seed(1) -n_reg <- 200 -X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2) -y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5) -train_idx <- sample(seq_len(n_reg), size = 150) -X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx] -X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx] - -model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE) -pred_reg <- knn_predict(model_reg, X_te) -mse <- mean((pred_reg$predictions - y_te)^2) -cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse)) + # --------------------------- + # Example/Test: Regression (toy) + # --------------------------- + cat("=== k-NN Example: Toy regression ===\n") + set.seed(1) + n_reg <- 200 + X_reg <- matrix(runif(n_reg * 2, -5, 5), ncol = 2) + y_reg <- X_reg[,1] * 2 - X_reg[,2] * 0.5 + rnorm(n_reg, sd = 0.5) + train_idx <- sample(seq_len(n_reg), size = 150) + X_tr <- X_reg[train_idx, , drop=FALSE]; y_tr <- y_reg[train_idx] + X_te <- X_reg[-train_idx, , drop=FALSE]; y_te <- y_reg[-train_idx] + model_reg <- knn_train(X_tr, y_tr, k = 7, weighted = TRUE, normalize = TRUE) + pred_reg <- knn_predict(model_reg, X_te) + mse <- mean((pred_reg$predictions - y_te)^2) + cat(sprintf("Regression MSE (k=%d, weighted=%s): %.4f\n\n", model_reg$k, model_reg$weighted, mse)) +} # --------------------------- # End of script # --------------------------- From 606ef9d50f3335b9bd1ba241cf999aead4124073 Mon Sep 17 00:00:00 2001 From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 20:20:04 +0530 Subject: [PATCH 7/8] Update machine_learning/cnn.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- machine_learning/cnn.r | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/machine_learning/cnn.r b/machine_learning/cnn.r index 5aa9d9e9..b0154f0e 100644 --- a/machine_learning/cnn.r +++ b/machine_learning/cnn.r @@ -29,24 +29,29 @@ # Load Required Library suppressPackageStartupMessages(library(keras)) -# Define CNN Architecture (Algorithm Only) -cnn_model <- keras_model_sequential() %>% - layer_conv_2d( - filters = 32, kernel_size = c(3, 3), activation = "relu", - input_shape = c(28, 28, 1), padding = "same" - ) %>% - layer_max_pooling_2d(pool_size = c(2, 2)) %>% - layer_conv_2d( - filters = 64, kernel_size = c(3, 3), - activation = "relu", padding = "same" - ) %>% - layer_max_pooling_2d(pool_size = c(2, 2)) %>% - layer_flatten() %>% - layer_dense(units = 128, activation = "relu") %>% - layer_dense(units = 10, activation = "softmax") +# Define CNN Architecture as a Function (Reusable) +build_cnn_model <- function(input_shape = c(28, 28, 1), num_classes = 10) { + keras_model_sequential() %>% + layer_conv_2d( + filters = 32, kernel_size = c(3, 3), activation = "relu", + input_shape = input_shape, padding = "same" + ) %>% + layer_max_pooling_2d(pool_size = c(2, 2)) %>% + layer_conv_2d( + filters = 64, kernel_size = c(3, 3), + activation = "relu", padding = "same" + ) %>% + layer_max_pooling_2d(pool_size = c(2, 2)) %>% + layer_flatten() %>% + layer_dense(units = 128, activation = "relu") %>% + layer_dense(units = num_classes, activation = "softmax") +} -# Display Model Summary -summary(cnn_model) +# Example: Display Model Summary (only in interactive sessions) +if (interactive()) { + model <- build_cnn_model() + summary(model) +} # ============================================== # Note: From 0c2f3694ace84aa69dfb7f402b6e98b865a80aea Mon Sep 17 00:00:00 2001 From: Atharva Patange <164712914+AtharvaPatange@users.noreply.github.com> Date: Sat, 18 Oct 2025 20:20:20 +0530 Subject: [PATCH 8/8] Update machine_learning/k-NN.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- machine_learning/k-NN.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/k-NN.r b/machine_learning/k-NN.r index 2314b021..04e984c2 100644 --- a/machine_learning/k-NN.r +++ b/machine_learning/k-NN.r @@ -173,7 +173,7 @@ knn_predict <- function(model, X_new, return_probs = TRUE, return_neighbors = FA else probs[i, ] <- counts # choose class with max probability (first tie wins because which.max) - preds[i] <- names(which.max(probs[i, ])) + preds[i] <- colnames(probs)[which.max(probs[i, ])] } # cast to factor with original levels preds <- factor(preds, levels = model$classes)