|
| 1 | +# ============================================== |
| 2 | +# Principal Component Analysis (PCA) |
| 3 | +# ============================================== |
| 4 | +# Algorithm: Dimensionality reduction using orthogonal linear transformation |
| 5 | +# Framework: Base R |
| 6 | +# |
| 7 | +# Purpose: |
| 8 | +# - Reduce high-dimensional data into a smaller set of uncorrelated variables (principal components) |
| 9 | +# - Preserve as much variance as possible |
| 10 | +# |
| 11 | +# Steps: |
| 12 | +# 1. Standardize the dataset (zero mean, unit variance) |
| 13 | +# 2. Compute the covariance matrix of the standardized data |
| 14 | +# 3. Compute eigenvalues and eigenvectors of the covariance matrix |
| 15 | +# 4. Sort eigenvectors by decreasing eigenvalues (most variance first) |
| 16 | +# 5. Project original data onto top k eigenvectors to get reduced data |
| 17 | +# |
| 18 | +# Complexity: |
| 19 | +# - Time: O(n * d^2 + d^3) where n = samples, d = features |
| 20 | +# - Space: O(d^2 + n * d) |
| 21 | +# |
| 22 | +# Applications: |
| 23 | +# - Data visualization, noise reduction, feature extraction |
| 24 | +# - Preprocessing for machine learning models |
| 25 | +# ============================================== |
| 26 | + |
| 27 | +# PCA Algorithm Implementation (Algorithm only) |
| 28 | +pca_algorithm <- function(X, k) { |
| 29 | + # Basic input validation (kept minimal to match repo style) |
| 30 | + if (is.vector(X)) { |
| 31 | + X <- matrix(X, ncol = 1) |
| 32 | + } |
| 33 | + if (!is.matrix(X) || !is.numeric(X)) { |
| 34 | + stop("Input 'X' must be a numeric matrix or vector") |
| 35 | + } |
| 36 | + d <- ncol(X) |
| 37 | + if (k <= 0 || k > d) { |
| 38 | + stop("'k' must be between 1 and the number of columns of X") |
| 39 | + } |
| 40 | + |
| 41 | + # Step 1: Standardize the data (zero mean, unit variance per feature) |
| 42 | + X_std <- scale(X) |
| 43 | + |
| 44 | + # Step 2: Compute covariance matrix of standardized data |
| 45 | + cov_matrix <- cov(X_std) |
| 46 | + |
| 47 | + # Step 3: Eigen decomposition (covariance is symmetric) |
| 48 | + eig <- eigen(cov_matrix) |
| 49 | + eig_values <- eig$values |
| 50 | + eig_vectors <- eig$vectors |
| 51 | + |
| 52 | + # Step 4: Select top k principal components (eigenvectors) |
| 53 | + top_vectors <- eig_vectors[, 1:k, drop = FALSE] |
| 54 | + |
| 55 | + # Step 5: Project standardized data onto top k components |
| 56 | + X_reduced <- X_std %*% top_vectors |
| 57 | + |
| 58 | + return(list( |
| 59 | + reduced_data = X_reduced, |
| 60 | + components = top_vectors, |
| 61 | + eigenvalues = eig_values |
| 62 | + )) |
| 63 | +} |
| 64 | + |
| 65 | +# Example usage (algorithm only) |
| 66 | +# set.seed(42) |
| 67 | +# X <- matrix(rnorm(50 * 5), nrow = 50, ncol = 5) |
| 68 | +# pca_result <- pca_algorithm(X, k = 2) |
| 69 | +# head(pca_result$reduced_data) |
0 commit comments