|
| 1 | +# Subset Sum Problem |
| 2 | +# |
| 3 | +# The Subset Sum problem determines whether there exists a subset of a given set |
| 4 | +# of positive integers that sums to a target value. This is a classic NP-complete |
| 5 | +# problem solved using dynamic programming. |
| 6 | +# |
| 7 | +# Time Complexity: O(n * sum) where n = number of elements, sum = target sum |
| 8 | +# Space Complexity: O(n * sum) for DP table, O(sum) for optimized version |
| 9 | +# |
| 10 | +# Applications: |
| 11 | +# - Partition problems in computer science |
| 12 | +# - Knapsack problem variations |
| 13 | +# - Resource allocation and optimization |
| 14 | +# - Cryptography and number theory |
| 15 | +# - Game theory and decision making |
| 16 | + |
| 17 | +# Basic DP solution for Subset Sum Problem |
| 18 | +subset_sum <- function(arr, target) { |
| 19 | + #' Check if there exists a subset that sums to the target value |
| 20 | + #' @param arr: Numeric vector of positive integers |
| 21 | + #' @param target: Target sum value |
| 22 | + #' @return: Boolean indicating if subset exists, along with DP table |
| 23 | + |
| 24 | + n <- length(arr) |
| 25 | + |
| 26 | + # Handle edge cases |
| 27 | + if (n == 0) { |
| 28 | + return(list( |
| 29 | + exists = (target == 0), |
| 30 | + dp_table = matrix(FALSE, nrow = 1, ncol = 1), |
| 31 | + subset = c() |
| 32 | + )) |
| 33 | + } |
| 34 | + |
| 35 | + if (target == 0) { |
| 36 | + return(list( |
| 37 | + exists = TRUE, |
| 38 | + dp_table = matrix(TRUE, nrow = n + 1, ncol = 1), |
| 39 | + subset = c() |
| 40 | + )) |
| 41 | + } |
| 42 | + |
| 43 | + # Create DP table: dp[i, j] = TRUE if sum j can be achieved using first i elements |
| 44 | + dp <- matrix(FALSE, nrow = n + 1, ncol = target + 1) |
| 45 | + |
| 46 | + # Base case: sum 0 can always be achieved with empty subset |
| 47 | + for (i in 1:(n + 1)) { |
| 48 | + dp[i, 1] <- TRUE |
| 49 | + } |
| 50 | + |
| 51 | + # Fill DP table |
| 52 | + for (i in 2:(n + 1)) { |
| 53 | + for (j in 1:(target + 1)) { |
| 54 | + current_sum <- j - 1 # Convert to 0-based indexing |
| 55 | + |
| 56 | + # Don't include current element |
| 57 | + dp[i, j] <- dp[i - 1, j] |
| 58 | + |
| 59 | + # Include current element (if it doesn't exceed current sum) |
| 60 | + if (arr[i - 1] <= current_sum) { |
| 61 | + dp[i, j] <- dp[i, j] || dp[i - 1, j - arr[i - 1]] |
| 62 | + } |
| 63 | + } |
| 64 | + } |
| 65 | + |
| 66 | + # Backtrack to find one possible subset |
| 67 | + subset <- c() |
| 68 | + if (dp[n + 1, target + 1]) { |
| 69 | + i <- n + 1 |
| 70 | + j <- target + 1 |
| 71 | + |
| 72 | + while (i > 1 && j > 1) { |
| 73 | + # If current sum was achieved without including arr[i-1] |
| 74 | + if (dp[i - 1, j]) { |
| 75 | + i <- i - 1 |
| 76 | + } else { |
| 77 | + # Current element was included |
| 78 | + subset <- c(arr[i - 1], subset) |
| 79 | + j <- j - arr[i - 1] |
| 80 | + i <- i - 1 |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + return(list( |
| 86 | + exists = dp[n + 1, target + 1], |
| 87 | + dp_table = dp, |
| 88 | + subset = subset |
| 89 | + )) |
| 90 | +} |
| 91 | + |
| 92 | +# Space-optimized version using only 1D array |
| 93 | +subset_sum_optimized <- function(arr, target) { |
| 94 | + #' Space optimized subset sum using 1D array |
| 95 | + #' @param arr: Numeric vector of positive integers |
| 96 | + #' @param target: Target sum value |
| 97 | + #' @return: Boolean indicating if subset exists |
| 98 | + |
| 99 | + n <- length(arr) |
| 100 | + |
| 101 | + if (n == 0) return(target == 0) |
| 102 | + if (target == 0) return(TRUE) |
| 103 | + |
| 104 | + dp <- rep(FALSE, target + 1) |
| 105 | + dp[1] <- TRUE # sum 0 is always possible |
| 106 | + |
| 107 | + for (i in 1:n) { |
| 108 | + # Traverse from right to left to avoid overwriting needed values |
| 109 | + for (j in target:1) { |
| 110 | + if (arr[i] <= j) { |
| 111 | + dp[j + 1] <- dp[j + 1] || dp[j - arr[i] + 1] |
| 112 | + } |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + return(dp[target + 1]) |
| 117 | +} |
| 118 | + |
| 119 | +# Function to find all subsets that sum to target |
| 120 | +find_all_subsets <- function(arr, target) { |
| 121 | + #' Find all subsets that sum to the target value |
| 122 | + #' @param arr: Numeric vector of positive integers |
| 123 | + #' @param target: Target sum value |
| 124 | + #' @return: List of subsets (each subset is a numeric vector) that sum to target |
| 125 | + |
| 126 | + n <- length(arr) |
| 127 | + results <- list() |
| 128 | + |
| 129 | + # Helper recursive function |
| 130 | + find_subsets_rec <- function(idx, current_subset, current_sum) { |
| 131 | + if (current_sum == target) { |
| 132 | + results <<- c(results, list(current_subset)) |
| 133 | + return() |
| 134 | + } |
| 135 | + if (idx > n || current_sum > target) { |
| 136 | + return() |
| 137 | + } |
| 138 | + # Include arr[idx] |
| 139 | + find_subsets_rec(idx + 1, c(current_subset, arr[idx]), current_sum + arr[idx]) |
| 140 | + # Exclude arr[idx] |
| 141 | + find_subsets_rec(idx + 1, current_subset, current_sum) |
| 142 | + } |
| 143 | + |
| 144 | + find_subsets_rec(1, c(), 0) |
| 145 | + return(results) |
| 146 | +} |
| 147 | + |
| 148 | +# Helper function to print DP table |
| 149 | +print_subset_sum_dp <- function(dp_table, arr, target) { |
| 150 | + cat("DP Table for Subset Sum Problem:\n") |
| 151 | + cat("Array:", paste(arr, collapse = ", "), "\n") |
| 152 | + cat("Target Sum:", target, "\n\n") |
| 153 | + |
| 154 | + # Print column headers (sums) |
| 155 | + cat(" ") |
| 156 | + cat(paste(sprintf("%4d", 0:target), collapse = " ")) |
| 157 | + cat("\n") |
| 158 | + cat(paste(rep("-", 8 + 5 * (target + 1)), collapse = ""), "\n") |
| 159 | + |
| 160 | + for (i in 1:nrow(dp_table)) { |
| 161 | + if (i == 1) { |
| 162 | + cat("Empty | ") |
| 163 | + } else { |
| 164 | + cat(sprintf("Elem%2d| ", i - 1)) |
| 165 | + } |
| 166 | + |
| 167 | + for (j in 1:ncol(dp_table)) { |
| 168 | + cat(sprintf("%4s", ifelse(dp_table[i, j], " T", " F"))) |
| 169 | + } |
| 170 | + cat("\n") |
| 171 | + } |
| 172 | + cat("\n") |
| 173 | +} |
| 174 | + |
| 175 | +# =========================== |
| 176 | +# Example Usage & Testing |
| 177 | +# =========================== |
| 178 | +cat("=== Subset Sum Problem (Dynamic Programming) ===\n\n") |
| 179 | + |
| 180 | +# Test 1: Basic Example |
| 181 | +arr1 <- c(3, 34, 4, 12, 5, 2) |
| 182 | +target1 <- 9 |
| 183 | +cat("Test 1: Basic Example\n") |
| 184 | +cat("Array:", paste(arr1, collapse = ", "), "\n") |
| 185 | +cat("Target Sum:", target1, "\n\n") |
| 186 | + |
| 187 | +result1 <- subset_sum(arr1, target1) |
| 188 | +print_subset_sum_dp(result1$dp_table, arr1, target1) |
| 189 | +cat("Subset exists:", result1$exists, "\n") |
| 190 | +if (result1$exists) { |
| 191 | + cat("One possible subset:", paste(result1$subset, collapse = ", "), "\n") |
| 192 | + cat("Sum verification:", sum(result1$subset), "\n") |
| 193 | +} |
| 194 | +cat("\n") |
| 195 | + |
| 196 | +# Test 2: Optimized Version |
| 197 | +cat("Test 2: Space Optimized Version\n") |
| 198 | +exists_opt <- subset_sum_optimized(arr1, target1) |
| 199 | +cat("Subset exists (Optimized):", exists_opt, "\n") |
| 200 | +cat("Verification: Both methods match:", result1$exists == exists_opt, "\n\n") |
| 201 | + |
| 202 | +# Test 3: No Solution Case |
| 203 | +cat("Test 3: No Solution Case\n") |
| 204 | +arr3 <- c(3, 34, 4, 12, 5, 2) |
| 205 | +target3 <- 30 |
| 206 | +cat("Array:", paste(arr3, collapse = ", "), "\n") |
| 207 | +cat("Target Sum:", target3, "\n") |
| 208 | + |
| 209 | +result3 <- subset_sum(arr3, target3) |
| 210 | +cat("Subset exists:", result3$exists, "\n\n") |
| 211 | + |
| 212 | +# Test 4: Multiple Solutions |
| 213 | +cat("Test 4: Multiple Solutions\n") |
| 214 | +arr4 <- c(1, 2, 3, 4, 5) |
| 215 | +target4 <- 6 |
| 216 | +cat("Array:", paste(arr4, collapse = ", "), "\n") |
| 217 | +cat("Target Sum:", target4, "\n") |
| 218 | + |
| 219 | +result4 <- subset_sum(arr4, target4) |
| 220 | +cat("Subset exists:", result4$exists, "\n") |
| 221 | +if (result4$exists) { |
| 222 | + cat("One possible subset:", paste(result4$subset, collapse = ", "), "\n") |
| 223 | + |
| 224 | + # Find all possible subsets |
| 225 | + all_subsets <- find_all_subsets(arr4, target4) |
| 226 | + cat("Total number of subsets:", length(all_subsets), "\n") |
| 227 | + for (i in seq_along(all_subsets)) { |
| 228 | + cat("Subset", i, ":", paste(all_subsets[[i]], collapse = ", "), |
| 229 | + "(sum =", sum(all_subsets[[i]]), ")\n") |
| 230 | + } |
| 231 | +} |
| 232 | +cat("\n") |
| 233 | + |
| 234 | +# Test 5: Edge Cases |
| 235 | +cat("Test 5: Edge Cases\n") |
| 236 | +cat("Empty array, target 0:", subset_sum(c(), 0)$exists, "\n") |
| 237 | +cat("Empty array, target 5:", subset_sum(c(), 5)$exists, "\n") |
| 238 | +cat("Array [1,2,3], target 0:", subset_sum(c(1, 2, 3), 0)$exists, "\n") |
| 239 | +cat("Array [5], target 5:", subset_sum(c(5), 5)$exists, "\n") |
| 240 | +cat("Array [5], target 3:", subset_sum(c(5), 3)$exists, "\n\n") |
| 241 | + |
| 242 | +# Test 6: Larger Dataset |
| 243 | +cat("Test 6: Larger Dataset (n=15)\n") |
| 244 | +set.seed(42) |
| 245 | +arr_large <- sample(1:20, 15) |
| 246 | +target_large <- 50 |
| 247 | +cat("Array:", paste(arr_large, collapse = ", "), "\n") |
| 248 | +cat("Target Sum:", target_large, "\n") |
| 249 | + |
| 250 | +start_time <- Sys.time() |
| 251 | +result_large <- subset_sum(arr_large, target_large) |
| 252 | +dp_time <- as.numeric(Sys.time() - start_time, units = "secs") |
| 253 | + |
| 254 | +start_time <- Sys.time() |
| 255 | +exists_large_opt <- subset_sum_optimized(arr_large, target_large) |
| 256 | +opt_time <- as.numeric(Sys.time() - start_time, units = "secs") |
| 257 | + |
| 258 | +cat("Subset exists:", result_large$exists, "\n") |
| 259 | +cat("DP method time:", sprintf("%.4f sec", dp_time), "\n") |
| 260 | +cat("Optimized method time:", sprintf("%.4f sec", opt_time), "\n") |
| 261 | +cat("Results match:", result_large$exists == exists_large_opt, "\n") |
| 262 | + |
| 263 | +if (result_large$exists) { |
| 264 | + cat("One possible subset:", paste(result_large$subset, collapse = ", "), "\n") |
| 265 | + cat("Sum verification:", sum(result_large$subset), "\n") |
| 266 | +} |
| 267 | +cat("\n") |
| 268 | + |
| 269 | +# Test 7: Real-world Example - Budget Allocation |
| 270 | +cat("Test 7: Real-world Example - Budget Allocation\n") |
| 271 | +project_costs <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100) |
| 272 | +budget <- 150 |
| 273 | +cat("Project costs:", paste(project_costs, collapse = ", "), "\n") |
| 274 | +cat("Available budget:", budget, "\n") |
| 275 | + |
| 276 | +budget_result <- subset_sum(project_costs, budget) |
| 277 | +cat("Exact budget allocation possible:", budget_result$exists, "\n") |
| 278 | + |
| 279 | +if (budget_result$exists) { |
| 280 | + selected_projects <- budget_result$subset |
| 281 | + cat("Selected projects (costs):", paste(selected_projects, collapse = ", "), "\n") |
| 282 | + cat("Total cost:", sum(selected_projects), "\n") |
| 283 | + cat("Remaining budget:", budget - sum(selected_projects), "\n") |
| 284 | +} else { |
| 285 | + # Find closest possible sum (≤ budget) in a single pass |
| 286 | + closest_sum <- max_subset_sum_leq(project_costs, budget) |
| 287 | + cat("Closest possible sum:", closest_sum, "\n") |
| 288 | +} |
0 commit comments