From 6936688412392d7a8c8fb224ae1ac6de6915b0c6 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sun, 7 Jan 2024 10:02:05 +0530 Subject: [PATCH 01/58] setCover.py --- submodlib/functions/setCover.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index 41d0baa..411b069 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -1,7 +1,8 @@ # setCover.py # Author: Vishal Kaushal from .setFunction import SetFunction -from submodlib_cpp import SetCover +#from submodlib_cpp import SetCover +from submodlib_pytorch import SetCover class SetCoverFunction(SetFunction): """Implementation of the Set-Cover (SC) submodular function. @@ -42,7 +43,8 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): self.cover_set = cover_set self.num_concepts = num_concepts self.concept_weights = concept_weights - self.cpp_obj = None + #self.cpp_obj = None + self.obj = None if self.n <= 0: raise Exception("ERROR: Number of elements in ground set must be positive") @@ -56,8 +58,9 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): else: self.concept_weights = [1] * self.num_concepts - self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + #self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + self.obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) self.effective_ground = set(range(n)) - \ No newline at end of file + From a266b9d333a0efd7a10995f29b838056dcadaf89 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sun, 7 Jan 2024 10:07:02 +0530 Subject: [PATCH 02/58] setCover.py --- submodlib/functions/setCover.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index 411b069..eaf2c54 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -1,8 +1,7 @@ # setCover.py # Author: Vishal Kaushal from .setFunction import SetFunction -#from submodlib_cpp import SetCover -from submodlib_pytorch import SetCover +from submodlib_cpp import SetCover class SetCoverFunction(SetFunction): """Implementation of the Set-Cover (SC) submodular function. @@ -43,9 +42,8 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): self.cover_set = cover_set self.num_concepts = num_concepts self.concept_weights = concept_weights - #self.cpp_obj = None - self.obj = None - + self.cpp_obj = None + if self.n <= 0: raise Exception("ERROR: Number of elements in ground set must be positive") @@ -58,9 +56,7 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): else: self.concept_weights = [1] * self.num_concepts - #self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) - self.obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) - + self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) self.effective_ground = set(range(n)) From 78f51b0d0bf66dcf3cd26aab2da7150bfa0a699e Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:15:43 +0530 Subject: [PATCH 03/58] Create NaiveGreedyOptimizer.py --- cpp/optimizers/NaiveGreedy.py | 90 +++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 cpp/optimizers/NaiveGreedy.py diff --git a/cpp/optimizers/NaiveGreedy.py b/cpp/optimizers/NaiveGreedy.py new file mode 100644 index 0000000..728f16a --- /dev/null +++ b/cpp/optimizers/NaiveGreedy.py @@ -0,0 +1,90 @@ +import torch +import random +from typing import List, Tuple, Set + +class NaiveGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize( + self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy + ): + greedy_vector = [] + greedy_set = set() + if not costs: + # greedy_vector = [None] * budget + greedy_set = set() + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + #print(ground_set) + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the naive greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + best_id = None + best_val = float("-inf") + + for i in ground_set: + if i in greedy_set: + continue + gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) + # print(gain) + if verbose: + print(f"Gain of {i} is {gain}") + + if gain > best_val: + best_id = i + best_val = gain + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stopIfNegativeGain) or ( + self.equals(best_val, 0, 1e-5) and stop_if_zero_gain + ): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print(f"Updated greedy set: {greedy_set}") + + if show_progress: + percent = int((iter_count + 1.0) / N * 100) + + if percent >= display_next: + print( + f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end="", + ) + print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + + iter_count += 1 + + return greedy_vector From 47f571e60bea2d9606cfc4fc6c0ed6561b763e6d Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:22:08 +0530 Subject: [PATCH 04/58] Delete cpp/optimizers/NaiveGreedy.py --- cpp/optimizers/NaiveGreedy.py | 90 ----------------------------------- 1 file changed, 90 deletions(-) delete mode 100644 cpp/optimizers/NaiveGreedy.py diff --git a/cpp/optimizers/NaiveGreedy.py b/cpp/optimizers/NaiveGreedy.py deleted file mode 100644 index 728f16a..0000000 --- a/cpp/optimizers/NaiveGreedy.py +++ /dev/null @@ -1,90 +0,0 @@ -import torch -import random -from typing import List, Tuple, Set - -class NaiveGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1, val2, eps): - return abs(val1 - val2) < eps - - def maximize( - self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy - ): - greedy_vector = [] - greedy_set = set() - if not costs: - # greedy_vector = [None] * budget - greedy_set = set() - rem_budget = budget - ground_set = f_obj.get_effective_ground_set() - #print(ground_set) - if verbose: - print("Ground set:") - print(ground_set) - print(f"Num elements in groundset = {len(ground_set)}") - print("Costs:") - print(costs) - print(f"Cost sensitive greedy: {cost_sensitive_greedy}") - print("Starting the naive greedy algorithm") - print("Initial greedy set:") - print(greedy_set) - - f_obj.clear_memoization() - best_id = None - best_val = None - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter_count = 0 - - while rem_budget > 0: - best_id = None - best_val = float("-inf") - - for i in ground_set: - if i in greedy_set: - continue - gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) - # print(gain) - if verbose: - print(f"Gain of {i} is {gain}") - - if gain > best_val: - best_id = i - best_val = gain - - if verbose: - print(f"Next best item to add is {best_id} and its value addition is {best_val}") - - if (best_val < 0 and stopIfNegativeGain) or ( - self.equals(best_val, 0, 1e-5) and stop_if_zero_gain - ): - break - else: - f_obj.update_memoization(greedy_set, best_id) - greedy_set.add(best_id) - greedy_vector.append((best_id, best_val)) - rem_budget -= 1 - - if verbose: - print(f"Added element {best_id} and the gain is {best_val}") - print(f"Updated greedy set: {greedy_set}") - - if show_progress: - percent = int((iter_count + 1.0) / N * 100) - - if percent >= display_next: - print( - f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", - end="", - ) - print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") - display_next += step - - iter_count += 1 - - return greedy_vector From 8a2603d73e7bae34a411242fdf2837fa4dc06f7c Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:41:03 +0530 Subject: [PATCH 05/58] Create SetFunction.py --- cpp/SetFunction.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 cpp/SetFunction.py diff --git a/cpp/SetFunction.py b/cpp/SetFunction.py new file mode 100644 index 0000000..661bf0c --- /dev/null +++ b/cpp/SetFunction.py @@ -0,0 +1,62 @@ +from typing import Set, List, Tuple +import numpy as np +import torch +import torch.nn as nn +import numpy as np +import random +# import optimizer python files + +class SetFunction(nn.Module): + def __init__(self): + pass + + def evaluate(self, X: Set[int]) -> float: + return self.evaluate(X) + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.evaluate_with_memoization(X) + + def marginal_gain(self, X: Set[int], item: int) -> float: + return self.marginal_gain(X, item) + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + return self.marginal_gain_with_memoization(X, item) + + def update_memoization(self, X: Set[int], item: int) -> None: + return self.update_memoization(X, item) + + + def get_effective_ground_set(self) -> Set[int]: + return self.get_effective_ground_set() + + def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool, + costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]: + optimizer = self._get_optimizer(optimizer) + if optimizer: + return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy) + else: + print("Invalid Optimizer") + return [] + + def _get_optimizer(self, optimizer_name: str): + if optimizer_name == "NaiveGreedy": + return NaiveGreedyOptimizer() + # define all optimizer classed into files + elif optimizer_name == "LazyGreedy": + return LazyGreedyOptimizer() + elif optimizer_name == "StochasticGreedy": + return StochasticGreedyOptimizer() + elif optimizer_name == "LazierThanLazyGreedy": + return LazierThanLazyGreedyOptimizer() + else: + return None + + def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int], + partial: bool, lambda_: float) -> None: + self.cluster_init(n, k_dense, ground, partial, lambda_) + + def set_memoization(self, X: Set[int]) -> None: + self.set_memoization(X) + + def clear_memoization(self) -> None: + self.clear_memoization() From 95e7cd243dff4f2caf72745e7ad62f795a2d3303 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:42:16 +0530 Subject: [PATCH 06/58] Create LazierThanLazyGreedyOptimizer.py --- .../LazierThanLazyGreedyOptimizer.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 cpp/optimizers/LazierThanLazyGreedyOptimizer.py diff --git a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py new file mode 100644 index 0000000..86e355c --- /dev/null +++ b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py @@ -0,0 +1,120 @@ +import random +import math + +class LazierThanLazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + @staticmethod + def print_sorted_set(sorted_set): + print("[", end="") + for val, elem in sorted_set: + print(f"({val}, {elem}), ", end="") + print("]") + + def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False, + epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False): + greedy_vector = [] + greedy_set = set() + + if costs is None: + greedy_vector.reserve(budget) + greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(remaining_set) + print(f"Num elements in ground set = {len(remaining_set)}") + print("Starting the LazierThanLazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {random_set}") + print("Now running lazy greedy on the random set") + + candidate_id = None + candidate_val = None + new_candidate_bound = None + + # Compute gains only for the elements in the remaining set + gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem) + for elem in remaining_set] + + for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))): + if elem in random_set and elem not in greedy_set: # Check if the element is not already selected + if verbose: + print(f"Checking {elem}...") + candidate_id = elem + candidate_val = val + new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False) + if verbose: + print(f"Updated gain as per updated greedy set = {new_candidate_bound}") + next_elem = gains[j + 1] if j + 1 < len(gains) else None + if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'): + if verbose: + print("..better than next best upper bound, " + "selecting...") + best_id = candidate_id + best_val = new_candidate_bound + break + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + remaining_set.remove(best_id) + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", greedy_set) + + if show_progress: + percent = int(((iter_count + 1.0) / N) * 100) + if percent >= display_next: + print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="") + print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + iter_count += 1 + + i += 1 + + return greedy_vector From 0715bb15065ac07320fd62229d245d5f2e7f27b2 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:43:13 +0530 Subject: [PATCH 07/58] Create LazyGreedyOptimizer.py --- cpp/optimizers/LazyGreedyOptimizer.py | 97 +++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 cpp/optimizers/LazyGreedyOptimizer.py diff --git a/cpp/optimizers/LazyGreedyOptimizer.py b/cpp/optimizers/LazyGreedyOptimizer.py new file mode 100644 index 0000000..45d7590 --- /dev/null +++ b/cpp/optimizers/LazyGreedyOptimizer.py @@ -0,0 +1,97 @@ +import torch +import heapq + +class LazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain, + verbose, show_progress, costs, cost_sensitive_greedy): + greedy_vector = [] + greedy_set = set() + + # if not costs: + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the lazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + + container = [] + heapq.heapify(container) + max_heap = container + + if cost_sensitive_greedy: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem] + heapq.heappush(max_heap, (-gain, elem)) + else: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + heapq.heappush(max_heap, (-gain, elem)) + + if verbose: + print("Max heap constructed") + + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0 and max_heap: + current_max = heapq.heappop(max_heap) + current_max_gain, current_max_elem = -current_max[0], current_max[1] + + if verbose: + print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}") + + new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False) + + if verbose: + print(f"newMaxBound: {new_max_bound}") + + if new_max_bound >= -max_heap[0][0]: + if (new_max_bound < 0 and stop_if_negative_gain) or \ + (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, current_max_elem) + greedy_set.add(current_max_elem) + greedy_vector.append((current_max_elem, new_max_bound)) + rem_budget -= 1 + + if verbose: + print(f"Added element {current_max_elem} and the gain is {new_max_bound}") + print("Updated greedySet:", greedy_set) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end=f" {percent}% [Iteration {iter + 1} of {N}]") + display_next += step + + iter += 1 + else: + heapq.heappush(max_heap, (-new_max_bound, current_max_elem)) + + return greedy_vector From 3820a32bbb49881bbe29baaff5289084104f17da Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:43:53 +0530 Subject: [PATCH 08/58] Create NaiveGreedyOptimizer.py --- cpp/optimizers/NaiveGreedyOptimizer.py | 90 ++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 cpp/optimizers/NaiveGreedyOptimizer.py diff --git a/cpp/optimizers/NaiveGreedyOptimizer.py b/cpp/optimizers/NaiveGreedyOptimizer.py new file mode 100644 index 0000000..728f16a --- /dev/null +++ b/cpp/optimizers/NaiveGreedyOptimizer.py @@ -0,0 +1,90 @@ +import torch +import random +from typing import List, Tuple, Set + +class NaiveGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize( + self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy + ): + greedy_vector = [] + greedy_set = set() + if not costs: + # greedy_vector = [None] * budget + greedy_set = set() + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + #print(ground_set) + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the naive greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + best_id = None + best_val = float("-inf") + + for i in ground_set: + if i in greedy_set: + continue + gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) + # print(gain) + if verbose: + print(f"Gain of {i} is {gain}") + + if gain > best_val: + best_id = i + best_val = gain + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stopIfNegativeGain) or ( + self.equals(best_val, 0, 1e-5) and stop_if_zero_gain + ): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print(f"Updated greedy set: {greedy_set}") + + if show_progress: + percent = int((iter_count + 1.0) / N * 100) + + if percent >= display_next: + print( + f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end="", + ) + print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + + iter_count += 1 + + return greedy_vector From 5dfbdf4e448c38d6ca4824c744bd92e48a81fad6 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:44:38 +0530 Subject: [PATCH 09/58] Create StochasticGreedyOptimizer.py --- cpp/optimizers/StochasticGreedyOptimizer.py | 105 ++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 cpp/optimizers/StochasticGreedyOptimizer.py diff --git a/cpp/optimizers/StochasticGreedyOptimizer.py b/cpp/optimizers/StochasticGreedyOptimizer.py new file mode 100644 index 0000000..bcc9263 --- /dev/null +++ b/cpp/optimizers/StochasticGreedyOptimizer.py @@ -0,0 +1,105 @@ +import random +from typing import List, Tuple, Set +import math +import sys +# from StochasticGreedyOptimizer import SetFunction + +class StochasticGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1: float, val2: float, eps: float) -> bool: + return abs(val1 - val2) < eps + + def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, + stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, + show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: + # TODO: Implement handling of equal guys and different sizes of each item later + # TODO: Implement cost-sensitive selection + + greedy_vector = [] + greedy_set = set() + + # if not costs: + # # Every element is of the same size, budget corresponds to cardinality + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(" ".join(map(str, remaining_set))) + print(f"Num elements in groundset = {len(remaining_set)}") + print("Starting the stochastic greedy algorithm") + print("Initial greedy set:") + print(" ".join(map(str, greedy_set))) + + f_obj.clear_memoization() + random.seed(1) + best_id = -1 + best_val = -1 * float('inf') + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {list(random_set)}") + print("Now running naive greedy on the random set") + + best_id = -1 + best_val = -1 * float('inf') + + for elem in random_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + if gain > best_val: + best_id = elem + best_val = gain + + if verbose: + if best_id == -1: + raise ValueError("Nobody had greater gain than minus infinity!!") + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + remaining_set.remove(best_id) + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", " ".join(map(str, greedy_set))) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") + print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") + sys.stdout.flush() + display_next += step + iter += 1 + + i += 1 + + return greedy_vector From 75bfd10121ae993e8462ef7a212ffb413427a5d1 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:48:32 +0530 Subject: [PATCH 10/58] Create SetCover.py --- cpp/submod/SetCover.py | 96 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 cpp/submod/SetCover.py diff --git a/cpp/submod/SetCover.py b/cpp/submod/SetCover.py new file mode 100644 index 0000000..d525c69 --- /dev/null +++ b/cpp/submod/SetCover.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import numpy as np +import random + +class SetCover(SetFunction): + def __init__(self, n, cover_set, num_concepts, concept_weights = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.cover_set = cover_set + self.num_concepts = num_concepts + self.concept_weights = concept_weights + if self.concept_weights is None: + self.concept_weights = [1.0] * num_concepts + else: + self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) + + + self.concepts_covered_by_x = set() + + + def evaluate(self, X): + result = 0.0 + + if X.numel() == 0: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem.item()]) + + for con in concepts_covered: + result += self.concept_weights[con] + + return result + + + def evaluate_with_memoization(self, X): + result = 0.0 + + if X.numel() == 0: + print("hi") + return 0.0 + + for con in self.concepts_covered_by_x: + result += self.concept_weights[con] + print(result) + + return result + + def marginal_gain(self, X, item): + gain = 0.0 + + if item in X: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem]) + + for con in self.cover_set[item]: + if con not in concepts_covered: + gain += self.concept_weights[con] + + return gain.item() + + def marginal_gain_with_memoization(self, X, item, enable_checks=True): + gain = 0.0 + + if enable_checks and item in X: + return 0.0 + for con in self.cover_set[item]: + if con not in self.concepts_covered_by_x: + gain += self.concept_weights[con] + + return gain + + def update_memoization(self, X, item): + if item in X: + return + + self.concepts_covered_by_x.update(self.cover_set[item]) + + def get_effective_ground_set(self): + return set(range(self.n)) + + def clear_memoization(self): + self.concepts_covered_by_x.clear() + + def set_memoization(self, X): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) From f9b07b92a81646dc9eb159011d9ef6c310128e4a Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:43:20 +0530 Subject: [PATCH 11/58] Delete cpp/SetFunction.py --- cpp/SetFunction.py | 62 ---------------------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 cpp/SetFunction.py diff --git a/cpp/SetFunction.py b/cpp/SetFunction.py deleted file mode 100644 index 661bf0c..0000000 --- a/cpp/SetFunction.py +++ /dev/null @@ -1,62 +0,0 @@ -from typing import Set, List, Tuple -import numpy as np -import torch -import torch.nn as nn -import numpy as np -import random -# import optimizer python files - -class SetFunction(nn.Module): - def __init__(self): - pass - - def evaluate(self, X: Set[int]) -> float: - return self.evaluate(X) - - def evaluate_with_memoization(self, X: Set[int]) -> float: - return self.evaluate_with_memoization(X) - - def marginal_gain(self, X: Set[int], item: int) -> float: - return self.marginal_gain(X, item) - - def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: - return self.marginal_gain_with_memoization(X, item) - - def update_memoization(self, X: Set[int], item: int) -> None: - return self.update_memoization(X, item) - - - def get_effective_ground_set(self) -> Set[int]: - return self.get_effective_ground_set() - - def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool, - costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]: - optimizer = self._get_optimizer(optimizer) - if optimizer: - return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy) - else: - print("Invalid Optimizer") - return [] - - def _get_optimizer(self, optimizer_name: str): - if optimizer_name == "NaiveGreedy": - return NaiveGreedyOptimizer() - # define all optimizer classed into files - elif optimizer_name == "LazyGreedy": - return LazyGreedyOptimizer() - elif optimizer_name == "StochasticGreedy": - return StochasticGreedyOptimizer() - elif optimizer_name == "LazierThanLazyGreedy": - return LazierThanLazyGreedyOptimizer() - else: - return None - - def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int], - partial: bool, lambda_: float) -> None: - self.cluster_init(n, k_dense, ground, partial, lambda_) - - def set_memoization(self, X: Set[int]) -> None: - self.set_memoization(X) - - def clear_memoization(self) -> None: - self.clear_memoization() From 40178a3a9383c47244781d55c321719edf1650ac Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:46:24 +0530 Subject: [PATCH 12/58] Create SetFunction.py --- pytorch/SetFunction.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 pytorch/SetFunction.py diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py new file mode 100644 index 0000000..661bf0c --- /dev/null +++ b/pytorch/SetFunction.py @@ -0,0 +1,62 @@ +from typing import Set, List, Tuple +import numpy as np +import torch +import torch.nn as nn +import numpy as np +import random +# import optimizer python files + +class SetFunction(nn.Module): + def __init__(self): + pass + + def evaluate(self, X: Set[int]) -> float: + return self.evaluate(X) + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.evaluate_with_memoization(X) + + def marginal_gain(self, X: Set[int], item: int) -> float: + return self.marginal_gain(X, item) + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + return self.marginal_gain_with_memoization(X, item) + + def update_memoization(self, X: Set[int], item: int) -> None: + return self.update_memoization(X, item) + + + def get_effective_ground_set(self) -> Set[int]: + return self.get_effective_ground_set() + + def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool, + costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]: + optimizer = self._get_optimizer(optimizer) + if optimizer: + return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy) + else: + print("Invalid Optimizer") + return [] + + def _get_optimizer(self, optimizer_name: str): + if optimizer_name == "NaiveGreedy": + return NaiveGreedyOptimizer() + # define all optimizer classed into files + elif optimizer_name == "LazyGreedy": + return LazyGreedyOptimizer() + elif optimizer_name == "StochasticGreedy": + return StochasticGreedyOptimizer() + elif optimizer_name == "LazierThanLazyGreedy": + return LazierThanLazyGreedyOptimizer() + else: + return None + + def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int], + partial: bool, lambda_: float) -> None: + self.cluster_init(n, k_dense, ground, partial, lambda_) + + def set_memoization(self, X: Set[int]) -> None: + self.set_memoization(X) + + def clear_memoization(self) -> None: + self.clear_memoization() From 4386d15c3b18b02fa8e3c06e93062a45d61f0d1d Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:52:32 +0530 Subject: [PATCH 13/58] Create SetCover.py --- pytorch/submod/SetCover.py | 96 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 pytorch/submod/SetCover.py diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py new file mode 100644 index 0000000..62ed068 --- /dev/null +++ b/pytorch/submod/SetCover.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn +import numpy as np +import random +from SetFunction import SetFunction +class SetCover(SetFunction): + def __init__(self, n, cover_set, num_concepts, concept_weights = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.cover_set = cover_set + self.num_concepts = num_concepts + self.concept_weights = concept_weights + if self.concept_weights is None: + self.concept_weights = [1.0] * num_concepts + else: + self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) + + + self.concepts_covered_by_x = set() + + + def evaluate(self, X): + result = 0.0 + + if X.numel() == 0: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem.item()]) + + for con in concepts_covered: + result += self.concept_weights[con] + + return result + + + def evaluate_with_memoization(self, X): + result = 0.0 + + if X.numel() == 0: + print("hi") + return 0.0 + + for con in self.concepts_covered_by_x: + result += self.concept_weights[con] + print(result) + + return result + + def marginal_gain(self, X, item): + gain = 0.0 + + if item in X: + return 0.0 + + concepts_covered = set() + for elem in X: + concepts_covered.update(self.cover_set[elem]) + + for con in self.cover_set[item]: + if con not in concepts_covered: + gain += self.concept_weights[con] + + return gain.item() + + def marginal_gain_with_memoization(self, X, item, enable_checks=True): + gain = 0.0 + + if enable_checks and item in X: + return 0.0 + for con in self.cover_set[item]: + if con not in self.concepts_covered_by_x: + gain += self.concept_weights[con] + + return gain + + def update_memoization(self, X, item): + if item in X: + return + + self.concepts_covered_by_x.update(self.cover_set[item]) + + def get_effective_ground_set(self): + return set(range(self.n)) + + def clear_memoization(self): + self.concepts_covered_by_x.clear() + + def set_memoization(self, X): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) From b5659d8afde11df3cdc0b4ed655cf7a97de60d4e Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:53:19 +0530 Subject: [PATCH 14/58] Delete cpp/submod/SetCover.py --- cpp/submod/SetCover.py | 96 ------------------------------------------ 1 file changed, 96 deletions(-) delete mode 100644 cpp/submod/SetCover.py diff --git a/cpp/submod/SetCover.py b/cpp/submod/SetCover.py deleted file mode 100644 index d525c69..0000000 --- a/cpp/submod/SetCover.py +++ /dev/null @@ -1,96 +0,0 @@ -import torch -import torch.nn as nn -import numpy as np -import random - -class SetCover(SetFunction): - def __init__(self, n, cover_set, num_concepts, concept_weights = None): - super(SetFunction, self).__init__() - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.n = n - self.cover_set = cover_set - self.num_concepts = num_concepts - self.concept_weights = concept_weights - if self.concept_weights is None: - self.concept_weights = [1.0] * num_concepts - else: - self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) - - - self.concepts_covered_by_x = set() - - - def evaluate(self, X): - result = 0.0 - - if X.numel() == 0: - return 0.0 - - concepts_covered = set() - for elem in X: - concepts_covered.update(self.cover_set[elem.item()]) - - for con in concepts_covered: - result += self.concept_weights[con] - - return result - - - def evaluate_with_memoization(self, X): - result = 0.0 - - if X.numel() == 0: - print("hi") - return 0.0 - - for con in self.concepts_covered_by_x: - result += self.concept_weights[con] - print(result) - - return result - - def marginal_gain(self, X, item): - gain = 0.0 - - if item in X: - return 0.0 - - concepts_covered = set() - for elem in X: - concepts_covered.update(self.cover_set[elem]) - - for con in self.cover_set[item]: - if con not in concepts_covered: - gain += self.concept_weights[con] - - return gain.item() - - def marginal_gain_with_memoization(self, X, item, enable_checks=True): - gain = 0.0 - - if enable_checks and item in X: - return 0.0 - for con in self.cover_set[item]: - if con not in self.concepts_covered_by_x: - gain += self.concept_weights[con] - - return gain - - def update_memoization(self, X, item): - if item in X: - return - - self.concepts_covered_by_x.update(self.cover_set[item]) - - def get_effective_ground_set(self): - return set(range(self.n)) - - def clear_memoization(self): - self.concepts_covered_by_x.clear() - - def set_memoization(self, X): - self.clear_memoization() - temp = set() - for elem in X: - self.update_memoization(temp, elem) - temp.add(elem) From 820c78ff4c4c890a77afafd44f6f413f4b5a12e1 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:54:14 +0530 Subject: [PATCH 15/58] Create LazierThanLazyGreedyOptimizer.py --- .../LazierThanLazyGreedyOptimizer.py | 120 ++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 pytorch/optimizer/LazierThanLazyGreedyOptimizer.py diff --git a/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py new file mode 100644 index 0000000..86e355c --- /dev/null +++ b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py @@ -0,0 +1,120 @@ +import random +import math + +class LazierThanLazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + @staticmethod + def print_sorted_set(sorted_set): + print("[", end="") + for val, elem in sorted_set: + print(f"({val}, {elem}), ", end="") + print("]") + + def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False, + epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False): + greedy_vector = [] + greedy_set = set() + + if costs is None: + greedy_vector.reserve(budget) + greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(remaining_set) + print(f"Num elements in ground set = {len(remaining_set)}") + print("Starting the LazierThanLazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {random_set}") + print("Now running lazy greedy on the random set") + + candidate_id = None + candidate_val = None + new_candidate_bound = None + + # Compute gains only for the elements in the remaining set + gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem) + for elem in remaining_set] + + for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))): + if elem in random_set and elem not in greedy_set: # Check if the element is not already selected + if verbose: + print(f"Checking {elem}...") + candidate_id = elem + candidate_val = val + new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False) + if verbose: + print(f"Updated gain as per updated greedy set = {new_candidate_bound}") + next_elem = gains[j + 1] if j + 1 < len(gains) else None + if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'): + if verbose: + print("..better than next best upper bound, " + "selecting...") + best_id = candidate_id + best_val = new_candidate_bound + break + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + remaining_set.remove(best_id) + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", greedy_set) + + if show_progress: + percent = int(((iter_count + 1.0) / N) * 100) + if percent >= display_next: + print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="") + print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + iter_count += 1 + + i += 1 + + return greedy_vector From 63c9e064ca8784f3fab7f783ec4737d245c969d6 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:54:48 +0530 Subject: [PATCH 16/58] Create LazyGreedyOptimizer.py --- pytorch/optimizer/LazyGreedyOptimizer.py | 97 ++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 pytorch/optimizer/LazyGreedyOptimizer.py diff --git a/pytorch/optimizer/LazyGreedyOptimizer.py b/pytorch/optimizer/LazyGreedyOptimizer.py new file mode 100644 index 0000000..45d7590 --- /dev/null +++ b/pytorch/optimizer/LazyGreedyOptimizer.py @@ -0,0 +1,97 @@ +import torch +import heapq + +class LazyGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain, + verbose, show_progress, costs, cost_sensitive_greedy): + greedy_vector = [] + greedy_set = set() + + # if not costs: + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the lazy greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + + container = [] + heapq.heapify(container) + max_heap = container + + if cost_sensitive_greedy: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem] + heapq.heappush(max_heap, (-gain, elem)) + else: + for elem in ground_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + heapq.heappush(max_heap, (-gain, elem)) + + if verbose: + print("Max heap constructed") + + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0 and max_heap: + current_max = heapq.heappop(max_heap) + current_max_gain, current_max_elem = -current_max[0], current_max[1] + + if verbose: + print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}") + + new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False) + + if verbose: + print(f"newMaxBound: {new_max_bound}") + + if new_max_bound >= -max_heap[0][0]: + if (new_max_bound < 0 and stop_if_negative_gain) or \ + (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, current_max_elem) + greedy_set.add(current_max_elem) + greedy_vector.append((current_max_elem, new_max_bound)) + rem_budget -= 1 + + if verbose: + print(f"Added element {current_max_elem} and the gain is {new_max_bound}") + print("Updated greedySet:", greedy_set) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end=f" {percent}% [Iteration {iter + 1} of {N}]") + display_next += step + + iter += 1 + else: + heapq.heappush(max_heap, (-new_max_bound, current_max_elem)) + + return greedy_vector From ba15c2fced48fd7e3834c5ed514330777a9b7a1c Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:55:24 +0530 Subject: [PATCH 17/58] Create NaiveGreedyOptimizer.py --- pytorch/optimizer/NaiveGreedyOptimizer.py | 90 +++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 pytorch/optimizer/NaiveGreedyOptimizer.py diff --git a/pytorch/optimizer/NaiveGreedyOptimizer.py b/pytorch/optimizer/NaiveGreedyOptimizer.py new file mode 100644 index 0000000..728f16a --- /dev/null +++ b/pytorch/optimizer/NaiveGreedyOptimizer.py @@ -0,0 +1,90 @@ +import torch +import random +from typing import List, Tuple, Set + +class NaiveGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1, val2, eps): + return abs(val1 - val2) < eps + + def maximize( + self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy + ): + greedy_vector = [] + greedy_set = set() + if not costs: + # greedy_vector = [None] * budget + greedy_set = set() + rem_budget = budget + ground_set = f_obj.get_effective_ground_set() + #print(ground_set) + if verbose: + print("Ground set:") + print(ground_set) + print(f"Num elements in groundset = {len(ground_set)}") + print("Costs:") + print(costs) + print(f"Cost sensitive greedy: {cost_sensitive_greedy}") + print("Starting the naive greedy algorithm") + print("Initial greedy set:") + print(greedy_set) + + f_obj.clear_memoization() + best_id = None + best_val = None + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter_count = 0 + + while rem_budget > 0: + best_id = None + best_val = float("-inf") + + for i in ground_set: + if i in greedy_set: + continue + gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) + # print(gain) + if verbose: + print(f"Gain of {i} is {gain}") + + if gain > best_val: + best_id = i + best_val = gain + + if verbose: + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stopIfNegativeGain) or ( + self.equals(best_val, 0, 1e-5) and stop_if_zero_gain + ): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print(f"Updated greedy set: {greedy_set}") + + if show_progress: + percent = int((iter_count + 1.0) / N * 100) + + if percent >= display_next: + print( + f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", + end="", + ) + print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") + display_next += step + + iter_count += 1 + + return greedy_vector From 125966604fcab22109aa02f7b8ec96498fe47709 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:55:59 +0530 Subject: [PATCH 18/58] Create StochasticGreedyOptimizer.py --- .../optimizer/StochasticGreedyOptimizer.py | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 pytorch/optimizer/StochasticGreedyOptimizer.py diff --git a/pytorch/optimizer/StochasticGreedyOptimizer.py b/pytorch/optimizer/StochasticGreedyOptimizer.py new file mode 100644 index 0000000..bcc9263 --- /dev/null +++ b/pytorch/optimizer/StochasticGreedyOptimizer.py @@ -0,0 +1,105 @@ +import random +from typing import List, Tuple, Set +import math +import sys +# from StochasticGreedyOptimizer import SetFunction + +class StochasticGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1: float, val2: float, eps: float) -> bool: + return abs(val1 - val2) < eps + + def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, + stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, + show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: + # TODO: Implement handling of equal guys and different sizes of each item later + # TODO: Implement cost-sensitive selection + + greedy_vector = [] + greedy_set = set() + + # if not costs: + # # Every element is of the same size, budget corresponds to cardinality + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(" ".join(map(str, remaining_set))) + print(f"Num elements in groundset = {len(remaining_set)}") + print("Starting the stochastic greedy algorithm") + print("Initial greedy set:") + print(" ".join(map(str, greedy_set))) + + f_obj.clear_memoization() + random.seed(1) + best_id = -1 + best_val = -1 * float('inf') + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {list(random_set)}") + print("Now running naive greedy on the random set") + + best_id = -1 + best_val = -1 * float('inf') + + for elem in random_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + if gain > best_val: + best_id = elem + best_val = gain + + if verbose: + if best_id == -1: + raise ValueError("Nobody had greater gain than minus infinity!!") + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + remaining_set.remove(best_id) + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", " ".join(map(str, greedy_set))) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") + print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") + sys.stdout.flush() + display_next += step + iter += 1 + + i += 1 + + return greedy_vector From 99f98703897d4c049299a20d9efffa3297c88abd Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:04:49 +0530 Subject: [PATCH 19/58] Update SetFunction.py --- pytorch/SetFunction.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py index 661bf0c..6c7891c 100644 --- a/pytorch/SetFunction.py +++ b/pytorch/SetFunction.py @@ -4,7 +4,11 @@ import torch.nn as nn import numpy as np import random -# import optimizer python files +from optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer +from optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer +from optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer +from optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer + class SetFunction(nn.Module): def __init__(self): From eba36f77b4c273e8ba6158679d03bdf907298ffe Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:05:13 +0530 Subject: [PATCH 20/58] Delete pytorch/optimizer/StochasticGreedyOptimizer.py --- .../optimizer/StochasticGreedyOptimizer.py | 105 ------------------ 1 file changed, 105 deletions(-) delete mode 100644 pytorch/optimizer/StochasticGreedyOptimizer.py diff --git a/pytorch/optimizer/StochasticGreedyOptimizer.py b/pytorch/optimizer/StochasticGreedyOptimizer.py deleted file mode 100644 index bcc9263..0000000 --- a/pytorch/optimizer/StochasticGreedyOptimizer.py +++ /dev/null @@ -1,105 +0,0 @@ -import random -from typing import List, Tuple, Set -import math -import sys -# from StochasticGreedyOptimizer import SetFunction - -class StochasticGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1: float, val2: float, eps: float) -> bool: - return abs(val1 - val2) < eps - - def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, - stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, - show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: - # TODO: Implement handling of equal guys and different sizes of each item later - # TODO: Implement cost-sensitive selection - - greedy_vector = [] - greedy_set = set() - - # if not costs: - # # Every element is of the same size, budget corresponds to cardinality - # greedy_vector.reserve(budget) - # greedy_set.reserve(budget) - - rem_budget = budget - remaining_set = set(f_obj.get_effective_ground_set()) - n = len(remaining_set) - epsilon = 0.05 - random_set_size = int((n / budget) * math.log(1 / epsilon)) - if verbose: - print(f"Epsilon = {epsilon}") - print(f"Random set size = {random_set_size}") - print("Ground set:") - print(" ".join(map(str, remaining_set))) - print(f"Num elements in groundset = {len(remaining_set)}") - print("Starting the stochastic greedy algorithm") - print("Initial greedy set:") - print(" ".join(map(str, greedy_set))) - - f_obj.clear_memoization() - random.seed(1) - best_id = -1 - best_val = -1 * float('inf') - i = 0 - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter = 0 - - while rem_budget > 0: - random_set = set() - while len(random_set) < random_set_size: - elem = random.randint(0, n - 1) - if elem in remaining_set and elem not in random_set: - random_set.add(elem) - - if verbose: - print(f"Iteration {i}") - print(f"Random set = {list(random_set)}") - print("Now running naive greedy on the random set") - - best_id = -1 - best_val = -1 * float('inf') - - for elem in random_set: - gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) - if gain > best_val: - best_id = elem - best_val = gain - - if verbose: - if best_id == -1: - raise ValueError("Nobody had greater gain than minus infinity!!") - print(f"Next best item to add is {best_id} and its value addition is {best_val}") - - if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): - break - else: - f_obj.update_memoization(greedy_set, best_id) - greedy_set.add(best_id) - greedy_vector.append((best_id, best_val)) - rem_budget -= 1 - remaining_set.remove(best_id) - - if verbose: - print(f"Added element {best_id} and the gain is {best_val}") - print("Updated greedy set:", " ".join(map(str, greedy_set))) - - if show_progress: - percent = int(((iter + 1.0) / N) * 100) - if percent >= display_next: - print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") - print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") - sys.stdout.flush() - display_next += step - iter += 1 - - i += 1 - - return greedy_vector From eef8085c9abd5b59dd1487e45e828fe21d514410 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:07:19 +0530 Subject: [PATCH 21/58] Create StochasticGreedyOptimizer --- pytorch/optimizer/StochasticGreedyOptimizer | 104 ++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 pytorch/optimizer/StochasticGreedyOptimizer diff --git a/pytorch/optimizer/StochasticGreedyOptimizer b/pytorch/optimizer/StochasticGreedyOptimizer new file mode 100644 index 0000000..081f379 --- /dev/null +++ b/pytorch/optimizer/StochasticGreedyOptimizer @@ -0,0 +1,104 @@ +import random +from typing import List, Tuple, Set +import math +import sys + +class StochasticGreedyOptimizer: + def __init__(self): + pass + + @staticmethod + def equals(val1: float, val2: float, eps: float) -> bool: + return abs(val1 - val2) < eps + + def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, + stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, + show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: + # TODO: Implement handling of equal guys and different sizes of each item later + # TODO: Implement cost-sensitive selection + + greedy_vector = [] + greedy_set = set() + + # if not costs: + # # Every element is of the same size, budget corresponds to cardinality + # greedy_vector.reserve(budget) + # greedy_set.reserve(budget) + + rem_budget = budget + remaining_set = set(f_obj.get_effective_ground_set()) + n = len(remaining_set) + epsilon = 0.05 + random_set_size = int((n / budget) * math.log(1 / epsilon)) + if verbose: + print(f"Epsilon = {epsilon}") + print(f"Random set size = {random_set_size}") + print("Ground set:") + print(" ".join(map(str, remaining_set))) + print(f"Num elements in groundset = {len(remaining_set)}") + print("Starting the stochastic greedy algorithm") + print("Initial greedy set:") + print(" ".join(map(str, greedy_set))) + + f_obj.clear_memoization() + random.seed(1) + best_id = -1 + best_val = -1 * float('inf') + i = 0 + step = 1 + display_next = step + percent = 0 + N = rem_budget + iter = 0 + + while rem_budget > 0: + random_set = set() + while len(random_set) < random_set_size: + elem = random.randint(0, n - 1) + if elem in remaining_set and elem not in random_set: + random_set.add(elem) + + if verbose: + print(f"Iteration {i}") + print(f"Random set = {list(random_set)}") + print("Now running naive greedy on the random set") + + best_id = -1 + best_val = -1 * float('inf') + + for elem in random_set: + gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) + if gain > best_val: + best_id = elem + best_val = gain + + if verbose: + if best_id == -1: + raise ValueError("Nobody had greater gain than minus infinity!!") + print(f"Next best item to add is {best_id} and its value addition is {best_val}") + + if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): + break + else: + f_obj.update_memoization(greedy_set, best_id) + greedy_set.add(best_id) + greedy_vector.append((best_id, best_val)) + rem_budget -= 1 + remaining_set.remove(best_id) + + if verbose: + print(f"Added element {best_id} and the gain is {best_val}") + print("Updated greedy set:", " ".join(map(str, greedy_set))) + + if show_progress: + percent = int(((iter + 1.0) / N) * 100) + if percent >= display_next: + print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") + print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") + sys.stdout.flush() + display_next += step + iter += 1 + + i += 1 + + return greedy_vector From cb235d96638508a5b5a0c649ba8c9ecbd6873480 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:09:15 +0530 Subject: [PATCH 22/58] Create __init__.py --- pytorch/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 pytorch/__init__.py diff --git a/pytorch/__init__.py b/pytorch/__init__.py new file mode 100644 index 0000000..766a9a5 --- /dev/null +++ b/pytorch/__init__.py @@ -0,0 +1 @@ +# /pytorch/__init__.py From 92be25313d303cdd24eabda8fc865525e8520bd6 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:11:12 +0530 Subject: [PATCH 23/58] Delete cpp/optimizers/LazierThanLazyGreedyOptimizer.py --- .../LazierThanLazyGreedyOptimizer.py | 120 ------------------ 1 file changed, 120 deletions(-) delete mode 100644 cpp/optimizers/LazierThanLazyGreedyOptimizer.py diff --git a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py deleted file mode 100644 index 86e355c..0000000 --- a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py +++ /dev/null @@ -1,120 +0,0 @@ -import random -import math - -class LazierThanLazyGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1, val2, eps): - return abs(val1 - val2) < eps - - @staticmethod - def print_sorted_set(sorted_set): - print("[", end="") - for val, elem in sorted_set: - print(f"({val}, {elem}), ", end="") - print("]") - - def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False, - epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False): - greedy_vector = [] - greedy_set = set() - - if costs is None: - greedy_vector.reserve(budget) - greedy_set.reserve(budget) - - rem_budget = budget - remaining_set = set(f_obj.get_effective_ground_set()) - n = len(remaining_set) - epsilon = 0.05 - random_set_size = int((n / budget) * math.log(1 / epsilon)) - - if verbose: - print(f"Epsilon = {epsilon}") - print(f"Random set size = {random_set_size}") - print("Ground set:") - print(remaining_set) - print(f"Num elements in ground set = {len(remaining_set)}") - print("Starting the LazierThanLazy greedy algorithm") - print("Initial greedy set:") - print(greedy_set) - - f_obj.clear_memoization() - best_id = None - best_val = None - - i = 0 - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter_count = 0 - - while rem_budget > 0: - random_set = set() - while len(random_set) < random_set_size: - elem = random.randint(0, n - 1) - if elem in remaining_set and elem not in random_set: - random_set.add(elem) - - if verbose: - print(f"Iteration {i}") - print(f"Random set = {random_set}") - print("Now running lazy greedy on the random set") - - candidate_id = None - candidate_val = None - new_candidate_bound = None - - # Compute gains only for the elements in the remaining set - gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem) - for elem in remaining_set] - - for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))): - if elem in random_set and elem not in greedy_set: # Check if the element is not already selected - if verbose: - print(f"Checking {elem}...") - candidate_id = elem - candidate_val = val - new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False) - if verbose: - print(f"Updated gain as per updated greedy set = {new_candidate_bound}") - next_elem = gains[j + 1] if j + 1 < len(gains) else None - if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'): - if verbose: - print("..better than next best upper bound, " - "selecting...") - best_id = candidate_id - best_val = new_candidate_bound - break - - if verbose: - print(f"Next best item to add is {best_id} and its value addition is {best_val}") - - remaining_set.remove(best_id) - - if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): - break - else: - f_obj.update_memoization(greedy_set, best_id) - greedy_set.add(best_id) - greedy_vector.append((best_id, best_val)) - rem_budget -= 1 - - if verbose: - print(f"Added element {best_id} and the gain is {best_val}") - print("Updated greedy set:", greedy_set) - - if show_progress: - percent = int(((iter_count + 1.0) / N) * 100) - if percent >= display_next: - print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="") - print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="") - display_next += step - iter_count += 1 - - i += 1 - - return greedy_vector From d2a9aba92d099817c26c122f5054fbe634aae72a Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:11:25 +0530 Subject: [PATCH 24/58] Delete cpp/optimizers/LazyGreedyOptimizer.py --- cpp/optimizers/LazyGreedyOptimizer.py | 97 --------------------------- 1 file changed, 97 deletions(-) delete mode 100644 cpp/optimizers/LazyGreedyOptimizer.py diff --git a/cpp/optimizers/LazyGreedyOptimizer.py b/cpp/optimizers/LazyGreedyOptimizer.py deleted file mode 100644 index 45d7590..0000000 --- a/cpp/optimizers/LazyGreedyOptimizer.py +++ /dev/null @@ -1,97 +0,0 @@ -import torch -import heapq - -class LazyGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1, val2, eps): - return abs(val1 - val2) < eps - - def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain, - verbose, show_progress, costs, cost_sensitive_greedy): - greedy_vector = [] - greedy_set = set() - - # if not costs: - # greedy_vector.reserve(budget) - # greedy_set.reserve(budget) - - rem_budget = budget - ground_set = f_obj.get_effective_ground_set() - - if verbose: - print("Ground set:") - print(ground_set) - print(f"Num elements in groundset = {len(ground_set)}") - print("Costs:") - print(costs) - print(f"Cost sensitive greedy: {cost_sensitive_greedy}") - print("Starting the lazy greedy algorithm") - print("Initial greedy set:") - print(greedy_set) - - f_obj.clear_memoization() - - container = [] - heapq.heapify(container) - max_heap = container - - if cost_sensitive_greedy: - for elem in ground_set: - gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem] - heapq.heappush(max_heap, (-gain, elem)) - else: - for elem in ground_set: - gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) - heapq.heappush(max_heap, (-gain, elem)) - - if verbose: - print("Max heap constructed") - - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter = 0 - - while rem_budget > 0 and max_heap: - current_max = heapq.heappop(max_heap) - current_max_gain, current_max_elem = -current_max[0], current_max[1] - - if verbose: - print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}") - - new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False) - - if verbose: - print(f"newMaxBound: {new_max_bound}") - - if new_max_bound >= -max_heap[0][0]: - if (new_max_bound < 0 and stop_if_negative_gain) or \ - (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain): - break - else: - f_obj.update_memoization(greedy_set, current_max_elem) - greedy_set.add(current_max_elem) - greedy_vector.append((current_max_elem, new_max_bound)) - rem_budget -= 1 - - if verbose: - print(f"Added element {current_max_elem} and the gain is {new_max_bound}") - print("Updated greedySet:", greedy_set) - - if show_progress: - percent = int(((iter + 1.0) / N) * 100) - - if percent >= display_next: - print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", - end=f" {percent}% [Iteration {iter + 1} of {N}]") - display_next += step - - iter += 1 - else: - heapq.heappush(max_heap, (-new_max_bound, current_max_elem)) - - return greedy_vector From 50ec126a51822a61aaa1bb8a6efc92636164be0c Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:11:38 +0530 Subject: [PATCH 25/58] Delete cpp/optimizers/NaiveGreedyOptimizer.py --- cpp/optimizers/NaiveGreedyOptimizer.py | 90 -------------------------- 1 file changed, 90 deletions(-) delete mode 100644 cpp/optimizers/NaiveGreedyOptimizer.py diff --git a/cpp/optimizers/NaiveGreedyOptimizer.py b/cpp/optimizers/NaiveGreedyOptimizer.py deleted file mode 100644 index 728f16a..0000000 --- a/cpp/optimizers/NaiveGreedyOptimizer.py +++ /dev/null @@ -1,90 +0,0 @@ -import torch -import random -from typing import List, Tuple, Set - -class NaiveGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1, val2, eps): - return abs(val1 - val2) < eps - - def maximize( - self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy - ): - greedy_vector = [] - greedy_set = set() - if not costs: - # greedy_vector = [None] * budget - greedy_set = set() - rem_budget = budget - ground_set = f_obj.get_effective_ground_set() - #print(ground_set) - if verbose: - print("Ground set:") - print(ground_set) - print(f"Num elements in groundset = {len(ground_set)}") - print("Costs:") - print(costs) - print(f"Cost sensitive greedy: {cost_sensitive_greedy}") - print("Starting the naive greedy algorithm") - print("Initial greedy set:") - print(greedy_set) - - f_obj.clear_memoization() - best_id = None - best_val = None - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter_count = 0 - - while rem_budget > 0: - best_id = None - best_val = float("-inf") - - for i in ground_set: - if i in greedy_set: - continue - gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False) - # print(gain) - if verbose: - print(f"Gain of {i} is {gain}") - - if gain > best_val: - best_id = i - best_val = gain - - if verbose: - print(f"Next best item to add is {best_id} and its value addition is {best_val}") - - if (best_val < 0 and stopIfNegativeGain) or ( - self.equals(best_val, 0, 1e-5) and stop_if_zero_gain - ): - break - else: - f_obj.update_memoization(greedy_set, best_id) - greedy_set.add(best_id) - greedy_vector.append((best_id, best_val)) - rem_budget -= 1 - - if verbose: - print(f"Added element {best_id} and the gain is {best_val}") - print(f"Updated greedy set: {greedy_set}") - - if show_progress: - percent = int((iter_count + 1.0) / N * 100) - - if percent >= display_next: - print( - f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", - end="", - ) - print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="") - display_next += step - - iter_count += 1 - - return greedy_vector From 427691665afe6f1012991c553df0944f7aaba54b Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:11:51 +0530 Subject: [PATCH 26/58] Delete cpp/optimizers/StochasticGreedyOptimizer.py --- cpp/optimizers/StochasticGreedyOptimizer.py | 105 -------------------- 1 file changed, 105 deletions(-) delete mode 100644 cpp/optimizers/StochasticGreedyOptimizer.py diff --git a/cpp/optimizers/StochasticGreedyOptimizer.py b/cpp/optimizers/StochasticGreedyOptimizer.py deleted file mode 100644 index bcc9263..0000000 --- a/cpp/optimizers/StochasticGreedyOptimizer.py +++ /dev/null @@ -1,105 +0,0 @@ -import random -from typing import List, Tuple, Set -import math -import sys -# from StochasticGreedyOptimizer import SetFunction - -class StochasticGreedyOptimizer: - def __init__(self): - pass - - @staticmethod - def equals(val1: float, val2: float, eps: float) -> bool: - return abs(val1 - val2) < eps - - def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, - stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, - show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: - # TODO: Implement handling of equal guys and different sizes of each item later - # TODO: Implement cost-sensitive selection - - greedy_vector = [] - greedy_set = set() - - # if not costs: - # # Every element is of the same size, budget corresponds to cardinality - # greedy_vector.reserve(budget) - # greedy_set.reserve(budget) - - rem_budget = budget - remaining_set = set(f_obj.get_effective_ground_set()) - n = len(remaining_set) - epsilon = 0.05 - random_set_size = int((n / budget) * math.log(1 / epsilon)) - if verbose: - print(f"Epsilon = {epsilon}") - print(f"Random set size = {random_set_size}") - print("Ground set:") - print(" ".join(map(str, remaining_set))) - print(f"Num elements in groundset = {len(remaining_set)}") - print("Starting the stochastic greedy algorithm") - print("Initial greedy set:") - print(" ".join(map(str, greedy_set))) - - f_obj.clear_memoization() - random.seed(1) - best_id = -1 - best_val = -1 * float('inf') - i = 0 - step = 1 - display_next = step - percent = 0 - N = rem_budget - iter = 0 - - while rem_budget > 0: - random_set = set() - while len(random_set) < random_set_size: - elem = random.randint(0, n - 1) - if elem in remaining_set and elem not in random_set: - random_set.add(elem) - - if verbose: - print(f"Iteration {i}") - print(f"Random set = {list(random_set)}") - print("Now running naive greedy on the random set") - - best_id = -1 - best_val = -1 * float('inf') - - for elem in random_set: - gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) - if gain > best_val: - best_id = elem - best_val = gain - - if verbose: - if best_id == -1: - raise ValueError("Nobody had greater gain than minus infinity!!") - print(f"Next best item to add is {best_id} and its value addition is {best_val}") - - if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain): - break - else: - f_obj.update_memoization(greedy_set, best_id) - greedy_set.add(best_id) - greedy_vector.append((best_id, best_val)) - rem_budget -= 1 - remaining_set.remove(best_id) - - if verbose: - print(f"Added element {best_id} and the gain is {best_val}") - print("Updated greedy set:", " ".join(map(str, greedy_set))) - - if show_progress: - percent = int(((iter + 1.0) / N) * 100) - if percent >= display_next: - print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="") - print(f"{percent}% [Iteration {iter + 1} of {N}]", end="") - sys.stdout.flush() - display_next += step - iter += 1 - - i += 1 - - return greedy_vector From 9c997e777cbc8f1d7f3e10a18ef4173888512155 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:13:46 +0530 Subject: [PATCH 27/58] Create ProbabilisticSetCover.py --- pytorch/submod/ProbabilisticSetCover.py | 80 +++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 pytorch/submod/ProbabilisticSetCover.py diff --git a/pytorch/submod/ProbabilisticSetCover.py b/pytorch/submod/ProbabilisticSetCover.py new file mode 100644 index 0000000..aacb7c4 --- /dev/null +++ b/pytorch/submod/ProbabilisticSetCover.py @@ -0,0 +1,80 @@ +import torch +from typing import List, Set, Tuple + +class ProbabilisticSetCover(SetFunction): + def __init__(self, n: int, ground_set_concept_probabilities: List[List[float]], num_concepts: int, concept_weights: List[float] = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.ground_set_concept_probabilities = ground_set_concept_probabilities + self.num_concepts = num_concepts + self.concept_weights = concept_weights + + if self.concept_weights is None: + self.concept_weights = [1.0] * num_concepts + else: + self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device) + self.prob_of_concepts_covered_by_X = num_concepts + + def evaluate(self, X: Set[int]) -> float: + result = 0 + if not X: + return result + + for i in range(self.num_concepts): + product = 1 + for elem in X: + product *= (1 - self.ground_set_concept_probabilities[elem][i]) + result += self.concept_weights[i] * (1 - product) + + return result + + def evaluate_with_memoization(self, X: Set[int]) -> float: + result = 0 + if not X: + return result + + for i in range(self.num_concepts): + result += self.concept_weights[i] * (1 - self.prob_of_concepts_covered_by_X[i]) + + return result + + def marginal_gain(self, X: Set[int], item: int) -> float: + gain = 0 + if item in X: + return gain + + for i in range(self.num_concepts): + old_concept_prod = 1 + for elem in X: + old_concept_prod *= (1 - self.ground_set_concept_probabilities[elem][i]) + gain += self.concept_weights[i] * old_concept_prod * self.ground_set_concept_probabilities[item][i] + return gain + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + gain = 0 + if enable_checks and item in X: + return gain + for i in range(self.num_concepts): + gain += self.concept_weights[i] * self.prob_of_concepts_covered_by_X[i] * self.ground_set_concept_probabilities[item][i] + return gain + + def update_memoization(self, X: Set[int], item: int): + if item in X: + return + + for i in range(self.num_concepts): + self.prob_of_concepts_covered_by_X[i] *= (1 - self.ground_set_concept_probabilities[item][i]) + + def get_effective_ground_set(self) -> Set[int]: + return set(range(self.n)) + + def clear_memoization(self): + self.prob_of_concepts_covered_by_X = torch.ones(self.num_concepts, dtype=torch.double) + + def set_memoization(self, X: Set[int]): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) From 52a30c531e1c49fbd815bf7f02c703dda4caaa32 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:17:06 +0530 Subject: [PATCH 28/58] Update SetCover.py --- pytorch/submod/SetCover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py index 62ed068..a01d2c6 100644 --- a/pytorch/submod/SetCover.py +++ b/pytorch/submod/SetCover.py @@ -2,7 +2,7 @@ import torch.nn as nn import numpy as np import random -from SetFunction import SetFunction +from ..SetFunction import SetFunction class SetCover(SetFunction): def __init__(self, n, cover_set, num_concepts, concept_weights = None): super(SetFunction, self).__init__() From 036d04b6b861c133b9cd46f1bcaecdcf01e9c36d Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:17:36 +0530 Subject: [PATCH 29/58] Create __init__.py --- pytorch/submod/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 pytorch/submod/__init__.py diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py new file mode 100644 index 0000000..a6041d7 --- /dev/null +++ b/pytorch/submod/__init__.py @@ -0,0 +1 @@ +# /pytorch/SetFunction From 1d7d0140350f0addcc7c9c88bff02438ab153af6 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:18:08 +0530 Subject: [PATCH 30/58] Update ProbabilisticSetCover.py --- pytorch/submod/ProbabilisticSetCover.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch/submod/ProbabilisticSetCover.py b/pytorch/submod/ProbabilisticSetCover.py index aacb7c4..5eaaaa1 100644 --- a/pytorch/submod/ProbabilisticSetCover.py +++ b/pytorch/submod/ProbabilisticSetCover.py @@ -1,5 +1,6 @@ import torch from typing import List, Set, Tuple +from ..SetFunction import SetFunction class ProbabilisticSetCover(SetFunction): def __init__(self, n: int, ground_set_concept_probabilities: List[List[float]], num_concepts: int, concept_weights: List[float] = None): From 694f358ec8d7295f2a92d91928700af69d9ac357 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:18:38 +0530 Subject: [PATCH 31/58] Update __init__.py --- pytorch/submod/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py index a6041d7..c8851c6 100644 --- a/pytorch/submod/__init__.py +++ b/pytorch/submod/__init__.py @@ -1 +1 @@ -# /pytorch/SetFunction +# /pytorch/SetFunction/__init__.py From 698f1f6a330127217154e1825db6d1c42c3b996d Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Fri, 19 Jan 2024 23:50:34 +0530 Subject: [PATCH 32/58] Rename StochasticGreedyOptimizer to StochasticGreedyOptimize.pyr --- .../{StochasticGreedyOptimizer => StochasticGreedyOptimize.pyr} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pytorch/optimizer/{StochasticGreedyOptimizer => StochasticGreedyOptimize.pyr} (100%) diff --git a/pytorch/optimizer/StochasticGreedyOptimizer b/pytorch/optimizer/StochasticGreedyOptimize.pyr similarity index 100% rename from pytorch/optimizer/StochasticGreedyOptimizer rename to pytorch/optimizer/StochasticGreedyOptimize.pyr From 00f7f9fba34b75b75ab4f4a9c18b1ded108aaa67 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Fri, 19 Jan 2024 23:50:47 +0530 Subject: [PATCH 33/58] Rename StochasticGreedyOptimize.pyr to StochasticGreedyOptimize.py --- .../{StochasticGreedyOptimize.pyr => StochasticGreedyOptimize.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pytorch/optimizer/{StochasticGreedyOptimize.pyr => StochasticGreedyOptimize.py} (100%) diff --git a/pytorch/optimizer/StochasticGreedyOptimize.pyr b/pytorch/optimizer/StochasticGreedyOptimize.py similarity index 100% rename from pytorch/optimizer/StochasticGreedyOptimize.pyr rename to pytorch/optimizer/StochasticGreedyOptimize.py From f268159f77c99d9d3146ff5937586bd7737d897e Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sat, 20 Jan 2024 00:01:07 +0530 Subject: [PATCH 34/58] Update setCover.py --- submodlib/functions/setCover.py | 47 +++++++-------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index eaf2c54..21d4038 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -2,48 +2,18 @@ # Author: Vishal Kaushal from .setFunction import SetFunction from submodlib_cpp import SetCover +from submodlib_pytorch import SetCover +import torch class SetCoverFunction(SetFunction): - """Implementation of the Set-Cover (SC) submodular function. - For a subset :math:`A`, its Set Cover evaluation is defined as: - - .. math:: - f(A) = w(\\cup_{a \\in A} \\gamma(a)) = w(\\gamma(A)) - - where :math:`\\gamma(A)` refers to the set of concepts covered by :math:`A`. Thus the set of all concepts :math:`\\mathcal{U} = \\gamma(\\mathcal{V})`. :math:`w` is a weight vector in :math:`\\Re^{|\\mathcal{U}|}`. Intuitively, each element in :math:`\\mathcal{V}` *covers* a set of elements from the concept set :math:`U` and hence :math:`w(\\gamma(A))` is total weight of concepts covered by elements in :math:`A`. Note that :math:`\\gamma(A \\cup B) = \\gamma(A) \\cup \\gamma(B)` and hence :math:`f(A \\cup B) = w(\\gamma(A \\cup B)) = w(\\gamma(A) \\cup \\gamma(B))`. - - Alternatively we can also view the function as follows. With :math:`U` being the set of all concepts (namely :math:`U = \\gamma(\\mathcal{V})`) and :math:`c_u(i)` denoting whether the concept :math:`u \\in U` is covered by the element :math:`i \\in \\mathcal{V}` i.e :math:`c_u(i) = 1` if :math:`u \\in \\gamma(\\{i\\})` and is zero otherwise. We then define :math:`c_u(A) = \\sum_{a\\in A} c_u(a)` as the count of concept :math:`u` in set :math:`A`, and the weighted set cover can then be written as: - - .. math:: - f(A) = \\sum_{u \\in U} w_u \\min(c_u(A), 1) - - .. note:: - Set Cover functions models coverage of concepts and is monotone submodular. - - Parameters - ---------- - n : int - Number of elements in the ground set, must be > 0. - - cover_set : list - List of sets. Each set is the set of concepts covered by the corresponding data point / image. Hence cover_set is of size n. - - num_concepts : int - Number of concepts. - - concept_weights : list - Weight :math:`w_i` of each concept. Size must be same as num_concepts. - - """ - def __init__(self, n, cover_set, num_concepts, concept_weights=None): self.n = n self.cover_set = cover_set self.num_concepts = num_concepts self.concept_weights = concept_weights self.cpp_obj = None - + if self.n <= 0: raise Exception("ERROR: Number of elements in ground set must be positive") @@ -55,8 +25,11 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): raise Exception("ERROR: Mismtach between num_conepts and len(concept_weights)") else: self.concept_weights = [1] * self.num_concepts - - self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)") + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if device == "cuda": + self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + else: + self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) self.effective_ground = set(range(n)) - - + From 4bc95552dc426e677355472a0caf71ca7c1f7223 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:55:33 +0530 Subject: [PATCH 35/58] Update setCover.py --- submodlib/functions/setCover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index 21d4038..e8c5466 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -27,7 +27,7 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): self.concept_weights = [1] * self.num_concepts print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if device == "cuda": + if "cuda" in device : self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) else: self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) From 2a27493d93d5869a8539f12aa6d38495b85cc6e9 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 23 Jan 2024 17:56:20 +0530 Subject: [PATCH 36/58] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 10ded66..74a62c5 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name='submodlib', #packages=find_packages(include=['submodlib']), - packages=['submodlib', 'submodlib/functions'], + packages=['submodlib', 'submodlib/functions','pytorch'], #packages=find_packages('submodlib'), #package_dir={'':'submodlib'}, #version='0.0.2', From aeaaaeff79871746938d6ad3fa450fa01c8a0278 Mon Sep 17 00:00:00 2001 From: amajee11us Date: Tue, 23 Jan 2024 11:42:45 -0600 Subject: [PATCH 37/58] Fixed dependency tree for submodlib GPU implementation --- pytorch/SetFunction.py | 8 ++++---- pytorch/__init__.py | 4 ++++ ...sticGreedyOptimize.py => StochasticGreedyOptimizer.py} | 2 +- pytorch/optimizer/__init__.py | 5 +++++ pytorch/submod/__init__.py | 2 ++ submodlib/functions/setCover.py | 3 ++- 6 files changed, 18 insertions(+), 6 deletions(-) rename pytorch/optimizer/{StochasticGreedyOptimize.py => StochasticGreedyOptimizer.py} (97%) create mode 100644 pytorch/optimizer/__init__.py diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py index 6c7891c..24c139e 100644 --- a/pytorch/SetFunction.py +++ b/pytorch/SetFunction.py @@ -4,10 +4,10 @@ import torch.nn as nn import numpy as np import random -from optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer -from optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer -from optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer -from optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer +from pytorch.optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer +from pytorch.optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer +from pytorch.optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer +from pytorch.optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer class SetFunction(nn.Module): diff --git a/pytorch/__init__.py b/pytorch/__init__.py index 766a9a5..b8f5042 100644 --- a/pytorch/__init__.py +++ b/pytorch/__init__.py @@ -1 +1,5 @@ # /pytorch/__init__.py +from .SetFunction import SetFunction + +from .optimizer import * +from .submod import * \ No newline at end of file diff --git a/pytorch/optimizer/StochasticGreedyOptimize.py b/pytorch/optimizer/StochasticGreedyOptimizer.py similarity index 97% rename from pytorch/optimizer/StochasticGreedyOptimize.py rename to pytorch/optimizer/StochasticGreedyOptimizer.py index 081f379..0f70547 100644 --- a/pytorch/optimizer/StochasticGreedyOptimize.py +++ b/pytorch/optimizer/StochasticGreedyOptimizer.py @@ -11,7 +11,7 @@ def __init__(self): def equals(val1: float, val2: float, eps: float) -> bool: return abs(val1 - val2) < eps - def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool, + def maximize(self, f_obj, budget: float, stop_if_zero_gain: bool, stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True, show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]: # TODO: Implement handling of equal guys and different sizes of each item later diff --git a/pytorch/optimizer/__init__.py b/pytorch/optimizer/__init__.py new file mode 100644 index 0000000..4a1cbb3 --- /dev/null +++ b/pytorch/optimizer/__init__.py @@ -0,0 +1,5 @@ +# /pytorch/optimizer/__init__.py +from .LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer +from .LazyGreedyOptimizer import LazyGreedyOptimizer +from .NaiveGreedyOptimizer import NaiveGreedyOptimizer +from .StochasticGreedyOptimizer import StochasticGreedyOptimizer diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py index c8851c6..49d05d7 100644 --- a/pytorch/submod/__init__.py +++ b/pytorch/submod/__init__.py @@ -1 +1,3 @@ # /pytorch/SetFunction/__init__.py +from .SetCover import SetCover +from .ProbabilisticSetCover import ProbabilisticSetCover \ No newline at end of file diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index e8c5466..83c9b3b 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -2,8 +2,9 @@ # Author: Vishal Kaushal from .setFunction import SetFunction from submodlib_cpp import SetCover -from submodlib_pytorch import SetCover +# from submodlib.pytorch import SetCover import torch +from pytorch.submod import SetCover class SetCoverFunction(SetFunction): From 57397c7dcb8830738b599cec866ce7cdd0abd588 Mon Sep 17 00:00:00 2001 From: amajee11us Date: Wed, 24 Jan 2024 09:51:06 -0600 Subject: [PATCH 38/58] Added fix for null object issue --- submodlib/functions/setCover.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py index 83c9b3b..d67d5b3 100644 --- a/submodlib/functions/setCover.py +++ b/submodlib/functions/setCover.py @@ -1,10 +1,11 @@ # setCover.py # Author: Vishal Kaushal from .setFunction import SetFunction -from submodlib_cpp import SetCover -# from submodlib.pytorch import SetCover import torch -from pytorch.submod import SetCover +if torch.cuda.is_available() : + from pytorch.submod import SetCover +else: + from submodlib_cpp import SetCover class SetCoverFunction(SetFunction): @@ -27,9 +28,9 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None): else: self.concept_weights = [1] * self.num_concepts print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)") - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - if "cuda" in device : - self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) + + if torch.cuda.is_available() : + self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) else: self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights) self.effective_ground = set(range(n)) From ed70d00e5f82224c2ec286d854d6bb3cf6ba55ca Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:12:58 +0530 Subject: [PATCH 39/58] Create GraphCut.py --- pytorch/submod/GraphCut.py | 346 +++++++++++++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 pytorch/submod/GraphCut.py diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py new file mode 100644 index 0000000..f2322ee --- /dev/null +++ b/pytorch/submod/GraphCut.py @@ -0,0 +1,346 @@ +from typing import List, Set + +class GraphCutpy(SetFunction): + def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, + master_ground_kernel: List[List[float]] = None, + ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, + arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False, + ground: Set[int] = None): + super(SetFunction, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n = n + self.mode = mode + self.lambda_ = lambdaVal + self.separate_rep=separate_rep + self.n_rep = n_rep + self.partial = partial + self.original_to_partial_index_map = {} + self.mgsijs = mgsijs + self.ggsijs = ggsijs + self.data = data + self.data_rep=data_rep + self.metric = metric + self.num_neighbors = num_neighbors + self.effective_ground_set = set(range(n)) + self.clusters=None + self.cluster_sijs=None + self.cluster_map=None + self.ggsijs = None + self.mgsijs = None + # self.cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful + self.content = None + self.effective_ground = None + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + if self.separate_rep == True: + if self.n_rep is None or self.n_rep <=0: + raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") + if self.mode != "dense": + raise Exception("Only dense mode supported if separate_rep = True") + if (type(self.mgsijs) != type(None)) and (type(self.mgsijs) != np.ndarray): + raise Exception("mgsijs provided, but is not dense") + if (type(self.ggsijs) != type(None)) and (type(self.ggsijs) != np.ndarray): + raise Exception("ggsijs provided, but is not dense") + + if mode == "dense": + self.master_ground_kernel = master_ground_kernel + self.ground_ground_kernel = ground_ground_kernel + + if ground_ground_kernel is not None: + self.separate_master = True + + if partial: + self.effective_ground_set = ground + else: + self.effective_ground_set = set(range(n)) + + self.num_effective_ground_set = len(self.effective_ground_set) + + self.n_master = self.num_effective_ground_set + self.master_set = self.effective_ground_set + + if partial: + self.original_to_partial_index_map = {elem: ind for ind, elem in enumerate(self.effective_ground_set)} + + self.total_similarity_with_subset = [0] * self.num_effective_ground_set + self.total_similarity_with_master = [0] * self.num_effective_ground_set + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if partial else elem + self.total_similarity_with_subset[index] = 0 + self.total_similarity_with_master[index] = 0 + # for j in self.master_set: + # self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem] + + if self.separate_rep == True: + if type(self.mgsijs) == type(None): + #not provided mgsij - make it + if (type(data) == type(None)) or (type(data_rep) == type(None)): + raise Exception("Data missing to compute mgsijs") + if np.shape(self.data)[0]!=self.n or np.shape(self.data_rep)[0]!=self.n_rep: + raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix") + self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric)) + else: + #provided mgsijs - verify it's dimensionality + if np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n_rep: + raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given mg kernel") + if type(self.ggsijs) == type(None): + #not provided ggsijs - make it + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + print(self.content) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + self.ggsijs = np.zeros((n,n)) + self.ggsijs[row,col] = val + else: + #provided ggsijs - verify it's dimensionality + if np.shape(self.ggsijs)[0]!=self.n or np.shape(self.ggsijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity gg kernel") + + else: + if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)): + #no kernel is provided make ggsij kernel + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + self.ggsijs = np.zeros((n,n)) + self.ggsijs[row,col] = val + elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)): + #gg is not available, mg is - good + #verify that it is dense and of correct dimension + if (type(self.mgsijs) != np.ndarray) or np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n: + raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel") + self.ggsijs = self.mgsijs + elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)): + #gg is available, mg is not - good + #verify that it is dense and of correct dimension + if (type(self.ggsijs) != np.ndarray) or np.shape(self.ggsijs)[1]!=self.n or np.shape(self.ggsijs)[0]!=self.n: + raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel") + else: + #both are available - something is wrong + raise Exception("Two kernels have been wrongly provided when separate_rep=False") + elif mode == "sparse": + if self.separate_rep == True: + raise Exception("Separate represented is supported only in dense mode") + if self.num_neighbors is None or self.num_neighbors <=0: + raise Exception("Valid num_neighbors is needed for sparse mode") + if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)): + #no kernel is provided make ggsij sparse kernel + if type(data) == type(None): + raise Exception("Data missing to compute ggsijs") + self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + self.ggsijs = sparse.csr_matrix((val, (row, col)), [n,n]) + elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)): + #gg is not available, mg is - good + #verify that it is sparse + if type(self.mgsijs) != scipy.sparse.csr.csr_matrix: + raise Exception("Provided kernel is not sparse") + self.ggsijs = self.mgsijs + elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)): + #gg is available, mg is not - good + #verify that it is dense and of correct dimension + if type(self.ggsijs) != scipy.sparse.csr.csr_matrix: + raise Exception("Provided kernel is not sparse") + else: + #both are available - something is wrong + raise Exception("Two kernels have been wrongly provided when separate_rep=False") + + if self.separate_rep==None: + self.separate_rep = False + + if self.mode=="dense" and self.separate_rep == False : + self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.ggsijs) + self.ggsijs=l + + # self.cpp_obj = GraphCut(self.n, self.cpp_ggsijs, False, self.cpp_ground_sub, self.lambdaVal) + + elif self.mode=="dense" and self.separate_rep == True : + self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.ggsijs) + self.ggsijs=l + + self.mgsijs = self.mgsijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.mgsijs[0])==int or type(self.mgsijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.mgsijs) + self.mgsijs=l + + # self.cpp_obj = GraphCutpy(self.n, self.cpp_mgsijs, self.cpp_ggsijs, self.lambdaVal) + + elif self.mode == "sparse": + self.ggsijs = {} + self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal) + self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val + # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal) + else: + raise Exception("Invalid") + + self.effective_ground = self.get_effective_ground_set() + print("it is done") + + # mode == "sparse": + # if not arr_val or not arr_count or not arr_col: + # raise ValueError("Error: Empty/Corrupt sparse similarity kernel") + + # self.sparse_kernel = SparseSim(arr_val, arr_count, arr_col) + + # self.effective_ground_set = set(range(n)) + # self.num_effective_ground_set = len(self.effective_ground_set) + + # self.n_master = self.num_effective_ground_set + # self.master_set = self.effective_ground_set + + # self.total_similarity_with_subset = [0] * n + # self.total_similarity_with_master = [0] * n + + # for i in range(n): + # self.total_similarity_with_subset[i] = 0 + # self.total_similarity_with_master[i] = 0 + + # for j in range(n): + # self.total_similarity_with_master[i] += self.sparse_kernel.get_val(j, i) + + # else: + # raise ValueError("Invalid mode") + + def evaluate(self, X: Set[int]) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if not effective_x: + return 0 + + result = 0 + + if self.mode == "dense": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] + + for elem2 in effective_x: + result -= self.lambda_ * self.ground_ground_kernel[elem][elem2] + + elif self.mode == "sparse": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] + + for elem2 in effective_x: + result -= self.lambda_ * self.sparse_kernel.get_val(elem, elem2) + + return result + + def evaluate_with_memoization(self, X: Set[int]) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if not effective_x: + return 0 + + result = 0 + + if self.mode == "dense" or self.mode == "sparse": + for elem in effective_x: + index = self.original_to_partial_index_map[elem] if self.partial else elem + result += self.total_similarity_with_master[index] - self.lambda_ * self.total_similarity_with_subset[index] + + return result + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effective_x or item not in self.effective_ground_set: + return 0 + + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] + + if self.mode == "dense": + for elem in effective_x: + gain -= 2 * self.lambda_ * self.ground_ground_kernel[item][elem] + gain -= self.lambda_ * self.ground_ground_kernel[item][item] + + elif self.mode == "sparse": + for elem in effective_x: + gain -= 2 * self.lambda_ * self.sparse_kernel.get_val(item, elem) + gain -= self.lambda_ * self.sparse_kernel.get_val(item, item) + + return gain + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if enable_checks and item in effective_x: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + gain = 0 + + if self.mode == "dense": + index = self.original_to_partial_index_map[item] if self.partial else item + gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item] + + elif self.mode == "sparse": + index = self.original_to_partial_index_map[item] if self.partial else item + gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item) + + return gain + + def update_memoization(self, X: Set[int], item: int): + effective_x = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effective_x or item not in self.effective_ground_set: + return + + if self.mode == "dense": + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if self.partial else elem + # self.total_similarity_with_subset[index] += self.ground_ground_kernel[elem][item] + + elif self.mode == "sparse": + for elem in self.effective_ground_set: + index = self.original_to_partial_index_map[elem] if self.partial else elem + self.total_similarity_with_subset[index] += self.sparse_kernel.get_val(elem, item) + + def get_effective_ground_set(self) -> Set[int]: + return self.effective_ground_set + + def clear_memoization(self): + if self.mode == "dense" or self.mode == "sparse": + self.total_similarity_with_subset = [0] * self.num_effective_ground_set + + def set_memoization(self, X: Set[int]): + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) From 52828bede6b13a284f0ceb163fe0a098230a6bb8 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:13:55 +0530 Subject: [PATCH 40/58] Update __init__.py --- pytorch/submod/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py index 49d05d7..dfa5ac5 100644 --- a/pytorch/submod/__init__.py +++ b/pytorch/submod/__init__.py @@ -1,3 +1,4 @@ # /pytorch/SetFunction/__init__.py from .SetCover import SetCover -from .ProbabilisticSetCover import ProbabilisticSetCover \ No newline at end of file +from .ProbabilisticSetCover import ProbabilisticSetCover +from .GraphCut import GraphCut From f8765d9616f246c6d9ad278110dd36bc1bbed1bc Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 31 Jan 2024 00:14:13 +0530 Subject: [PATCH 41/58] Update GraphCut.py --- pytorch/submod/GraphCut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py index f2322ee..6bfd8e2 100644 --- a/pytorch/submod/GraphCut.py +++ b/pytorch/submod/GraphCut.py @@ -1,6 +1,6 @@ from typing import List, Set -class GraphCutpy(SetFunction): +class GraphCut(SetFunction): def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, master_ground_kernel: List[List[float]] = None, ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, From 72e82c9f1f981e43d1bbb531eb8b9b26d55a5a16 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:44:57 +0530 Subject: [PATCH 42/58] pytorch version of helper.py All the functions of helper.py have been converted to pytoch. This is used in graph cut, disparity min, disparity sum for now. --- pytorch/submod/helper.py | 186 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 pytorch/submod/helper.py diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py new file mode 100644 index 0000000..2f10c81 --- /dev/null +++ b/pytorch/submod/helper.py @@ -0,0 +1,186 @@ +import torch +import torch.nn.functional as F +from sklearn.cluster import Birch +from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity, pairwise_distances +from sklearn.neighbors import NearestNeighbors +from scipy import sparse +import pickle +import time +import os + +def cos_sim_square(A): + similarity = torch.matmul(A, A.t()) + + square_mag = torch.diag(similarity) + + inv_square_mag = 1 / square_mag + inv_square_mag[torch.isinf(inv_square_mag)] = 0 + + inv_mag = torch.sqrt(inv_square_mag) + + cosine = similarity * inv_mag + cosine = cosine.t() * inv_mag + return cosine + +def cos_sim_rectangle(A, B): + num = torch.matmul(A, B.t()) + p1 = torch.sqrt(torch.sum(A**2, dim=1)).unsqueeze(1) + p2 = torch.sqrt(torch.sum(B**2, dim=1)).unsqueeze(0) + return num / (p1 * p2) + +def create_sparse_kernel(X, metric, num_neigh, n_jobs=1, method="sklearn"): + if num_neigh > X.shape[0]: + raise Exception("ERROR: num of neighbors can't be more than the number of datapoints") + dense = None + dense = create_kernel_dense_sklearn(X, metric) + dense_ = None + if num_neigh == -1: + num_neigh = X.shape[0] # default is the total number of datapoints + + # Assuming X is a PyTorch tensor + X_np = X.numpy() + + # Use PyTorch functions for the nearest neighbors search + if metric == 'euclidean': + distances = torch.cdist(X, X, p=2) # Euclidean distance + elif metric == 'cosine': + distances = 1 - torch.nn.functional.cosine_similarity(X, X, dim=1) # Cosine similarity as distance + + # Exclude the distance to oneself (diagonal elements) + distances.fill_diagonal_(float('inf')) + + # Find the indices of the k-nearest neighbors using torch.topk + _, ind = torch.topk(distances, k=num_neigh, largest=False) + + # ind_l = [(index[0], x.item()) for index, x in torch.ndenumerate(ind)] + # Convert indices to row and col lists + row = [] + col = [] + for i, indices_row in enumerate(ind): + for j in indices_row: + row.append(i) + col.append(j.item()) + + mat = torch.zeros_like(distances) + mat[row, col] = 1 + dense_ = dense * mat # Only retain similarity of nearest neighbors + sparse_coo = torch.sparse_coo_tensor(torch.tensor([row, col]), mat[row, col], dense.size()) + # Convert the COO tensor to CSR format + sparse_csr = sparse_coo.coalesce() + return sparse_csr + # pass + + +def create_kernel_dense(X, metric, method="sklearn"): + dense = None + if method == "sklearn": + dense = create_kernel_dense_sklearn(X, metric) + else: + raise Exception("For creating dense kernel, only 'sklearn' method is supported") + return dense + +def create_kernel_dense_sklearn(X, metric, X_rep=None): + dense = None + D = None + + if metric == "euclidean": + if X_rep is None: + D = torch.cdist(X, X, p=2) + else: + D = torch.cdist(X_rep, X, p=2) + gamma = 1 / X.shape[1] + dense = torch.exp(-D * gamma) # Obtaining Similarity from distance + + elif metric == "cosine": + if X_rep is None: + dense = torch.nn.functional.cosine_similarity(X, X, dim=1) + else: + dense = torch.nn.functional.cosine_similarity(X_rep, X, dim=1) + + elif metric == "dot": + if X_rep is None: + dense = torch.matmul(X, X.t()) + else: + dense = torch.matmul(X_rep, X.t()) + + else: + raise Exception("ERROR: unsupported metric for this method of kernel creation") + + if X_rep is not None: + assert dense.shape == (X_rep.shape[0], X.shape[0]) + else: + assert dense.shape == (X.shape[0], X.shape[0]) + + return dense + pass + + +def create_cluster_kernels(X, metric, cluster_lab=None, num_cluster=None, onlyClusters=False): + lab = [] + if cluster_lab is None: + obj = Birch(n_clusters=num_cluster) + obj.fit(X) + lab = obj.predict(X).tolist() + if num_cluster is None: + num_cluster = len(obj.subcluster_labels_) + else: + if num_cluster is None: + raise Exception("ERROR: num_cluster needs to be specified if cluster_lab is provided") + lab = cluster_lab + + l_cluster = [set() for _ in range(num_cluster)] + l_ind = [0] * X.shape[0] + l_count = [0] * num_cluster + + for i, el in enumerate(lab): + l_cluster[el].add(i) + l_ind[i] = l_count[el] + l_count[el] = l_count[el] + 1 + + if onlyClusters: + return l_cluster, None, None + + l_kernel = [] + for el in l_cluster: + k = len(el) + l_kernel.append(torch.zeros((k, k))) # placeholder matrices of suitable size + + M = None + if metric == "euclidean": + D = torch.cdist(X, X) + gamma = 1 / X.shape[1] + M = torch.exp(-D * gamma) # similarity from distance + elif metric == "cosine": + M = F.cosine_similarity(X, X, dim=1) + M = M.unsqueeze(0) # converting to 2D for compatibility + else: + raise Exception("ERROR: unsupported metric") + + # Create kernel for each cluster using the bigger kernel + for i in range(X.shape[0]): + for j in range(X.shape[0]): + if lab[i] == lab[j]: + c_ID = lab[i] + ii = l_ind[i] + jj = l_ind[j] + l_kernel[c_ID][ii, jj] = M[i, j] + + return l_cluster, l_kernel, l_ind + +def create_kernel(X, metric, mode="dense", num_neigh=-1, n_jobs=1, X_rep=None, method="sklearn"): + + if X_rep is not None: + assert X_rep.shape[1] == X.shape[1] + + if mode == "dense": + dense = None + dense = globals()['create_kernel_dense_'+method](X, metric, X_rep) + return torch.tensor(dense) + + elif mode == "sparse": + if X_rep is not None: + raise Exception("Sparse mode is not supported for separate X_rep") + return create_sparse_kernel(X, metric, num_neigh, n_jobs, method) + + else: + raise Exception("ERROR: unsupported mode") From 7cc626e337ea16fe62c95a56a59439e6fc8e2673 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:15:41 +0530 Subject: [PATCH 43/58] dense mode of with helper functions GraphCut.py --- pytorch/submod/GraphCut.py | 121 ++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 43 deletions(-) diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py index 6bfd8e2..ed5a93d 100644 --- a/pytorch/submod/GraphCut.py +++ b/pytorch/submod/GraphCut.py @@ -1,7 +1,13 @@ from typing import List, Set - -class GraphCut(SetFunction): - def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, +import random +from helper import * + +class GraphCutpy(SetFunction): + # def __init__(self, n: int, mode: str, metric: str, master_ground_kernel: List[List[float]] = None, + # ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, + # arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False, + # ground: Set[int] = None, lambdaVal: float = 0.0): + def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, master_ground_kernel: List[List[float]] = None, ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False, @@ -27,7 +33,6 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non self.cluster_map=None self.ggsijs = None self.mgsijs = None - # self.cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful self.content = None self.effective_ground = None @@ -38,14 +43,14 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") if self.separate_rep == True: if self.n_rep is None or self.n_rep <=0: - raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") + raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") if self.mode != "dense": raise Exception("Only dense mode supported if separate_rep = True") if (type(self.mgsijs) != type(None)) and (type(self.mgsijs) != np.ndarray): raise Exception("mgsijs provided, but is not dense") if (type(self.ggsijs) != type(None)) and (type(self.ggsijs) != np.ndarray): raise Exception("ggsijs provided, but is not dense") - + if mode == "dense": self.master_ground_kernel = master_ground_kernel self.ground_ground_kernel = ground_ground_kernel @@ -66,14 +71,16 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non if partial: self.original_to_partial_index_map = {elem: ind for ind, elem in enumerate(self.effective_ground_set)} - self.total_similarity_with_subset = [0] * self.num_effective_ground_set - self.total_similarity_with_master = [0] * self.num_effective_ground_set + self.total_similarity_with_subset = [random.random() for _ in range(self.num_effective_ground_set)] + self.total_similarity_with_master = [random.random() for _ in range(self.num_effective_ground_set)] + self.master_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)] + self.ground_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)] for elem in self.effective_ground_set: index = self.original_to_partial_index_map[elem] if partial else elem - self.total_similarity_with_subset[index] = 0 - self.total_similarity_with_master[index] = 0 - # for j in self.master_set: - # self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem] + self.total_similarity_with_subset[index] = 1 + self.total_similarity_with_master[index] = 1 + for j in self.master_set: + self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem] if self.separate_rep == True: if type(self.mgsijs) == type(None): @@ -82,7 +89,9 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non raise Exception("Data missing to compute mgsijs") if np.shape(self.data)[0]!=self.n or np.shape(self.data_rep)[0]!=self.n_rep: raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix") - self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric)) + + #create_kernel_NS is there .................... find it and define it not found in helper.py but used as here + # self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric)) else: #provided mgsijs - verify it's dimensionality if np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n_rep: @@ -94,8 +103,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non if self.num_neighbors is not None: raise Exception("num_neighbors wrongly provided for dense mode") self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode - self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) - print(self.content) + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) val = self.cpp_content[0] row = list(self.cpp_content[1].astype(int)) col = list(self.cpp_content[2].astype(int)) @@ -114,7 +122,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non if self.num_neighbors is not None: raise Exception("num_neighbors wrongly provided for dense mode") self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode - self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) val = self.content[0] row = list(self.content[1].astype(int)) col = list(self.content[2].astype(int)) @@ -143,7 +151,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non #no kernel is provided make ggsij sparse kernel if type(data) == type(None): raise Exception("Data missing to compute ggsijs") - self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense()) val = self.content[0] row = list(self.content[1].astype(int)) col = list(self.content[2].astype(int)) @@ -168,26 +176,24 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non if self.mode=="dense" and self.separate_rep == False : self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure - + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) l=[] l.append(self.ggsijs) self.ggsijs=l - # self.cpp_obj = GraphCut(self.n, self.cpp_ggsijs, False, self.cpp_ground_sub, self.lambdaVal) - elif self.mode=="dense" and self.separate_rep == True : self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure - + if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11 #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) l=[] l.append(self.ggsijs) self.ggsijs=l - + self.mgsijs = self.mgsijs.tolist() #break numpy ndarray to native list of list datastructure - + if type(self.mgsijs[0])==int or type(self.mgsijs[0])==float: #Its critical that we pass a list of list to pybind11 #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) l=[] @@ -198,17 +204,18 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non elif self.mode == "sparse": self.ggsijs = {} - self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal) - self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row - self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val - # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal) + # self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal) + # self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + # self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val + # # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal) else: raise Exception("Invalid") self.effective_ground = self.get_effective_ground_set() - print("it is done") - # mode == "sparse": + # if mode == "dense": + + # elif mode == "sparse": # if not arr_val or not arr_count or not arr_col: # raise ValueError("Error: Empty/Corrupt sparse similarity kernel") @@ -291,31 +298,59 @@ def marginal_gain(self, X: Set[int], item: int) -> float: for elem in effective_x: gain -= 2 * self.lambda_ * self.sparse_kernel.get_val(item, elem) gain -= self.lambda_ * self.sparse_kernel.get_val(item, item) - return gain - def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: - effective_x = X.intersection(self.effective_ground_set) if self.partial else X + # def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + # effective_x = X.intersection(self.effective_ground_set) if self.partial else X - if enable_checks and item in effective_x: - return 0 + # if enable_checks and item in effective_x: + # return 0 - if self.partial and item not in self.effective_ground_set: - return 0 + # if self.partial and item not in self.effective_ground_set: + # return 0 + # gain = 0 + + # if self.mode == "dense": + # index = self.original_to_partial_index_map[item] if self.partial else item + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item] + + # elif self.mode == "sparse": + # index = self.original_to_partial_index_map[item] if self.partial else item + # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item) + + # return gain + + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool) -> float: + effective_X = set() gain = 0 + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X - if self.mode == "dense": - index = self.original_to_partial_index_map[item] if self.partial else item - gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item] + if enable_checks and item in effective_X: + return 0 - elif self.mode == "sparse": - index = self.original_to_partial_index_map[item] if self.partial else item - gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item) + if self.partial and item not in self.effective_ground_set: + return 0 + if self.mode == 'dense': + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \ + - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \ + - self.lambda_ * self.ground_ground_kernel[item][item] + elif self.mode == 'sparse': + gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \ + - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \ + - self.lambda_ * self.sparse_kernel.get_val(item, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + # print("gain value",gain) return gain + def update_memoization(self, X: Set[int], item: int): effective_x = X.intersection(self.effective_ground_set) if self.partial else X From 766e43268e2426f6d29a4aa4afe9b792e3ea5ac1 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:18:04 +0530 Subject: [PATCH 44/58] Required functions of helper.py --- pytorch/submod/helper.py | 80 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py index 2f10c81..7832a32 100644 --- a/pytorch/submod/helper.py +++ b/pytorch/submod/helper.py @@ -7,6 +7,14 @@ import pickle import time import os +import numpy as np +from typing import List, Dict, Union +from math import sqrt + +# Define type aliases for clarity +Vector = List[float] +Matrix = List[Vector] +Set = List[int] # Considering integer elements for simplicity def cos_sim_square(A): similarity = torch.matmul(A, A.t()) @@ -184,3 +192,75 @@ def create_kernel(X, metric, mode="dense", num_neigh=-1, n_jobs=1, X_rep=None, m else: raise Exception("ERROR: unsupported mode") + + + +# Euclidean similarity function +def euclidean_similarity(a: Vector, b: Vector) -> float: + return np.linalg.norm(np.array(a) - np.array(b)) + +# Cosine similarity function +def cosine_similarity(a: Vector, b: Vector) -> float: + dot_product = np.dot(a, b) + norm_a = np.linalg.norm(a) + norm_b = np.linalg.norm(b) + return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0 + +# Dot product function +def dot_prod(a: Vector, b: Vector) -> float: + return np.dot(a, b) + +# Create kernel function for non-square kernel +def create_kernel_NS(X_ground: Matrix, X_master: Matrix, metric: str = "euclidean") -> Matrix: + n_ground = len(X_ground) + n_master = len(X_master) + k_dense = [[0] * n_ground for _ in range(n_master)] + + for r in range(n_master): + for c in range(n_ground): + if metric == "euclidean": + k_dense[r][c] = euclidean_similarity(X_master[r], X_ground[c]) + elif metric == "cosine": + k_dense[r][c] = cosine_similarity(X_master[r], X_ground[c]) + elif metric == "dot": + k_dense[r][c] = dot_prod(X_master[r], X_ground[c]) + else: + raise ValueError("Unsupported metric for kernel computation in Python") + return k_dense + +# Create square kernel function +def create_square_kernel_dense(X_ground: Matrix, metric: str = "euclidean") -> Matrix: + n_ground = len(X_ground) + k_dense = [[0] * n_ground for _ in range(n_ground)] + + if metric == "euclidean": + for r in range(n_ground): + k_dense[r][r] = 1.0 + for c in range(r + 1, n_ground): + sim = euclidean_similarity(X_ground[r], X_ground[c]) + k_dense[r][c] = sim + k_dense[c][r] = sim + elif metric == "cosine": + for r in range(n_ground): + a_norm = sqrt(dot_prod(X_ground[r], X_ground[r])) + k_dense[r][r] = 1.0 + for c in range(r + 1, n_ground): + sim = dot_prod(X_ground[r], X_ground[c]) + b_norm = sqrt(dot_prod(X_ground[c], X_ground[c])) + sim = sim / (a_norm * b_norm) if a_norm * b_norm > 0 else 0 + k_dense[r][c] = sim + k_dense[c][r] = sim + elif metric == "dot": + for r in range(n_ground): + for c in range(r, n_ground): + sim = dot_prod(X_ground[r], X_ground[c]) + k_dense[r][c] = sim + k_dense[c][r] = sim + else: + raise ValueError("Unsupported metric for kernel computation in Python") + return k_dense + +# Set intersection function +def set_intersection(a: Set, b: Set) -> Set: + return list(set(a) & set(b)) # Converting set intersection to list for better compatibility + From 1efe9bf0f61c46e584ef1f9a6087b57096552dd1 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:20:09 +0530 Subject: [PATCH 45/58] Function of dense mode only FacilityLocation.py --- pytorch/submod/FacilityLocation.py | 429 +++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 pytorch/submod/FacilityLocation.py diff --git a/pytorch/submod/FacilityLocation.py b/pytorch/submod/FacilityLocation.py new file mode 100644 index 0000000..57c1c7b --- /dev/null +++ b/pytorch/submod/FacilityLocation.py @@ -0,0 +1,429 @@ +import numpy as np +import scipy +from scipy import sparse +from helper import * +class FacilityLocationFunction(SetFunction): + def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, data_rep=None, num_clusters=None, cluster_labels=None, metric="cosine", num_neighbors=None, + dense_kernel = None, data_master = None, create_dense_cpp_kernel_in_python = True, partial = False, seperate_master = False): + self.n = n + self.n_rep = n_rep + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.partial = partial + self.data_rep = data_rep + self.num_neighbors = num_neighbors + self.separate_rep = separate_rep + self.clusters = None + self.cluster_sijs = None + self.cluster_map = None + self.cluster_labels = cluster_labels + self.num_clusters = num_clusters + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_ground_sub = None + self.cpp_content = None + self.effective_ground = None + self.seperate_master = seperate_master + self.dense_kernel = dense_kernel + self.data_master = data_master + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse', 'clustered']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'") + + if self.separate_rep == True: + if self.n_rep is None or self.n_rep <= 0: + raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive") + if self.mode != "dense": + raise Exception("Only dense mode supported if separate_rep = True") + + if self.mode == "clustered": + if type(self.cluster_labels) != type(None) and (self.num_clusters is None or self.num_clusters <= 0): + raise Exception("ERROR: Positive number of clusters must be provided in clustered mode when cluster_labels is provided") + if type(self.cluster_labels) == type(None) and self.num_clusters is not None and self.num_clusters <= 0: + raise Exception("Invalid number of clusters provided") + if type(self.cluster_labels) != type(None) and len(self.cluster_labels) != self.n: + raise Exception("ERROR: cluster_labels's size is NOT same as ground set size") + if type(self.cluster_labels) != type(None) and not all(ele >= 0 and ele <= self.num_clusters-1 for ele in self.cluster_labels): + raise Exception("Cluster IDs/labels contain invalid values") + + if type(self.sijs) != type(None): + if create_dense_cpp_kernel_in_python == False: + raise Exception("ERROR: create_dense_cpp_kernel_in_python is to be set to False ONLY when a similarity kernel is not provided and a CPP kernel is desired to be created in CPP") + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if self.separate_rep is None: + raise Exception("ERROR: separate_rep bool must be specified with custom dense kernel") + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + + if self.separate_rep == True: + if np.shape(self.sijs)[1] != self.n or np.shape(self.sijs)[0] != self.n_rep: + raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given kernel") + else: + if np.shape(self.sijs)[0] != self.n or np.shape(self.sijs)[1] != self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + + if type(self.data) != type(None) or type(self.data_rep) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + else: + if type(self.data) != type(None): + if self.separate_rep == True: + if type(self.data_rep) == type(None): + raise Exception("Represented data matrix not given") + if np.shape(self.data)[0] != self.n or np.shape(self.data_rep)[0] != self.n_rep: + raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix") + else: + if type(self.data_rep) != type(None): + print("WARNING: Represented data matrix not required but given, will be ignored.") + if np.shape(self.data)[0] != self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "clustered": + self.clusters, self.cluster_sijs, self.cluster_map = create_cluster_kernels(self.data.tolist(), self.metric, self.cluster_labels, self.num_clusters) + else: + if self.separate_rep == True: + if create_dense_cpp_kernel_in_python == True: + self.sijs = np.array(create_kernel_NS(self.data.tolist(), self.data_rep.tolist(), self.metric)) + else: + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + if create_dense_cpp_kernel_in_python == True: + pass + # self.sijs = np.array(create_square_kernel_dense(self.data.tolist(), self.metric)) + else: + self.cpp_content = np.array(create_kernel(self.data.tolist(), self.metric, self.num_neighbors)) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + self.sijs = sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + # self.cpp_ground_sub = {-1} + + if separate_rep == None: + self.separate_rep = False + + elif self.mode == "sparse": + self.cpp_sijs = {} + self.cpp_sijs["arr_val"] = self.sijs.data.tolist() + self.cpp_sijs["arr_count"] = self.sijs.indptr.tolist() + self.cpp_sijs["arr_col"] = self.sijs.indices.tolist() + # self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs["arr_val"], self.cpp_sijs["arr_count"], self.cpp_sijs["arr_col"]) + elif self.mode == "clustered": + l_temp = [] + for el in self.cluster_sijs: + temp = el.tolist() + if isinstance(temp[0], int) or isinstance(temp[0], float): + l = [] + l.append(temp) + temp = l + l_temp.append(temp) + self.cluster_sijs = l_temp + + + if self.mode == 'dense': + if self.dense_kernel == None: + self.dense_constructor_no_kernel(n = self.n, data = self.data, data_master = self.data_master) ## dense mode with no dense_kernel + elif self.dense_kernel != None: + self.dense_constructor(n = self.n, dense_kernel = self.dense_kernel, ground = self.data, partial = self.partial, separate_master = self.separate_master) ## dense mode with dense_kernel + ### other modes are remaining + elif self.mode == 'sparse': + pass + elif self.mode == 'clustered': + pass + + self.effective_ground = self.get_effective_ground_set() + + + def dense_constructor(self, n, dense_kernel, partial = False, ground = None, separate_master = False): + self.n = n + self.mode = 'dense' + self.dense_kernel = dense_kernel + self.partial = partial + self.separate_master = separate_master + + if partial: + self.effective_ground_set = ground + else: + self.effective_ground_set = set(range(n)) + + self.num_effective_groundset = len(self.effective_ground_set) + + if separate_master: + self.n_master = len(dense_kernel) + self.master_set = set(range(self.n_master)) + else: + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + if partial: + self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)} + + # Constructor for dense mode (kernel not supplied) + def dense_constructor_no_kernel(self, n, data, data_master, separate_master = False, metric = 'cosine'): + if separate_master: + self.dense_kernel = create_kernel_NS(data, data_master, metric) + else: + self.dense_kernel = create_square_kernel_dense(data, metric) + + self.mode = 'dense' + self.partial = False + + self.n = n + self.separate_master = separate_master + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + if separate_master: + self.n_master = len(self.dense_kernel) + self.master_set = set(range(self.n_master)) + else: + self.n_master = n + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + # Constructor for sparse mode + def sparse_constructor(self, n, arr_val, arr_count, arr_col): + self.n = n + self.mode = 'sparse' + self.partial = False + self.separate_master = False + + self.sparse_kernel = self.SparseSim(arr_val, arr_count, arr_col) + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + + # Constructor for cluster mode + def cluster_constructor(self, n, clusters, cluster_kernels, cluster_index_map): + self.n = n + self.mode = 'clustered' + self.num_clusters = len(clusters) + self.clusters = clusters + self.cluster_kernels = cluster_kernels + self.cluster_index_map = cluster_index_map + self.partial = False + self.separate_master = False + + self.effective_ground_set = set(range(n)) + self.num_effective_groundset = n + + self.n_master = self.num_effective_groundset + self.master_set = self.effective_ground_set + + self.cluster_ids = [0] * n + for i, ci in enumerate(clusters): + for ind in ci: + self.cluster_ids[ind] = i + + self.relevant_x = [[] for _ in range(self.num_clusters)] + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n) + + # def clone(self): + # return FacilityLocation(**self.__dict__) + + def evaluate(self, X): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + result = 0 + + if effective_X: + if self.mode == 'dense': + for ind in self.master_set: + result += self.get_max_sim_dense(ind, effective_X) + elif self.mode == 'sparse': + for ind in self.master_set: + result += self.get_max_sim_sparse(ind, effective_X) + else: # clustered + for i in range(self.num_clusters): + relevant_subset = X.intersection(self.clusters[i]) + if relevant_subset: + for ind in self.clusters[i]: + result += self.get_max_sim_cluster(ind, relevant_subset, i) + + return result + + def evaluate_with_memoization(self, X): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + result = 0 + + if effective_X: + if self.mode == 'dense' or self.mode == 'sparse': + for ind in self.master_set: + result += self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + for i in range(self.num_clusters): + if self.relevant_x[i]: + for ind in self.clusters[i]: + result += self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return result + + def marginal_gain(self, X, item): + effective_X = X.intersection(self.effective_ground_set) if self.partial else X + gain = 0 + + if item not in effective_X: + if self.mode == 'dense': + print(self.master_set) + for ind in self.master_set: + m = self.get_max_sim_dense(ind, effective_X) + if self.dense_kernel[item][ind] > m: + m = self.dense_kernel[item][ind] + gain += m - self.similarity_with_nearest_in_effective_x[ind] + elif self.mode == 'sparse': + for ind in self.master_set: + m = self.get_max_sim_sparse(ind, effective_X) + if self.sparse_kernel[item, ind] > m: + m = self.sparse_kernel[item, ind] + gain += m - self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + cluster_id = self.cluster_ids[item] + relevant_subset = effective_X.intersection(self.clusters[cluster_id]) + for ind in self.clusters[cluster_id]: + m = self.get_max_sim_cluster(ind, relevant_subset, cluster_id) + if self.cluster_kernels[cluster_id][item][ind] > m: + m = self.cluster_kernels[cluster_id][item][ind] + gain += m - self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return gain + def marginal_gain_with_memoization(self, X, item, enable_checks): + effective_X = set() + gain = 0 + + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X + + if enable_checks and item in effective_X: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + if self.mode == 'dense': + for ind in self.master_set: + if self.partial: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]: + gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]] + else: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]: + gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[ind] + elif self.mode == 'sparse': + for ind in self.master_set: + temp = self.sparse_kernel[ind, item] + if temp > self.similarity_with_nearest_in_effective_x[ind]: + gain += temp - self.similarity_with_nearest_in_effective_x[ind] + else: # clustered + i = self.cluster_ids[item] + item_ = self.cluster_index_map[item] + relevant_subset = self.relevant_x[i] + ci = self.clusters[i] + + if len(relevant_subset) == 0: + for ind in ci: + ind_ = self.cluster_index_map[ind] + gain += self.cluster_kernels[i][ind_][item_] + else: + for ind in ci: + ind_ = self.cluster_index_map[ind] + if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]: + gain += self.cluster_kernels[i][ind_][item_] - self.clustered_similarity_with_nearest_in_relevant_x[ind] + + return gain + + + def update_memoization(self, X, item): + effective_X = set() + + if self.partial: + effective_X = X.intersection(self.effective_ground_set) + else: + effective_X = X + + if item in effective_X: + return + + if self.partial and item not in self.effective_ground_set: + return + + if self.mode == 'dense': + for ind in self.master_set: + if self.partial: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]: + self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]] = self.dense_kernel[ind][item] + else: + if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]: + self.similarity_with_nearest_in_effective_x[ind] = self.dense_kernel[ind][item] + elif self.mode == 'sparse': + for ind in self.master_set: + temp_val = self.sparse_kernel[ind, item] + if temp_val > self.similarity_with_nearest_in_effective_x[ind]: + self.similarity_with_nearest_in_effective_x[ind] = temp_val + else: # clustered + i = self.cluster_ids[item] + item_ = self.cluster_index_map[item] + ci = self.clusters[i] + + for ind in ci: + ind_ = self.cluster_index_map[ind] + if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]: + self.clustered_similarity_with_nearest_in_relevant_x[ind] = self.cluster_kernels[i][ind_][item_] + + self.relevant_x[i].add(item) + + + def get_effective_ground_set(self): + return set(range(self.n)) + + + def cluster_init(self, n_, dense_kernel_, ground_, partial, lambda_): + self.n = n_ + self.partial = partial + self.effective_ground_set = ground_ + self.n_master = len(dense_kernel_) + self.master_set = set(range(self.n_master)) + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + self.mode = 'dense' + self.dense_kernel = dense_kernel_ + self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)} + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n_) + self.relevant_x = [set() for _ in range(n_)] + + + def clear_memoization(self): + if self.mode == 'dense' or self.mode == 'sparse': + self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master) + else: + self.relevant_x = [set() for _ in range(self.num_clusters)] + self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(self.n) + + + def set_memoization(self, X): + self.clear_memoization() + temp = set() + for elem in X: + self.update_memoization(temp, elem) + temp.add(elem) From 7fac287c21a973383d4728434afa786e62cadfb0 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:44:28 +0530 Subject: [PATCH 46/58] Function with all modes implemented DisparityMin.py --- pytorch/submod/DisparityMin.py | 191 +++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 pytorch/submod/DisparityMin.py diff --git a/pytorch/submod/DisparityMin.py b/pytorch/submod/DisparityMin.py new file mode 100644 index 0000000..aa71fe1 --- /dev/null +++ b/pytorch/submod/DisparityMin.py @@ -0,0 +1,191 @@ +import numpy as np +import scipy +from helper import * + +class DisparityMinFunction(SetFunction): + + def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None): + super(DisparityMinFunction, self).__init__() + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_content = None + self.effective_ground_set = None + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data.tolist()), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense()) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.cpp_sijs) + self.cpp_sijs=l + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentMin = 0 + + if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) + self.cpp_sijs = {} + self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) + self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val n, arr_val arr_count arr_col + if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0): + raise Exception("Error: Empty/Corrupt sparse similarity kernel") + self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col']) + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentMin = 0 + + + + def evaluate(self, X: Set[int]) -> float: + effective_X = X + if len(effective_X) == 0 or len(effective_X) == 1: + return 0.0 + if self.mode == 'dense': + return get_min_dense(effective_X, self) + elif self.mode == 'sparse': + return get_min_sparse(effective_X, self) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.currentMin + + def get_effective_ground_set(self) -> Set[int]: + + return self.effective_ground_set + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_X = X + + if item in effective_X: + return 0.0 + + if item not in self.effective_ground_set: + return 0.0 + + min_val = 1.0 if len(effective_X) == 1 else self.currentMin + + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < min_val and elem != item: + min_val = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1 - self.sparse_kernel.get_val(elem, item) < min_val and elem != item: + min_val = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return min_val - self.currentMin + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + effective_X = X + + if enable_checks and item in effective_X: + return 0.0 + + if False and item not in self.effective_ground_set: + return 0.0 + + min_val = 1.0 if len(effective_X) == 1 else self.currentMin + + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < min_val and elem != item: + min_val = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1-self.sparse_kernel.get_val(item, elem) and elem!=item: + min = 1-self.sparse_kernel.get_val(item,elem) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return min_val - self.currentMin + + def update_memoization(self, X: Set[int], item: int) -> None: + effective_X = X + + if item in effective_X: + return + + if item not in self.effective_ground_set: + return + + if len(effective_X) == 1: + if self.mode == 'dense': + for elem in effective_X: + self.currentMin = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + self.currentMin = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + else: + if self.mode == 'dense': + for elem in effective_X: + if 1 - self.cpp_sijs[elem][item] < self.currentMin and elem != item: + self.currentMin = 1 - self.cpp_sijs[elem][item] + elif self.mode == 'sparse': + for elem in effective_X: + if 1 - self.sparse_kernel.get_val(elem, item) < self.currentMin and elem != item: + self.currentMin = 1 - self.sparse_kernel.get_val(elem, item) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def clear_memoization(self) -> None: + self.currentMin = 0.0 + + def set_memoization(self, X: Set[int]) -> None: + self.currentMin = self.evaluate(X) From 1cac3802da4629d8d50e46451f052fc8c4c7d247 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:48:25 +0530 Subject: [PATCH 47/58] All modes are implemented DisparitySum.py --- pytorch/submod/DisparitySum.py | 174 +++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 pytorch/submod/DisparitySum.py diff --git a/pytorch/submod/DisparitySum.py b/pytorch/submod/DisparitySum.py new file mode 100644 index 0000000..10f773c --- /dev/null +++ b/pytorch/submod/DisparitySum.py @@ -0,0 +1,174 @@ +from helper import * +import numpy as np +import scipy + +class DisparitySumFunction(SetFunction): + + def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None): + super(DisparitySumFunction, self).__init__() + + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.cpp_obj = None + self.cpp_sijs = None + self.cpp_content = None + self.effective_ground_set = None + + + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'") + + + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense()) + val = self.cpp_content[0] + row = list(self.cpp_content[1].astype(int)) + col = list(self.cpp_content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.num_neighbors = 0 + self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + + self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.cpp_sijs) + self.cpp_sijs=l + + + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentSum = 0 + + + + if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) + self.cpp_sijs = {} + self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) + self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row + self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val + if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0): + raise Exception("Error: Empty/Corrupt sparse similarity kernel") + self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col']) + self.effective_ground_set = set(range(n)) + self.numeffectivegroundset = len(self.effective_ground_set) + self.currentSum = 0 + + + def evaluate(self, X: Set[int]) -> float: + effective_X = X + if len(effective_X) == 0 : + return 0.0 + if self.mode == 'dense': + return get_sum_dense(effective_X, self) + elif self.mode == 'sparse': + return get_sum_sparse(effective_X, self) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + def evaluate_with_memoization(self, X: Set[int]) -> float: + return self.currentSum + + def get_effective_ground_set(self) -> Set[int]: + return self.effective_ground_set + + def marginal_gain(self, X: Set[int], item: int) -> float: + effective_X = X + gain = 0.0 + + if item in effective_X: + return 0.0 + + if item not in self.effective_ground_set: + return 0.0 + + + if self.mode == 'dense': + for elem in effective_X: + gain += (1 - self.cpp_sijs[elem][item]) + elif self.mode == 'sparse': + for elem in effective_X: + gain += (1 - self.sparse_kernel.get_val(item, elem)) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return gain + + def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float: + effective_X = X + gain =0.0 + + if enable_checks and item in effective_X: + return 0.0 + + if False and item not in self.effective_ground_set: + return 0.0 + + + + if self.mode == 'dense': + for elem in effective_X: + gain += (1 - self.cpp_sijs[elem][item]) + elif self.mode == 'sparse': + for elem in effective_X: + gain += (1 - self.sparse_kernel.get_val(item, elem)) + else: + raise ValueError("Error: Only dense and sparse mode supported") + + return gain + + def update_memoization(self, X: Set[int], item: int) -> None: + + + self.currentSum += self.marginal_gain(X, item) + + + def clear_memoization(self) -> None: + self.currentSum = 0.0 + + def set_memoization(self, X: Set[int]) -> None: + self.currentSum = self.evaluate(X) From 9464fb8a355f8f76fad6af6e32bee7a0f163f19d Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:49:22 +0530 Subject: [PATCH 48/58] Dense mode is implemented GraphCut.py --- pytorch/submod/GraphCut.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py index ed5a93d..df60fd0 100644 --- a/pytorch/submod/GraphCut.py +++ b/pytorch/submod/GraphCut.py @@ -2,11 +2,7 @@ import random from helper import * -class GraphCutpy(SetFunction): - # def __init__(self, n: int, mode: str, metric: str, master_ground_kernel: List[List[float]] = None, - # ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, - # arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False, - # ground: Set[int] = None, lambdaVal: float = 0.0): +class GraphCutFunction(SetFunction): def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, master_ground_kernel: List[List[float]] = None, ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None, From fbbd7baf1a44c7bd6c0c41f62d1d865fc5da0fb2 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:50:16 +0530 Subject: [PATCH 49/58] Dense mode is implemented GraphCut.py --- pytorch/submod/GraphCut.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py index df60fd0..5cfb774 100644 --- a/pytorch/submod/GraphCut.py +++ b/pytorch/submod/GraphCut.py @@ -1,6 +1,7 @@ from typing import List, Set import random from helper import * +from ..SetFunction import SetFunction class GraphCutFunction(SetFunction): def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, From 0fb08d6ba8060f45263289335355fe7193fe1672 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:51:02 +0530 Subject: [PATCH 50/58] Function with all modes implemented DisparityMin.py --- pytorch/submod/DisparityMin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch/submod/DisparityMin.py b/pytorch/submod/DisparityMin.py index aa71fe1..27a4d2b 100644 --- a/pytorch/submod/DisparityMin.py +++ b/pytorch/submod/DisparityMin.py @@ -1,6 +1,7 @@ import numpy as np import scipy from helper import * +from ..SetFunction import SetFunction class DisparityMinFunction(SetFunction): From a026e5b85923dc507db7d389ad018b152d0f64aa Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:51:51 +0530 Subject: [PATCH 51/58] Function with all modes implemented DisparitySum.py --- pytorch/submod/DisparitySum.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch/submod/DisparitySum.py b/pytorch/submod/DisparitySum.py index 10f773c..efb9f1d 100644 --- a/pytorch/submod/DisparitySum.py +++ b/pytorch/submod/DisparitySum.py @@ -1,6 +1,7 @@ from helper import * import numpy as np import scipy +from ..SetFunction import SetFunction class DisparitySumFunction(SetFunction): From 078194ae9c4ed109323e98fb326a80571da913d0 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:53:20 +0530 Subject: [PATCH 52/58] Function of dense mode only FacilityLocation.py --- pytorch/submod/FacilityLocation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch/submod/FacilityLocation.py b/pytorch/submod/FacilityLocation.py index 57c1c7b..1a5a155 100644 --- a/pytorch/submod/FacilityLocation.py +++ b/pytorch/submod/FacilityLocation.py @@ -2,6 +2,8 @@ import scipy from scipy import sparse from helper import * +from ..SetFunction import SetFunction + class FacilityLocationFunction(SetFunction): def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, data_rep=None, num_clusters=None, cluster_labels=None, metric="cosine", num_neighbors=None, dense_kernel = None, data_master = None, create_dense_cpp_kernel_in_python = True, partial = False, seperate_master = False): From 5265d84075532e306b3db92df7f3972dbbc0cc49 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:54:53 +0530 Subject: [PATCH 53/58] Update SetCover.py --- pytorch/submod/SetCover.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py index a01d2c6..3163400 100644 --- a/pytorch/submod/SetCover.py +++ b/pytorch/submod/SetCover.py @@ -3,7 +3,8 @@ import numpy as np import random from ..SetFunction import SetFunction -class SetCover(SetFunction): + +class SetCoverFunction(SetFunction): def __init__(self, n, cover_set, num_concepts, concept_weights = None): super(SetFunction, self).__init__() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") From 6cf311be845c93de9a39e47ace811f3e27f5c857 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:56:59 +0530 Subject: [PATCH 54/58] Update __init__.py --- pytorch/submod/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py index dfa5ac5..99c05d2 100644 --- a/pytorch/submod/__init__.py +++ b/pytorch/submod/__init__.py @@ -1,4 +1,8 @@ # /pytorch/SetFunction/__init__.py -from .SetCover import SetCover -from .ProbabilisticSetCover import ProbabilisticSetCover -from .GraphCut import GraphCut +from .SetCover import SetCoverFunction +from .ProbabilisticSetCover import ProbabilisticSetCoverFunction +from .GraphCut import GraphCutFunction +from .DisparityMin import DisparityMinFunction +from .DisparitySum import DisparitySumFunction +from .FacilityLocation import FacilityLocationFunction +from .LogDeteminant import LogDeteminantFunction From de6ae41fe8665d5595eb31c4f691faa1b50b7cfa Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 6 Feb 2024 18:59:04 +0530 Subject: [PATCH 55/58] Dense mode is done LogDeterminant.py --- pytorch/submod/LogDeterminant.py | 248 +++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 pytorch/submod/LogDeterminant.py diff --git a/pytorch/submod/LogDeterminant.py b/pytorch/submod/LogDeterminant.py new file mode 100644 index 0000000..96b0e50 --- /dev/null +++ b/pytorch/submod/LogDeterminant.py @@ -0,0 +1,248 @@ +import math +from collections import defaultdict +import scipy +from helper import * +from ..SetFunction import SetFunction + +class LogDeterminantFunction(SetFunction): + + def dot_product(self, x, y): + return sum(xi * yi for xi, yi in zip(x, y)) + + + def __init__(self, n, mode, lambdaVal, arr_val=None, arr_count=None, arr_col=None, dense_kernel=None, partial=None, + sijs=None, data=None, metric="cosine", num_neighbors=None, memoizedC = None, memoizedD = None, data_master = None): + self.n = n + self.mode = mode + self.metric = metric + self.sijs = sijs + self.data = data + self.num_neighbors = num_neighbors + self.lambdaVal = lambdaVal + self.sijs = None + self.content = None + self.effective_ground = None + self.partial = partial + self.effective_ground_set = set(range(n)) + self.memoizedC = memoizedC + self.memoizedD = memoizedD + self.data_master = data_master + self.dense_kernel = dense_kernel + + if self.n <= 0: + raise Exception("ERROR: Number of elements in ground set must be positive") + + if self.mode not in ['dense', 'sparse', 'clustered']: + raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'") + + if self.metric not in ['euclidean', 'cosine']: + raise Exception("ERROR: Unsupported metric. Must be 'euclidean' or 'cosine'") + if type(self.sijs) != type(None): # User has provided similarity kernel + if type(self.sijs) == scipy.sparse.csr.csr_matrix: + if num_neighbors is None or num_neighbors <= 0: + raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel") + if mode != "sparse": + raise Exception("ERROR: Sparse kernel provided, but mode is not sparse") + elif type(self.sijs) == np.ndarray: + if mode != "dense": + raise Exception("ERROR: Dense kernel provided, but mode is not dense") + else: + raise Exception("Invalid kernel provided") + #TODO: is the below dimensionality check valid for both dense and sparse kernels? + if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel") + if type(self.data) != type(None): + print("WARNING: similarity kernel found. Provided data matrix will be ignored.") + else: #similarity kernel has not been provided + if type(self.data) != type(None): + if np.shape(self.data)[0]!=self.n: + raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix") + + if self.mode == "dense": + if self.num_neighbors is not None: + raise Exception("num_neighbors wrongly provided for dense mode") + self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode + self.content = np.array(create_kernel( X = self.data.tolist(), metric = self.metric, mode = self.mode, num_neigh = self.num_neighbors)) + val = self.content[0] + row = list(self.content[1].astype(int)) + col = list(self.content[2].astype(int)) + if self.mode=="dense": + self.sijs = np.zeros((n,n)) + self.sijs[row,col] = val + if self.mode=="sparse": + self.sijs = sparse.csr_matrix((val, (row, col)), [n,n]) + else: + raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided") + + + #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding + if self.mode=="dense": + self.sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure + + if type(self.sijs[0])==int or type(self.sijs[0])==float: #Its critical that we pass a list of list to pybind11 + #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix) + l=[] + l.append(self.sijs) + self.sijs=l + + self.effective_ground = self.get_effective_ground_set() + if self.mode == 'dense': + if self.dense_kernel == None: + self.dense_kernel = create_kernel_NS(X_ground = self.data, X_master = self.data, metric = self.metric) + if self.partial: + self.effectiveGroundSet = self.data + else: + self.effectiveGroundSet = set(range(n)) + self.numEffectiveGroundset = len(self.effectiveGroundSet) + self.memoizedC = [[] for _ in range(self.numEffectiveGroundset)] + self.prevDetVal = 0 + self.memoizedD = [] + self.prevItem = -1 + + if self.partial: + ind = 0 + for it in self.effectiveGroundSet: + self.originalToPartialIndexMap[it] = ind + ind += 1 + self.memoizedD.append(np.sqrt(self.dense_kernel[it][it] + self.lambdaVal)) + else: + for i in range(self.n): + self.memoizedD.append(np.sqrt(self.dense_kernel[i][i] + self.lambdaVal)) + + elif arr_val is not None and arr_count is not None and arr_col is not None: + self.n = n + self.mode = 'sparse' + self.lambdaVal = lambdaVal + self.sparseKernel = SparseSim(arr_val, arr_count, arr_col) + self.effectiveGroundSet = set(range(n_)) + self.numEffectiveGroundset = len(self.effectiveGroundSet) + self.memoizedC = [[] for _ in range(n_)] + self.memoizedD = [] + self.prevDetVal = 0 + self.prevItem = -1 + + for i in range(self.n): + self.memoizedD.append(np.sqrt(self.sparseKernel.get_val(i, i) + self.lambdaVal)) + + else: + raise ValueError("Invalid constructor arguments. Please provide either denseKernel or sparse kernel data.") + + def evaluate(self, X): + currMemoizedC = self.memoizedC.copy() + currMemoizedD = self.memoizedD.copy() + currprevItem = self.prevItem + currprevDetVal = self.prevDetVal + self.setMemoization(X) + result = self.evaluate_with_memoization(X) + self.memoizedC = currMemoizedC + self.memoizedD = currMemoizedD + self.prevItem = currprevItem + self.prevDetVal = currprevDetVal + return result + + def evaluate_with_memoization(self, X): + return self.prevDetVal + + def marginal_gain(self, X, item): + currMemoizedC = self.memoizedC.copy() + currMemoizedD = self.memoizedD.copy() + currprevItem = self.prevItem + currprevDetVal = self.prevDetVal + self.set_memoization(X) + result = self.marginal_gain_with_memoization(X, item) + self.memoizedC = currMemoizedC + self.memoizedD = currMemoizedD + self.prevItem = currprevItem + self.prevDetVal = currprevDetVal + return result + + def marginal_gain_with_memoization(self, X, item, enableChecks=True): + effectiveX = X.intersection(self.effective_ground_set) if self.partial else X + gain = 0 + + if enableChecks and item in effectiveX: + return 0 + + if self.partial and item not in self.effective_ground_set: + return 0 + + itemIndex = self.originalToPartialIndexMap[item] if self.partial else item + + if self.mode == "dense": + if len(effectiveX) == 0: + gain = math.log(self.memoizedD[itemIndex] * self.memoizedD[itemIndex]) + elif len(effectiveX) == 1: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = self.dense_kernel[self.prevItem][item] / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = (self.dense_kernel[self.prevItem][item] - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + elif self.mode == "sparse": + if len(effectiveX) == 0: + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex])) + elif len(effectiveX) == 1: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = self.sparseKernel.get_val(self.prevItem, item) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + e = (self.sparseKernel.get_val(self.prevItem, item) - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex] + gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e)) + else: + raise ValueError("Only dense and sparse mode supported") + + return gain + + def update_memoization(self, X, item): + effectiveX = X.intersection(self.effective_ground_set) if self.partial else X + + if item in effectiveX: + return + + if item not in self.effective_ground_set: + return + + self.prevDetVal += self.marginal_gain_with_memoization(X, item) + + if len(effectiveX) == 0: + pass + else: + prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem + prevDValue = self.memoizedD[prevItemIndex] + + for i in self.effectiveGroundSet: + iIndex = self.originalToPartialIndexMap[i] if self.partial else i + + if i in effectiveX: + continue + + e = 0 + if len(effectiveX) == 1: + e = self.dense_kernel[self.prevItem][i] / prevDValue + self.memoizedC[iIndex].append(e) + else: + e = (self.dense_kernel[self.prevItem][i] - + self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[iIndex])) / prevDValue + self.memoizedC[iIndex].append(e) + + self.memoizedD[iIndex] = math.sqrt(math.fabs(self.memoizedD[iIndex] * self.memoizedD[iIndex] - e * e)) + + self.prevItem = item + + def get_effective_ground_set(self): + return self.effective_ground_set + + def clear_memoization(self): + self.memoizedC.clear() + self.memoizedC = defaultdict(list) + self.prevDetVal = 0 + self.prevItem = -1 + + if self.mode == "dense": + if self.partial: + for it in self.effective_ground_set: + index = self.originalTo From 6e9eee5db2858206cf5734e19d50920d6cc89417 Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:29:40 +0530 Subject: [PATCH 56/58] Create_kernel_sklearn updated for batchwise calculation on cuda --- pytorch/submod/helper.py | 75 +++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py index 7832a32..1797e4f 100644 --- a/pytorch/submod/helper.py +++ b/pytorch/submod/helper.py @@ -87,29 +87,81 @@ def create_kernel_dense(X, metric, method="sklearn"): raise Exception("For creating dense kernel, only 'sklearn' method is supported") return dense -def create_kernel_dense_sklearn(X, metric, X_rep=None): +def create_kernel_dense_sklearn(X, metric, X_rep=None, batch=0): dense = None D = None - + batch_size = batch if metric == "euclidean": if X_rep is None: - D = torch.cdist(X, X, p=2) + # print(X.shape) + # Process data in batches for torch.cdist + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + # print(X_batch.shape) + D_batch = torch.cdist(X_batch, X, p=2).to(device="cuda") + gamma = 1 / X.shape[1] + dense_batch = torch.exp(-D_batch * gamma).to(device="cuda") + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) else: - D = torch.cdist(X_rep, X, p=2) - gamma = 1 / X.shape[1] - dense = torch.exp(-D * gamma) # Obtaining Similarity from distance + # Process data in batches for torch.cdist + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + D_batch = torch.cdist(X_rep_batch, X).to(device="cuda") + gamma = 1 / X.shape[1] + dense_batch = torch.exp(-D_batch * gamma).to(device="cuda") + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) elif metric == "cosine": if X_rep is None: - dense = torch.nn.functional.cosine_similarity(X, X, dim=1) + # Process data in batches for torch.nn.functional.cosine_similarity + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + dense_batch = torch.nn.functional.cosine_similarity(X_batch.unsqueeze(1), X.unsqueeze(0), dim=2) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) else: - dense = torch.nn.functional.cosine_similarity(X_rep, X, dim=1) + # Process data in batches for torch.nn.functional.cosine_similarity + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + dense_batch = torch.nn.functional.cosine_similarity(X_rep_batch, X, dim=1) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) elif metric == "dot": if X_rep is None: - dense = torch.matmul(X, X.t()) + # Process data in batches for torch.matmul + for i in range(0, len(X), batch_size): + X_batch = X[i:i+batch_size].to(device="cuda") + dense_batch = torch.matmul(X_batch, X.t()) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) else: - dense = torch.matmul(X_rep, X.t()) + # Process data in batches for torch.matmul + for i in range(0, len(X_rep), batch_size): + X_rep_batch = X_rep[i:i+batch_size].to(device="cuda") + dense_batch = torch.matmul(X_rep_batch, X.t()) + # Accumulate results from batches + if dense is None: + dense = dense_batch + else: + dense = torch.cat([dense, dense_batch]) else: raise Exception("ERROR: unsupported metric for this method of kernel creation") @@ -119,9 +171,8 @@ def create_kernel_dense_sklearn(X, metric, X_rep=None): else: assert dense.shape == (X.shape[0], X.shape[0]) + torch.cuda.empty_cache() return dense - pass - def create_cluster_kernels(X, metric, cluster_lab=None, num_cluster=None, onlyClusters=False): lab = [] From 4e2eab6ffbf1462994ac46ff6f9c603b9c5b318e Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 13 Feb 2024 21:28:46 +0530 Subject: [PATCH 57/58] Cuda facilityLocation.py --- submodlib/functions/facilityLocation.py | 42 ++++++++++++++++++++----- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/submodlib/functions/facilityLocation.py b/submodlib/functions/facilityLocation.py index 276d8b0..4689471 100644 --- a/submodlib/functions/facilityLocation.py +++ b/submodlib/functions/facilityLocation.py @@ -10,6 +10,12 @@ from submodlib.helper import create_kernel, create_cluster_kernels #from memory_profiler import profile +if torch.cuda.is_available() : + from pytorch.submod import FacilityLocation +else: + from submodlib_cpp import FacilityLocation + + class FacilityLocationFunction(SetFunction): """Implementation of the Facility Location submodular function (FL). @@ -224,11 +230,20 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, l.append(self.cpp_sijs) self.cpp_sijs=l - self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + + # elif pybind_mode == "memoryview": # self.cpp_obj = FacilityLocation(self.n, memoryview(self.sijs), False, self.cpp_ground_sub, self.separate_rep) elif pybind_mode == "numpyarray": - self.cpp_obj = FacilityLocation(self.n, self.sijs, False, self.cpp_ground_sub, self.separate_rep) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep) + elif pybind_mode == "array32": # print("Kernel's type = ", self.sijs.dtype) self.sijs.astype('float32', copy=False) @@ -250,16 +265,26 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, elif self.mode=="dense" and create_dense_cpp_kernel_in_python == False: if self.separate_rep == True: - self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) + else: + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric) else: - self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + else: + self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric) + elif self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation) self.cpp_sijs = {} self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal) self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val - self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) + else: + self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col']) elif self.mode=="clustered": l_temp = [] @@ -273,8 +298,11 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, l_temp.append(temp) self.cluster_sijs = l_temp - self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) + if torch.cuda.is_available() : + self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) + else: + self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map) #self.cpp_ground_sub=self.cpp_obj.getEffectiveGroundSet() #self.ground_sub=self.cpp_ground_sub - self.effective_ground = self.cpp_obj.getEffectiveGroundSet() \ No newline at end of file + self.effective_ground = self.cpp_obj.getEffectiveGroundSet() From b5a0f2a19da853877b5fb895a3f458563e5ab6ce Mon Sep 17 00:00:00 2001 From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com> Date: Tue, 13 Feb 2024 21:38:56 +0530 Subject: [PATCH 58/58] coda facilityLocation.py --- submodlib/functions/facilityLocation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/submodlib/functions/facilityLocation.py b/submodlib/functions/facilityLocation.py index 4689471..198db06 100644 --- a/submodlib/functions/facilityLocation.py +++ b/submodlib/functions/facilityLocation.py @@ -8,6 +8,7 @@ from submodlib_cpp import FacilityLocation from submodlib_cpp import FacilityLocation2 from submodlib.helper import create_kernel, create_cluster_kernels +import torch #from memory_profiler import profile if torch.cuda.is_available() :