From 6936688412392d7a8c8fb224ae1ac6de6915b0c6 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sun, 7 Jan 2024 10:02:05 +0530
Subject: [PATCH 01/58] setCover.py

---
 submodlib/functions/setCover.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index 41d0baa..411b069 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -1,7 +1,8 @@
 # setCover.py
 # Author: Vishal Kaushal <vishal.kaushal@gmail.com>
 from .setFunction import SetFunction
-from submodlib_cpp import SetCover
+#from submodlib_cpp import SetCover
+from submodlib_pytorch import SetCover
 
 class SetCoverFunction(SetFunction):
 	"""Implementation of the Set-Cover (SC) submodular function.
@@ -42,7 +43,8 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		self.cover_set = cover_set
 		self.num_concepts = num_concepts
 		self.concept_weights = concept_weights
-		self.cpp_obj = None
+		#self.cpp_obj = None
+		self.obj = None
 
 		if self.n <= 0:
 			raise Exception("ERROR: Number of elements in ground set must be positive")
@@ -56,8 +58,9 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		else:
 			self.concept_weights = [1] * self.num_concepts
 
-		self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
+		#self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
+		self.obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 
 		self.effective_ground = set(range(n))
 
-	
\ No newline at end of file
+	

From a266b9d333a0efd7a10995f29b838056dcadaf89 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sun, 7 Jan 2024 10:07:02 +0530
Subject: [PATCH 02/58] setCover.py

---
 submodlib/functions/setCover.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index 411b069..eaf2c54 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -1,8 +1,7 @@
 # setCover.py
 # Author: Vishal Kaushal <vishal.kaushal@gmail.com>
 from .setFunction import SetFunction
-#from submodlib_cpp import SetCover
-from submodlib_pytorch import SetCover
+from submodlib_cpp import SetCover
 
 class SetCoverFunction(SetFunction):
 	"""Implementation of the Set-Cover (SC) submodular function.
@@ -43,9 +42,8 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		self.cover_set = cover_set
 		self.num_concepts = num_concepts
 		self.concept_weights = concept_weights
-		#self.cpp_obj = None
-		self.obj = None
-
+		self.cpp_obj = None
+		
 		if self.n <= 0:
 			raise Exception("ERROR: Number of elements in ground set must be positive")
 
@@ -58,9 +56,7 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		else:
 			self.concept_weights = [1] * self.num_concepts
 
-		#self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
-		self.obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
-
+		self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 		self.effective_ground = set(range(n))
 
 	

From 78f51b0d0bf66dcf3cd26aab2da7150bfa0a699e Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 15:15:43 +0530
Subject: [PATCH 03/58] Create NaiveGreedyOptimizer.py

---
 cpp/optimizers/NaiveGreedy.py | 90 +++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 cpp/optimizers/NaiveGreedy.py

diff --git a/cpp/optimizers/NaiveGreedy.py b/cpp/optimizers/NaiveGreedy.py
new file mode 100644
index 0000000..728f16a
--- /dev/null
+++ b/cpp/optimizers/NaiveGreedy.py
@@ -0,0 +1,90 @@
+import torch
+import random
+from typing import List, Tuple, Set
+
+class NaiveGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    def maximize(
+        self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy
+    ):
+        greedy_vector = []
+        greedy_set = set()
+        if not costs:
+            # greedy_vector = [None] * budget
+            greedy_set = set()
+        rem_budget = budget
+        ground_set = f_obj.get_effective_ground_set()
+        #print(ground_set)
+        if verbose:
+            print("Ground set:")
+            print(ground_set)
+            print(f"Num elements in groundset = {len(ground_set)}")
+            print("Costs:")
+            print(costs)
+            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
+            print("Starting the naive greedy algorithm")
+            print("Initial greedy set:")
+            print(greedy_set)
+
+        f_obj.clear_memoization()
+        best_id = None
+        best_val = None
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter_count = 0
+
+        while rem_budget > 0:
+            best_id = None
+            best_val = float("-inf")
+
+            for i in ground_set:
+                if i in greedy_set:
+                    continue
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False)
+                # print(gain)
+                if verbose:
+                    print(f"Gain of {i} is {gain}")
+
+                if gain > best_val:
+                    best_id = i
+                    best_val = gain
+
+            if verbose:
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stopIfNegativeGain) or (
+                self.equals(best_val, 0, 1e-5) and stop_if_zero_gain
+            ):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print(f"Updated greedy set: {greedy_set}")
+
+                if show_progress:
+                    percent = int((iter_count + 1.0) / N * 100)
+
+                    if percent >= display_next:
+                        print(
+                            f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
+                            end="",
+                        )
+                        print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="")
+                        display_next += step
+
+                    iter_count += 1
+
+        return greedy_vector

From 47f571e60bea2d9606cfc4fc6c0ed6561b763e6d Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 15:22:08 +0530
Subject: [PATCH 04/58] Delete cpp/optimizers/NaiveGreedy.py

---
 cpp/optimizers/NaiveGreedy.py | 90 -----------------------------------
 1 file changed, 90 deletions(-)
 delete mode 100644 cpp/optimizers/NaiveGreedy.py

diff --git a/cpp/optimizers/NaiveGreedy.py b/cpp/optimizers/NaiveGreedy.py
deleted file mode 100644
index 728f16a..0000000
--- a/cpp/optimizers/NaiveGreedy.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import torch
-import random
-from typing import List, Tuple, Set
-
-class NaiveGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1, val2, eps):
-        return abs(val1 - val2) < eps
-
-    def maximize(
-        self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy
-    ):
-        greedy_vector = []
-        greedy_set = set()
-        if not costs:
-            # greedy_vector = [None] * budget
-            greedy_set = set()
-        rem_budget = budget
-        ground_set = f_obj.get_effective_ground_set()
-        #print(ground_set)
-        if verbose:
-            print("Ground set:")
-            print(ground_set)
-            print(f"Num elements in groundset = {len(ground_set)}")
-            print("Costs:")
-            print(costs)
-            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
-            print("Starting the naive greedy algorithm")
-            print("Initial greedy set:")
-            print(greedy_set)
-
-        f_obj.clear_memoization()
-        best_id = None
-        best_val = None
-        step = 1
-        display_next = step
-        percent = 0
-        N = rem_budget
-        iter_count = 0
-
-        while rem_budget > 0:
-            best_id = None
-            best_val = float("-inf")
-
-            for i in ground_set:
-                if i in greedy_set:
-                    continue
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False)
-                # print(gain)
-                if verbose:
-                    print(f"Gain of {i} is {gain}")
-
-                if gain > best_val:
-                    best_id = i
-                    best_val = gain
-
-            if verbose:
-                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
-
-            if (best_val < 0 and stopIfNegativeGain) or (
-                self.equals(best_val, 0, 1e-5) and stop_if_zero_gain
-            ):
-                break
-            else:
-                f_obj.update_memoization(greedy_set, best_id)
-                greedy_set.add(best_id)
-                greedy_vector.append((best_id, best_val))
-                rem_budget -= 1
-
-                if verbose:
-                    print(f"Added element {best_id} and the gain is {best_val}")
-                    print(f"Updated greedy set: {greedy_set}")
-
-                if show_progress:
-                    percent = int((iter_count + 1.0) / N * 100)
-
-                    if percent >= display_next:
-                        print(
-                            f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
-                            end="",
-                        )
-                        print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="")
-                        display_next += step
-
-                    iter_count += 1
-
-        return greedy_vector

From 8a2603d73e7bae34a411242fdf2837fa4dc06f7c Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:41:03 +0530
Subject: [PATCH 05/58] Create SetFunction.py

---
 cpp/SetFunction.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 cpp/SetFunction.py

diff --git a/cpp/SetFunction.py b/cpp/SetFunction.py
new file mode 100644
index 0000000..661bf0c
--- /dev/null
+++ b/cpp/SetFunction.py
@@ -0,0 +1,62 @@
+from typing import Set, List, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+import numpy as np
+import random
+# import optimizer python files
+
+class SetFunction(nn.Module):
+    def __init__(self):
+        pass
+
+    def evaluate(self, X: Set[int]) -> float:
+        return self.evaluate(X)
+
+    def evaluate_with_memoization(self, X: Set[int]) -> float:
+        return self.evaluate_with_memoization(X)
+
+    def marginal_gain(self, X: Set[int], item: int) -> float:
+        return self.marginal_gain(X, item)
+
+    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+       return self.marginal_gain_with_memoization(X, item)
+
+    def update_memoization(self, X: Set[int], item: int) -> None:
+        return self.update_memoization(X, item)
+
+
+    def get_effective_ground_set(self) -> Set[int]:
+        return self.get_effective_ground_set()
+
+    def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool,
+                  costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]:
+        optimizer = self._get_optimizer(optimizer)
+        if optimizer:
+            return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy)
+        else:
+            print("Invalid Optimizer")
+            return []
+
+    def _get_optimizer(self, optimizer_name: str):
+        if optimizer_name == "NaiveGreedy":
+            return NaiveGreedyOptimizer()
+        # define all optimizer classed into files
+        elif optimizer_name == "LazyGreedy":
+            return LazyGreedyOptimizer()
+        elif optimizer_name == "StochasticGreedy":
+            return StochasticGreedyOptimizer()
+        elif optimizer_name == "LazierThanLazyGreedy":
+            return LazierThanLazyGreedyOptimizer()
+        else:
+            return None
+
+    def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int],
+                     partial: bool, lambda_: float) -> None:
+        self.cluster_init(n, k_dense, ground, partial, lambda_)
+
+    def set_memoization(self, X: Set[int]) -> None:
+        self.set_memoization(X)
+
+    def clear_memoization(self) -> None:
+        self.clear_memoization()

From 95e7cd243dff4f2caf72745e7ad62f795a2d3303 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:42:16 +0530
Subject: [PATCH 06/58] Create LazierThanLazyGreedyOptimizer.py

---
 .../LazierThanLazyGreedyOptimizer.py          | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 cpp/optimizers/LazierThanLazyGreedyOptimizer.py

diff --git a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py
new file mode 100644
index 0000000..86e355c
--- /dev/null
+++ b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py
@@ -0,0 +1,120 @@
+import random
+import math
+
+class LazierThanLazyGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    @staticmethod
+    def print_sorted_set(sorted_set):
+        print("[", end="")
+        for val, elem in sorted_set:
+            print(f"({val}, {elem}), ", end="")
+        print("]")
+
+    def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False,
+                    epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False):
+            greedy_vector = []
+            greedy_set = set()
+
+            if costs is None:
+                greedy_vector.reserve(budget)
+                greedy_set.reserve(budget)
+
+            rem_budget = budget
+            remaining_set = set(f_obj.get_effective_ground_set())
+            n = len(remaining_set)
+            epsilon = 0.05
+            random_set_size = int((n / budget) * math.log(1 / epsilon))
+
+            if verbose:
+                print(f"Epsilon = {epsilon}")
+                print(f"Random set size = {random_set_size}")
+                print("Ground set:")
+                print(remaining_set)
+                print(f"Num elements in ground set = {len(remaining_set)}")
+                print("Starting the LazierThanLazy greedy algorithm")
+                print("Initial greedy set:")
+                print(greedy_set)
+
+            f_obj.clear_memoization()
+            best_id = None
+            best_val = None
+
+            i = 0
+            step = 1
+            display_next = step
+            percent = 0
+            N = rem_budget
+            iter_count = 0
+
+            while rem_budget > 0:
+                random_set = set()
+                while len(random_set) < random_set_size:
+                    elem = random.randint(0, n - 1)
+                    if elem in remaining_set and elem not in random_set:
+                        random_set.add(elem)
+
+                if verbose:
+                    print(f"Iteration {i}")
+                    print(f"Random set = {random_set}")
+                    print("Now running lazy greedy on the random set")
+
+                candidate_id = None
+                candidate_val = None
+                new_candidate_bound = None
+
+                # Compute gains only for the elements in the remaining set
+                gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem)
+                        for elem in remaining_set]
+
+                for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))):
+                    if elem in random_set and elem not in greedy_set:  # Check if the element is not already selected
+                        if verbose:
+                            print(f"Checking {elem}...")
+                        candidate_id = elem
+                        candidate_val = val
+                        new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False)
+                        if verbose:
+                            print(f"Updated gain as per updated greedy set = {new_candidate_bound}")
+                        next_elem = gains[j + 1] if j + 1 < len(gains) else None
+                        if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'):
+                            if verbose:
+                                print("..better than next best upper bound, "
+                                      "selecting...")
+                            best_id = candidate_id
+                            best_val = new_candidate_bound
+                            break
+
+                if verbose:
+                    print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+                remaining_set.remove(best_id)
+
+                if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
+                    break
+                else:
+                    f_obj.update_memoization(greedy_set, best_id)
+                    greedy_set.add(best_id)
+                    greedy_vector.append((best_id, best_val))
+                    rem_budget -= 1
+
+                    if verbose:
+                        print(f"Added element {best_id} and the gain is {best_val}")
+                        print("Updated greedy set:", greedy_set)
+
+                    if show_progress:
+                        percent = int(((iter_count + 1.0) / N) * 100)
+                        if percent >= display_next:
+                            print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="")
+                            print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="")
+                            display_next += step
+                        iter_count += 1
+
+                i += 1
+
+            return greedy_vector

From 0715bb15065ac07320fd62229d245d5f2e7f27b2 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:43:13 +0530
Subject: [PATCH 07/58] Create LazyGreedyOptimizer.py

---
 cpp/optimizers/LazyGreedyOptimizer.py | 97 +++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 cpp/optimizers/LazyGreedyOptimizer.py

diff --git a/cpp/optimizers/LazyGreedyOptimizer.py b/cpp/optimizers/LazyGreedyOptimizer.py
new file mode 100644
index 0000000..45d7590
--- /dev/null
+++ b/cpp/optimizers/LazyGreedyOptimizer.py
@@ -0,0 +1,97 @@
+import torch
+import heapq
+
+class LazyGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain,
+                 verbose, show_progress, costs, cost_sensitive_greedy):
+        greedy_vector = []
+        greedy_set = set()
+
+        # if not costs:
+        #     greedy_vector.reserve(budget)
+        #     greedy_set.reserve(budget)
+
+        rem_budget = budget
+        ground_set = f_obj.get_effective_ground_set()
+
+        if verbose:
+            print("Ground set:")
+            print(ground_set)
+            print(f"Num elements in groundset = {len(ground_set)}")
+            print("Costs:")
+            print(costs)
+            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
+            print("Starting the lazy greedy algorithm")
+            print("Initial greedy set:")
+            print(greedy_set)
+
+        f_obj.clear_memoization()
+
+        container = []
+        heapq.heapify(container)
+        max_heap = container
+
+        if cost_sensitive_greedy:
+            for elem in ground_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem]
+                heapq.heappush(max_heap, (-gain, elem))
+        else:
+            for elem in ground_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
+                heapq.heappush(max_heap, (-gain, elem))
+
+        if verbose:
+            print("Max heap constructed")
+
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter = 0
+
+        while rem_budget > 0 and max_heap:
+            current_max = heapq.heappop(max_heap)
+            current_max_gain, current_max_elem = -current_max[0], current_max[1]
+
+            if verbose:
+                print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}")
+
+            new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False)
+
+            if verbose:
+                print(f"newMaxBound: {new_max_bound}")
+
+            if new_max_bound >= -max_heap[0][0]:
+                if (new_max_bound < 0 and stop_if_negative_gain) or \
+                        (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain):
+                    break
+                else:
+                    f_obj.update_memoization(greedy_set, current_max_elem)
+                    greedy_set.add(current_max_elem)
+                    greedy_vector.append((current_max_elem, new_max_bound))
+                    rem_budget -= 1
+
+                    if verbose:
+                        print(f"Added element {current_max_elem} and the gain is {new_max_bound}")
+                        print("Updated greedySet:", greedy_set)
+
+                    if show_progress:
+                        percent = int(((iter + 1.0) / N) * 100)
+
+                        if percent >= display_next:
+                            print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
+                                  end=f" {percent}% [Iteration {iter + 1} of {N}]")
+                            display_next += step
+
+                        iter += 1
+            else:
+                heapq.heappush(max_heap, (-new_max_bound, current_max_elem))
+
+        return greedy_vector

From 3820a32bbb49881bbe29baaff5289084104f17da Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:43:53 +0530
Subject: [PATCH 08/58] Create NaiveGreedyOptimizer.py

---
 cpp/optimizers/NaiveGreedyOptimizer.py | 90 ++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 cpp/optimizers/NaiveGreedyOptimizer.py

diff --git a/cpp/optimizers/NaiveGreedyOptimizer.py b/cpp/optimizers/NaiveGreedyOptimizer.py
new file mode 100644
index 0000000..728f16a
--- /dev/null
+++ b/cpp/optimizers/NaiveGreedyOptimizer.py
@@ -0,0 +1,90 @@
+import torch
+import random
+from typing import List, Tuple, Set
+
+class NaiveGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    def maximize(
+        self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy
+    ):
+        greedy_vector = []
+        greedy_set = set()
+        if not costs:
+            # greedy_vector = [None] * budget
+            greedy_set = set()
+        rem_budget = budget
+        ground_set = f_obj.get_effective_ground_set()
+        #print(ground_set)
+        if verbose:
+            print("Ground set:")
+            print(ground_set)
+            print(f"Num elements in groundset = {len(ground_set)}")
+            print("Costs:")
+            print(costs)
+            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
+            print("Starting the naive greedy algorithm")
+            print("Initial greedy set:")
+            print(greedy_set)
+
+        f_obj.clear_memoization()
+        best_id = None
+        best_val = None
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter_count = 0
+
+        while rem_budget > 0:
+            best_id = None
+            best_val = float("-inf")
+
+            for i in ground_set:
+                if i in greedy_set:
+                    continue
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False)
+                # print(gain)
+                if verbose:
+                    print(f"Gain of {i} is {gain}")
+
+                if gain > best_val:
+                    best_id = i
+                    best_val = gain
+
+            if verbose:
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stopIfNegativeGain) or (
+                self.equals(best_val, 0, 1e-5) and stop_if_zero_gain
+            ):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print(f"Updated greedy set: {greedy_set}")
+
+                if show_progress:
+                    percent = int((iter_count + 1.0) / N * 100)
+
+                    if percent >= display_next:
+                        print(
+                            f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
+                            end="",
+                        )
+                        print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="")
+                        display_next += step
+
+                    iter_count += 1
+
+        return greedy_vector

From 5dfbdf4e448c38d6ca4824c744bd92e48a81fad6 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:44:38 +0530
Subject: [PATCH 09/58] Create StochasticGreedyOptimizer.py

---
 cpp/optimizers/StochasticGreedyOptimizer.py | 105 ++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 cpp/optimizers/StochasticGreedyOptimizer.py

diff --git a/cpp/optimizers/StochasticGreedyOptimizer.py b/cpp/optimizers/StochasticGreedyOptimizer.py
new file mode 100644
index 0000000..bcc9263
--- /dev/null
+++ b/cpp/optimizers/StochasticGreedyOptimizer.py
@@ -0,0 +1,105 @@
+import random
+from typing import List, Tuple, Set
+import math
+import sys
+# from StochasticGreedyOptimizer import SetFunction
+
+class StochasticGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1: float, val2: float, eps: float) -> bool:
+        return abs(val1 - val2) < eps
+
+    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
+                 stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
+                 show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
+        # TODO: Implement handling of equal guys and different sizes of each item later
+        # TODO: Implement cost-sensitive selection
+
+        greedy_vector = []
+        greedy_set = set()
+
+        # if not costs:
+        #     # Every element is of the same size, budget corresponds to cardinality
+        #     greedy_vector.reserve(budget)
+        #     greedy_set.reserve(budget)
+
+        rem_budget = budget
+        remaining_set = set(f_obj.get_effective_ground_set())
+        n = len(remaining_set)
+        epsilon = 0.05
+        random_set_size = int((n / budget) * math.log(1 / epsilon))
+        if verbose:
+            print(f"Epsilon = {epsilon}")
+            print(f"Random set size = {random_set_size}")
+            print("Ground set:")
+            print(" ".join(map(str, remaining_set)))
+            print(f"Num elements in groundset = {len(remaining_set)}")
+            print("Starting the stochastic greedy algorithm")
+            print("Initial greedy set:")
+            print(" ".join(map(str, greedy_set)))
+
+        f_obj.clear_memoization()
+        random.seed(1)
+        best_id = -1
+        best_val = -1 * float('inf')
+        i = 0
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter = 0
+
+        while rem_budget > 0:
+            random_set = set()
+            while len(random_set) < random_set_size:
+                elem = random.randint(0, n - 1)
+                if elem in remaining_set and elem not in random_set:
+                    random_set.add(elem)
+
+            if verbose:
+                print(f"Iteration {i}")
+                print(f"Random set = {list(random_set)}")
+                print("Now running naive greedy on the random set")
+
+            best_id = -1
+            best_val = -1 * float('inf')
+
+            for elem in random_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
+                if gain > best_val:
+                    best_id = elem
+                    best_val = gain
+
+            if verbose:
+                if best_id == -1:
+                    raise ValueError("Nobody had greater gain than minus infinity!!")
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+                remaining_set.remove(best_id)
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print("Updated greedy set:", " ".join(map(str, greedy_set)))
+
+                if show_progress:
+                    percent = int(((iter + 1.0) / N) * 100)
+                    if percent >= display_next:
+                        print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="")
+                        print(f"{percent}% [Iteration {iter + 1} of {N}]", end="")
+                        sys.stdout.flush()
+                        display_next += step
+                    iter += 1
+
+            i += 1
+
+        return greedy_vector

From 75bfd10121ae993e8462ef7a212ffb413427a5d1 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 16 Jan 2024 19:48:32 +0530
Subject: [PATCH 10/58] Create SetCover.py

---
 cpp/submod/SetCover.py | 96 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 cpp/submod/SetCover.py

diff --git a/cpp/submod/SetCover.py b/cpp/submod/SetCover.py
new file mode 100644
index 0000000..d525c69
--- /dev/null
+++ b/cpp/submod/SetCover.py
@@ -0,0 +1,96 @@
+import torch
+import torch.nn as nn
+import numpy as np
+import random
+
+class SetCover(SetFunction):
+    def __init__(self, n, cover_set, num_concepts, concept_weights = None):
+        super(SetFunction, self).__init__()
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.n = n
+        self.cover_set = cover_set
+        self.num_concepts = num_concepts
+        self.concept_weights = concept_weights
+        if self.concept_weights is None:
+            self.concept_weights = [1.0] * num_concepts
+        else:
+            self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device)
+
+
+        self.concepts_covered_by_x = set()
+
+
+    def evaluate(self, X):
+      result = 0.0
+
+      if X.numel() == 0:
+          return 0.0
+
+      concepts_covered = set()
+      for elem in X:
+          concepts_covered.update(self.cover_set[elem.item()])
+
+      for con in concepts_covered:
+          result += self.concept_weights[con]
+
+      return result
+
+
+    def evaluate_with_memoization(self, X):
+        result = 0.0
+
+        if X.numel() == 0:
+            print("hi")
+            return 0.0
+
+        for con in self.concepts_covered_by_x:
+            result += self.concept_weights[con]
+            print(result)
+
+        return result
+
+    def marginal_gain(self, X, item):
+        gain = 0.0
+
+        if item in X:
+            return 0.0
+
+        concepts_covered = set()
+        for elem in X:
+            concepts_covered.update(self.cover_set[elem])
+
+        for con in self.cover_set[item]:
+            if con not in concepts_covered:
+                gain += self.concept_weights[con]
+
+        return gain.item()
+
+    def marginal_gain_with_memoization(self, X, item, enable_checks=True):
+        gain = 0.0
+
+        if enable_checks and item in X:
+            return 0.0
+        for con in self.cover_set[item]:
+          if con not in self.concepts_covered_by_x:
+                gain += self.concept_weights[con]
+
+        return gain
+
+    def update_memoization(self, X, item):
+        if item in X:
+            return
+
+        self.concepts_covered_by_x.update(self.cover_set[item])
+
+    def get_effective_ground_set(self):
+        return set(range(self.n))
+
+    def clear_memoization(self):
+        self.concepts_covered_by_x.clear()
+
+    def set_memoization(self, X):
+        self.clear_memoization()
+        temp = set()
+        for elem in X:
+            self.update_memoization(temp, elem)
+            temp.add(elem)

From f9b07b92a81646dc9eb159011d9ef6c310128e4a Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:43:20 +0530
Subject: [PATCH 11/58] Delete cpp/SetFunction.py

---
 cpp/SetFunction.py | 62 ----------------------------------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 cpp/SetFunction.py

diff --git a/cpp/SetFunction.py b/cpp/SetFunction.py
deleted file mode 100644
index 661bf0c..0000000
--- a/cpp/SetFunction.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from typing import Set, List, Tuple
-import numpy as np
-import torch
-import torch.nn as nn
-import numpy as np
-import random
-# import optimizer python files
-
-class SetFunction(nn.Module):
-    def __init__(self):
-        pass
-
-    def evaluate(self, X: Set[int]) -> float:
-        return self.evaluate(X)
-
-    def evaluate_with_memoization(self, X: Set[int]) -> float:
-        return self.evaluate_with_memoization(X)
-
-    def marginal_gain(self, X: Set[int], item: int) -> float:
-        return self.marginal_gain(X, item)
-
-    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
-       return self.marginal_gain_with_memoization(X, item)
-
-    def update_memoization(self, X: Set[int], item: int) -> None:
-        return self.update_memoization(X, item)
-
-
-    def get_effective_ground_set(self) -> Set[int]:
-        return self.get_effective_ground_set()
-
-    def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool,
-                  costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]:
-        optimizer = self._get_optimizer(optimizer)
-        if optimizer:
-            return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy)
-        else:
-            print("Invalid Optimizer")
-            return []
-
-    def _get_optimizer(self, optimizer_name: str):
-        if optimizer_name == "NaiveGreedy":
-            return NaiveGreedyOptimizer()
-        # define all optimizer classed into files
-        elif optimizer_name == "LazyGreedy":
-            return LazyGreedyOptimizer()
-        elif optimizer_name == "StochasticGreedy":
-            return StochasticGreedyOptimizer()
-        elif optimizer_name == "LazierThanLazyGreedy":
-            return LazierThanLazyGreedyOptimizer()
-        else:
-            return None
-
-    def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int],
-                     partial: bool, lambda_: float) -> None:
-        self.cluster_init(n, k_dense, ground, partial, lambda_)
-
-    def set_memoization(self, X: Set[int]) -> None:
-        self.set_memoization(X)
-
-    def clear_memoization(self) -> None:
-        self.clear_memoization()

From 40178a3a9383c47244781d55c321719edf1650ac Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:46:24 +0530
Subject: [PATCH 12/58] Create SetFunction.py

---
 pytorch/SetFunction.py | 62 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 pytorch/SetFunction.py

diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py
new file mode 100644
index 0000000..661bf0c
--- /dev/null
+++ b/pytorch/SetFunction.py
@@ -0,0 +1,62 @@
+from typing import Set, List, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+import numpy as np
+import random
+# import optimizer python files
+
+class SetFunction(nn.Module):
+    def __init__(self):
+        pass
+
+    def evaluate(self, X: Set[int]) -> float:
+        return self.evaluate(X)
+
+    def evaluate_with_memoization(self, X: Set[int]) -> float:
+        return self.evaluate_with_memoization(X)
+
+    def marginal_gain(self, X: Set[int], item: int) -> float:
+        return self.marginal_gain(X, item)
+
+    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+       return self.marginal_gain_with_memoization(X, item)
+
+    def update_memoization(self, X: Set[int], item: int) -> None:
+        return self.update_memoization(X, item)
+
+
+    def get_effective_ground_set(self) -> Set[int]:
+        return self.get_effective_ground_set()
+
+    def maximize(self, optimizer: str, budget: float, stopIfZeroGain: bool, stopIfNegativeGain: bool, verbose: bool,
+                  costs: List[float] = None, cost_sensitive_greedy: bool = False, show_progress: bool = False, epsilon: float = 0.0) -> List[Tuple[int, float]]:
+        optimizer = self._get_optimizer(optimizer)
+        if optimizer:
+            return optimizer.maximize(self, budget, stopIfZeroGain, stopIfZeroGain, verbose, show_progress, costs, cost_sensitive_greedy)
+        else:
+            print("Invalid Optimizer")
+            return []
+
+    def _get_optimizer(self, optimizer_name: str):
+        if optimizer_name == "NaiveGreedy":
+            return NaiveGreedyOptimizer()
+        # define all optimizer classed into files
+        elif optimizer_name == "LazyGreedy":
+            return LazyGreedyOptimizer()
+        elif optimizer_name == "StochasticGreedy":
+            return StochasticGreedyOptimizer()
+        elif optimizer_name == "LazierThanLazyGreedy":
+            return LazierThanLazyGreedyOptimizer()
+        else:
+            return None
+
+    def cluster_init(self, n: int, k_dense: List[List[float]], ground: Set[int],
+                     partial: bool, lambda_: float) -> None:
+        self.cluster_init(n, k_dense, ground, partial, lambda_)
+
+    def set_memoization(self, X: Set[int]) -> None:
+        self.set_memoization(X)
+
+    def clear_memoization(self) -> None:
+        self.clear_memoization()

From 4386d15c3b18b02fa8e3c06e93062a45d61f0d1d Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:52:32 +0530
Subject: [PATCH 13/58] Create SetCover.py

---
 pytorch/submod/SetCover.py | 96 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 pytorch/submod/SetCover.py

diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py
new file mode 100644
index 0000000..62ed068
--- /dev/null
+++ b/pytorch/submod/SetCover.py
@@ -0,0 +1,96 @@
+import torch
+import torch.nn as nn
+import numpy as np
+import random
+from SetFunction import SetFunction
+class SetCover(SetFunction):
+    def __init__(self, n, cover_set, num_concepts, concept_weights = None):
+        super(SetFunction, self).__init__()
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.n = n
+        self.cover_set = cover_set
+        self.num_concepts = num_concepts
+        self.concept_weights = concept_weights
+        if self.concept_weights is None:
+            self.concept_weights = [1.0] * num_concepts
+        else:
+            self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device)
+
+
+        self.concepts_covered_by_x = set()
+
+
+    def evaluate(self, X):
+      result = 0.0
+
+      if X.numel() == 0:
+          return 0.0
+
+      concepts_covered = set()
+      for elem in X:
+          concepts_covered.update(self.cover_set[elem.item()])
+
+      for con in concepts_covered:
+          result += self.concept_weights[con]
+
+      return result
+
+
+    def evaluate_with_memoization(self, X):
+        result = 0.0
+
+        if X.numel() == 0:
+            print("hi")
+            return 0.0
+
+        for con in self.concepts_covered_by_x:
+            result += self.concept_weights[con]
+            print(result)
+
+        return result
+
+    def marginal_gain(self, X, item):
+        gain = 0.0
+
+        if item in X:
+            return 0.0
+
+        concepts_covered = set()
+        for elem in X:
+            concepts_covered.update(self.cover_set[elem])
+
+        for con in self.cover_set[item]:
+            if con not in concepts_covered:
+                gain += self.concept_weights[con]
+
+        return gain.item()
+
+    def marginal_gain_with_memoization(self, X, item, enable_checks=True):
+        gain = 0.0
+
+        if enable_checks and item in X:
+            return 0.0
+        for con in self.cover_set[item]:
+          if con not in self.concepts_covered_by_x:
+                gain += self.concept_weights[con]
+
+        return gain
+
+    def update_memoization(self, X, item):
+        if item in X:
+            return
+
+        self.concepts_covered_by_x.update(self.cover_set[item])
+
+    def get_effective_ground_set(self):
+        return set(range(self.n))
+
+    def clear_memoization(self):
+        self.concepts_covered_by_x.clear()
+
+    def set_memoization(self, X):
+        self.clear_memoization()
+        temp = set()
+        for elem in X:
+            self.update_memoization(temp, elem)
+            temp.add(elem)

From b5659d8afde11df3cdc0b4ed655cf7a97de60d4e Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:53:19 +0530
Subject: [PATCH 14/58] Delete cpp/submod/SetCover.py

---
 cpp/submod/SetCover.py | 96 ------------------------------------------
 1 file changed, 96 deletions(-)
 delete mode 100644 cpp/submod/SetCover.py

diff --git a/cpp/submod/SetCover.py b/cpp/submod/SetCover.py
deleted file mode 100644
index d525c69..0000000
--- a/cpp/submod/SetCover.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import torch
-import torch.nn as nn
-import numpy as np
-import random
-
-class SetCover(SetFunction):
-    def __init__(self, n, cover_set, num_concepts, concept_weights = None):
-        super(SetFunction, self).__init__()
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.n = n
-        self.cover_set = cover_set
-        self.num_concepts = num_concepts
-        self.concept_weights = concept_weights
-        if self.concept_weights is None:
-            self.concept_weights = [1.0] * num_concepts
-        else:
-            self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device)
-
-
-        self.concepts_covered_by_x = set()
-
-
-    def evaluate(self, X):
-      result = 0.0
-
-      if X.numel() == 0:
-          return 0.0
-
-      concepts_covered = set()
-      for elem in X:
-          concepts_covered.update(self.cover_set[elem.item()])
-
-      for con in concepts_covered:
-          result += self.concept_weights[con]
-
-      return result
-
-
-    def evaluate_with_memoization(self, X):
-        result = 0.0
-
-        if X.numel() == 0:
-            print("hi")
-            return 0.0
-
-        for con in self.concepts_covered_by_x:
-            result += self.concept_weights[con]
-            print(result)
-
-        return result
-
-    def marginal_gain(self, X, item):
-        gain = 0.0
-
-        if item in X:
-            return 0.0
-
-        concepts_covered = set()
-        for elem in X:
-            concepts_covered.update(self.cover_set[elem])
-
-        for con in self.cover_set[item]:
-            if con not in concepts_covered:
-                gain += self.concept_weights[con]
-
-        return gain.item()
-
-    def marginal_gain_with_memoization(self, X, item, enable_checks=True):
-        gain = 0.0
-
-        if enable_checks and item in X:
-            return 0.0
-        for con in self.cover_set[item]:
-          if con not in self.concepts_covered_by_x:
-                gain += self.concept_weights[con]
-
-        return gain
-
-    def update_memoization(self, X, item):
-        if item in X:
-            return
-
-        self.concepts_covered_by_x.update(self.cover_set[item])
-
-    def get_effective_ground_set(self):
-        return set(range(self.n))
-
-    def clear_memoization(self):
-        self.concepts_covered_by_x.clear()
-
-    def set_memoization(self, X):
-        self.clear_memoization()
-        temp = set()
-        for elem in X:
-            self.update_memoization(temp, elem)
-            temp.add(elem)

From 820c78ff4c4c890a77afafd44f6f413f4b5a12e1 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:54:14 +0530
Subject: [PATCH 15/58] Create LazierThanLazyGreedyOptimizer.py

---
 .../LazierThanLazyGreedyOptimizer.py          | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 pytorch/optimizer/LazierThanLazyGreedyOptimizer.py

diff --git a/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py
new file mode 100644
index 0000000..86e355c
--- /dev/null
+++ b/pytorch/optimizer/LazierThanLazyGreedyOptimizer.py
@@ -0,0 +1,120 @@
+import random
+import math
+
+class LazierThanLazyGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    @staticmethod
+    def print_sorted_set(sorted_set):
+        print("[", end="")
+        for val, elem in sorted_set:
+            print(f"({val}, {elem}), ", end="")
+        print("]")
+
+    def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False,
+                    epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False):
+            greedy_vector = []
+            greedy_set = set()
+
+            if costs is None:
+                greedy_vector.reserve(budget)
+                greedy_set.reserve(budget)
+
+            rem_budget = budget
+            remaining_set = set(f_obj.get_effective_ground_set())
+            n = len(remaining_set)
+            epsilon = 0.05
+            random_set_size = int((n / budget) * math.log(1 / epsilon))
+
+            if verbose:
+                print(f"Epsilon = {epsilon}")
+                print(f"Random set size = {random_set_size}")
+                print("Ground set:")
+                print(remaining_set)
+                print(f"Num elements in ground set = {len(remaining_set)}")
+                print("Starting the LazierThanLazy greedy algorithm")
+                print("Initial greedy set:")
+                print(greedy_set)
+
+            f_obj.clear_memoization()
+            best_id = None
+            best_val = None
+
+            i = 0
+            step = 1
+            display_next = step
+            percent = 0
+            N = rem_budget
+            iter_count = 0
+
+            while rem_budget > 0:
+                random_set = set()
+                while len(random_set) < random_set_size:
+                    elem = random.randint(0, n - 1)
+                    if elem in remaining_set and elem not in random_set:
+                        random_set.add(elem)
+
+                if verbose:
+                    print(f"Iteration {i}")
+                    print(f"Random set = {random_set}")
+                    print("Now running lazy greedy on the random set")
+
+                candidate_id = None
+                candidate_val = None
+                new_candidate_bound = None
+
+                # Compute gains only for the elements in the remaining set
+                gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem)
+                        for elem in remaining_set]
+
+                for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))):
+                    if elem in random_set and elem not in greedy_set:  # Check if the element is not already selected
+                        if verbose:
+                            print(f"Checking {elem}...")
+                        candidate_id = elem
+                        candidate_val = val
+                        new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False)
+                        if verbose:
+                            print(f"Updated gain as per updated greedy set = {new_candidate_bound}")
+                        next_elem = gains[j + 1] if j + 1 < len(gains) else None
+                        if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'):
+                            if verbose:
+                                print("..better than next best upper bound, "
+                                      "selecting...")
+                            best_id = candidate_id
+                            best_val = new_candidate_bound
+                            break
+
+                if verbose:
+                    print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+                remaining_set.remove(best_id)
+
+                if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
+                    break
+                else:
+                    f_obj.update_memoization(greedy_set, best_id)
+                    greedy_set.add(best_id)
+                    greedy_vector.append((best_id, best_val))
+                    rem_budget -= 1
+
+                    if verbose:
+                        print(f"Added element {best_id} and the gain is {best_val}")
+                        print("Updated greedy set:", greedy_set)
+
+                    if show_progress:
+                        percent = int(((iter_count + 1.0) / N) * 100)
+                        if percent >= display_next:
+                            print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="")
+                            print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="")
+                            display_next += step
+                        iter_count += 1
+
+                i += 1
+
+            return greedy_vector

From 63c9e064ca8784f3fab7f783ec4737d245c969d6 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:54:48 +0530
Subject: [PATCH 16/58] Create LazyGreedyOptimizer.py

---
 pytorch/optimizer/LazyGreedyOptimizer.py | 97 ++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 pytorch/optimizer/LazyGreedyOptimizer.py

diff --git a/pytorch/optimizer/LazyGreedyOptimizer.py b/pytorch/optimizer/LazyGreedyOptimizer.py
new file mode 100644
index 0000000..45d7590
--- /dev/null
+++ b/pytorch/optimizer/LazyGreedyOptimizer.py
@@ -0,0 +1,97 @@
+import torch
+import heapq
+
+class LazyGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain,
+                 verbose, show_progress, costs, cost_sensitive_greedy):
+        greedy_vector = []
+        greedy_set = set()
+
+        # if not costs:
+        #     greedy_vector.reserve(budget)
+        #     greedy_set.reserve(budget)
+
+        rem_budget = budget
+        ground_set = f_obj.get_effective_ground_set()
+
+        if verbose:
+            print("Ground set:")
+            print(ground_set)
+            print(f"Num elements in groundset = {len(ground_set)}")
+            print("Costs:")
+            print(costs)
+            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
+            print("Starting the lazy greedy algorithm")
+            print("Initial greedy set:")
+            print(greedy_set)
+
+        f_obj.clear_memoization()
+
+        container = []
+        heapq.heapify(container)
+        max_heap = container
+
+        if cost_sensitive_greedy:
+            for elem in ground_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem]
+                heapq.heappush(max_heap, (-gain, elem))
+        else:
+            for elem in ground_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
+                heapq.heappush(max_heap, (-gain, elem))
+
+        if verbose:
+            print("Max heap constructed")
+
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter = 0
+
+        while rem_budget > 0 and max_heap:
+            current_max = heapq.heappop(max_heap)
+            current_max_gain, current_max_elem = -current_max[0], current_max[1]
+
+            if verbose:
+                print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}")
+
+            new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False)
+
+            if verbose:
+                print(f"newMaxBound: {new_max_bound}")
+
+            if new_max_bound >= -max_heap[0][0]:
+                if (new_max_bound < 0 and stop_if_negative_gain) or \
+                        (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain):
+                    break
+                else:
+                    f_obj.update_memoization(greedy_set, current_max_elem)
+                    greedy_set.add(current_max_elem)
+                    greedy_vector.append((current_max_elem, new_max_bound))
+                    rem_budget -= 1
+
+                    if verbose:
+                        print(f"Added element {current_max_elem} and the gain is {new_max_bound}")
+                        print("Updated greedySet:", greedy_set)
+
+                    if show_progress:
+                        percent = int(((iter + 1.0) / N) * 100)
+
+                        if percent >= display_next:
+                            print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
+                                  end=f" {percent}% [Iteration {iter + 1} of {N}]")
+                            display_next += step
+
+                        iter += 1
+            else:
+                heapq.heappush(max_heap, (-new_max_bound, current_max_elem))
+
+        return greedy_vector

From ba15c2fced48fd7e3834c5ed514330777a9b7a1c Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:55:24 +0530
Subject: [PATCH 17/58] Create NaiveGreedyOptimizer.py

---
 pytorch/optimizer/NaiveGreedyOptimizer.py | 90 +++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 pytorch/optimizer/NaiveGreedyOptimizer.py

diff --git a/pytorch/optimizer/NaiveGreedyOptimizer.py b/pytorch/optimizer/NaiveGreedyOptimizer.py
new file mode 100644
index 0000000..728f16a
--- /dev/null
+++ b/pytorch/optimizer/NaiveGreedyOptimizer.py
@@ -0,0 +1,90 @@
+import torch
+import random
+from typing import List, Tuple, Set
+
+class NaiveGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1, val2, eps):
+        return abs(val1 - val2) < eps
+
+    def maximize(
+        self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy
+    ):
+        greedy_vector = []
+        greedy_set = set()
+        if not costs:
+            # greedy_vector = [None] * budget
+            greedy_set = set()
+        rem_budget = budget
+        ground_set = f_obj.get_effective_ground_set()
+        #print(ground_set)
+        if verbose:
+            print("Ground set:")
+            print(ground_set)
+            print(f"Num elements in groundset = {len(ground_set)}")
+            print("Costs:")
+            print(costs)
+            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
+            print("Starting the naive greedy algorithm")
+            print("Initial greedy set:")
+            print(greedy_set)
+
+        f_obj.clear_memoization()
+        best_id = None
+        best_val = None
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter_count = 0
+
+        while rem_budget > 0:
+            best_id = None
+            best_val = float("-inf")
+
+            for i in ground_set:
+                if i in greedy_set:
+                    continue
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False)
+                # print(gain)
+                if verbose:
+                    print(f"Gain of {i} is {gain}")
+
+                if gain > best_val:
+                    best_id = i
+                    best_val = gain
+
+            if verbose:
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stopIfNegativeGain) or (
+                self.equals(best_val, 0, 1e-5) and stop_if_zero_gain
+            ):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print(f"Updated greedy set: {greedy_set}")
+
+                if show_progress:
+                    percent = int((iter_count + 1.0) / N * 100)
+
+                    if percent >= display_next:
+                        print(
+                            f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
+                            end="",
+                        )
+                        print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="")
+                        display_next += step
+
+                    iter_count += 1
+
+        return greedy_vector

From 125966604fcab22109aa02f7b8ec96498fe47709 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 14:55:59 +0530
Subject: [PATCH 18/58] Create StochasticGreedyOptimizer.py

---
 .../optimizer/StochasticGreedyOptimizer.py    | 105 ++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 pytorch/optimizer/StochasticGreedyOptimizer.py

diff --git a/pytorch/optimizer/StochasticGreedyOptimizer.py b/pytorch/optimizer/StochasticGreedyOptimizer.py
new file mode 100644
index 0000000..bcc9263
--- /dev/null
+++ b/pytorch/optimizer/StochasticGreedyOptimizer.py
@@ -0,0 +1,105 @@
+import random
+from typing import List, Tuple, Set
+import math
+import sys
+# from StochasticGreedyOptimizer import SetFunction
+
+class StochasticGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1: float, val2: float, eps: float) -> bool:
+        return abs(val1 - val2) < eps
+
+    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
+                 stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
+                 show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
+        # TODO: Implement handling of equal guys and different sizes of each item later
+        # TODO: Implement cost-sensitive selection
+
+        greedy_vector = []
+        greedy_set = set()
+
+        # if not costs:
+        #     # Every element is of the same size, budget corresponds to cardinality
+        #     greedy_vector.reserve(budget)
+        #     greedy_set.reserve(budget)
+
+        rem_budget = budget
+        remaining_set = set(f_obj.get_effective_ground_set())
+        n = len(remaining_set)
+        epsilon = 0.05
+        random_set_size = int((n / budget) * math.log(1 / epsilon))
+        if verbose:
+            print(f"Epsilon = {epsilon}")
+            print(f"Random set size = {random_set_size}")
+            print("Ground set:")
+            print(" ".join(map(str, remaining_set)))
+            print(f"Num elements in groundset = {len(remaining_set)}")
+            print("Starting the stochastic greedy algorithm")
+            print("Initial greedy set:")
+            print(" ".join(map(str, greedy_set)))
+
+        f_obj.clear_memoization()
+        random.seed(1)
+        best_id = -1
+        best_val = -1 * float('inf')
+        i = 0
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter = 0
+
+        while rem_budget > 0:
+            random_set = set()
+            while len(random_set) < random_set_size:
+                elem = random.randint(0, n - 1)
+                if elem in remaining_set and elem not in random_set:
+                    random_set.add(elem)
+
+            if verbose:
+                print(f"Iteration {i}")
+                print(f"Random set = {list(random_set)}")
+                print("Now running naive greedy on the random set")
+
+            best_id = -1
+            best_val = -1 * float('inf')
+
+            for elem in random_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
+                if gain > best_val:
+                    best_id = elem
+                    best_val = gain
+
+            if verbose:
+                if best_id == -1:
+                    raise ValueError("Nobody had greater gain than minus infinity!!")
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+                remaining_set.remove(best_id)
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print("Updated greedy set:", " ".join(map(str, greedy_set)))
+
+                if show_progress:
+                    percent = int(((iter + 1.0) / N) * 100)
+                    if percent >= display_next:
+                        print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="")
+                        print(f"{percent}% [Iteration {iter + 1} of {N}]", end="")
+                        sys.stdout.flush()
+                        display_next += step
+                    iter += 1
+
+            i += 1
+
+        return greedy_vector

From 99f98703897d4c049299a20d9efffa3297c88abd Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:04:49 +0530
Subject: [PATCH 19/58] Update SetFunction.py

---
 pytorch/SetFunction.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py
index 661bf0c..6c7891c 100644
--- a/pytorch/SetFunction.py
+++ b/pytorch/SetFunction.py
@@ -4,7 +4,11 @@
 import torch.nn as nn
 import numpy as np
 import random
-# import optimizer python files
+from optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer
+from optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer
+from optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer
+from optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer
+
 
 class SetFunction(nn.Module):
     def __init__(self):

From eba36f77b4c273e8ba6158679d03bdf907298ffe Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:05:13 +0530
Subject: [PATCH 20/58] Delete pytorch/optimizer/StochasticGreedyOptimizer.py

---
 .../optimizer/StochasticGreedyOptimizer.py    | 105 ------------------
 1 file changed, 105 deletions(-)
 delete mode 100644 pytorch/optimizer/StochasticGreedyOptimizer.py

diff --git a/pytorch/optimizer/StochasticGreedyOptimizer.py b/pytorch/optimizer/StochasticGreedyOptimizer.py
deleted file mode 100644
index bcc9263..0000000
--- a/pytorch/optimizer/StochasticGreedyOptimizer.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import random
-from typing import List, Tuple, Set
-import math
-import sys
-# from StochasticGreedyOptimizer import SetFunction
-
-class StochasticGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1: float, val2: float, eps: float) -> bool:
-        return abs(val1 - val2) < eps
-
-    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
-                 stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
-                 show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
-        # TODO: Implement handling of equal guys and different sizes of each item later
-        # TODO: Implement cost-sensitive selection
-
-        greedy_vector = []
-        greedy_set = set()
-
-        # if not costs:
-        #     # Every element is of the same size, budget corresponds to cardinality
-        #     greedy_vector.reserve(budget)
-        #     greedy_set.reserve(budget)
-
-        rem_budget = budget
-        remaining_set = set(f_obj.get_effective_ground_set())
-        n = len(remaining_set)
-        epsilon = 0.05
-        random_set_size = int((n / budget) * math.log(1 / epsilon))
-        if verbose:
-            print(f"Epsilon = {epsilon}")
-            print(f"Random set size = {random_set_size}")
-            print("Ground set:")
-            print(" ".join(map(str, remaining_set)))
-            print(f"Num elements in groundset = {len(remaining_set)}")
-            print("Starting the stochastic greedy algorithm")
-            print("Initial greedy set:")
-            print(" ".join(map(str, greedy_set)))
-
-        f_obj.clear_memoization()
-        random.seed(1)
-        best_id = -1
-        best_val = -1 * float('inf')
-        i = 0
-        step = 1
-        display_next = step
-        percent = 0
-        N = rem_budget
-        iter = 0
-
-        while rem_budget > 0:
-            random_set = set()
-            while len(random_set) < random_set_size:
-                elem = random.randint(0, n - 1)
-                if elem in remaining_set and elem not in random_set:
-                    random_set.add(elem)
-
-            if verbose:
-                print(f"Iteration {i}")
-                print(f"Random set = {list(random_set)}")
-                print("Now running naive greedy on the random set")
-
-            best_id = -1
-            best_val = -1 * float('inf')
-
-            for elem in random_set:
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
-                if gain > best_val:
-                    best_id = elem
-                    best_val = gain
-
-            if verbose:
-                if best_id == -1:
-                    raise ValueError("Nobody had greater gain than minus infinity!!")
-                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
-
-            if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
-                break
-            else:
-                f_obj.update_memoization(greedy_set, best_id)
-                greedy_set.add(best_id)
-                greedy_vector.append((best_id, best_val))
-                rem_budget -= 1
-                remaining_set.remove(best_id)
-
-                if verbose:
-                    print(f"Added element {best_id} and the gain is {best_val}")
-                    print("Updated greedy set:", " ".join(map(str, greedy_set)))
-
-                if show_progress:
-                    percent = int(((iter + 1.0) / N) * 100)
-                    if percent >= display_next:
-                        print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="")
-                        print(f"{percent}% [Iteration {iter + 1} of {N}]", end="")
-                        sys.stdout.flush()
-                        display_next += step
-                    iter += 1
-
-            i += 1
-
-        return greedy_vector

From eef8085c9abd5b59dd1487e45e828fe21d514410 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:07:19 +0530
Subject: [PATCH 21/58] Create StochasticGreedyOptimizer

---
 pytorch/optimizer/StochasticGreedyOptimizer | 104 ++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 pytorch/optimizer/StochasticGreedyOptimizer

diff --git a/pytorch/optimizer/StochasticGreedyOptimizer b/pytorch/optimizer/StochasticGreedyOptimizer
new file mode 100644
index 0000000..081f379
--- /dev/null
+++ b/pytorch/optimizer/StochasticGreedyOptimizer
@@ -0,0 +1,104 @@
+import random
+from typing import List, Tuple, Set
+import math
+import sys
+
+class StochasticGreedyOptimizer:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def equals(val1: float, val2: float, eps: float) -> bool:
+        return abs(val1 - val2) < eps
+
+    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
+                 stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
+                 show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
+        # TODO: Implement handling of equal guys and different sizes of each item later
+        # TODO: Implement cost-sensitive selection
+
+        greedy_vector = []
+        greedy_set = set()
+
+        # if not costs:
+        #     # Every element is of the same size, budget corresponds to cardinality
+        #     greedy_vector.reserve(budget)
+        #     greedy_set.reserve(budget)
+
+        rem_budget = budget
+        remaining_set = set(f_obj.get_effective_ground_set())
+        n = len(remaining_set)
+        epsilon = 0.05
+        random_set_size = int((n / budget) * math.log(1 / epsilon))
+        if verbose:
+            print(f"Epsilon = {epsilon}")
+            print(f"Random set size = {random_set_size}")
+            print("Ground set:")
+            print(" ".join(map(str, remaining_set)))
+            print(f"Num elements in groundset = {len(remaining_set)}")
+            print("Starting the stochastic greedy algorithm")
+            print("Initial greedy set:")
+            print(" ".join(map(str, greedy_set)))
+
+        f_obj.clear_memoization()
+        random.seed(1)
+        best_id = -1
+        best_val = -1 * float('inf')
+        i = 0
+        step = 1
+        display_next = step
+        percent = 0
+        N = rem_budget
+        iter = 0
+
+        while rem_budget > 0:
+            random_set = set()
+            while len(random_set) < random_set_size:
+                elem = random.randint(0, n - 1)
+                if elem in remaining_set and elem not in random_set:
+                    random_set.add(elem)
+
+            if verbose:
+                print(f"Iteration {i}")
+                print(f"Random set = {list(random_set)}")
+                print("Now running naive greedy on the random set")
+
+            best_id = -1
+            best_val = -1 * float('inf')
+
+            for elem in random_set:
+                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
+                if gain > best_val:
+                    best_id = elem
+                    best_val = gain
+
+            if verbose:
+                if best_id == -1:
+                    raise ValueError("Nobody had greater gain than minus infinity!!")
+                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
+
+            if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
+                break
+            else:
+                f_obj.update_memoization(greedy_set, best_id)
+                greedy_set.add(best_id)
+                greedy_vector.append((best_id, best_val))
+                rem_budget -= 1
+                remaining_set.remove(best_id)
+
+                if verbose:
+                    print(f"Added element {best_id} and the gain is {best_val}")
+                    print("Updated greedy set:", " ".join(map(str, greedy_set)))
+
+                if show_progress:
+                    percent = int(((iter + 1.0) / N) * 100)
+                    if percent >= display_next:
+                        print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="")
+                        print(f"{percent}% [Iteration {iter + 1} of {N}]", end="")
+                        sys.stdout.flush()
+                        display_next += step
+                    iter += 1
+
+            i += 1
+
+        return greedy_vector

From cb235d96638508a5b5a0c649ba8c9ecbd6873480 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:09:15 +0530
Subject: [PATCH 22/58] Create __init__.py

---
 pytorch/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 pytorch/__init__.py

diff --git a/pytorch/__init__.py b/pytorch/__init__.py
new file mode 100644
index 0000000..766a9a5
--- /dev/null
+++ b/pytorch/__init__.py
@@ -0,0 +1 @@
+# /pytorch/__init__.py

From 92be25313d303cdd24eabda8fc865525e8520bd6 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:11:12 +0530
Subject: [PATCH 23/58] Delete cpp/optimizers/LazierThanLazyGreedyOptimizer.py

---
 .../LazierThanLazyGreedyOptimizer.py          | 120 ------------------
 1 file changed, 120 deletions(-)
 delete mode 100644 cpp/optimizers/LazierThanLazyGreedyOptimizer.py

diff --git a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py b/cpp/optimizers/LazierThanLazyGreedyOptimizer.py
deleted file mode 100644
index 86e355c..0000000
--- a/cpp/optimizers/LazierThanLazyGreedyOptimizer.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import random
-import math
-
-class LazierThanLazyGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1, val2, eps):
-        return abs(val1 - val2) < eps
-
-    @staticmethod
-    def print_sorted_set(sorted_set):
-        print("[", end="")
-        for val, elem in sorted_set:
-            print(f"({val}, {elem}), ", end="")
-        print("]")
-
-    def maximize(self, f_obj, budget, stop_if_zero_gain=False, stop_if_negative_gain=False,
-                    epsilon=0.1, verbose=False, show_progress=False, costs=None, cost_sensitive_greedy=False):
-            greedy_vector = []
-            greedy_set = set()
-
-            if costs is None:
-                greedy_vector.reserve(budget)
-                greedy_set.reserve(budget)
-
-            rem_budget = budget
-            remaining_set = set(f_obj.get_effective_ground_set())
-            n = len(remaining_set)
-            epsilon = 0.05
-            random_set_size = int((n / budget) * math.log(1 / epsilon))
-
-            if verbose:
-                print(f"Epsilon = {epsilon}")
-                print(f"Random set size = {random_set_size}")
-                print("Ground set:")
-                print(remaining_set)
-                print(f"Num elements in ground set = {len(remaining_set)}")
-                print("Starting the LazierThanLazy greedy algorithm")
-                print("Initial greedy set:")
-                print(greedy_set)
-
-            f_obj.clear_memoization()
-            best_id = None
-            best_val = None
-
-            i = 0
-            step = 1
-            display_next = step
-            percent = 0
-            N = rem_budget
-            iter_count = 0
-
-            while rem_budget > 0:
-                random_set = set()
-                while len(random_set) < random_set_size:
-                    elem = random.randint(0, n - 1)
-                    if elem in remaining_set and elem not in random_set:
-                        random_set.add(elem)
-
-                if verbose:
-                    print(f"Iteration {i}")
-                    print(f"Random set = {random_set}")
-                    print("Now running lazy greedy on the random set")
-
-                candidate_id = None
-                candidate_val = None
-                new_candidate_bound = None
-
-                # Compute gains only for the elements in the remaining set
-                gains = [(f_obj.marginal_gain_with_memoization(greedy_set, elem, False), elem)
-                        for elem in remaining_set]
-
-                for j, (val, elem) in enumerate(sorted(gains, key=lambda x: (-x[0], x[1]))):
-                    if elem in random_set and elem not in greedy_set:  # Check if the element is not already selected
-                        if verbose:
-                            print(f"Checking {elem}...")
-                        candidate_id = elem
-                        candidate_val = val
-                        new_candidate_bound = f_obj.marginal_gain_with_memoization(greedy_set, candidate_id, False)
-                        if verbose:
-                            print(f"Updated gain as per updated greedy set = {new_candidate_bound}")
-                        next_elem = gains[j + 1] if j + 1 < len(gains) else None
-                        if new_candidate_bound >= next_elem[0] if next_elem else float('-inf'):
-                            if verbose:
-                                print("..better than next best upper bound, "
-                                      "selecting...")
-                            best_id = candidate_id
-                            best_val = new_candidate_bound
-                            break
-
-                if verbose:
-                    print(f"Next best item to add is {best_id} and its value addition is {best_val}")
-
-                remaining_set.remove(best_id)
-
-                if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
-                    break
-                else:
-                    f_obj.update_memoization(greedy_set, best_id)
-                    greedy_set.add(best_id)
-                    greedy_vector.append((best_id, best_val))
-                    rem_budget -= 1
-
-                    if verbose:
-                        print(f"Added element {best_id} and the gain is {best_val}")
-                        print("Updated greedy set:", greedy_set)
-
-                    if show_progress:
-                        percent = int(((iter_count + 1.0) / N) * 100)
-                        if percent >= display_next:
-                            print("\r", "[" + "|" * (percent // 5) + " " * (100 // 5 - percent // 5) + "]", end="")
-                            print(f" {percent}% [Iteration {iter_count + 1} of {N}]", end="")
-                            display_next += step
-                        iter_count += 1
-
-                i += 1
-
-            return greedy_vector

From d2a9aba92d099817c26c122f5054fbe634aae72a Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:11:25 +0530
Subject: [PATCH 24/58] Delete cpp/optimizers/LazyGreedyOptimizer.py

---
 cpp/optimizers/LazyGreedyOptimizer.py | 97 ---------------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 cpp/optimizers/LazyGreedyOptimizer.py

diff --git a/cpp/optimizers/LazyGreedyOptimizer.py b/cpp/optimizers/LazyGreedyOptimizer.py
deleted file mode 100644
index 45d7590..0000000
--- a/cpp/optimizers/LazyGreedyOptimizer.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import torch
-import heapq
-
-class LazyGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1, val2, eps):
-        return abs(val1 - val2) < eps
-
-    def maximize(self, f_obj, budget, stop_if_zero_gain, stop_if_negative_gain,
-                 verbose, show_progress, costs, cost_sensitive_greedy):
-        greedy_vector = []
-        greedy_set = set()
-
-        # if not costs:
-        #     greedy_vector.reserve(budget)
-        #     greedy_set.reserve(budget)
-
-        rem_budget = budget
-        ground_set = f_obj.get_effective_ground_set()
-
-        if verbose:
-            print("Ground set:")
-            print(ground_set)
-            print(f"Num elements in groundset = {len(ground_set)}")
-            print("Costs:")
-            print(costs)
-            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
-            print("Starting the lazy greedy algorithm")
-            print("Initial greedy set:")
-            print(greedy_set)
-
-        f_obj.clear_memoization()
-
-        container = []
-        heapq.heapify(container)
-        max_heap = container
-
-        if cost_sensitive_greedy:
-            for elem in ground_set:
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False) / costs[elem]
-                heapq.heappush(max_heap, (-gain, elem))
-        else:
-            for elem in ground_set:
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
-                heapq.heappush(max_heap, (-gain, elem))
-
-        if verbose:
-            print("Max heap constructed")
-
-        step = 1
-        display_next = step
-        percent = 0
-        N = rem_budget
-        iter = 0
-
-        while rem_budget > 0 and max_heap:
-            current_max = heapq.heappop(max_heap)
-            current_max_gain, current_max_elem = -current_max[0], current_max[1]
-
-            if verbose:
-                print(f"currentMax element: {current_max_elem} and its upper bound: {current_max_gain}")
-
-            new_max_bound = f_obj.marginal_gain_with_memoization(greedy_set, current_max_elem, False)
-
-            if verbose:
-                print(f"newMaxBound: {new_max_bound}")
-
-            if new_max_bound >= -max_heap[0][0]:
-                if (new_max_bound < 0 and stop_if_negative_gain) or \
-                        (self.equals(new_max_bound, 0, 1e-5) and stop_if_zero_gain):
-                    break
-                else:
-                    f_obj.update_memoization(greedy_set, current_max_elem)
-                    greedy_set.add(current_max_elem)
-                    greedy_vector.append((current_max_elem, new_max_bound))
-                    rem_budget -= 1
-
-                    if verbose:
-                        print(f"Added element {current_max_elem} and the gain is {new_max_bound}")
-                        print("Updated greedySet:", greedy_set)
-
-                    if show_progress:
-                        percent = int(((iter + 1.0) / N) * 100)
-
-                        if percent >= display_next:
-                            print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
-                                  end=f" {percent}% [Iteration {iter + 1} of {N}]")
-                            display_next += step
-
-                        iter += 1
-            else:
-                heapq.heappush(max_heap, (-new_max_bound, current_max_elem))
-
-        return greedy_vector

From 50ec126a51822a61aaa1bb8a6efc92636164be0c Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:11:38 +0530
Subject: [PATCH 25/58] Delete cpp/optimizers/NaiveGreedyOptimizer.py

---
 cpp/optimizers/NaiveGreedyOptimizer.py | 90 --------------------------
 1 file changed, 90 deletions(-)
 delete mode 100644 cpp/optimizers/NaiveGreedyOptimizer.py

diff --git a/cpp/optimizers/NaiveGreedyOptimizer.py b/cpp/optimizers/NaiveGreedyOptimizer.py
deleted file mode 100644
index 728f16a..0000000
--- a/cpp/optimizers/NaiveGreedyOptimizer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import torch
-import random
-from typing import List, Tuple, Set
-
-class NaiveGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1, val2, eps):
-        return abs(val1 - val2) < eps
-
-    def maximize(
-        self, f_obj, budget, stop_if_zero_gain, stopIfNegativeGain, verbose, show_progress, costs, cost_sensitive_greedy
-    ):
-        greedy_vector = []
-        greedy_set = set()
-        if not costs:
-            # greedy_vector = [None] * budget
-            greedy_set = set()
-        rem_budget = budget
-        ground_set = f_obj.get_effective_ground_set()
-        #print(ground_set)
-        if verbose:
-            print("Ground set:")
-            print(ground_set)
-            print(f"Num elements in groundset = {len(ground_set)}")
-            print("Costs:")
-            print(costs)
-            print(f"Cost sensitive greedy: {cost_sensitive_greedy}")
-            print("Starting the naive greedy algorithm")
-            print("Initial greedy set:")
-            print(greedy_set)
-
-        f_obj.clear_memoization()
-        best_id = None
-        best_val = None
-        step = 1
-        display_next = step
-        percent = 0
-        N = rem_budget
-        iter_count = 0
-
-        while rem_budget > 0:
-            best_id = None
-            best_val = float("-inf")
-
-            for i in ground_set:
-                if i in greedy_set:
-                    continue
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, i, False)
-                # print(gain)
-                if verbose:
-                    print(f"Gain of {i} is {gain}")
-
-                if gain > best_val:
-                    best_id = i
-                    best_val = gain
-
-            if verbose:
-                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
-
-            if (best_val < 0 and stopIfNegativeGain) or (
-                self.equals(best_val, 0, 1e-5) and stop_if_zero_gain
-            ):
-                break
-            else:
-                f_obj.update_memoization(greedy_set, best_id)
-                greedy_set.add(best_id)
-                greedy_vector.append((best_id, best_val))
-                rem_budget -= 1
-
-                if verbose:
-                    print(f"Added element {best_id} and the gain is {best_val}")
-                    print(f"Updated greedy set: {greedy_set}")
-
-                if show_progress:
-                    percent = int((iter_count + 1.0) / N * 100)
-
-                    if percent >= display_next:
-                        print(
-                            f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]",
-                            end="",
-                        )
-                        print(f"{percent}% [Iteration {iter_count + 1} of {N}]", end="")
-                        display_next += step
-
-                    iter_count += 1
-
-        return greedy_vector

From 427691665afe6f1012991c553df0944f7aaba54b Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:11:51 +0530
Subject: [PATCH 26/58] Delete cpp/optimizers/StochasticGreedyOptimizer.py

---
 cpp/optimizers/StochasticGreedyOptimizer.py | 105 --------------------
 1 file changed, 105 deletions(-)
 delete mode 100644 cpp/optimizers/StochasticGreedyOptimizer.py

diff --git a/cpp/optimizers/StochasticGreedyOptimizer.py b/cpp/optimizers/StochasticGreedyOptimizer.py
deleted file mode 100644
index bcc9263..0000000
--- a/cpp/optimizers/StochasticGreedyOptimizer.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import random
-from typing import List, Tuple, Set
-import math
-import sys
-# from StochasticGreedyOptimizer import SetFunction
-
-class StochasticGreedyOptimizer:
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def equals(val1: float, val2: float, eps: float) -> bool:
-        return abs(val1 - val2) < eps
-
-    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
-                 stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
-                 show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
-        # TODO: Implement handling of equal guys and different sizes of each item later
-        # TODO: Implement cost-sensitive selection
-
-        greedy_vector = []
-        greedy_set = set()
-
-        # if not costs:
-        #     # Every element is of the same size, budget corresponds to cardinality
-        #     greedy_vector.reserve(budget)
-        #     greedy_set.reserve(budget)
-
-        rem_budget = budget
-        remaining_set = set(f_obj.get_effective_ground_set())
-        n = len(remaining_set)
-        epsilon = 0.05
-        random_set_size = int((n / budget) * math.log(1 / epsilon))
-        if verbose:
-            print(f"Epsilon = {epsilon}")
-            print(f"Random set size = {random_set_size}")
-            print("Ground set:")
-            print(" ".join(map(str, remaining_set)))
-            print(f"Num elements in groundset = {len(remaining_set)}")
-            print("Starting the stochastic greedy algorithm")
-            print("Initial greedy set:")
-            print(" ".join(map(str, greedy_set)))
-
-        f_obj.clear_memoization()
-        random.seed(1)
-        best_id = -1
-        best_val = -1 * float('inf')
-        i = 0
-        step = 1
-        display_next = step
-        percent = 0
-        N = rem_budget
-        iter = 0
-
-        while rem_budget > 0:
-            random_set = set()
-            while len(random_set) < random_set_size:
-                elem = random.randint(0, n - 1)
-                if elem in remaining_set and elem not in random_set:
-                    random_set.add(elem)
-
-            if verbose:
-                print(f"Iteration {i}")
-                print(f"Random set = {list(random_set)}")
-                print("Now running naive greedy on the random set")
-
-            best_id = -1
-            best_val = -1 * float('inf')
-
-            for elem in random_set:
-                gain = f_obj.marginal_gain_with_memoization(greedy_set, elem, False)
-                if gain > best_val:
-                    best_id = elem
-                    best_val = gain
-
-            if verbose:
-                if best_id == -1:
-                    raise ValueError("Nobody had greater gain than minus infinity!!")
-                print(f"Next best item to add is {best_id} and its value addition is {best_val}")
-
-            if (best_val < 0 and stop_if_negative_gain) or (self.equals(best_val, 0, 1e-5) and stop_if_zero_gain):
-                break
-            else:
-                f_obj.update_memoization(greedy_set, best_id)
-                greedy_set.add(best_id)
-                greedy_vector.append((best_id, best_val))
-                rem_budget -= 1
-                remaining_set.remove(best_id)
-
-                if verbose:
-                    print(f"Added element {best_id} and the gain is {best_val}")
-                    print("Updated greedy set:", " ".join(map(str, greedy_set)))
-
-                if show_progress:
-                    percent = int(((iter + 1.0) / N) * 100)
-                    if percent >= display_next:
-                        print(f"\r[{'|' * (percent // 5)}{' ' * (100 // 5 - percent // 5)}]", end="")
-                        print(f"{percent}% [Iteration {iter + 1} of {N}]", end="")
-                        sys.stdout.flush()
-                        display_next += step
-                    iter += 1
-
-            i += 1
-
-        return greedy_vector

From 9c997e777cbc8f1d7f3e10a18ef4173888512155 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:13:46 +0530
Subject: [PATCH 27/58] Create ProbabilisticSetCover.py

---
 pytorch/submod/ProbabilisticSetCover.py | 80 +++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 pytorch/submod/ProbabilisticSetCover.py

diff --git a/pytorch/submod/ProbabilisticSetCover.py b/pytorch/submod/ProbabilisticSetCover.py
new file mode 100644
index 0000000..aacb7c4
--- /dev/null
+++ b/pytorch/submod/ProbabilisticSetCover.py
@@ -0,0 +1,80 @@
+import torch
+from typing import List, Set, Tuple
+
+class ProbabilisticSetCover(SetFunction):
+    def __init__(self, n: int, ground_set_concept_probabilities: List[List[float]], num_concepts: int, concept_weights: List[float] = None):
+        super(SetFunction, self).__init__()
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.n = n
+        self.ground_set_concept_probabilities = ground_set_concept_probabilities
+        self.num_concepts = num_concepts
+        self.concept_weights = concept_weights
+
+        if self.concept_weights is None:
+            self.concept_weights = [1.0] * num_concepts
+        else:
+            self.concept_weights = torch.tensor(concept_weights, dtype=torch.float32).to(device)
+        self.prob_of_concepts_covered_by_X = num_concepts
+
+    def evaluate(self, X: Set[int]) -> float:
+        result = 0
+        if not X:
+            return result
+
+        for i in range(self.num_concepts):
+            product = 1
+            for elem in X:
+                product *= (1 - self.ground_set_concept_probabilities[elem][i])
+            result += self.concept_weights[i] * (1 - product)
+
+        return result
+
+    def evaluate_with_memoization(self, X: Set[int]) -> float:
+        result = 0
+        if not X:
+            return result
+
+        for i in range(self.num_concepts):
+            result += self.concept_weights[i] * (1 - self.prob_of_concepts_covered_by_X[i])
+
+        return result
+
+    def marginal_gain(self, X: Set[int], item: int) -> float:
+        gain = 0
+        if item in X:
+            return gain
+
+        for i in range(self.num_concepts):
+            old_concept_prod = 1
+            for elem in X:
+                old_concept_prod *= (1 - self.ground_set_concept_probabilities[elem][i])
+            gain += self.concept_weights[i] * old_concept_prod * self.ground_set_concept_probabilities[item][i]
+        return gain
+
+    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+        gain = 0
+        if enable_checks and item in X:
+            return gain
+        for i in range(self.num_concepts):
+            gain += self.concept_weights[i] * self.prob_of_concepts_covered_by_X[i] * self.ground_set_concept_probabilities[item][i]
+        return gain
+
+    def update_memoization(self, X: Set[int], item: int):
+        if item in X:
+            return
+
+        for i in range(self.num_concepts):
+            self.prob_of_concepts_covered_by_X[i] *= (1 - self.ground_set_concept_probabilities[item][i])
+
+    def get_effective_ground_set(self) -> Set[int]:
+        return set(range(self.n))
+
+    def clear_memoization(self):
+        self.prob_of_concepts_covered_by_X = torch.ones(self.num_concepts, dtype=torch.double)
+
+    def set_memoization(self, X: Set[int]):
+        self.clear_memoization()
+        temp = set()
+        for elem in X:
+            self.update_memoization(temp, elem)
+            temp.add(elem)

From 52a30c531e1c49fbd815bf7f02c703dda4caaa32 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:17:06 +0530
Subject: [PATCH 28/58] Update SetCover.py

---
 pytorch/submod/SetCover.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py
index 62ed068..a01d2c6 100644
--- a/pytorch/submod/SetCover.py
+++ b/pytorch/submod/SetCover.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 import numpy as np
 import random
-from SetFunction import SetFunction
+from ..SetFunction import SetFunction
 class SetCover(SetFunction):
     def __init__(self, n, cover_set, num_concepts, concept_weights = None):
         super(SetFunction, self).__init__()

From 036d04b6b861c133b9cd46f1bcaecdcf01e9c36d Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:17:36 +0530
Subject: [PATCH 29/58] Create __init__.py

---
 pytorch/submod/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 pytorch/submod/__init__.py

diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py
new file mode 100644
index 0000000..a6041d7
--- /dev/null
+++ b/pytorch/submod/__init__.py
@@ -0,0 +1 @@
+# /pytorch/SetFunction

From 1d7d0140350f0addcc7c9c88bff02438ab153af6 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:18:08 +0530
Subject: [PATCH 30/58] Update ProbabilisticSetCover.py

---
 pytorch/submod/ProbabilisticSetCover.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch/submod/ProbabilisticSetCover.py b/pytorch/submod/ProbabilisticSetCover.py
index aacb7c4..5eaaaa1 100644
--- a/pytorch/submod/ProbabilisticSetCover.py
+++ b/pytorch/submod/ProbabilisticSetCover.py
@@ -1,5 +1,6 @@
 import torch
 from typing import List, Set, Tuple
+from ..SetFunction import SetFunction
 
 class ProbabilisticSetCover(SetFunction):
     def __init__(self, n: int, ground_set_concept_probabilities: List[List[float]], num_concepts: int, concept_weights: List[float] = None):

From 694f358ec8d7295f2a92d91928700af69d9ac357 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:18:38 +0530
Subject: [PATCH 31/58] Update __init__.py

---
 pytorch/submod/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py
index a6041d7..c8851c6 100644
--- a/pytorch/submod/__init__.py
+++ b/pytorch/submod/__init__.py
@@ -1 +1 @@
-# /pytorch/SetFunction
+# /pytorch/SetFunction/__init__.py

From 698f1f6a330127217154e1825db6d1c42c3b996d Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Fri, 19 Jan 2024 23:50:34 +0530
Subject: [PATCH 32/58] Rename StochasticGreedyOptimizer to
 StochasticGreedyOptimize.pyr

---
 .../{StochasticGreedyOptimizer => StochasticGreedyOptimize.pyr}   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pytorch/optimizer/{StochasticGreedyOptimizer => StochasticGreedyOptimize.pyr} (100%)

diff --git a/pytorch/optimizer/StochasticGreedyOptimizer b/pytorch/optimizer/StochasticGreedyOptimize.pyr
similarity index 100%
rename from pytorch/optimizer/StochasticGreedyOptimizer
rename to pytorch/optimizer/StochasticGreedyOptimize.pyr

From 00f7f9fba34b75b75ab4f4a9c18b1ded108aaa67 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Fri, 19 Jan 2024 23:50:47 +0530
Subject: [PATCH 33/58] Rename StochasticGreedyOptimize.pyr to
 StochasticGreedyOptimize.py

---
 .../{StochasticGreedyOptimize.pyr => StochasticGreedyOptimize.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pytorch/optimizer/{StochasticGreedyOptimize.pyr => StochasticGreedyOptimize.py} (100%)

diff --git a/pytorch/optimizer/StochasticGreedyOptimize.pyr b/pytorch/optimizer/StochasticGreedyOptimize.py
similarity index 100%
rename from pytorch/optimizer/StochasticGreedyOptimize.pyr
rename to pytorch/optimizer/StochasticGreedyOptimize.py

From f268159f77c99d9d3146ff5937586bd7737d897e Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sat, 20 Jan 2024 00:01:07 +0530
Subject: [PATCH 34/58] Update setCover.py

---
 submodlib/functions/setCover.py | 47 +++++++--------------------------
 1 file changed, 10 insertions(+), 37 deletions(-)

diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index eaf2c54..21d4038 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -2,48 +2,18 @@
 # Author: Vishal Kaushal <vishal.kaushal@gmail.com>
 from .setFunction import SetFunction
 from submodlib_cpp import SetCover
+from submodlib_pytorch import SetCover
+import torch
 
 class SetCoverFunction(SetFunction):
-	"""Implementation of the Set-Cover (SC) submodular function.
 	
-	For a subset :math:`A`, its Set Cover evaluation is defined as: 
-
-	.. math::
-			f(A) = w(\\cup_{a \\in A} \\gamma(a)) = w(\\gamma(A))
-
-	where :math:`\\gamma(A)` refers to the set of concepts covered by :math:`A`. Thus the set of all concepts :math:`\\mathcal{U} = \\gamma(\\mathcal{V})`. :math:`w` is a weight vector in :math:`\\Re^{|\\mathcal{U}|}`. Intuitively, each element in :math:`\\mathcal{V}` *covers* a set of elements from the concept set :math:`U` and hence :math:`w(\\gamma(A))` is total weight of concepts covered by elements in :math:`A`. Note that :math:`\\gamma(A \\cup B) = \\gamma(A) \\cup \\gamma(B)` and hence :math:`f(A \\cup B) = w(\\gamma(A \\cup B)) = w(\\gamma(A) \\cup \\gamma(B))`.
-
-	Alternatively we can also view the function as follows. With :math:`U` being the set of all concepts (namely :math:`U = \\gamma(\\mathcal{V})`) and :math:`c_u(i)` denoting whether the concept :math:`u \\in U` is covered by the element :math:`i \\in \\mathcal{V}` i.e :math:`c_u(i) = 1` if :math:`u \\in \\gamma(\\{i\\})` and is zero otherwise. We then define :math:`c_u(A) = \\sum_{a\\in A} c_u(a)` as the count of concept :math:`u` in set :math:`A`, and the weighted set cover can then be written as:
-		
-	.. math::
-			f(A) = \\sum_{u \\in U} w_u \\min(c_u(A), 1)
-			
-	.. note::
-			Set Cover functions models coverage of concepts and is monotone submodular.
-
-	Parameters
-	----------
-	n : int
-		Number of elements in the ground set, must be > 0.
-
-	cover_set : list
-		List of sets. Each set is the set of concepts covered by the corresponding data point / image. Hence cover_set is of size n.
-
-	num_concepts : int
-		Number of concepts.
-
-	concept_weights : list
-		Weight :math:`w_i` of each concept. Size must be same as num_concepts.
-
-	"""
-
 	def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		self.n = n
 		self.cover_set = cover_set
 		self.num_concepts = num_concepts
 		self.concept_weights = concept_weights
 		self.cpp_obj = None
-		
+
 		if self.n <= 0:
 			raise Exception("ERROR: Number of elements in ground set must be positive")
 
@@ -55,8 +25,11 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 			    raise Exception("ERROR: Mismtach between num_conepts and len(concept_weights)")
 		else:
 			self.concept_weights = [1] * self.num_concepts
-
-		self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
+		print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)")
+		device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+		if device == "cuda":
+			self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
+		else:
+			self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 		self.effective_ground = set(range(n))
-
-	
+		

From 4bc95552dc426e677355472a0caf71ca7c1f7223 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 23 Jan 2024 17:55:33 +0530
Subject: [PATCH 35/58] Update setCover.py

---
 submodlib/functions/setCover.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index 21d4038..e8c5466 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -27,7 +27,7 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 			self.concept_weights = [1] * self.num_concepts
 		print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)")
 		device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-		if device == "cuda":
+		if  "cuda" in device :
 			self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 		else:
 			self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)

From 2a27493d93d5869a8539f12aa6d38495b85cc6e9 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 23 Jan 2024 17:56:20 +0530
Subject: [PATCH 36/58] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 10ded66..74a62c5 100644
--- a/setup.py
+++ b/setup.py
@@ -70,7 +70,7 @@
 setup(
     name='submodlib',
     #packages=find_packages(include=['submodlib']),
-    packages=['submodlib', 'submodlib/functions'],
+    packages=['submodlib', 'submodlib/functions','pytorch'],
     #packages=find_packages('submodlib'),
     #package_dir={'':'submodlib'},
     #version='0.0.2',

From aeaaaeff79871746938d6ad3fa450fa01c8a0278 Mon Sep 17 00:00:00 2001
From: amajee11us <anay.majee@utdallas.edu>
Date: Tue, 23 Jan 2024 11:42:45 -0600
Subject: [PATCH 37/58] Fixed dependency tree for submodlib GPU implementation

---
 pytorch/SetFunction.py                                    | 8 ++++----
 pytorch/__init__.py                                       | 4 ++++
 ...sticGreedyOptimize.py => StochasticGreedyOptimizer.py} | 2 +-
 pytorch/optimizer/__init__.py                             | 5 +++++
 pytorch/submod/__init__.py                                | 2 ++
 submodlib/functions/setCover.py                           | 3 ++-
 6 files changed, 18 insertions(+), 6 deletions(-)
 rename pytorch/optimizer/{StochasticGreedyOptimize.py => StochasticGreedyOptimizer.py} (97%)
 create mode 100644 pytorch/optimizer/__init__.py

diff --git a/pytorch/SetFunction.py b/pytorch/SetFunction.py
index 6c7891c..24c139e 100644
--- a/pytorch/SetFunction.py
+++ b/pytorch/SetFunction.py
@@ -4,10 +4,10 @@
 import torch.nn as nn
 import numpy as np
 import random
-from optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer
-from optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer
-from optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer
-from optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer
+from pytorch.optimizer.LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer
+from pytorch.optimizer.LazyGreedyOptimizer import LazyGreedyOptimizer
+from pytorch.optimizer.NaiveGreedyOptimizer import NaiveGreedyOptimizer
+from pytorch.optimizer.StochasticGreedyOptimizer import StochasticGreedyOptimizer
 
 
 class SetFunction(nn.Module):
diff --git a/pytorch/__init__.py b/pytorch/__init__.py
index 766a9a5..b8f5042 100644
--- a/pytorch/__init__.py
+++ b/pytorch/__init__.py
@@ -1 +1,5 @@
 # /pytorch/__init__.py
+from .SetFunction import SetFunction
+
+from .optimizer import *
+from .submod import *
\ No newline at end of file
diff --git a/pytorch/optimizer/StochasticGreedyOptimize.py b/pytorch/optimizer/StochasticGreedyOptimizer.py
similarity index 97%
rename from pytorch/optimizer/StochasticGreedyOptimize.py
rename to pytorch/optimizer/StochasticGreedyOptimizer.py
index 081f379..0f70547 100644
--- a/pytorch/optimizer/StochasticGreedyOptimize.py
+++ b/pytorch/optimizer/StochasticGreedyOptimizer.py
@@ -11,7 +11,7 @@ def __init__(self):
     def equals(val1: float, val2: float, eps: float) -> bool:
         return abs(val1 - val2) < eps
 
-    def maximize(self, f_obj: SetFunction, budget: float, stop_if_zero_gain: bool,
+    def maximize(self, f_obj, budget: float, stop_if_zero_gain: bool,
                  stop_if_negative_gain: bool, epsilon: float = 1, verbose: bool = True,
                  show_progress: bool = False, costs: List[float] = None, cost_sensitive_greedy: bool = False) -> List[Tuple[int, float]]:
         # TODO: Implement handling of equal guys and different sizes of each item later
diff --git a/pytorch/optimizer/__init__.py b/pytorch/optimizer/__init__.py
new file mode 100644
index 0000000..4a1cbb3
--- /dev/null
+++ b/pytorch/optimizer/__init__.py
@@ -0,0 +1,5 @@
+# /pytorch/optimizer/__init__.py
+from .LazierThanLazyGreedyOptimizer import LazierThanLazyGreedyOptimizer
+from .LazyGreedyOptimizer import LazyGreedyOptimizer
+from .NaiveGreedyOptimizer import NaiveGreedyOptimizer
+from .StochasticGreedyOptimizer import StochasticGreedyOptimizer
diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py
index c8851c6..49d05d7 100644
--- a/pytorch/submod/__init__.py
+++ b/pytorch/submod/__init__.py
@@ -1 +1,3 @@
 # /pytorch/SetFunction/__init__.py
+from .SetCover import SetCover
+from .ProbabilisticSetCover import ProbabilisticSetCover
\ No newline at end of file
diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index e8c5466..83c9b3b 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -2,8 +2,9 @@
 # Author: Vishal Kaushal <vishal.kaushal@gmail.com>
 from .setFunction import SetFunction
 from submodlib_cpp import SetCover
-from submodlib_pytorch import SetCover
+# from submodlib.pytorch import SetCover
 import torch
+from pytorch.submod import SetCover
 
 class SetCoverFunction(SetFunction):
 	

From 57397c7dcb8830738b599cec866ce7cdd0abd588 Mon Sep 17 00:00:00 2001
From: amajee11us <anay.majee@utdallas.edu>
Date: Wed, 24 Jan 2024 09:51:06 -0600
Subject: [PATCH 38/58] Added fix for null object issue

---
 submodlib/functions/setCover.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/submodlib/functions/setCover.py b/submodlib/functions/setCover.py
index 83c9b3b..d67d5b3 100644
--- a/submodlib/functions/setCover.py
+++ b/submodlib/functions/setCover.py
@@ -1,10 +1,11 @@
 # setCover.py
 # Author: Vishal Kaushal <vishal.kaushal@gmail.com>
 from .setFunction import SetFunction
-from submodlib_cpp import SetCover
-# from submodlib.pytorch import SetCover
 import torch
-from pytorch.submod import SetCover
+if torch.cuda.is_available() :
+	from pytorch.submod import SetCover
+else:
+	from submodlib_cpp import SetCover
 
 class SetCoverFunction(SetFunction):
 	
@@ -27,9 +28,9 @@ def __init__(self, n, cover_set, num_concepts, concept_weights=None):
 		else:
 			self.concept_weights = [1] * self.num_concepts
 		print("starting setCover.py self.cpp_obj = SetCover line 40 (at 60)")
-		device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-		if  "cuda" in device :
-			self.pytorch_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
+		
+		if torch.cuda.is_available() :
+			self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 		else:
 			self.cpp_obj = SetCover(self.n, self.cover_set, self.num_concepts, self.concept_weights)
 		self.effective_ground = set(range(n))

From ed70d00e5f82224c2ec286d854d6bb3cf6ba55ca Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 31 Jan 2024 00:12:58 +0530
Subject: [PATCH 39/58] Create GraphCut.py

---
 pytorch/submod/GraphCut.py | 346 +++++++++++++++++++++++++++++++++++++
 1 file changed, 346 insertions(+)
 create mode 100644 pytorch/submod/GraphCut.py

diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py
new file mode 100644
index 0000000..f2322ee
--- /dev/null
+++ b/pytorch/submod/GraphCut.py
@@ -0,0 +1,346 @@
+from typing import List, Set
+
+class GraphCutpy(SetFunction):
+    def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, 
+                 master_ground_kernel: List[List[float]] = None,
+                 ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,
+                 arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False,
+                 ground: Set[int] = None):
+        super(SetFunction, self).__init__()
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.n = n
+        self.mode = mode
+        self.lambda_ = lambdaVal
+        self.separate_rep=separate_rep
+        self.n_rep = n_rep
+        self.partial = partial
+        self.original_to_partial_index_map = {}
+        self.mgsijs = mgsijs
+        self.ggsijs = ggsijs
+        self.data = data
+        self.data_rep=data_rep
+        self.metric = metric
+        self.num_neighbors = num_neighbors
+        self.effective_ground_set = set(range(n))
+        self.clusters=None
+        self.cluster_sijs=None
+        self.cluster_map=None
+        self.ggsijs = None
+        self.mgsijs = None
+        # self.cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful
+        self.content = None
+        self.effective_ground = None
+
+        if self.n <= 0:
+          raise Exception("ERROR: Number of elements in ground set must be positive")
+
+        if self.mode not in ['dense', 'sparse']:
+          raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'")
+        if self.separate_rep == True:
+          if self.n_rep is None or self.n_rep <=0:
+            raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive")	
+          if self.mode != "dense":
+            raise Exception("Only dense mode supported if separate_rep = True")
+          if (type(self.mgsijs) != type(None)) and (type(self.mgsijs) != np.ndarray):
+            raise Exception("mgsijs provided, but is not dense")
+          if (type(self.ggsijs) != type(None)) and (type(self.ggsijs) != np.ndarray):
+            raise Exception("ggsijs provided, but is not dense")
+          
+        if mode == "dense":
+            self.master_ground_kernel = master_ground_kernel
+            self.ground_ground_kernel = ground_ground_kernel
+
+            if ground_ground_kernel is not None:
+                self.separate_master = True
+
+            if partial:
+                self.effective_ground_set = ground
+            else:
+                self.effective_ground_set = set(range(n))
+
+            self.num_effective_ground_set = len(self.effective_ground_set)
+
+            self.n_master = self.num_effective_ground_set
+            self.master_set = self.effective_ground_set
+
+            if partial:
+                self.original_to_partial_index_map = {elem: ind for ind, elem in enumerate(self.effective_ground_set)}
+
+            self.total_similarity_with_subset = [0] * self.num_effective_ground_set
+            self.total_similarity_with_master = [0] * self.num_effective_ground_set
+            for elem in self.effective_ground_set:
+                index = self.original_to_partial_index_map[elem] if partial else elem
+                self.total_similarity_with_subset[index] = 0
+                self.total_similarity_with_master[index] = 0
+                # for j in self.master_set:
+                #     self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem]
+
+            if self.separate_rep == True:
+              if type(self.mgsijs) == type(None):
+                #not provided mgsij - make it
+                if (type(data) == type(None)) or (type(data_rep) == type(None)):
+                  raise Exception("Data missing to compute mgsijs")
+                if np.shape(self.data)[0]!=self.n or np.shape(self.data_rep)[0]!=self.n_rep:
+                  raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix")
+                self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric))
+              else:
+                #provided mgsijs - verify it's dimensionality
+                if np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n_rep:
+                  raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given mg kernel")
+              if type(self.ggsijs) == type(None):
+                #not provided ggsijs - make it
+                if type(data) == type(None):
+                  raise Exception("Data missing to compute ggsijs")
+                if self.num_neighbors is not None:
+                  raise Exception("num_neighbors wrongly provided for dense mode")
+                self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
+                self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+                print(self.content)
+                val = self.cpp_content[0]
+                row = list(self.cpp_content[1].astype(int))
+                col = list(self.cpp_content[2].astype(int))
+                self.ggsijs = np.zeros((n,n))
+                self.ggsijs[row,col] = val
+              else:
+                #provided ggsijs - verify it's dimensionality
+                if np.shape(self.ggsijs)[0]!=self.n or np.shape(self.ggsijs)[1]!=self.n:
+                  raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity gg kernel")
+
+            else:
+              if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)):
+                #no kernel is provided make ggsij kernel
+                if type(data) == type(None):
+                  raise Exception("Data missing to compute ggsijs")
+                if self.num_neighbors is not None:
+                  raise Exception("num_neighbors wrongly provided for dense mode")
+                self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
+                self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+                val = self.content[0]
+                row = list(self.content[1].astype(int))
+                col = list(self.content[2].astype(int))
+                self.ggsijs = np.zeros((n,n))
+                self.ggsijs[row,col] = val
+              elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)):
+                #gg is not available, mg is - good
+                #verify that it is dense and of correct dimension
+                if (type(self.mgsijs) != np.ndarray) or np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n:
+                  raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel")
+                self.ggsijs = self.mgsijs
+              elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)):
+                #gg is available, mg is not - good
+                #verify that it is dense and of correct dimension
+                if (type(self.ggsijs) != np.ndarray) or np.shape(self.ggsijs)[1]!=self.n or np.shape(self.ggsijs)[0]!=self.n:
+                  raise Exception("ERROR: Inconsistency between n and no of rows, columns of given kernel")
+              else:
+                #both are available - something is wrong
+                raise Exception("Two kernels have been wrongly provided when separate_rep=False")
+        elif mode == "sparse":
+            if self.separate_rep == True:
+                raise Exception("Separate represented is supported only in dense mode")
+            if self.num_neighbors is None or self.num_neighbors <=0:
+              raise Exception("Valid num_neighbors is needed for sparse mode")
+            if (type(self.ggsijs) == type(None)) and (type(self.mgsijs) == type(None)):
+              #no kernel is provided make ggsij sparse kernel
+              if type(data) == type(None):
+                raise Exception("Data missing to compute ggsijs")
+              self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+              val = self.content[0]
+              row = list(self.content[1].astype(int))
+              col = list(self.content[2].astype(int))
+              self.ggsijs = sparse.csr_matrix((val, (row, col)), [n,n])
+            elif (type(self.ggsijs) == type(None)) and (type(self.mgsijs) != type(None)):
+              #gg is not available, mg is - good
+              #verify that it is sparse
+              if type(self.mgsijs) != scipy.sparse.csr.csr_matrix:
+                raise Exception("Provided kernel is not sparse")
+              self.ggsijs = self.mgsijs
+            elif (type(self.ggsijs) != type(None)) and (type(self.mgsijs) == type(None)):
+              #gg is available, mg is not - good
+              #verify that it is dense and of correct dimension
+              if type(self.ggsijs) != scipy.sparse.csr.csr_matrix:
+                raise Exception("Provided kernel is not sparse")
+            else:
+              #both are available - something is wrong
+              raise Exception("Two kernels have been wrongly provided when separate_rep=False")
+
+        if self.separate_rep==None:
+            self.separate_rep = False
+
+        if self.mode=="dense" and self.separate_rep == False :
+            self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure
+            
+            if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11
+                                            #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+              l=[]
+              l.append(self.ggsijs)
+              self.ggsijs=l
+
+            # self.cpp_obj = GraphCut(self.n, self.cpp_ggsijs, False, self.cpp_ground_sub, self.lambdaVal)
+          
+        elif self.mode=="dense" and self.separate_rep == True :
+            self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure
+            
+            if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11
+                                            #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+              l=[]
+              l.append(self.ggsijs)
+              self.ggsijs=l
+            
+            self.mgsijs = self.mgsijs.tolist() #break numpy ndarray to native list of list datastructure
+            
+            if type(self.mgsijs[0])==int or type(self.mgsijs[0])==float: #Its critical that we pass a list of list to pybind11
+                                            #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+              l=[]
+              l.append(self.mgsijs)
+              self.mgsijs=l
+
+            # self.cpp_obj = GraphCutpy(self.n, self.cpp_mgsijs, self.cpp_ggsijs, self.lambdaVal)
+
+        elif self.mode == "sparse":
+            self.ggsijs = {}
+            self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal)
+            self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
+            self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val
+            # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal)
+        else:
+            raise Exception("Invalid")
+
+        self.effective_ground = self.get_effective_ground_set()
+        print("it is done")
+
+      # mode == "sparse":
+        #     if not arr_val or not arr_count or not arr_col:
+        #         raise ValueError("Error: Empty/Corrupt sparse similarity kernel")
+
+        #     self.sparse_kernel = SparseSim(arr_val, arr_count, arr_col)
+
+        #     self.effective_ground_set = set(range(n))
+        #     self.num_effective_ground_set = len(self.effective_ground_set)
+
+        #     self.n_master = self.num_effective_ground_set
+        #     self.master_set = self.effective_ground_set
+
+        #     self.total_similarity_with_subset = [0] * n
+        #     self.total_similarity_with_master = [0] * n
+
+        #     for i in range(n):
+        #         self.total_similarity_with_subset[i] = 0
+        #         self.total_similarity_with_master[i] = 0
+
+        #         for j in range(n):
+        #             self.total_similarity_with_master[i] += self.sparse_kernel.get_val(j, i)
+
+        # else:
+        #     raise ValueError("Invalid mode")
+
+    def evaluate(self, X: Set[int]) -> float:
+        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if not effective_x:
+            return 0
+
+        result = 0
+
+        if self.mode == "dense":
+            for elem in effective_x:
+                index = self.original_to_partial_index_map[elem] if self.partial else elem
+                result += self.total_similarity_with_master[index]
+
+                for elem2 in effective_x:
+                    result -= self.lambda_ * self.ground_ground_kernel[elem][elem2]
+
+        elif self.mode == "sparse":
+            for elem in effective_x:
+                index = self.original_to_partial_index_map[elem] if self.partial else elem
+                result += self.total_similarity_with_master[index]
+
+                for elem2 in effective_x:
+                    result -= self.lambda_ * self.sparse_kernel.get_val(elem, elem2)
+
+        return result
+
+    def evaluate_with_memoization(self, X: Set[int]) -> float:
+        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if not effective_x:
+            return 0
+
+        result = 0
+
+        if self.mode == "dense" or self.mode == "sparse":
+            for elem in effective_x:
+                index = self.original_to_partial_index_map[elem] if self.partial else elem
+                result += self.total_similarity_with_master[index] - self.lambda_ * self.total_similarity_with_subset[index]
+
+        return result
+
+    def marginal_gain(self, X: Set[int], item: int) -> float:
+        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if item in effective_x or item not in self.effective_ground_set:
+            return 0
+
+        gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item]
+
+        if self.mode == "dense":
+            for elem in effective_x:
+                gain -= 2 * self.lambda_ * self.ground_ground_kernel[item][elem]
+            gain -= self.lambda_ * self.ground_ground_kernel[item][item]
+
+        elif self.mode == "sparse":
+            for elem in effective_x:
+                gain -= 2 * self.lambda_ * self.sparse_kernel.get_val(item, elem)
+            gain -= self.lambda_ * self.sparse_kernel.get_val(item, item)
+
+        return gain
+
+    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if enable_checks and item in effective_x:
+            return 0
+
+        if self.partial and item not in self.effective_ground_set:
+            return 0
+
+        gain = 0
+
+        if self.mode == "dense":
+            index = self.original_to_partial_index_map[item] if self.partial else item
+            gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index]
+            # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item]
+
+        elif self.mode == "sparse":
+            index = self.original_to_partial_index_map[item] if self.partial else item
+            gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item)
+
+        return gain
+
+    def update_memoization(self, X: Set[int], item: int):
+        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if item in effective_x or item not in self.effective_ground_set:
+            return
+
+        if self.mode == "dense":
+            for elem in self.effective_ground_set:
+                index = self.original_to_partial_index_map[elem] if self.partial else elem
+                # self.total_similarity_with_subset[index] += self.ground_ground_kernel[elem][item]
+
+        elif self.mode == "sparse":
+            for elem in self.effective_ground_set:
+                index = self.original_to_partial_index_map[elem] if self.partial else elem
+                self.total_similarity_with_subset[index] += self.sparse_kernel.get_val(elem, item)
+
+    def get_effective_ground_set(self) -> Set[int]:
+        return self.effective_ground_set
+
+    def clear_memoization(self):
+        if self.mode == "dense" or self.mode == "sparse":
+            self.total_similarity_with_subset = [0] * self.num_effective_ground_set
+
+    def set_memoization(self, X: Set[int]):
+        temp = set()
+        for elem in X:
+            self.update_memoization(temp, elem)
+            temp.add(elem)

From 52828bede6b13a284f0ceb163fe0a098230a6bb8 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 31 Jan 2024 00:13:55 +0530
Subject: [PATCH 40/58] Update __init__.py

---
 pytorch/submod/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py
index 49d05d7..dfa5ac5 100644
--- a/pytorch/submod/__init__.py
+++ b/pytorch/submod/__init__.py
@@ -1,3 +1,4 @@
 # /pytorch/SetFunction/__init__.py
 from .SetCover import SetCover
-from .ProbabilisticSetCover import ProbabilisticSetCover
\ No newline at end of file
+from .ProbabilisticSetCover import ProbabilisticSetCover
+from .GraphCut import GraphCut

From f8765d9616f246c6d9ad278110dd36bc1bbed1bc Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 31 Jan 2024 00:14:13 +0530
Subject: [PATCH 41/58] Update GraphCut.py

---
 pytorch/submod/GraphCut.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py
index f2322ee..6bfd8e2 100644
--- a/pytorch/submod/GraphCut.py
+++ b/pytorch/submod/GraphCut.py
@@ -1,6 +1,6 @@
 from typing import List, Set
 
-class GraphCutpy(SetFunction):
+class GraphCut(SetFunction):
     def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, 
                  master_ground_kernel: List[List[float]] = None,
                  ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,

From 72e82c9f1f981e43d1bbb531eb8b9b26d55a5a16 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Wed, 31 Jan 2024 21:44:57 +0530
Subject: [PATCH 42/58] pytorch version of helper.py

All the functions of helper.py have been converted to pytoch.
This is used in graph cut, disparity min, disparity sum for now.
---
 pytorch/submod/helper.py | 186 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 pytorch/submod/helper.py

diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py
new file mode 100644
index 0000000..2f10c81
--- /dev/null
+++ b/pytorch/submod/helper.py
@@ -0,0 +1,186 @@
+import torch
+import torch.nn.functional as F
+from sklearn.cluster import Birch
+from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity, pairwise_distances
+from sklearn.neighbors import NearestNeighbors
+from scipy import sparse
+import pickle
+import time
+import os
+
+def cos_sim_square(A):
+    similarity = torch.matmul(A, A.t())
+
+    square_mag = torch.diag(similarity)
+
+    inv_square_mag = 1 / square_mag
+    inv_square_mag[torch.isinf(inv_square_mag)] = 0
+
+    inv_mag = torch.sqrt(inv_square_mag)
+
+    cosine = similarity * inv_mag
+    cosine = cosine.t() * inv_mag
+    return cosine
+
+def cos_sim_rectangle(A, B):
+    num = torch.matmul(A, B.t())
+    p1 = torch.sqrt(torch.sum(A**2, dim=1)).unsqueeze(1)
+    p2 = torch.sqrt(torch.sum(B**2, dim=1)).unsqueeze(0)
+    return num / (p1 * p2)
+
+def create_sparse_kernel(X, metric, num_neigh, n_jobs=1, method="sklearn"):
+    if num_neigh > X.shape[0]:
+        raise Exception("ERROR: num of neighbors can't be more than the number of datapoints")
+    dense = None
+    dense = create_kernel_dense_sklearn(X, metric)
+    dense_ = None
+    if num_neigh == -1:
+        num_neigh = X.shape[0]  # default is the total number of datapoints
+
+    # Assuming X is a PyTorch tensor
+    X_np = X.numpy()
+
+    # Use PyTorch functions for the nearest neighbors search
+    if metric == 'euclidean':
+      distances = torch.cdist(X, X, p=2)  # Euclidean distance
+    elif metric == 'cosine':
+      distances = 1 - torch.nn.functional.cosine_similarity(X, X, dim=1)  # Cosine similarity as distance
+
+    # Exclude the distance to oneself (diagonal elements)
+    distances.fill_diagonal_(float('inf'))
+
+    # Find the indices of the k-nearest neighbors using torch.topk
+    _, ind = torch.topk(distances, k=num_neigh, largest=False)
+
+    # ind_l = [(index[0], x.item()) for index, x in torch.ndenumerate(ind)]
+        # Convert indices to row and col lists
+    row = []
+    col = []
+    for i, indices_row in enumerate(ind):
+        for j in indices_row:
+            row.append(i)
+            col.append(j.item())
+
+    mat = torch.zeros_like(distances)
+    mat[row, col] = 1
+    dense_ = dense * mat  # Only retain similarity of nearest neighbors
+    sparse_coo = torch.sparse_coo_tensor(torch.tensor([row, col]), mat[row, col], dense.size())
+    # Convert the COO tensor to CSR format
+    sparse_csr = sparse_coo.coalesce()
+    return sparse_csr
+    # pass
+
+
+def create_kernel_dense(X, metric, method="sklearn"):
+    dense = None
+    if method == "sklearn":
+        dense = create_kernel_dense_sklearn(X, metric)
+    else:
+        raise Exception("For creating dense kernel, only 'sklearn' method is supported")
+    return dense
+
+def create_kernel_dense_sklearn(X, metric, X_rep=None):
+    dense = None
+    D = None
+
+    if metric == "euclidean":
+        if X_rep is None:
+            D = torch.cdist(X, X, p=2)
+        else:
+            D = torch.cdist(X_rep, X, p=2)
+        gamma = 1 / X.shape[1]
+        dense = torch.exp(-D * gamma)  # Obtaining Similarity from distance
+
+    elif metric == "cosine":
+        if X_rep is None:
+            dense = torch.nn.functional.cosine_similarity(X, X, dim=1)
+        else:
+            dense = torch.nn.functional.cosine_similarity(X_rep, X, dim=1)
+
+    elif metric == "dot":
+        if X_rep is None:
+            dense = torch.matmul(X, X.t())
+        else:
+            dense = torch.matmul(X_rep, X.t())
+
+    else:
+        raise Exception("ERROR: unsupported metric for this method of kernel creation")
+
+    if X_rep is not None:
+        assert dense.shape == (X_rep.shape[0], X.shape[0])
+    else:
+        assert dense.shape == (X.shape[0], X.shape[0])
+
+    return dense
+    pass
+
+
+def create_cluster_kernels(X, metric, cluster_lab=None, num_cluster=None, onlyClusters=False):
+    lab = []
+    if cluster_lab is None:
+        obj = Birch(n_clusters=num_cluster)
+        obj.fit(X)
+        lab = obj.predict(X).tolist()
+        if num_cluster is None:
+            num_cluster = len(obj.subcluster_labels_)
+    else:
+        if num_cluster is None:
+            raise Exception("ERROR: num_cluster needs to be specified if cluster_lab is provided")
+        lab = cluster_lab
+    
+    l_cluster = [set() for _ in range(num_cluster)]
+    l_ind = [0] * X.shape[0]
+    l_count = [0] * num_cluster
+    
+    for i, el in enumerate(lab):
+        l_cluster[el].add(i)
+        l_ind[i] = l_count[el]
+        l_count[el] = l_count[el] + 1
+
+    if onlyClusters:
+        return l_cluster, None, None
+        
+    l_kernel = []
+    for el in l_cluster: 
+        k = len(el)
+        l_kernel.append(torch.zeros((k, k)))  # placeholder matrices of suitable size
+    
+    M = None
+    if metric == "euclidean":
+        D = torch.cdist(X, X)
+        gamma = 1 / X.shape[1]
+        M = torch.exp(-D * gamma)  # similarity from distance
+    elif metric == "cosine":
+        M = F.cosine_similarity(X, X, dim=1)
+        M = M.unsqueeze(0)  # converting to 2D for compatibility
+    else:
+        raise Exception("ERROR: unsupported metric")
+    
+    # Create kernel for each cluster using the bigger kernel
+    for i in range(X.shape[0]):
+        for j in range(X.shape[0]):
+            if lab[i] == lab[j]:
+                c_ID = lab[i]
+                ii = l_ind[i]
+                jj = l_ind[j]
+                l_kernel[c_ID][ii, jj] = M[i, j]
+            
+    return l_cluster, l_kernel, l_ind
+
+def create_kernel(X, metric, mode="dense", num_neigh=-1, n_jobs=1, X_rep=None, method="sklearn"):
+
+    if X_rep is not None:
+        assert X_rep.shape[1] == X.shape[1]
+
+    if mode == "dense":
+        dense = None
+        dense = globals()['create_kernel_dense_'+method](X, metric, X_rep)
+        return torch.tensor(dense)
+
+    elif mode == "sparse":
+        if X_rep is not None:
+            raise Exception("Sparse mode is not supported for separate X_rep")
+        return create_sparse_kernel(X, metric, num_neigh, n_jobs, method)
+
+    else:
+        raise Exception("ERROR: unsupported mode")

From 7cc626e337ea16fe62c95a56a59439e6fc8e2673 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sun, 4 Feb 2024 15:15:41 +0530
Subject: [PATCH 43/58] dense mode of with helper functions GraphCut.py

---
 pytorch/submod/GraphCut.py | 121 ++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 43 deletions(-)

diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py
index 6bfd8e2..ed5a93d 100644
--- a/pytorch/submod/GraphCut.py
+++ b/pytorch/submod/GraphCut.py
@@ -1,7 +1,13 @@
 from typing import List, Set
-
-class GraphCut(SetFunction):
-    def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None, 
+import random
+from helper import *
+
+class GraphCutpy(SetFunction):
+    # def __init__(self, n: int, mode: str, metric: str, master_ground_kernel: List[List[float]] = None,
+    #              ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,
+    #              arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False,
+    #              ground: Set[int] = None, lambdaVal: float = 0.0):
+    def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None,
                  master_ground_kernel: List[List[float]] = None,
                  ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,
                  arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False,
@@ -27,7 +33,6 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
         self.cluster_map=None
         self.ggsijs = None
         self.mgsijs = None
-        # self.cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful
         self.content = None
         self.effective_ground = None
 
@@ -38,14 +43,14 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
           raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'")
         if self.separate_rep == True:
           if self.n_rep is None or self.n_rep <=0:
-            raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive")	
+            raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive")
           if self.mode != "dense":
             raise Exception("Only dense mode supported if separate_rep = True")
           if (type(self.mgsijs) != type(None)) and (type(self.mgsijs) != np.ndarray):
             raise Exception("mgsijs provided, but is not dense")
           if (type(self.ggsijs) != type(None)) and (type(self.ggsijs) != np.ndarray):
             raise Exception("ggsijs provided, but is not dense")
-          
+
         if mode == "dense":
             self.master_ground_kernel = master_ground_kernel
             self.ground_ground_kernel = ground_ground_kernel
@@ -66,14 +71,16 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
             if partial:
                 self.original_to_partial_index_map = {elem: ind for ind, elem in enumerate(self.effective_ground_set)}
 
-            self.total_similarity_with_subset = [0] * self.num_effective_ground_set
-            self.total_similarity_with_master = [0] * self.num_effective_ground_set
+            self.total_similarity_with_subset = [random.random() for _ in range(self.num_effective_ground_set)]
+            self.total_similarity_with_master = [random.random() for _ in range(self.num_effective_ground_set)]
+            self.master_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)]
+            self.ground_ground_kernel = [[random.random() for _ in range(self.num_effective_ground_set)] for _ in range(self.num_effective_ground_set)]
             for elem in self.effective_ground_set:
                 index = self.original_to_partial_index_map[elem] if partial else elem
-                self.total_similarity_with_subset[index] = 0
-                self.total_similarity_with_master[index] = 0
-                # for j in self.master_set:
-                #     self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem]
+                self.total_similarity_with_subset[index] = 1
+                self.total_similarity_with_master[index] = 1
+                for j in self.master_set:
+                    self.total_similarity_with_master[index] += self.master_ground_kernel[j][elem]
 
             if self.separate_rep == True:
               if type(self.mgsijs) == type(None):
@@ -82,7 +89,9 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
                   raise Exception("Data missing to compute mgsijs")
                 if np.shape(self.data)[0]!=self.n or np.shape(self.data_rep)[0]!=self.n_rep:
                   raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix")
-                self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric))
+
+                #create_kernel_NS is there .................... find it and define it not found in helper.py but used as here
+                # self.mgsijs = np.array(subcp.create_kernel_NS(self.data.tolist(),self.data_rep.tolist(), self.metric))
               else:
                 #provided mgsijs - verify it's dimensionality
                 if np.shape(self.mgsijs)[1]!=self.n or np.shape(self.mgsijs)[0]!=self.n_rep:
@@ -94,8 +103,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
                 if self.num_neighbors is not None:
                   raise Exception("num_neighbors wrongly provided for dense mode")
                 self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
-                self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
-                print(self.content)
+                self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense())
                 val = self.cpp_content[0]
                 row = list(self.cpp_content[1].astype(int))
                 col = list(self.cpp_content[2].astype(int))
@@ -114,7 +122,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
                 if self.num_neighbors is not None:
                   raise Exception("num_neighbors wrongly provided for dense mode")
                 self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
-                self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+                self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense())
                 val = self.content[0]
                 row = list(self.content[1].astype(int))
                 col = list(self.content[2].astype(int))
@@ -143,7 +151,7 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
               #no kernel is provided make ggsij sparse kernel
               if type(data) == type(None):
                 raise Exception("Data missing to compute ggsijs")
-              self.content = np.array(subcp.create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+              self.content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors).to_dense())
               val = self.content[0]
               row = list(self.content[1].astype(int))
               col = list(self.content[2].astype(int))
@@ -168,26 +176,24 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
 
         if self.mode=="dense" and self.separate_rep == False :
             self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure
-            
+
             if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11
                                             #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
               l=[]
               l.append(self.ggsijs)
               self.ggsijs=l
 
-            # self.cpp_obj = GraphCut(self.n, self.cpp_ggsijs, False, self.cpp_ground_sub, self.lambdaVal)
-          
         elif self.mode=="dense" and self.separate_rep == True :
             self.ggsijs = self.ggsijs.tolist() #break numpy ndarray to native list of list datastructure
-            
+
             if type(self.ggsijs[0])==int or type(self.ggsijs[0])==float: #Its critical that we pass a list of list to pybind11
                                             #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
               l=[]
               l.append(self.ggsijs)
               self.ggsijs=l
-            
+
             self.mgsijs = self.mgsijs.tolist() #break numpy ndarray to native list of list datastructure
-            
+
             if type(self.mgsijs[0])==int or type(self.mgsijs[0])==float: #Its critical that we pass a list of list to pybind11
                                             #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
               l=[]
@@ -198,17 +204,18 @@ def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=Non
 
         elif self.mode == "sparse":
             self.ggsijs = {}
-            self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal)
-            self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
-            self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val
-            # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal)
+            # self.ggsijs['arr_val'] = self.ggsijs.data.tolist() #contains non-zero values in matrix (row major traversal)
+            # self.ggsijs['arr_count'] = self.ggsijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
+            # self.ggsijs['arr_col'] = self.ggsijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val
+            # # self.cpp_obj = GraphCutpy(self.n, self.cpp_ggsijs['arr_val'], self.cpp_ggsijs['arr_count'], self.cpp_ggsijs['arr_col'], lambdaVal)
         else:
             raise Exception("Invalid")
 
         self.effective_ground = self.get_effective_ground_set()
-        print("it is done")
 
-      # mode == "sparse":
+        # if mode == "dense":
+
+        # elif mode == "sparse":
         #     if not arr_val or not arr_count or not arr_col:
         #         raise ValueError("Error: Empty/Corrupt sparse similarity kernel")
 
@@ -291,31 +298,59 @@ def marginal_gain(self, X: Set[int], item: int) -> float:
             for elem in effective_x:
                 gain -= 2 * self.lambda_ * self.sparse_kernel.get_val(item, elem)
             gain -= self.lambda_ * self.sparse_kernel.get_val(item, item)
-
         return gain
 
-    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
-        effective_x = X.intersection(self.effective_ground_set) if self.partial else X
+    # def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+    #     effective_x = X.intersection(self.effective_ground_set) if self.partial else X
 
-        if enable_checks and item in effective_x:
-            return 0
+    #     if enable_checks and item in effective_x:
+    #         return 0
 
-        if self.partial and item not in self.effective_ground_set:
-            return 0
+    #     if self.partial and item not in self.effective_ground_set:
+    #         return 0
 
+    #     gain = 0
+
+    #     if self.mode == "dense":
+    #         index = self.original_to_partial_index_map[item] if self.partial else item
+    #         gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index]
+    #         gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item]
+
+    #     elif self.mode == "sparse":
+    #         index = self.original_to_partial_index_map[item] if self.partial else item
+    #         gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item)
+
+    #     return gain
+
+
+    def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool) -> float:
+        effective_X = set()
         gain = 0
+        if self.partial:
+            effective_X = X.intersection(self.effective_ground_set)
+        else:
+            effective_X = X
 
-        if self.mode == "dense":
-            index = self.original_to_partial_index_map[item] if self.partial else item
-            gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index]
-            # gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.ground_ground_kernel[item][item]
+        if enable_checks and item in effective_X:
+            return 0
 
-        elif self.mode == "sparse":
-            index = self.original_to_partial_index_map[item] if self.partial else item
-            gain = self.total_similarity_with_master[index] - 2 * self.lambda_ * self.total_similarity_with_subset[index] - self.lambda_ * self.sparse_kernel.get_val(item, item)
+        if self.partial and item not in self.effective_ground_set:
+            return 0
 
+        if self.mode == 'dense':
+            gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \
+                  - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \
+                  - self.lambda_ * self.ground_ground_kernel[item][item]
+        elif self.mode == 'sparse':
+            gain = self.total_similarity_with_master[self.original_to_partial_index_map[item] if self.partial else item] \
+                  - 2 * self.lambda_ * self.total_similarity_with_subset[self.original_to_partial_index_map[item] if self.partial else item] \
+                  - self.lambda_ * self.sparse_kernel.get_val(item, item)
+        else:
+            raise ValueError("Error: Only dense and sparse mode supported")
+        # print("gain value",gain)
         return gain
 
+
     def update_memoization(self, X: Set[int], item: int):
         effective_x = X.intersection(self.effective_ground_set) if self.partial else X
 

From 766e43268e2426f6d29a4aa4afe9b792e3ea5ac1 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sun, 4 Feb 2024 15:18:04 +0530
Subject: [PATCH 44/58] Required functions of  helper.py

---
 pytorch/submod/helper.py | 80 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py
index 2f10c81..7832a32 100644
--- a/pytorch/submod/helper.py
+++ b/pytorch/submod/helper.py
@@ -7,6 +7,14 @@
 import pickle
 import time
 import os
+import numpy as np
+from typing import List, Dict, Union
+from math import sqrt
+
+# Define type aliases for clarity
+Vector = List[float]
+Matrix = List[Vector]
+Set = List[int]  # Considering integer elements for simplicity
 
 def cos_sim_square(A):
     similarity = torch.matmul(A, A.t())
@@ -184,3 +192,75 @@ def create_kernel(X, metric, mode="dense", num_neigh=-1, n_jobs=1, X_rep=None, m
 
     else:
         raise Exception("ERROR: unsupported mode")
+
+
+
+# Euclidean similarity function
+def euclidean_similarity(a: Vector, b: Vector) -> float:
+    return np.linalg.norm(np.array(a) - np.array(b))
+
+# Cosine similarity function
+def cosine_similarity(a: Vector, b: Vector) -> float:
+    dot_product = np.dot(a, b)
+    norm_a = np.linalg.norm(a)
+    norm_b = np.linalg.norm(b)
+    return dot_product / (norm_a * norm_b) if norm_a * norm_b > 0 else 0
+
+# Dot product function
+def dot_prod(a: Vector, b: Vector) -> float:
+    return np.dot(a, b)
+
+# Create kernel function for non-square kernel
+def create_kernel_NS(X_ground: Matrix, X_master: Matrix, metric: str = "euclidean") -> Matrix:
+    n_ground = len(X_ground)
+    n_master = len(X_master)
+    k_dense = [[0] * n_ground for _ in range(n_master)]
+
+    for r in range(n_master):
+        for c in range(n_ground):
+            if metric == "euclidean":
+                k_dense[r][c] = euclidean_similarity(X_master[r], X_ground[c])
+            elif metric == "cosine":
+                k_dense[r][c] = cosine_similarity(X_master[r], X_ground[c])
+            elif metric == "dot":
+                k_dense[r][c] = dot_prod(X_master[r], X_ground[c])
+            else:
+                raise ValueError("Unsupported metric for kernel computation in Python")
+    return k_dense
+
+# Create square kernel function
+def create_square_kernel_dense(X_ground: Matrix, metric: str = "euclidean") -> Matrix:
+    n_ground = len(X_ground)
+    k_dense = [[0] * n_ground for _ in range(n_ground)]
+
+    if metric == "euclidean":
+        for r in range(n_ground):
+            k_dense[r][r] = 1.0
+            for c in range(r + 1, n_ground):
+                sim = euclidean_similarity(X_ground[r], X_ground[c])
+                k_dense[r][c] = sim
+                k_dense[c][r] = sim
+    elif metric == "cosine":
+        for r in range(n_ground):
+            a_norm = sqrt(dot_prod(X_ground[r], X_ground[r]))
+            k_dense[r][r] = 1.0
+            for c in range(r + 1, n_ground):
+                sim = dot_prod(X_ground[r], X_ground[c])
+                b_norm = sqrt(dot_prod(X_ground[c], X_ground[c]))
+                sim = sim / (a_norm * b_norm) if a_norm * b_norm > 0 else 0
+                k_dense[r][c] = sim
+                k_dense[c][r] = sim
+    elif metric == "dot":
+        for r in range(n_ground):
+            for c in range(r, n_ground):
+                sim = dot_prod(X_ground[r], X_ground[c])
+                k_dense[r][c] = sim
+                k_dense[c][r] = sim
+    else:
+        raise ValueError("Unsupported metric for kernel computation in Python")
+    return k_dense
+
+# Set intersection function
+def set_intersection(a: Set, b: Set) -> Set:
+    return list(set(a) & set(b))  # Converting set intersection to list for better compatibility
+

From 1efe9bf0f61c46e584ef1f9a6087b57096552dd1 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Sun, 4 Feb 2024 15:20:09 +0530
Subject: [PATCH 45/58] Function of dense mode only FacilityLocation.py

---
 pytorch/submod/FacilityLocation.py | 429 +++++++++++++++++++++++++++++
 1 file changed, 429 insertions(+)
 create mode 100644 pytorch/submod/FacilityLocation.py

diff --git a/pytorch/submod/FacilityLocation.py b/pytorch/submod/FacilityLocation.py
new file mode 100644
index 0000000..57c1c7b
--- /dev/null
+++ b/pytorch/submod/FacilityLocation.py
@@ -0,0 +1,429 @@
+import numpy as np
+import scipy
+from scipy import sparse
+from helper import *
+class FacilityLocationFunction(SetFunction):
+    def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, data_rep=None, num_clusters=None, cluster_labels=None, metric="cosine", num_neighbors=None,
+                 dense_kernel = None, data_master = None, create_dense_cpp_kernel_in_python = True, partial = False, seperate_master = False):
+        self.n = n
+        self.n_rep = n_rep
+        self.mode = mode
+        self.metric = metric
+        self.sijs = sijs
+        self.data = data
+        self.partial = partial
+        self.data_rep = data_rep
+        self.num_neighbors = num_neighbors
+        self.separate_rep = separate_rep
+        self.clusters = None
+        self.cluster_sijs = None
+        self.cluster_map = None
+        self.cluster_labels = cluster_labels
+        self.num_clusters = num_clusters
+        self.cpp_obj = None
+        self.cpp_sijs = None
+        self.cpp_ground_sub = None
+        self.cpp_content = None
+        self.effective_ground = None
+        self.seperate_master = seperate_master
+        self.dense_kernel = dense_kernel
+        self.data_master = data_master
+
+        if self.n <= 0:
+            raise Exception("ERROR: Number of elements in ground set must be positive")
+
+        if self.mode not in ['dense', 'sparse', 'clustered']:
+            raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'")
+
+        if self.separate_rep == True:
+            if self.n_rep is None or self.n_rep <= 0:
+                raise Exception("ERROR: separate represented intended but number of elements in represented not specified or not positive")
+            if self.mode != "dense":
+                raise Exception("Only dense mode supported if separate_rep = True")
+
+        if self.mode == "clustered":
+            if type(self.cluster_labels) != type(None) and (self.num_clusters  is None or self.num_clusters <= 0):
+                raise Exception("ERROR: Positive number of clusters must be provided in clustered mode when cluster_labels is provided")
+            if type(self.cluster_labels) == type(None) and self.num_clusters is not None and self.num_clusters <= 0:
+                raise Exception("Invalid number of clusters provided")
+            if type(self.cluster_labels) != type(None) and len(self.cluster_labels) != self.n:
+                raise Exception("ERROR: cluster_labels's size is NOT same as ground set size")
+            if type(self.cluster_labels) != type(None) and not all(ele >= 0 and ele <= self.num_clusters-1 for ele in self.cluster_labels):
+                raise Exception("Cluster IDs/labels contain invalid values")
+
+        if type(self.sijs) != type(None):
+            if create_dense_cpp_kernel_in_python == False:
+                raise Exception("ERROR: create_dense_cpp_kernel_in_python is to be set to False ONLY when a similarity kernel is not provided and a CPP kernel is desired to be created in CPP")
+            if type(self.sijs) == scipy.sparse.csr.csr_matrix:
+                if num_neighbors is None or num_neighbors <= 0:
+                    raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel")
+                if mode != "sparse":
+                    raise Exception("ERROR: Sparse kernel provided, but mode is not sparse")
+            elif type(self.sijs) == np.ndarray:
+                if self.separate_rep is None:
+                    raise Exception("ERROR: separate_rep bool must be specified with custom dense kernel")
+                if mode != "dense":
+                    raise Exception("ERROR: Dense kernel provided, but mode is not dense")
+            else:
+                raise Exception("Invalid kernel provided")
+
+            if self.separate_rep == True:
+                if np.shape(self.sijs)[1] != self.n or np.shape(self.sijs)[0] != self.n_rep:
+                    raise Exception("ERROR: Inconsistency between n_rep, n and no of rows, columns of given kernel")
+            else:
+                if np.shape(self.sijs)[0] != self.n or np.shape(self.sijs)[1] != self.n:
+                    raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel")
+
+            if type(self.data) != type(None) or type(self.data_rep) != type(None):
+                print("WARNING: similarity kernel found. Provided data matrix will be ignored.")
+        else:
+            if type(self.data) != type(None):
+                if self.separate_rep == True:
+                    if type(self.data_rep) == type(None):
+                        raise Exception("Represented data matrix not given")
+                    if np.shape(self.data)[0] != self.n or np.shape(self.data_rep)[0] != self.n_rep:
+                        raise Exception("ERROR: Inconsistentcy between n, n_rep and no of examples in the given ground data matrix and represented data matrix")
+                else:
+                    if type(self.data_rep) != type(None):
+                        print("WARNING: Represented data matrix not required but given, will be ignored.")
+                    if np.shape(self.data)[0] != self.n:
+                        raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix")
+
+                if self.mode == "clustered":
+                    self.clusters, self.cluster_sijs, self.cluster_map = create_cluster_kernels(self.data.tolist(), self.metric, self.cluster_labels, self.num_clusters)
+                else:
+                    if self.separate_rep == True:
+                        if create_dense_cpp_kernel_in_python == True:
+                            self.sijs = np.array(create_kernel_NS(self.data.tolist(), self.data_rep.tolist(), self.metric))
+                    else:
+                        if self.mode == "dense":
+                            if self.num_neighbors is not None:
+                                raise Exception("num_neighbors wrongly provided for dense mode")
+                            if create_dense_cpp_kernel_in_python == True:
+                                pass
+                                # self.sijs = np.array(create_square_kernel_dense(self.data.tolist(), self.metric))
+                        else:
+                            self.cpp_content = np.array(create_kernel(self.data.tolist(), self.metric, self.num_neighbors))
+                            val = self.cpp_content[0]
+                            row = list(self.cpp_content[1].astype(int))
+                            col = list(self.cpp_content[2].astype(int))
+                            self.sijs = sparse.csr_matrix((val, (row, col)), [n,n])
+            else:
+                raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided")
+
+        # self.cpp_ground_sub = {-1}
+
+        if separate_rep == None:
+            self.separate_rep = False
+
+        elif self.mode == "sparse":
+            self.cpp_sijs = {}
+            self.cpp_sijs["arr_val"] = self.sijs.data.tolist()
+            self.cpp_sijs["arr_count"] = self.sijs.indptr.tolist()
+            self.cpp_sijs["arr_col"] = self.sijs.indices.tolist()
+            # self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs["arr_val"], self.cpp_sijs["arr_count"], self.cpp_sijs["arr_col"])
+        elif self.mode == "clustered":
+            l_temp = []
+            for el in self.cluster_sijs:
+                temp = el.tolist()
+                if isinstance(temp[0], int) or isinstance(temp[0], float):
+                    l = []
+                    l.append(temp)
+                    temp = l
+                l_temp.append(temp)
+            self.cluster_sijs = l_temp
+
+
+        if self.mode == 'dense':
+          if self.dense_kernel == None:
+            self.dense_constructor_no_kernel(n = self.n, data = self.data, data_master = self.data_master) ## dense mode with no dense_kernel
+          elif self.dense_kernel != None:
+            self.dense_constructor(n = self.n, dense_kernel = self.dense_kernel, ground = self.data, partial = self.partial, separate_master = self.separate_master) ## dense mode with dense_kernel
+        ### other modes are remaining
+        elif self.mode == 'sparse':
+          pass
+        elif self.mode == 'clustered':
+          pass
+
+        self.effective_ground = self.get_effective_ground_set()
+
+
+    def dense_constructor(self, n, dense_kernel, partial = False, ground = None, separate_master = False):
+        self.n = n
+        self.mode = 'dense'
+        self.dense_kernel = dense_kernel
+        self.partial = partial
+        self.separate_master = separate_master
+
+        if partial:
+            self.effective_ground_set = ground
+        else:
+            self.effective_ground_set = set(range(n))
+
+        self.num_effective_groundset = len(self.effective_ground_set)
+
+        if separate_master:
+            self.n_master = len(dense_kernel)
+            self.master_set = set(range(self.n_master))
+        else:
+            self.n_master = self.num_effective_groundset
+            self.master_set = self.effective_ground_set
+
+        self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master)
+
+        if partial:
+            self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)}
+
+    # Constructor for dense mode (kernel not supplied)
+    def dense_constructor_no_kernel(self, n, data, data_master, separate_master = False, metric = 'cosine'):
+        if separate_master:
+            self.dense_kernel = create_kernel_NS(data, data_master, metric)
+        else:
+            self.dense_kernel = create_square_kernel_dense(data, metric)
+
+        self.mode = 'dense'
+        self.partial = False
+
+        self.n = n
+        self.separate_master = separate_master
+
+        self.effective_ground_set = set(range(n))
+        self.num_effective_groundset = n
+
+        if separate_master:
+            self.n_master = len(self.dense_kernel)
+            self.master_set = set(range(self.n_master))
+        else:
+            self.n_master = n
+            self.master_set = self.effective_ground_set
+
+        self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master)
+
+    # Constructor for sparse mode
+    def sparse_constructor(self, n, arr_val, arr_count, arr_col):
+        self.n = n
+        self.mode = 'sparse'
+        self.partial = False
+        self.separate_master = False
+
+        self.sparse_kernel = self.SparseSim(arr_val, arr_count, arr_col)
+
+        self.effective_ground_set = set(range(n))
+        self.num_effective_groundset = n
+
+        self.n_master = self.num_effective_groundset
+        self.master_set = self.effective_ground_set
+
+        self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master)
+
+    # Constructor for cluster mode
+    def cluster_constructor(self, n, clusters, cluster_kernels, cluster_index_map):
+        self.n = n
+        self.mode = 'clustered'
+        self.num_clusters = len(clusters)
+        self.clusters = clusters
+        self.cluster_kernels = cluster_kernels
+        self.cluster_index_map = cluster_index_map
+        self.partial = False
+        self.separate_master = False
+
+        self.effective_ground_set = set(range(n))
+        self.num_effective_groundset = n
+
+        self.n_master = self.num_effective_groundset
+        self.master_set = self.effective_ground_set
+
+        self.cluster_ids = [0] * n
+        for i, ci in enumerate(clusters):
+            for ind in ci:
+                self.cluster_ids[ind] = i
+
+        self.relevant_x = [[] for _ in range(self.num_clusters)]
+        self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n)
+
+    # def clone(self):
+    #     return FacilityLocation(**self.__dict__)
+
+    def evaluate(self, X):
+        effective_X = X.intersection(self.effective_ground_set) if self.partial else X
+        result = 0
+
+        if effective_X:
+            if self.mode == 'dense':
+                for ind in self.master_set:
+                    result += self.get_max_sim_dense(ind, effective_X)
+            elif self.mode == 'sparse':
+                for ind in self.master_set:
+                    result += self.get_max_sim_sparse(ind, effective_X)
+            else:  # clustered
+                for i in range(self.num_clusters):
+                    relevant_subset = X.intersection(self.clusters[i])
+                    if relevant_subset:
+                        for ind in self.clusters[i]:
+                            result += self.get_max_sim_cluster(ind, relevant_subset, i)
+
+        return result
+
+    def evaluate_with_memoization(self, X):
+        effective_X = X.intersection(self.effective_ground_set) if self.partial else X
+        result = 0
+
+        if effective_X:
+            if self.mode == 'dense' or self.mode == 'sparse':
+                for ind in self.master_set:
+                    result += self.similarity_with_nearest_in_effective_x[ind]
+            else:  # clustered
+                for i in range(self.num_clusters):
+                    if self.relevant_x[i]:
+                        for ind in self.clusters[i]:
+                            result += self.clustered_similarity_with_nearest_in_relevant_x[ind]
+
+        return result
+
+    def marginal_gain(self, X, item):
+        effective_X = X.intersection(self.effective_ground_set) if self.partial else X
+        gain = 0
+
+        if item not in effective_X:
+            if self.mode == 'dense':
+                print(self.master_set)
+                for ind in self.master_set:
+                    m = self.get_max_sim_dense(ind, effective_X)
+                    if self.dense_kernel[item][ind] > m:
+                        m = self.dense_kernel[item][ind]
+                    gain += m - self.similarity_with_nearest_in_effective_x[ind]
+            elif self.mode == 'sparse':
+                for ind in self.master_set:
+                    m = self.get_max_sim_sparse(ind, effective_X)
+                    if self.sparse_kernel[item, ind] > m:
+                        m = self.sparse_kernel[item, ind]
+                    gain += m - self.similarity_with_nearest_in_effective_x[ind]
+            else:  # clustered
+                cluster_id = self.cluster_ids[item]
+                relevant_subset = effective_X.intersection(self.clusters[cluster_id])
+                for ind in self.clusters[cluster_id]:
+                    m = self.get_max_sim_cluster(ind, relevant_subset, cluster_id)
+                    if self.cluster_kernels[cluster_id][item][ind] > m:
+                        m = self.cluster_kernels[cluster_id][item][ind]
+                    gain += m - self.clustered_similarity_with_nearest_in_relevant_x[ind]
+
+        return gain
+    def marginal_gain_with_memoization(self, X, item, enable_checks):
+      effective_X = set()
+      gain = 0
+
+      if self.partial:
+          effective_X = X.intersection(self.effective_ground_set)
+      else:
+          effective_X = X
+
+      if enable_checks and item in effective_X:
+          return 0
+
+      if self.partial and item not in self.effective_ground_set:
+          return 0
+
+      if self.mode == 'dense':
+          for ind in self.master_set:
+              if self.partial:
+                  if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]:
+                      gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]
+              else:
+                  if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]:
+                      gain += self.dense_kernel[ind][item] - self.similarity_with_nearest_in_effective_x[ind]
+      elif self.mode == 'sparse':
+          for ind in self.master_set:
+              temp = self.sparse_kernel[ind, item]
+              if temp > self.similarity_with_nearest_in_effective_x[ind]:
+                  gain += temp - self.similarity_with_nearest_in_effective_x[ind]
+      else:  # clustered
+          i = self.cluster_ids[item]
+          item_ = self.cluster_index_map[item]
+          relevant_subset = self.relevant_x[i]
+          ci = self.clusters[i]
+
+          if len(relevant_subset) == 0:
+              for ind in ci:
+                  ind_ = self.cluster_index_map[ind]
+                  gain += self.cluster_kernels[i][ind_][item_]
+          else:
+              for ind in ci:
+                  ind_ = self.cluster_index_map[ind]
+                  if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]:
+                      gain += self.cluster_kernels[i][ind_][item_] - self.clustered_similarity_with_nearest_in_relevant_x[ind]
+
+      return gain
+
+
+    def update_memoization(self, X, item):
+        effective_X = set()
+
+        if self.partial:
+            effective_X = X.intersection(self.effective_ground_set)
+        else:
+            effective_X = X
+
+        if item in effective_X:
+            return
+
+        if self.partial and item not in self.effective_ground_set:
+            return
+
+        if self.mode == 'dense':
+            for ind in self.master_set:
+                if self.partial:
+                    if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]]:
+                        self.similarity_with_nearest_in_effective_x[self.original_to_partial_index_map[ind]] = self.dense_kernel[ind][item]
+                else:
+                    if self.dense_kernel[ind][item] > self.similarity_with_nearest_in_effective_x[ind]:
+                        self.similarity_with_nearest_in_effective_x[ind] = self.dense_kernel[ind][item]
+        elif self.mode == 'sparse':
+            for ind in self.master_set:
+                temp_val = self.sparse_kernel[ind, item]
+                if temp_val > self.similarity_with_nearest_in_effective_x[ind]:
+                    self.similarity_with_nearest_in_effective_x[ind] = temp_val
+        else:  # clustered
+            i = self.cluster_ids[item]
+            item_ = self.cluster_index_map[item]
+            ci = self.clusters[i]
+
+            for ind in ci:
+                ind_ = self.cluster_index_map[ind]
+                if self.cluster_kernels[i][ind_][item_] > self.clustered_similarity_with_nearest_in_relevant_x[ind]:
+                    self.clustered_similarity_with_nearest_in_relevant_x[ind] = self.cluster_kernels[i][ind_][item_]
+
+            self.relevant_x[i].add(item)
+
+
+    def get_effective_ground_set(self):
+        return set(range(self.n))
+
+
+    def cluster_init(self, n_, dense_kernel_, ground_, partial, lambda_):
+        self.n = n_
+        self.partial = partial
+        self.effective_ground_set = ground_
+        self.n_master = len(dense_kernel_)
+        self.master_set = set(range(self.n_master))
+        self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master)
+        self.mode = 'dense'
+        self.dense_kernel = dense_kernel_
+        self.original_to_partial_index_map = {val: i for i, val in enumerate(self.effective_ground_set)}
+        self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(n_)
+        self.relevant_x = [set() for _ in range(n_)]
+
+
+    def clear_memoization(self):
+        if self.mode == 'dense' or self.mode == 'sparse':
+            self.similarity_with_nearest_in_effective_x = np.zeros(self.n_master)
+        else:
+            self.relevant_x = [set() for _ in range(self.num_clusters)]
+            self.clustered_similarity_with_nearest_in_relevant_x = np.zeros(self.n)
+
+
+    def set_memoization(self, X):
+        self.clear_memoization()
+        temp = set()
+        for elem in X:
+            self.update_memoization(temp, elem)
+            temp.add(elem)

From 7fac287c21a973383d4728434afa786e62cadfb0 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:44:28 +0530
Subject: [PATCH 46/58] Function with all modes implemented DisparityMin.py

---
 pytorch/submod/DisparityMin.py | 191 +++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 pytorch/submod/DisparityMin.py

diff --git a/pytorch/submod/DisparityMin.py b/pytorch/submod/DisparityMin.py
new file mode 100644
index 0000000..aa71fe1
--- /dev/null
+++ b/pytorch/submod/DisparityMin.py
@@ -0,0 +1,191 @@
+import numpy as np
+import scipy
+from helper import *
+
+class DisparityMinFunction(SetFunction):
+
+	def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None):
+		super(DisparityMinFunction, self).__init__()
+		self.n = n
+		self.mode = mode
+		self.metric = metric
+		self.sijs = sijs
+		self.data = data
+		self.num_neighbors = num_neighbors
+		self.cpp_obj = None
+		self.cpp_sijs = None
+		self.cpp_content = None
+		self.effective_ground_set = None
+
+		if self.n <= 0:
+			raise Exception("ERROR: Number of elements in ground set must be positive")
+
+		if self.mode not in ['dense', 'sparse']:
+			raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'")
+
+		if type(self.sijs) != type(None): # User has provided similarity kernel
+			if type(self.sijs) == scipy.sparse.csr.csr_matrix:
+				if num_neighbors is None or num_neighbors <= 0:
+					raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel")
+				if mode != "sparse":
+					raise Exception("ERROR: Sparse kernel provided, but mode is not sparse")
+			elif type(self.sijs) == np.ndarray:
+				if mode != "dense":
+					raise Exception("ERROR: Dense kernel provided, but mode is not dense")
+			else:
+				raise Exception("Invalid kernel provided")
+			#TODO: is the below dimensionality check valid for both dense and sparse kernels?
+			if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n:
+				raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel")
+			if type(self.data) != type(None):
+				print("WARNING: similarity kernel found. Provided data matrix will be ignored.")
+
+		else: #similarity kernel has not been provided
+			if type(self.data) != type(None):
+				if np.shape(self.data)[0]!=self.n:
+					raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix")
+				if self.mode == "dense":
+					if self.num_neighbors  is not None:
+						raise Exception("num_neighbors wrongly provided for dense mode")
+					self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
+				self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data.tolist()), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense())
+				val = self.cpp_content[0]
+				row = list(self.cpp_content[1].astype(int))
+				col = list(self.cpp_content[2].astype(int))
+				if self.mode=="dense":
+					self.sijs = np.zeros((n,n))
+					self.sijs[row,col] = val
+				if self.mode=="sparse":
+					self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n])
+			else:
+				raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided")
+
+		cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful
+
+		#Breaking similarity matrix to simpler native data structures for implicit pybind11 binding
+		if self.mode=="dense":
+			self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure
+			if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11
+																			 #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+				l=[]
+				l.append(self.cpp_sijs)
+				self.cpp_sijs=l
+			self.effective_ground_set = set(range(n))
+			self.numeffectivegroundset  = len(self.effective_ground_set)
+			self.currentMin = 0
+
+		if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation)
+			self.cpp_sijs = {}
+			self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal)
+			self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
+			self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val														n,           arr_val                    arr_count            			arr_col
+			if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0):
+				raise Exception("Error: Empty/Corrupt sparse similarity kernel")
+			self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col'])
+			self.effective_ground_set = set(range(n))
+			self.numeffectivegroundset = len(self.effective_ground_set)
+			self.currentMin = 0
+
+
+
+	def evaluate(self, X: Set[int]) -> float:
+		effective_X = X
+		if len(effective_X) == 0 or len(effective_X) == 1:
+			return 0.0
+		if self.mode == 'dense':
+			return get_min_dense(effective_X, self)
+		elif self.mode == 'sparse':
+			return get_min_sparse(effective_X, self)
+		else:
+			raise ValueError("Error: Only dense and sparse mode supported")
+
+	def evaluate_with_memoization(self, X: Set[int]) -> float:
+		return self.currentMin
+
+	def get_effective_ground_set(self) -> Set[int]:
+
+		return self.effective_ground_set
+
+	def marginal_gain(self, X: Set[int], item: int) -> float:
+			effective_X = X
+
+			if item in effective_X:
+					return 0.0
+
+			if item not in self.effective_ground_set:
+					return 0.0
+
+			min_val = 1.0 if len(effective_X) == 1 else self.currentMin
+
+			if self.mode == 'dense':
+					for elem in effective_X:
+							if 1 - self.cpp_sijs[elem][item] < min_val and elem != item:
+									min_val = 1 - self.cpp_sijs[elem][item]
+			elif self.mode == 'sparse':
+					for elem in effective_X:
+							if 1 - self.sparse_kernel.get_val(elem, item) < min_val and elem != item:
+									min_val = 1 - self.sparse_kernel.get_val(elem, item)
+			else:
+					raise ValueError("Error: Only dense and sparse mode supported")
+
+			return min_val - self.currentMin
+
+	def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+			effective_X = X
+
+			if enable_checks and item in effective_X:
+					return 0.0
+
+			if False and item not in self.effective_ground_set:
+					return 0.0
+
+			min_val = 1.0 if len(effective_X) == 1 else self.currentMin
+
+			if self.mode == 'dense':
+					for elem in effective_X:
+							if 1 - self.cpp_sijs[elem][item] < min_val and elem != item:
+									min_val = 1 - self.cpp_sijs[elem][item]
+			elif self.mode == 'sparse':
+					for elem in effective_X:
+							if 1-self.sparse_kernel.get_val(item, elem) and elem!=item:
+								min = 1-self.sparse_kernel.get_val(item,elem)
+			else:
+					raise ValueError("Error: Only dense and sparse mode supported")
+
+			return min_val - self.currentMin
+
+	def update_memoization(self, X: Set[int], item: int) -> None:
+			effective_X = X
+
+			if item in effective_X:
+					return
+
+			if item not in self.effective_ground_set:
+					return
+
+			if len(effective_X) == 1:
+					if self.mode == 'dense':
+							for elem in effective_X:
+									self.currentMin = 1 - self.cpp_sijs[elem][item]
+					elif self.mode == 'sparse':
+							for elem in effective_X:
+									self.currentMin = 1 - self.sparse_kernel.get_val(elem, item)
+					else:
+							raise ValueError("Error: Only dense and sparse mode supported")
+			else:
+					if self.mode == 'dense':
+							for elem in effective_X:
+									if 1 - self.cpp_sijs[elem][item] < self.currentMin and elem != item:
+											self.currentMin = 1 - self.cpp_sijs[elem][item]
+					elif self.mode == 'sparse':
+							for elem in effective_X:
+									if 1 - self.sparse_kernel.get_val(elem, item) < self.currentMin and elem != item:
+											self.currentMin = 1 - self.sparse_kernel.get_val(elem, item)
+					else:
+							raise ValueError("Error: Only dense and sparse mode supported")
+
+	def clear_memoization(self) -> None:
+			self.currentMin = 0.0
+
+	def set_memoization(self, X: Set[int]) -> None:
+			self.currentMin = self.evaluate(X)

From 1cac3802da4629d8d50e46451f052fc8c4c7d247 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:48:25 +0530
Subject: [PATCH 47/58] All modes are implemented DisparitySum.py

---
 pytorch/submod/DisparitySum.py | 174 +++++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 pytorch/submod/DisparitySum.py

diff --git a/pytorch/submod/DisparitySum.py b/pytorch/submod/DisparitySum.py
new file mode 100644
index 0000000..10f773c
--- /dev/null
+++ b/pytorch/submod/DisparitySum.py
@@ -0,0 +1,174 @@
+from helper import *
+import numpy as np
+import scipy
+
+class DisparitySumFunction(SetFunction):
+
+	def __init__(self, n, mode, sijs=None, data=None, metric="cosine", num_neighbors=None):
+		super(DisparitySumFunction, self).__init__()
+
+		self.n = n
+		self.mode = mode
+		self.metric = metric
+		self.sijs = sijs
+		self.data = data
+		self.num_neighbors = num_neighbors
+		self.cpp_obj = None
+		self.cpp_sijs = None
+		self.cpp_content = None
+		self.effective_ground_set = None
+
+
+
+		if self.n <= 0:
+			raise Exception("ERROR: Number of elements in ground set must be positive")
+
+		if self.mode not in ['dense', 'sparse']:
+			raise Exception("ERROR: Incorrect mode. Must be one of 'dense' or 'sparse'")
+
+
+		if type(self.sijs) != type(None): # User has provided similarity kernel
+			if type(self.sijs) == scipy.sparse.csr.csr_matrix:
+				if num_neighbors is None or num_neighbors <= 0:
+					raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel")
+				if mode != "sparse":
+					raise Exception("ERROR: Sparse kernel provided, but mode is not sparse")
+			elif type(self.sijs) == np.ndarray:
+				if mode != "dense":
+					raise Exception("ERROR: Dense kernel provided, but mode is not dense")
+			else:
+				raise Exception("Invalid kernel provided")
+			#TODO: is the below dimensionality check valid for both dense and sparse kernels?
+			if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n:
+				raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel")
+			if type(self.data) != type(None):
+				print("WARNING: similarity kernel found. Provided data matrix will be ignored.")
+
+		else: #similarity kernel has not been provided
+			if type(self.data) != type(None):
+				if np.shape(self.data)[0]!=self.n:
+					raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix")
+
+				if self.mode == "dense":
+					if self.num_neighbors  is not None:
+						raise Exception("num_neighbors wrongly provided for dense mode")
+					self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
+				self.cpp_content = np.array(create_kernel(X = torch.tensor(self.data), metric = self.metric, num_neigh = self.num_neighbors, mode = self.mode).to_dense())
+				val = self.cpp_content[0]
+				row = list(self.cpp_content[1].astype(int))
+				col = list(self.cpp_content[2].astype(int))
+				if self.mode=="dense":
+					self.sijs = np.zeros((n,n))
+					self.sijs[row,col] = val
+				if self.mode=="sparse":
+					self.num_neighbors = 0
+					self.sijs = scipy.sparse.csr_matrix((val, (row, col)), [n,n])
+			else:
+				raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided")
+
+		cpp_ground_sub = {-1} #Provide a dummy set for pybind11 binding to be successful
+
+		#Breaking similarity matrix to simpler native data structures for implicit pybind11 binding
+		if self.mode=="dense":
+
+			self.cpp_sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure
+
+			if type(self.cpp_sijs[0])==int or type(self.cpp_sijs[0])==float: #Its critical that we pass a list of list to pybind11
+																			 #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+				l=[]
+				l.append(self.cpp_sijs)
+				self.cpp_sijs=l
+
+
+			self.effective_ground_set = set(range(n))
+			self.numeffectivegroundset  = len(self.effective_ground_set)
+			self.currentSum = 0
+
+
+
+		if self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation)
+			self.cpp_sijs = {}
+			self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal)
+			self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
+			self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val
+			if(len(self.cpp_sijs['arr_val']) ==0 or len(self.cpp_sijs['arr_count']) ==0 or len(self.cpp_sijs['arr_col']) ==0):
+				raise Exception("Error: Empty/Corrupt sparse similarity kernel")
+			self.sparse_kernel = subcp.SparseSim(self.cpp_sijs['arr_val'],self.cpp_sijs['arr_count'],self.cpp_sijs['arr_col'])
+			self.effective_ground_set = set(range(n))
+			self.numeffectivegroundset = len(self.effective_ground_set)
+			self.currentSum = 0
+
+
+	def evaluate(self, X: Set[int]) -> float:
+		effective_X = X
+		if len(effective_X) == 0 :
+			return 0.0
+		if self.mode == 'dense':
+			return get_sum_dense(effective_X, self)
+		elif self.mode == 'sparse':
+			return get_sum_sparse(effective_X, self)
+		else:
+			raise ValueError("Error: Only dense and sparse mode supported")
+
+	def evaluate_with_memoization(self, X: Set[int]) -> float:
+		return self.currentSum
+
+	def get_effective_ground_set(self) -> Set[int]:
+		return self.effective_ground_set
+
+	def marginal_gain(self, X: Set[int], item: int) -> float:
+			effective_X = X
+			gain = 0.0
+
+			if item in effective_X:
+					return 0.0
+
+			if item not in self.effective_ground_set:
+					return 0.0
+
+
+			if self.mode == 'dense':
+					for elem in effective_X:
+							gain += (1 - self.cpp_sijs[elem][item])
+			elif self.mode == 'sparse':
+					for elem in effective_X:
+							gain += (1 - self.sparse_kernel.get_val(item, elem))
+			else:
+					raise ValueError("Error: Only dense and sparse mode supported")
+
+			return gain
+
+	def marginal_gain_with_memoization(self, X: Set[int], item: int, enable_checks: bool = True) -> float:
+			effective_X = X
+			gain =0.0
+
+			if enable_checks and item in effective_X:
+					return 0.0
+
+			if False and item not in self.effective_ground_set:
+					return 0.0
+
+
+
+			if self.mode == 'dense':
+					for elem in effective_X:
+							gain += (1 - self.cpp_sijs[elem][item])
+			elif self.mode == 'sparse':
+					for elem in effective_X:
+							gain += (1 - self.sparse_kernel.get_val(item, elem))
+			else:
+					raise ValueError("Error: Only dense and sparse mode supported")
+
+			return gain
+
+	def update_memoization(self, X: Set[int], item: int) -> None:
+
+
+			self.currentSum += self.marginal_gain(X, item)
+
+
+	def clear_memoization(self) -> None:
+			self.currentSum = 0.0
+
+	def set_memoization(self, X: Set[int]) -> None:
+			self.currentSum = self.evaluate(X)

From 9464fb8a355f8f76fad6af6e32bee7a0f163f19d Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:49:22 +0530
Subject: [PATCH 48/58] Dense mode is implemented GraphCut.py

---
 pytorch/submod/GraphCut.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py
index ed5a93d..df60fd0 100644
--- a/pytorch/submod/GraphCut.py
+++ b/pytorch/submod/GraphCut.py
@@ -2,11 +2,7 @@
 import random
 from helper import *
 
-class GraphCutpy(SetFunction):
-    # def __init__(self, n: int, mode: str, metric: str, master_ground_kernel: List[List[float]] = None,
-    #              ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,
-    #              arr_count: List[int] = None, arr_col: List[int] = None, partial: bool = False,
-    #              ground: Set[int] = None, lambdaVal: float = 0.0):
+class GraphCutFunction(SetFunction):
     def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None,
                  master_ground_kernel: List[List[float]] = None,
                  ground_ground_kernel: List[List[float]] = None, arr_val: List[float] = None,

From fbbd7baf1a44c7bd6c0c41f62d1d865fc5da0fb2 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:50:16 +0530
Subject: [PATCH 49/58] Dense mode is implemented GraphCut.py

---
 pytorch/submod/GraphCut.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch/submod/GraphCut.py b/pytorch/submod/GraphCut.py
index df60fd0..5cfb774 100644
--- a/pytorch/submod/GraphCut.py
+++ b/pytorch/submod/GraphCut.py
@@ -1,6 +1,7 @@
 from typing import List, Set
 import random
 from helper import *
+from ..SetFunction import SetFunction
 
 class GraphCutFunction(SetFunction):
     def __init__(self, n, mode, lambdaVal, separate_rep=None, n_rep=None, mgsijs=None, ggsijs=None, data=None, data_rep=None, metric="cosine", num_neighbors=None,

From 0fb08d6ba8060f45263289335355fe7193fe1672 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:51:02 +0530
Subject: [PATCH 50/58] Function with all modes implemented DisparityMin.py

---
 pytorch/submod/DisparityMin.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch/submod/DisparityMin.py b/pytorch/submod/DisparityMin.py
index aa71fe1..27a4d2b 100644
--- a/pytorch/submod/DisparityMin.py
+++ b/pytorch/submod/DisparityMin.py
@@ -1,6 +1,7 @@
 import numpy as np
 import scipy
 from helper import *
+from ..SetFunction import SetFunction
 
 class DisparityMinFunction(SetFunction):
 

From a026e5b85923dc507db7d389ad018b152d0f64aa Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:51:51 +0530
Subject: [PATCH 51/58] Function with all modes implemented DisparitySum.py

---
 pytorch/submod/DisparitySum.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch/submod/DisparitySum.py b/pytorch/submod/DisparitySum.py
index 10f773c..efb9f1d 100644
--- a/pytorch/submod/DisparitySum.py
+++ b/pytorch/submod/DisparitySum.py
@@ -1,6 +1,7 @@
 from helper import *
 import numpy as np
 import scipy
+from ..SetFunction import SetFunction
 
 class DisparitySumFunction(SetFunction):
 

From 078194ae9c4ed109323e98fb326a80571da913d0 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:53:20 +0530
Subject: [PATCH 52/58] Function of dense mode only FacilityLocation.py

---
 pytorch/submod/FacilityLocation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch/submod/FacilityLocation.py b/pytorch/submod/FacilityLocation.py
index 57c1c7b..1a5a155 100644
--- a/pytorch/submod/FacilityLocation.py
+++ b/pytorch/submod/FacilityLocation.py
@@ -2,6 +2,8 @@
 import scipy
 from scipy import sparse
 from helper import *
+from ..SetFunction import SetFunction
+
 class FacilityLocationFunction(SetFunction):
     def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None, data_rep=None, num_clusters=None, cluster_labels=None, metric="cosine", num_neighbors=None,
                  dense_kernel = None, data_master = None, create_dense_cpp_kernel_in_python = True, partial = False, seperate_master = False):

From 5265d84075532e306b3db92df7f3972dbbc0cc49 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:54:53 +0530
Subject: [PATCH 53/58] Update SetCover.py

---
 pytorch/submod/SetCover.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch/submod/SetCover.py b/pytorch/submod/SetCover.py
index a01d2c6..3163400 100644
--- a/pytorch/submod/SetCover.py
+++ b/pytorch/submod/SetCover.py
@@ -3,7 +3,8 @@
 import numpy as np
 import random
 from ..SetFunction import SetFunction
-class SetCover(SetFunction):
+
+class SetCoverFunction(SetFunction):
     def __init__(self, n, cover_set, num_concepts, concept_weights = None):
         super(SetFunction, self).__init__()
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

From 6cf311be845c93de9a39e47ace811f3e27f5c857 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:56:59 +0530
Subject: [PATCH 54/58] Update __init__.py

---
 pytorch/submod/__init__.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pytorch/submod/__init__.py b/pytorch/submod/__init__.py
index dfa5ac5..99c05d2 100644
--- a/pytorch/submod/__init__.py
+++ b/pytorch/submod/__init__.py
@@ -1,4 +1,8 @@
 # /pytorch/SetFunction/__init__.py
-from .SetCover import SetCover
-from .ProbabilisticSetCover import ProbabilisticSetCover
-from .GraphCut import GraphCut
+from .SetCover import SetCoverFunction
+from .ProbabilisticSetCover import ProbabilisticSetCoverFunction
+from .GraphCut import GraphCutFunction
+from .DisparityMin import DisparityMinFunction
+from .DisparitySum import DisparitySumFunction
+from .FacilityLocation import FacilityLocationFunction
+from .LogDeteminant import LogDeteminantFunction

From de6ae41fe8665d5595eb31c4f691faa1b50b7cfa Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 6 Feb 2024 18:59:04 +0530
Subject: [PATCH 55/58] Dense mode is done LogDeterminant.py

---
 pytorch/submod/LogDeterminant.py | 248 +++++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)
 create mode 100644 pytorch/submod/LogDeterminant.py

diff --git a/pytorch/submod/LogDeterminant.py b/pytorch/submod/LogDeterminant.py
new file mode 100644
index 0000000..96b0e50
--- /dev/null
+++ b/pytorch/submod/LogDeterminant.py
@@ -0,0 +1,248 @@
+import math
+from collections import defaultdict
+import scipy
+from helper import *
+from ..SetFunction import SetFunction
+
+class LogDeterminantFunction(SetFunction):
+
+    def dot_product(self, x, y):
+        return sum(xi * yi for xi, yi in zip(x, y))
+
+
+    def __init__(self, n, mode, lambdaVal, arr_val=None, arr_count=None, arr_col=None, dense_kernel=None, partial=None,
+                  sijs=None, data=None, metric="cosine", num_neighbors=None, memoizedC = None, memoizedD = None, data_master = None):
+        self.n = n
+        self.mode = mode
+        self.metric = metric
+        self.sijs = sijs
+        self.data = data
+        self.num_neighbors = num_neighbors
+        self.lambdaVal = lambdaVal
+        self.sijs = None
+        self.content = None
+        self.effective_ground = None
+        self.partial = partial
+        self.effective_ground_set = set(range(n))
+        self.memoizedC = memoizedC
+        self.memoizedD = memoizedD
+        self.data_master = data_master
+        self.dense_kernel = dense_kernel
+
+        if self.n <= 0:
+          raise Exception("ERROR: Number of elements in ground set must be positive")
+
+        if self.mode not in ['dense', 'sparse', 'clustered']:
+          raise Exception("ERROR: Incorrect mode. Must be one of 'dense', 'sparse' or 'clustered'")
+
+        if self.metric not in ['euclidean', 'cosine']:
+        	raise Exception("ERROR: Unsupported metric. Must be 'euclidean' or 'cosine'")
+        if type(self.sijs) != type(None): # User has provided similarity kernel
+          if type(self.sijs) == scipy.sparse.csr.csr_matrix:
+            if num_neighbors is None or num_neighbors <= 0:
+              raise Exception("ERROR: Positive num_neighbors must be provided for given sparse kernel")
+            if mode != "sparse":
+              raise Exception("ERROR: Sparse kernel provided, but mode is not sparse")
+          elif type(self.sijs) == np.ndarray:
+            if mode != "dense":
+              raise Exception("ERROR: Dense kernel provided, but mode is not dense")
+          else:
+            raise Exception("Invalid kernel provided")
+          #TODO: is the below dimensionality check valid for both dense and sparse kernels?
+          if np.shape(self.sijs)[0]!=self.n or np.shape(self.sijs)[1]!=self.n:
+            raise Exception("ERROR: Inconsistentcy between n and dimensionality of given similarity kernel")
+          if type(self.data) != type(None):
+            print("WARNING: similarity kernel found. Provided data matrix will be ignored.")
+        else: #similarity kernel has not been provided
+          if type(self.data) != type(None):
+            if np.shape(self.data)[0]!=self.n:
+              raise Exception("ERROR: Inconsistentcy between n and no of examples in the given data matrix")
+
+            if self.mode == "dense":
+              if self.num_neighbors  is not None:
+                raise Exception("num_neighbors wrongly provided for dense mode")
+              self.num_neighbors = np.shape(self.data)[0] #Using all data as num_neighbors in case of dense mode
+            self.content = np.array(create_kernel( X = self.data.tolist(), metric = self.metric, mode = self.mode, num_neigh = self.num_neighbors))
+            val = self.content[0]
+            row = list(self.content[1].astype(int))
+            col = list(self.content[2].astype(int))
+            if self.mode=="dense":
+              self.sijs = np.zeros((n,n))
+              self.sijs[row,col] = val
+            if self.mode=="sparse":
+              self.sijs = sparse.csr_matrix((val, (row, col)), [n,n])
+          else:
+            raise Exception("ERROR: Neither ground set data matrix nor similarity kernel provided")
+
+
+        #Breaking similarity matrix to simpler native data structures for implicit pybind11 binding
+        if self.mode=="dense":
+          self.sijs = self.sijs.tolist() #break numpy ndarray to native list of list datastructure
+
+          if type(self.sijs[0])==int or type(self.sijs[0])==float: #Its critical that we pass a list of list to pybind11
+                                          #This condition ensures the same in case of a 1D numpy array (for 1x1 sim matrix)
+            l=[]
+            l.append(self.sijs)
+            self.sijs=l
+
+        self.effective_ground = self.get_effective_ground_set()
+        if self.mode == 'dense':
+          if self.dense_kernel == None:
+             self.dense_kernel = create_kernel_NS(X_ground = self.data, X_master = self.data, metric = self.metric)
+          if self.partial:
+            self.effectiveGroundSet = self.data
+          else:
+            self.effectiveGroundSet = set(range(n))
+            self.numEffectiveGroundset = len(self.effectiveGroundSet)
+            self.memoizedC = [[] for _ in range(self.numEffectiveGroundset)]
+            self.prevDetVal = 0
+            self.memoizedD = []
+            self.prevItem = -1
+
+            if self.partial:
+                ind = 0
+                for it in self.effectiveGroundSet:
+                    self.originalToPartialIndexMap[it] = ind
+                    ind += 1
+                    self.memoizedD.append(np.sqrt(self.dense_kernel[it][it] + self.lambdaVal))
+            else:
+                for i in range(self.n):
+                    self.memoizedD.append(np.sqrt(self.dense_kernel[i][i] + self.lambdaVal))
+
+        elif arr_val is not None and arr_count is not None and arr_col is not None:
+            self.n = n
+            self.mode = 'sparse'
+            self.lambdaVal = lambdaVal
+            self.sparseKernel = SparseSim(arr_val, arr_count, arr_col)
+            self.effectiveGroundSet = set(range(n_))
+            self.numEffectiveGroundset = len(self.effectiveGroundSet)
+            self.memoizedC = [[] for _ in range(n_)]
+            self.memoizedD = []
+            self.prevDetVal = 0
+            self.prevItem = -1
+
+            for i in range(self.n):
+                self.memoizedD.append(np.sqrt(self.sparseKernel.get_val(i, i) + self.lambdaVal))
+
+        else:
+            raise ValueError("Invalid constructor arguments. Please provide either denseKernel or sparse kernel data.")
+
+    def evaluate(self, X):
+        currMemoizedC = self.memoizedC.copy()
+        currMemoizedD = self.memoizedD.copy()
+        currprevItem = self.prevItem
+        currprevDetVal = self.prevDetVal
+        self.setMemoization(X)
+        result = self.evaluate_with_memoization(X)
+        self.memoizedC = currMemoizedC
+        self.memoizedD = currMemoizedD
+        self.prevItem = currprevItem
+        self.prevDetVal = currprevDetVal
+        return result
+
+    def evaluate_with_memoization(self, X):
+        return self.prevDetVal
+
+    def marginal_gain(self, X, item):
+        currMemoizedC = self.memoizedC.copy()
+        currMemoizedD = self.memoizedD.copy()
+        currprevItem = self.prevItem
+        currprevDetVal = self.prevDetVal
+        self.set_memoization(X)
+        result = self.marginal_gain_with_memoization(X, item)
+        self.memoizedC = currMemoizedC
+        self.memoizedD = currMemoizedD
+        self.prevItem = currprevItem
+        self.prevDetVal = currprevDetVal
+        return result
+
+    def marginal_gain_with_memoization(self, X, item, enableChecks=True):
+        effectiveX = X.intersection(self.effective_ground_set) if self.partial else X
+        gain = 0
+
+        if enableChecks and item in effectiveX:
+            return 0
+
+        if self.partial and item not in self.effective_ground_set:
+            return 0
+
+        itemIndex = self.originalToPartialIndexMap[item] if self.partial else item
+
+        if self.mode == "dense":
+            if len(effectiveX) == 0:
+                gain = math.log(self.memoizedD[itemIndex] * self.memoizedD[itemIndex])
+            elif len(effectiveX) == 1:
+                prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem
+                e = self.dense_kernel[self.prevItem][item] / self.memoizedD[prevItemIndex]
+                gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e))
+            else:
+                prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem
+                e = (self.dense_kernel[self.prevItem][item] -
+                     self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex]
+                gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e))
+        elif self.mode == "sparse":
+            if len(effectiveX) == 0:
+                gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex]))
+            elif len(effectiveX) == 1:
+                prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem
+                e = self.sparseKernel.get_val(self.prevItem, item) / self.memoizedD[prevItemIndex]
+                gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e))
+            else:
+                prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem
+                e = (self.sparseKernel.get_val(self.prevItem, item) -
+                     self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[itemIndex])) / self.memoizedD[prevItemIndex]
+                gain = math.log(math.fabs(self.memoizedD[itemIndex] * self.memoizedD[itemIndex] - e * e))
+        else:
+            raise ValueError("Only dense and sparse mode supported")
+
+        return gain
+
+    def update_memoization(self, X, item):
+        effectiveX = X.intersection(self.effective_ground_set) if self.partial else X
+
+        if item in effectiveX:
+            return
+
+        if item not in self.effective_ground_set:
+            return
+
+        self.prevDetVal += self.marginal_gain_with_memoization(X, item)
+
+        if len(effectiveX) == 0:
+            pass
+        else:
+            prevItemIndex = self.originalToPartialIndexMap[self.prevItem] if self.partial else self.prevItem
+            prevDValue = self.memoizedD[prevItemIndex]
+
+            for i in self.effectiveGroundSet:
+                iIndex = self.originalToPartialIndexMap[i] if self.partial else i
+
+                if i in effectiveX:
+                    continue
+
+                e = 0
+                if len(effectiveX) == 1:
+                    e = self.dense_kernel[self.prevItem][i] / prevDValue
+                    self.memoizedC[iIndex].append(e)
+                else:
+                    e = (self.dense_kernel[self.prevItem][i] -
+                         self.dot_product(self.memoizedC[prevItemIndex], self.memoizedC[iIndex])) / prevDValue
+                    self.memoizedC[iIndex].append(e)
+
+                self.memoizedD[iIndex] = math.sqrt(math.fabs(self.memoizedD[iIndex] * self.memoizedD[iIndex] - e * e))
+
+        self.prevItem = item
+
+    def get_effective_ground_set(self):
+        return self.effective_ground_set
+
+    def clear_memoization(self):
+        self.memoizedC.clear()
+        self.memoizedC = defaultdict(list)
+        self.prevDetVal = 0
+        self.prevItem = -1
+
+        if self.mode == "dense":
+            if self.partial:
+                for it in self.effective_ground_set:
+                    index = self.originalTo

From 6e9eee5db2858206cf5734e19d50920d6cc89417 Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Mon, 12 Feb 2024 16:29:40 +0530
Subject: [PATCH 56/58] Create_kernel_sklearn updated for batchwise calculation
 on cuda

---
 pytorch/submod/helper.py | 75 +++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/pytorch/submod/helper.py b/pytorch/submod/helper.py
index 7832a32..1797e4f 100644
--- a/pytorch/submod/helper.py
+++ b/pytorch/submod/helper.py
@@ -87,29 +87,81 @@ def create_kernel_dense(X, metric, method="sklearn"):
         raise Exception("For creating dense kernel, only 'sklearn' method is supported")
     return dense
 
-def create_kernel_dense_sklearn(X, metric, X_rep=None):
+def create_kernel_dense_sklearn(X, metric, X_rep=None, batch=0):
     dense = None
     D = None
-
+    batch_size = batch
     if metric == "euclidean":
         if X_rep is None:
-            D = torch.cdist(X, X, p=2)
+            # print(X.shape)
+            # Process data in batches for torch.cdist
+            for i in range(0, len(X), batch_size):
+                X_batch = X[i:i+batch_size].to(device="cuda")
+                # print(X_batch.shape)
+                D_batch = torch.cdist(X_batch, X, p=2).to(device="cuda")
+                gamma = 1 / X.shape[1]
+                dense_batch = torch.exp(-D_batch * gamma).to(device="cuda")
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
         else:
-            D = torch.cdist(X_rep, X, p=2)
-        gamma = 1 / X.shape[1]
-        dense = torch.exp(-D * gamma)  # Obtaining Similarity from distance
+            # Process data in batches for torch.cdist
+            for i in range(0, len(X_rep), batch_size):
+                X_rep_batch = X_rep[i:i+batch_size].to(device="cuda")
+                D_batch = torch.cdist(X_rep_batch, X).to(device="cuda")
+                gamma = 1 / X.shape[1]
+                dense_batch = torch.exp(-D_batch * gamma).to(device="cuda")
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
 
     elif metric == "cosine":
         if X_rep is None:
-            dense = torch.nn.functional.cosine_similarity(X, X, dim=1)
+            # Process data in batches for torch.nn.functional.cosine_similarity
+            for i in range(0, len(X), batch_size):
+                X_batch = X[i:i+batch_size].to(device="cuda")
+                dense_batch = torch.nn.functional.cosine_similarity(X_batch.unsqueeze(1), X.unsqueeze(0), dim=2)
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
         else:
-            dense = torch.nn.functional.cosine_similarity(X_rep, X, dim=1)
+            # Process data in batches for torch.nn.functional.cosine_similarity
+            for i in range(0, len(X_rep), batch_size):
+                X_rep_batch = X_rep[i:i+batch_size].to(device="cuda")
+                dense_batch = torch.nn.functional.cosine_similarity(X_rep_batch, X, dim=1)
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
 
     elif metric == "dot":
         if X_rep is None:
-            dense = torch.matmul(X, X.t())
+            # Process data in batches for torch.matmul
+            for i in range(0, len(X), batch_size):
+                X_batch = X[i:i+batch_size].to(device="cuda")
+                dense_batch = torch.matmul(X_batch, X.t())
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
         else:
-            dense = torch.matmul(X_rep, X.t())
+            # Process data in batches for torch.matmul
+            for i in range(0, len(X_rep), batch_size):
+                X_rep_batch = X_rep[i:i+batch_size].to(device="cuda")
+                dense_batch = torch.matmul(X_rep_batch, X.t())
+                # Accumulate results from batches
+                if dense is None:
+                    dense = dense_batch
+                else:
+                    dense = torch.cat([dense, dense_batch])
 
     else:
         raise Exception("ERROR: unsupported metric for this method of kernel creation")
@@ -119,9 +171,8 @@ def create_kernel_dense_sklearn(X, metric, X_rep=None):
     else:
         assert dense.shape == (X.shape[0], X.shape[0])
 
+    torch.cuda.empty_cache()
     return dense
-    pass
-
 
 def create_cluster_kernels(X, metric, cluster_lab=None, num_cluster=None, onlyClusters=False):
     lab = []

From 4e2eab6ffbf1462994ac46ff6f9c603b9c5b318e Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 13 Feb 2024 21:28:46 +0530
Subject: [PATCH 57/58] Cuda facilityLocation.py

---
 submodlib/functions/facilityLocation.py | 42 ++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/submodlib/functions/facilityLocation.py b/submodlib/functions/facilityLocation.py
index 276d8b0..4689471 100644
--- a/submodlib/functions/facilityLocation.py
+++ b/submodlib/functions/facilityLocation.py
@@ -10,6 +10,12 @@
 from submodlib.helper import create_kernel, create_cluster_kernels
 #from memory_profiler import profile
 
+if torch.cuda.is_available() :
+	from pytorch.submod import FacilityLocation
+else:
+	from submodlib_cpp import FacilityLocation
+
+
 class FacilityLocationFunction(SetFunction):
 	"""Implementation of the Facility Location submodular function (FL).
 
@@ -224,11 +230,20 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None,
 					l.append(self.cpp_sijs)
 					self.cpp_sijs=l
 
-				self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep)
+				if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep)
+				else:
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep)
+		
+				
 			# elif pybind_mode == "memoryview":
 			# 	self.cpp_obj = FacilityLocation(self.n, memoryview(self.sijs), False, self.cpp_ground_sub, self.separate_rep)
 			elif pybind_mode == "numpyarray":
-				self.cpp_obj = FacilityLocation(self.n, self.sijs, False, self.cpp_ground_sub, self.separate_rep)
+				if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep)
+				else:
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs, False, self.cpp_ground_sub, self.separate_rep)
+
 			elif pybind_mode == "array32":
 				# print("Kernel's type = ", self.sijs.dtype)
 				self.sijs.astype('float32', copy=False)
@@ -250,16 +265,26 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None,
 		
 		elif self.mode=="dense" and create_dense_cpp_kernel_in_python == False:
 			if self.separate_rep == True:
-				self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric)
+				if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric)
+				else:
+					self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), self.data_rep.tolist(), True, self.metric)
 			else:
-				self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric)
+				if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric)
+				else:
+					self.cpp_obj = FacilityLocation(self.n, self.data.tolist(), [[0.]], False, self.metric)
+				
 		
 		elif self.mode=="sparse": #break scipy sparse matrix to native component lists (for csr implementation)
 			self.cpp_sijs = {}
 			self.cpp_sijs['arr_val'] = self.sijs.data.tolist() #contains non-zero values in matrix (row major traversal)
 			self.cpp_sijs['arr_count'] = self.sijs.indptr.tolist() #cumulitive count of non-zero elements upto but not including current row
 			self.cpp_sijs['arr_col'] = self.sijs.indices.tolist() #contains col index corrosponding to non-zero values in arr_val
-			self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col'])
+			if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col'])
+			else:
+					self.cpp_obj = FacilityLocation(self.n, self.cpp_sijs['arr_val'], self.cpp_sijs['arr_count'], self.cpp_sijs['arr_col'])
 		
 		elif self.mode=="clustered":
 			l_temp = []
@@ -273,8 +298,11 @@ def __init__(self, n, mode, separate_rep=None, n_rep=None, sijs=None, data=None,
 				l_temp.append(temp)
 			self.cluster_sijs = l_temp
 
-			self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map)
+			if torch.cuda.is_available() :
+					self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map)
+			else:
+					self.cpp_obj = FacilityLocation(self.n, self.clusters, self.cluster_sijs, self.cluster_map)
 
 		#self.cpp_ground_sub=self.cpp_obj.getEffectiveGroundSet()
 		#self.ground_sub=self.cpp_ground_sub
-		self.effective_ground = self.cpp_obj.getEffectiveGroundSet()
\ No newline at end of file
+		self.effective_ground = self.cpp_obj.getEffectiveGroundSet()

From b5a0f2a19da853877b5fb895a3f458563e5ab6ce Mon Sep 17 00:00:00 2001
From: JahanviRajput <142418693+JahanviRajput@users.noreply.github.com>
Date: Tue, 13 Feb 2024 21:38:56 +0530
Subject: [PATCH 58/58] coda facilityLocation.py

---
 submodlib/functions/facilityLocation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/submodlib/functions/facilityLocation.py b/submodlib/functions/facilityLocation.py
index 4689471..198db06 100644
--- a/submodlib/functions/facilityLocation.py
+++ b/submodlib/functions/facilityLocation.py
@@ -8,6 +8,7 @@
 from submodlib_cpp import FacilityLocation
 from submodlib_cpp import FacilityLocation2 
 from submodlib.helper import create_kernel, create_cluster_kernels
+import torch
 #from memory_profiler import profile
 
 if torch.cuda.is_available() :