BrianLusina · BrianLusina · Nov 16, 2025 · Nov 15, 2025 · Nov 15, 2025 · Nov 16, 2025
@@ -729,6 +729,8 @@
     * [Test Reverse Vowels](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/reverse_vowels/test_reverse_vowels.py)
   * Reverse Words
     * [Test Reverse Words](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/reverse_words/test_reverse_words.py)
+  * Similar String Groups
+    * [Test Similar String Groups](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/similar_string_groups/test_similar_string_groups.py)
   * Spreadsheet Encoding
     * [Test Spreadsheet Encode](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/spreadsheet_encoding/test_spreadsheet_encode.py)
   * String Compression

@@ -0,0 +1,6 @@
+from datastructures.sets import DisjointSetUnion, UnionFind
+
+__all__ = [
+    "DisjointSetUnion",
+    "UnionFind"
+]
@@ -0,0 +1,3 @@
+from datastructures.sets.union_find import DisjointSetUnion, UnionFind
+
+__all__ = ["DisjointSetUnion", "UnionFind"]
@@ -0,0 +1,74 @@
+class DisjointSetUnion:
+    """A class for the Union-Find (Disjoint Set Union) data structure."""
+
+    def __init__(self, size: int):
+        """Initializes the data structure with 'size' elements, each in its own set."""
+        if size <= 0:
+            raise ValueError("Size must be a positive integer.")
+        self.root = list(range(size))
+        self.rank = [1] * size  # For union by rank
+        self.count = size  # Number of disjoint sets
+
+    def find(self, i: int) -> int:
+        """Finds the representative (root) of the set containing element 'i'."""
+        if self.root[i] == i:
+            return i
+        # Path compression: make all nodes on the path point to the root
+        self.root[i] = self.find(self.root[i])
+        return self.root[i]
+
+    def union(self, i: int, j: int) -> bool:
+        """
+        Merges the sets containing elements 'i' and 'j'.
+        Returns True if a merge occurred, False if they were already in the same set.
+        """
+        root_i = self.find(i) # 0
+        root_j = self.find(j) # 1
+
+        if root_i != root_j:
+            # Union by rank: attach the smaller tree to the larger tree
+            if self.rank[root_i] > self.rank[root_j]:
+                self.root[root_j] = root_i
+            elif self.rank[root_i] < self.rank[root_j]:
+                self.root[root_i] = root_j
+            else:
+                self.root[root_j] = root_i
+                self.rank[root_i] += 1
+
+            self.count -= 1
+            return True
+
+        return False
+
+    def get_count(self) -> int:
+        """Returns the current number of disjoint sets."""
+        return self.count
+
+
+class UnionFind:
+    """A minimal Union-Find data structure with path compression."""
+
+    def __init__(self, size: int):
+        """Initializes the data structure with 'size' elements."""
+        if size <= 0:
+            raise ValueError("Size must be a positive integer.")
+        self.parent = list(range(size))
+
+    def find(self, x: int) -> int:
+        """Finds the representative (root) of the set containing element 'x'."""
+        if self.parent[x] != x:
+            # Path compression
+            self.parent[x] = self.find(self.parent[x])
+        return self.parent[x]
+
+    def union(self, x: int, y: int) -> bool:
+        """
+        Merges the sets containing elements 'x' and 'y'.
+        Returns True if a merge occurred, False if already in same set.
+        """
+        root_x = self.find(x)
+        root_y = self.find(y)
+        if root_x != root_y:
+            self.parent[root_y] = root_x
+            return True
+        return False
@@ -0,0 +1,113 @@
+# Similar String Groups
+
+Two strings x and y are considered similar if they are either exactly the same or can be made identical by swapping at 
+most two different characters in string x.
+
+We define a similarity group as a set of strings where each string is similar to at least one other string in the group. 
+A string doesn't need to be directly similar to every other string in the group — it just needs to be connected to them 
+through a chain of similarities.
+
+Given a list of strings strs, where each string is an anagram of the others, your task is to determine how many such 
+similarity groups exist in the list.
+
+Constraints:
+
+- 1 ≤  strs.length ≤ 300
+- 1 ≤ strs[i].length ≤ 300
+- strs[i] consists of lowercase letters only.
+- All words in strs have the same length and are anagrams of each other.
+
+---
+
+## Examples
+
+![Example 1](./images/similar_string_groups_example_1.png)
+![Example 2](./images/similar_string_groups_example_2.png)
+![Example 3](./images/similar_string_groups_example_3.png)
+
+---
+
+## Solution
+
+This problem can be seen as a graph connectivity challenge. Each string is a node, and an edge exists between two nodes 
+if their corresponding strings are similar. Our goal is to count how many connected groups (components) exist in this 
+graph.
+
+We solve this problem using the Union-Find (Disjoint Set Union) data structure to efficiently group similar strings. 
+Initially, each string is placed in its own group. We then iterate over all possible pairs of strings. For each pair at 
+indexes i and j, we check whether the two strings are similar — that is, either exactly the same or differ at exactly 
+two positions (meaning one swap can make them equal). If they are similar and currently belong to different groups 
+(i.e., their roots in the Union-Find structure are different), we perform a union operation to merge their groups. 
+Repeating this across all string pairs gradually reduces the number of distinct groups. Finally, we count the number of 
+unique roots in the Union-Find structure, which represents the number of similar string groups.
+
+Here’s the step-by-step explanation of the solution:
+
+1. Initialize n = len(strs).
+2. Create a Union-Find (DSU) structure with n elements, where each element is its own parent.
+3. Define a function areSimilar(s1, s2) that returns TRUE if both strings s1 and s2 are similar according to the given 
+   condition:
+   - Initialize an empty list diff = [] to track differences. 
+   - Loop through both strings in parallel using zip. 
+     - If characters differ at any position, record the mismatch in diff.
+     - Early exit if more than 2 differences and return FALSE.
+    - After the loop is completed, evaluate the result:
+      - len(diff) == 0 means the strings are identical. 
+      - len(diff) == 2 and diff[0] == diff[1][::-1] means there are exactly two differences and the character pairs are 
+        mirror images of each other.
+
+4. Loop over all pairs (i, j) such that 0 ≤ i < j < n. 
+5. For each pair, use the areSimilar function to check if strs[i] and strs[j] are similar. 
+6. If similar, use find(i) and find(j) to get their root parents. 
+7. If the roots differ, merge them using union(i, j). 
+8. After processing all pairs, iterate over all indexes i from 0 to n - 1 and find their root parents using find(i). 
+9. Add each root to a set to track unique groups. 
+10. Return the size of the set as the number of similarity groups.
+
+Let’s look at the following illustration to get a better understanding of the solution:
+
+![Solution 1](./images/similar_string_groups_solution_1.png)
+![Solution 2](./images/similar_string_groups_solution_2.png)
+![Solution 3](./images/similar_string_groups_solution_3.png)
+![Solution 4](./images/similar_string_groups_solution_4.png)
+![Solution 5](./images/similar_string_groups_solution_5.png)
+![Solution 6](./images/similar_string_groups_solution_6.png)
+![Solution 7](./images/similar_string_groups_solution_7.png)
+
+### Time Complexity
+Let's break the time complexity down into two major components:
+
+#### **Comparing all pairs of strings**
+
+To check if two strings are similar, we compare them character by character, which takes _O(m)_ where m is the length 
+of each string. Given there are n strings and we compare all possible pairs of strings, there are O(n²) comparisons. 
+Therefore, the total time spent on comparisons is O(n²∗m).
+
+#### **Union-Find operations (find and union)**
+
+For each similar pair, we perform a find and possibly a union operation. With path compression, each operation takes 
+O(α(n)) time, where α(n) is nearly constant in practice. Since there are up to O(n²) similar pairs, the total time for 
+Union-Find operations is O(n²∗α(n)).
+
+The comparison step dominates the time complexity, as m (the string length) is typically much larger than α(n), which 
+grows very slowly. Therefore, the overall time complexity is O(n²∗m).
+
+### Space Complexity
+
+The space complexity of the algorithm comes from the following components:
+
+#### **Union-Find parent array**: 
+
+Requires O(n) space to store the parent of each node (one per string).
+
+#### **Temporary storage in areSimilar() function**: 
+
+Uses O(1) space — a constant-sized list to track the positions where the two strings differ. Since at most 2 differences 
+are allowed, space usage remains constant.
+
+#### **Set to store unique groups (roots)**: 
+
+Requires O(n) space in the worst case, when all strings are in separate groups and each has a unique root.
+
+The total space complexity is O(n), as all other components (e.g., temporary storage and sets) do not exceed linear 
+space relative to the input size.
@@ -0,0 +1,77 @@
+from typing import List
+from datastructures import DisjointSetUnion, UnionFind
+
+
+def num_similar_groups(strs: List[str]) -> int:
+    strs_len = len(strs)
+    if strs_len == 0:
+        return 0
+
+    # All strings have the same length, per constraints
+    word_len = len(strs[0])
+
+    # Initialize Union-Find with n elements, one for each string.
+    # The initial count is n (each string is its own group).
+    uf = DisjointSetUnion(strs_len)
+
+    def is_similar(s1: str, s2: str) -> bool:
+        """
+        Checks if two strings are similar.
+        Similar means identical (0 diffs) or 1 swap (2 diffs).
+        """
+        diff_count = 0
+        positions_that_differ = []
+        for k in range(word_len):
+            if s1[k] != s2[k]:
+                positions_that_differ.append(k)
+                diff_count += 1
+
+            # Optimization: If more than 2 differences,
+            # they can't be similar.
+            if diff_count > 2:
+                return False
+
+        if diff_count == 2:
+            i = positions_that_differ[0]
+            j = positions_that_differ[1]
+            return s1[i] == s2[j] and s1[j] == s2[i]
+
+        # Must have 0 diffs (identical) or 2 diffs (one swap)
+        return diff_count == 0 or diff_count == 2
+
+    # Iterate over all unique pairs of strings
+    for i in range(strs_len):
+        for j in range(i + 1, strs_len):
+            # If the strings are similar, merge their groups.
+            # The union() method handles decrementing the count
+            # only if they were in different groups.
+            if is_similar(strs[i], strs[j]):
+                uf.union(i, j)
+
+    # The final count of disjoint sets is the number of groups
+    return uf.get_count()
+
+# Helper: Decide if two strings are similar
+def are_similar(s1, s2):
+    diff = []
+    for a, b in zip(s1, s2):
+        if a != b:
+            diff.append((a, b))
+            if len(diff) > 2:
+                return False
+
+    return (len(diff) == 0) or (
+        len(diff) == 2 and diff[0] == diff[1][::-1]
+    )
+
+def num_similar_groups_2(strs: List[str]) -> int:
+    n = len(strs)
+    uf = DisjointSetUnion(n)
+
+    for i in range(n):
+        for j in range(i + 1, n):
+            if are_similar(strs[i], strs[j]):
+                uf.union(i, j)
+
+    roots = {uf.find(i) for i in range(n)}
+    return len(roots)
@@ -0,0 +1,60 @@
+import unittest
+from . import num_similar_groups, num_similar_groups_2
+
+
+class SimilarStringGroupsTestCase(unittest.TestCase):
+    def test_1(self):
+        strs = ["jhki", "kijh", "jkhi", "kihj", "ijhk"]
+        expected = 3
+        actual = num_similar_groups(strs)
+        self.assertEqual(expected, actual)
+
+    def test_2(self):
+        strs = ["abc", "acb", "bac", "bca", "cab", "cba"]
+        expected = 1
+        actual = num_similar_groups(strs)
+        self.assertEqual(expected, actual)
+
+    def test_3(self):
+        strs = ["abcd", "abdc", "acbd", "bdca"]
+        expected = 2
+        actual = num_similar_groups(strs)
+        self.assertEqual(expected, actual)
+
+    def test_4(self):
+        strs = ["fgtdvepeqcfajhlzkwlpuhrwfcueqfbs","fgcdvppeqcfajhlzkwluehrwftuefqbs","fgtdvepeqcfajhlzkwlpuhrwfcuefqbs",
+                "fgcdvepeqcfajhlzkwluphrwftuefqbs","fgldvepeqcfajhlzkwcuphrwftuefqbs","fgtdvefeqcpajhlzkwlpuhrwfcuefqbs"]
+        expected = 2
+        actual = num_similar_groups(strs)
+        self.assertEqual(expected, actual)
+
+
+class SimilarStringGroups2TestCase(unittest.TestCase):
+    def test_1(self):
+        strs = ["jhki", "kijh", "jkhi", "kihj", "ijhk"]
+        expected = 3
+        actual = num_similar_groups_2(strs)
+        self.assertEqual(expected, actual)
+
+    def test_2(self):
+        strs = ["abc", "acb", "bac", "bca", "cab", "cba"]
+        expected = 1
+        actual = num_similar_groups_2(strs)
+        self.assertEqual(expected, actual)
+
+    def test_3(self):
+        strs = ["abcd", "abdc", "acbd", "bdca"]
+        expected = 2
+        actual = num_similar_groups_2(strs)
+        self.assertEqual(expected, actual)
+
+    def test_4(self):
+        strs = ["fgtdvepeqcfajhlzkwlpuhrwfcueqfbs","fgcdvppeqcfajhlzkwluehrwftuefqbs","fgtdvepeqcfajhlzkwlpuhrwfcuefqbs",
+                "fgcdvepeqcfajhlzkwluphrwftuefqbs","fgldvepeqcfajhlzkwcuphrwftuefqbs","fgtdvefeqcpajhlzkwlpuhrwfcuefqbs"]
+        expected = 2
+        actual = num_similar_groups_2(strs)
+        self.assertEqual(expected, actual)
+
+
+if __name__ == '__main__':
+    unittest.main()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from datastructures.sets.union_find import DisjointSetUnion, UnionFind

		__all__ = ["DisjointSetUnion", "UnionFind"]