Skip to content

Commit 13e7694

Browse files
authored
fix(strings): use frequency-based signature for anagrams
Replaced the sorting-based signature implementation with a frequency-based approach using `collections.Counter`. This ensures that the signature represents both characters and their counts, preventing collisions and better grouping of true anagrams. Examples: - "test" → "e1s1t2" - "finaltest" → "a1e1f1i1l1n1s1t2" - "this is a test" → " 3a1e1h1i2s3t3" Also updated the anagram lookup to use the new frequency-based signatures, making results more accurate and avoiding false positives.
1 parent e224532 commit 13e7694

File tree

1 file changed

+18
-10
lines changed

1 file changed

+18
-10
lines changed

strings/anagrams.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,50 @@
33
import collections
44
import pprint
55
from pathlib import Path
6+
from typing import List
67

78

89
def signature(word: str) -> str:
9-
"""Return a word sorted
10+
"""
11+
Return a frequency-based signature for a word.
12+
1013
>>> signature("test")
11-
'estt'
14+
'e1s1t2'
1215
>>> signature("this is a test")
13-
' aehiisssttt'
16+
' 3a1e1h1i2s3t3'
1417
>>> signature("finaltest")
15-
'aefilnstt'
18+
'a1e1f1i1l1n1s1t2'
1619
"""
17-
return "".join(sorted(word))
20+
freq = collections.Counter(word)
21+
return "".join(f"{ch}{freq[ch]}" for ch in sorted(freq))
1822

1923

20-
def anagram(my_word: str) -> list[str]:
21-
"""Return every anagram of the given word
24+
def anagram(my_word: str) -> List[str]:
25+
"""
26+
Return every anagram of the given word from the dictionary.
27+
2228
>>> anagram('test')
2329
['sett', 'stet', 'test']
2430
>>> anagram('this is a test')
2531
[]
2632
>>> anagram('final')
2733
['final']
2834
"""
29-
return word_by_signature[signature(my_word)]
35+
return word_by_signature.get(signature(my_word), [])
3036

3137

38+
# Load word list
3239
data: str = Path(__file__).parent.joinpath("words.txt").read_text(encoding="utf-8")
3340
word_list = sorted({word.strip().lower() for word in data.splitlines()})
3441

42+
# Map signatures to word list
3543
word_by_signature = collections.defaultdict(list)
3644
for word in word_list:
3745
word_by_signature[signature(word)].append(word)
3846

3947
if __name__ == "__main__":
4048
all_anagrams = {word: anagram(word) for word in word_list if len(anagram(word)) > 1}
4149

42-
with open("anagrams.txt", "w") as file:
43-
file.write("all_anagrams = \n ")
50+
with open("anagrams.txt", "w", encoding="utf-8") as file:
51+
file.write("all_anagrams = \n")
4452
file.write(pprint.pformat(all_anagrams))

0 commit comments

Comments
 (0)