miso-belica · bysiber · Feb 21, 2026 · miso-belica · Feb 22, 2026
diff --git a/commit_msg.txt b/commit_msg.txt
@@ -0,0 +1,17 @@
+Fix KeyError in SumBasic summarizer
+
+The _get_all_content_words_in_doc method was processing words in a
+different order (stem -> filter -> normalize) compared to
+_get_content_words_in_sentence (normalize -> filter -> stem). This
+mismatch meant some words would appear in the per-sentence word
+lists but not in the document frequency table, causing a KeyError
+during summarization.
+
+Aligned both methods to use the same processing order:
+normalize -> filter stop words -> stem.
+
+Also fixed _get_all_words_in_doc to return raw words instead of
+pre-stemmed words, since stemming is now handled consistently
+in _get_all_content_words_in_doc.
+
+Fixes #176
diff --git a/sumy/summarizers/sum_basic.py b/sumy/summarizers/sum_basic.py
@@ -28,7 +28,7 @@ def __call__(self, document, sentences_count):
         return self._get_best_sentences(document.sentences, sentences_count, ratings)
 
     def _get_all_words_in_doc(self, sentences):
-        return self._stem_words([w for s in sentences for w in s.words])
+        return [w for s in sentences for w in s.words]
 
     def _get_content_words_in_sentence(self, sentence):
         normalized_words = self._normalize_words(sentence.words)
@@ -54,9 +54,10 @@ def _compute_word_freq(list_of_words):
 
     def _get_all_content_words_in_doc(self, sentences):
         all_words = self._get_all_words_in_doc(sentences)
-        content_words = self._filter_out_stop_words(all_words)
-        normalized_content_words = self._normalize_words(content_words)
-        return normalized_content_words
+        normalized_words = self._normalize_words(all_words)
+        content_words = self._filter_out_stop_words(normalized_words)
+        stemmed_content_words = self._stem_words(content_words)
+        return stemmed_content_words
 
     def _compute_tf(self, sentences):
         """