@@ -30,9 +30,18 @@ def f_124(text):
3030 words 1
3131 dtype: int64
3232 """
33- words = re .findall (r"\b\w+\b" , text .lower ())
34- words = [word for word in words if word not in STOPWORDS ]
35- word_counts = pd .Series (words ).value_counts ().rename (None )
33+ # Normalize the text to lowercase
34+ text = text .lower ()
35+
36+ # Use regex to find words, considering words as sequences of alphabetic characters
37+ words = re .findall (r'\b\p{L}+\b' , text )
38+
39+ # Filter out stopwords
40+ filtered_words = [word for word in words if word not in STOPWORDS ]
41+
42+ # Count the frequency of each word using pandas Series
43+ word_counts = pd .Series (filtered_words ).value_counts ()
44+
3645 return word_counts
3746
3847
@@ -44,31 +53,26 @@ class TestCases(unittest.TestCase):
4453
4554 def test_case_1 (self ):
4655 text = "This is a sample text This text contains sample words"
47- word_counts = f_124 (text )
48- expected_counts = pd .Series (
49- {"this" : 2 , "sample" : 2 , "text" : 2 , "contains" : 1 , "words" : 1 }
50- )
51- pd .testing .assert_series_equal (word_counts , expected_counts )
56+ word_counts = f_124 (text ).to_dict ()
57+ expected_counts = {"this" : 2 , "sample" : 2 , "text" : 2 , "contains" : 1 , "words" : 1 }
58+ self .assertDictEqual (word_counts , expected_counts )
5259
5360 def test_case_2 (self ):
5461 text = "Hello world Hello everyone"
55- word_counts = f_124 (text )
56- expected_counts = pd . Series ( {"hello" : 2 , "world" : 1 , "everyone" : 1 })
57- pd . testing . assert_series_equal (word_counts , expected_counts )
62+ word_counts = f_124 (text ). to_dict ()
63+ expected_counts = {"hello" : 2 , "world" : 1 , "everyone" : 1 }
64+ self . assertDictEqual (word_counts , expected_counts )
5865
5966 def test_case_3 (self ):
6067 text = "a an the in is are"
61- word_counts = f_124 (text )
62- expected_counts = pd .Series (dtype = "int64" )
63- pd .testing .assert_series_equal (
64- word_counts .reset_index (drop = True ), expected_counts .reset_index (drop = True )
65- )
68+ word_counts = f_124 (text ).to_dict ()
69+ expected_counts = {}
70+ self .assertDictEqual (word_counts , expected_counts )
6671
6772 def test_case_4 (self ):
6873 text = "This is a test sentence which has a bunch of words and no period"
69- word_counts = f_124 (text )
70- expected_counts = pd .Series (
71- {
74+ word_counts = f_124 (text ).to_dict ()
75+ expected_counts = {
7276 "this" : 1 ,
7377 "test" : 1 ,
7478 "sentence" : 1 ,
@@ -81,18 +85,16 @@ def test_case_4(self):
8185 "no" : 1 ,
8286 "period" : 1 ,
8387 }
84- )
85- pd . testing . assert_series_equal (word_counts , expected_counts )
88+
89+ self . assertDictEqual (word_counts , expected_counts )
8690
8791 def test_case_5 (self ):
8892 text = (
8993 "I I I want want to to to to to go to to to the olympics olympics this year"
9094 )
91- word_counts = f_124 (text )
92- expected_counts = pd .Series (
93- {"i" : 3 , "want" : 2 , "to" : 8 , "go" : 1 , "olympics" : 2 , "this" : 1 , "year" : 1 }
94- ).sort_values (ascending = False )
95- pd .testing .assert_series_equal (word_counts , expected_counts )
95+ word_counts = f_124 (text ).to_dict ()
96+ expected_counts = {"i" : 3 , "want" : 2 , "to" : 8 , "go" : 1 , "olympics" : 2 , "this" : 1 , "year" : 1 }
97+ self .assertDictEqual (word_counts , expected_counts )
9698
9799
98100def run_tests ():
0 commit comments