File tree Expand file tree Collapse file tree 2 files changed +31
-4
lines changed Expand file tree Collapse file tree 2 files changed +31
-4
lines changed Original file line number Diff line number Diff line change @@ -22,6 +22,11 @@ WordSegment API Reference
22
22
23
23
Score a `word ` in the context of the previous word, `prev `.
24
24
25
+ .. py :function :: isegment(text)
26
+ :module: wordsegment
27
+
28
+ Return iterator of words that is the best segmenation of `text `.
29
+
25
30
.. py :function :: segment(text)
26
31
:module: wordsegment
27
32
Original file line number Diff line number Diff line change @@ -92,8 +92,8 @@ def score(word, prev=None):
92
92
93
93
return score (word )
94
94
95
- def segment (text ):
96
- "Return a list of words that is the best segmenation of `text`."
95
+ def isegment (text ):
96
+ "Return iterator of words that is the best segmenation of `text`."
97
97
98
98
memo = dict ()
99
99
@@ -116,9 +116,31 @@ def candidates():
116
116
117
117
return max (candidates ())
118
118
119
- _ , result_words = search (clean (text ))
119
+ # Avoid recursion limit issues by dividing text into chunks, segmenting
120
+ # those chunks and combining the results together. Chunks may divide words
121
+ # in the middle so prefix chunks with the last five words of the previous
122
+ # result.
123
+
124
+ clean_text = clean (text )
125
+ size = 250
126
+ prefix = ''
127
+
128
+ for offset in range (0 , len (clean_text ), size ):
129
+ chunk = clean_text [offset :(offset + size )]
130
+ _ , chunk_words = search (prefix + chunk )
131
+ prefix = '' .join (chunk_words [- 5 :])
132
+ del chunk_words [- 5 :]
133
+ for word in chunk_words :
134
+ yield word
120
135
121
- return result_words
136
+ _ , prefix_words = search (prefix )
137
+
138
+ for word in prefix_words :
139
+ yield word
140
+
141
+ def segment (text ):
142
+ "Return a list of words that is the best segmenation of `text`."
143
+ return list (isegment (text ))
122
144
123
145
def main (args = ()):
124
146
"""Command-line entry-point. Parses `args` into in-file and out-file then
You can’t perform that action at this time.
0 commit comments