23
23
TRACE = False
24
24
25
25
if TRACE :
26
+ use_print = True
26
27
import logging
27
28
import sys
28
29
31
32
def logger_debug (* args ):
32
33
return logger .debug (' ' .join (isinstance (a , str ) and a or repr (a ) for a in args ))
33
34
35
+ if use_print :
36
+ logger_debug = print
37
+
34
38
logging .basicConfig (stream = sys .stdout )
35
39
logger .setLevel (logging .DEBUG )
36
40
@@ -142,22 +146,25 @@ def match_unknowns(
142
146
unknown_ngram_length = unknown_ngram_length ,
143
147
)
144
148
149
+ # build match from merged matched ngrams
150
+ qspans = (Span (qstart , qend ) for qstart , qend in matched_ngrams )
151
+ qspan = Span ().union (* qspans )
152
+
145
153
if TRACE :
146
154
tokens_by_tid = idx .tokens_by_tid
147
155
148
156
def get_tokens (_toks ):
149
157
return (' ' .join (tokens_by_tid [t ] for t in _toks ))
150
158
151
159
print ('match_unknowns: matched_ngrams' )
152
- for qstart , qend , matched_toks in matched_ngrams :
160
+
161
+ for qstart , qend in matched_ngrams :
162
+ _span = Span (qstart , qend )
163
+ _tokens = [query_tokens [qpos ] for qpos in _span ]
153
164
print (
154
165
' ' , 'qstart' , qstart ,
155
166
'qend' , qend ,
156
- 'matched_toks' , get_tokens (matched_toks ))
157
-
158
- # build match from merged matched ngrams
159
- qspans = (Span (qstart , qend ) for qstart , qend in matched_ngrams )
160
- qspan = Span ().union (* qspans )
167
+ 'matched_toks' , get_tokens (_tokens ))
161
168
162
169
if not qspan :
163
170
return
@@ -169,7 +176,8 @@ def get_tokens(_toks):
169
176
match_len = len (qspan )
170
177
171
178
if TRACE :
172
- print ('match_unknowns: matched_span:' , get_tokens (matched_tokens ))
179
+ #print('match_unknowns: matched_span:', get_tokens(matched_tokens))
180
+ print ('match_unknowns: qspan, match_len, matched_span:' , qspan , match_len , matched_tokens )
173
181
174
182
# we use the query side to build the ispans
175
183
ispan = Span (0 , match_len )
@@ -180,9 +188,8 @@ def get_tokens(_toks):
180
188
try :
181
189
match_start_line = line_by_pos [qspan .start ]
182
190
match_end_line = line_by_pos [qspan .end ]
183
- except :
184
- print ('empty span:' , qspan )
185
- raise
191
+ except Exception as e :
192
+ raise Exception ('empty span:' , qspan ) from e
186
193
187
194
text = '' .join (get_full_qspan_matched_text (
188
195
match_qspan = qspan ,
0 commit comments