-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
对中文查询字符串分词用的。
import re
def intersperse(delimiter, iterable):
it = iter(iterable)
yield next(it)
for x in it:
yield delimiter
yield x
def prepare_query(self, qs):
if isinstance(qs, str):
qs = qs.decode('utf-8')
items = re.split(r'(\W+)', qs, flags=re.UNICODE)
ret = []
for i, item in enumerate(items):
logger.debug('processing %d: %s', i, item)
if i % 2 == 0: # word
ws = [w.decode('utf-8') for w in _scws(item.encode('utf-8'))]
if len(ws) > 1:
logger.debug('word segmenting result: %r', ws)
ret.append(u'(')
ret.extend(intersperse(' ', ws))
ret.append(u')')
continue
ret.append(item)
return ''.join(ret)Metadata
Metadata
Assignees
Labels
No labels