Skip to content

Commit b34f6ad

Browse files
Predd0omatheusvirManoelNetto26Lucaslg7RailtonDantas
committed
fix(whoosh): correct skip list implementation errors
Co-authored-by: Matheus Virgolino <matheus.virgolino.abilio.da.silva@ccc.ufcg.edu.br> Co-authored-by: Manoel Netto <manoel.da.nobrega.eustaqueo.netto@ccc.ufcg.edu.br> Co-authored-by: Pedro <pedroalmeida1896@gmail.com> Co-authored-by: Lucaslg7 <lucasmoizinholg7@gmail.com> Co-authored-by: RailtonDantas <railtondantas.code@gmail.com> Co-authored-by: João Pereira <joao.pereira.de.oliveira@ccc.ufcg.edu.br>
1 parent c65c833 commit b34f6ad

File tree

7 files changed

+44
-16
lines changed

7 files changed

+44
-16
lines changed

src/whoosh/codec/memory.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from threading import Lock
3131

3232
from whoosh.codec import base
33-
from whoosh.matching import ListMatcher
33+
from whoosh.matching import ListMatcher, SkipListMatcher
3434
from whoosh.reading import SegmentReader, TermInfo, TermNotFound
3535
from whoosh.writing import SegmentWriter
3636

@@ -182,7 +182,7 @@ def has_vector(self, docnum, fieldname):
182182
def vector(self, docnum, fieldname, format_):
183183
items = self._segment._vectors[docnum][fieldname]
184184
ids, weights, values = zip(*items)
185-
return ListMatcher(ids, weights, values, format_)
185+
return SkipListMatcher(ids, weights, values, format_)
186186

187187
def stored_fields(self, docnum):
188188
return self._segment._stored[docnum]
@@ -284,7 +284,7 @@ def term_info(self, fieldname, text):
284284
def matcher(self, fieldname, btext, format_, scorer=None):
285285
items = self._invindex[fieldname][btext]
286286
ids, weights, values = zip(*items)
287-
return ListMatcher(ids, weights, values, format_, scorer=scorer)
287+
return SkipListMatcher(ids, weights, values, format_, scorer=scorer)
288288

289289
def indexed_field_names(self):
290290
return self._invindex.keys()

src/whoosh/codec/plaintext.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pickle import dumps, loads
3030

3131
from whoosh.codec import base
32-
from whoosh.matching import ListMatcher
32+
from whoosh.matching import ListMatcher, SkipListMatcher
3333
from whoosh.reading import TermInfo, TermNotFound
3434

3535
_reprable = (bytes, str, int, float)
@@ -273,7 +273,7 @@ def vector(self, docnum, fieldname, format_):
273273
values.append(c["v"])
274274
c = self._find_line(3, "VPOST")
275275

276-
return ListMatcher(
276+
return SkipListMatcher(
277277
ids,
278278
weights,
279279
values,
@@ -433,7 +433,7 @@ def matcher(self, fieldname, btext, format_, scorer=None):
433433
values.append(c["v"])
434434
c = self._find_line(3, "POST")
435435

436-
return ListMatcher(ids, weights, values, format_, scorer=scorer)
436+
return SkipListMatcher(ids, weights, values, format_, scorer=scorer)
437437

438438
def close(self):
439439
self._dbfile.close()

src/whoosh/codec/whoosh3.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from whoosh import columns, formats
3838
from whoosh.codec import base
3939
from whoosh.filedb import compound, filetables
40-
from whoosh.matching import LeafMatcher, ListMatcher, ReadTooFar
40+
from whoosh.matching import LeafMatcher, ListMatcher, ReadTooFar, SkipListMatcher
4141
from whoosh.reading import TermInfo, TermNotFound
4242
from whoosh.system import (
4343
_FLOAT_SIZE,
@@ -111,9 +111,9 @@ def postings_writer(self, dbfile, byteids=False):
111111
def postings_reader(self, dbfile, terminfo, format_, term=None, scorer=None):
112112
if terminfo.is_inlined():
113113
# If the postings were inlined into the terminfo object, pull them
114-
# out and use a ListMatcher to wrap them in a Matcher interface
114+
# out and use a SkipListMatcher to wrap them in a Matcher interface
115115
ids, weights, values = terminfo.inlined_postings()
116-
m = ListMatcher(
116+
m = SkipListMatcher(
117117
ids,
118118
weights,
119119
values,

src/whoosh/matching/mcore.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,11 +573,36 @@ def score(self):
573573
else:
574574
return self.weight()
575575

576+
576577
class SkipListMatcher(ListMatcher):
577-
def __init__(self, ids, **kwargs):
578-
super().__init__(ids, **kwargs)
578+
def __init__(
579+
self,
580+
ids,
581+
weights=None,
582+
values=None,
583+
format=None,
584+
scorer=None,
585+
position=0,
586+
all_weights=None,
587+
term=None,
588+
terminfo=None,
589+
):
590+
super().__init__(
591+
ids, weights, values, format, scorer, position, all_weights, term, terminfo
592+
)
579593
self._skiplist = SkipList(ids)
580594

595+
def copy(self):
596+
return self.__class__(
597+
self._ids,
598+
self._weights,
599+
self._values,
600+
self._format,
601+
self._scorer,
602+
self._i,
603+
self._all_weights,
604+
)
605+
581606
def skip_to(self, id):
582607
if not self.is_active():
583608
raise ReadTooFar
@@ -586,10 +611,11 @@ def skip_to(self, id):
586611

587612
node = self._skiplist.skip_to(id)
588613
if node is not None:
589-
self._i = bisect_left(self._ids, node.doc_id, self._i)
614+
self._i = bisect_left(self._ids, node.doc_id, self._i)
590615
else:
591616
self._i = len(self._ids)
592617

618+
593619
# Term/vector leaf posting matcher middleware
594620

595621

src/whoosh/matching/skiplist.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import random
22

3+
_rng = random.Random(42)
4+
5+
36
class SkipNode:
47
__slots__ = ("doc_id", "forward")
58

@@ -18,7 +21,7 @@ def __init__(self, ids, max_level=16, p = 0.5):
1821

1922
def _random_level(self):
2023
level = 0
21-
while random.random() < self.p and level < self.max_level:
24+
while _rng.random() < self.p and level < self.max_level:
2225
level += 1
2326
return level
2427

@@ -60,4 +63,3 @@ def __iter__(self):
6063
def __contains__(self, doc_id):
6164
node = self.skip_to(doc_id)
6265
return node is not None and node.doc_id == doc_id
63-

src/whoosh/query/qcore.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -735,4 +735,4 @@ def matcher(self, searcher, context=None):
735735
doclist.update(pr.all_ids())
736736
doclist = sorted(doclist)
737737

738-
return matching.ListMatcher(doclist, all_weights=self.boost)
738+
return matching.SkipListMatcher(doclist, all_weights=self.boost)

src/whoosh/query/wrappers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def matcher(self, searcher, context=None):
178178
return m
179179
else:
180180
ids = array("I", m.all_ids())
181-
return matching.ListMatcher(ids, all_weights=self.score, term=m.term())
181+
return matching.SkipListMatcher(ids, all_weights=self.score, term=m.term())
182182

183183

184184
class WeightingQuery(WrappingQuery):

0 commit comments

Comments
 (0)