Skip to content

Commit 5927a2e

Browse files
Harshil Goeldarkcoderrises
authored andcommitted
perf(core): Use Intersect to Narrow Iterate Range and Reduce Memory Allocation in pl.Uids() (#9299)
Based off of: #9271 Coauthored by: gooohgb
1 parent 9df606b commit 5927a2e

File tree

1 file changed

+79
-33
lines changed

1 file changed

+79
-33
lines changed

posting/list.go

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,45 +1709,87 @@ func (l *List) Uids(opt ListOptions) (*pb.List, error) {
17091709
if opt.First == 0 {
17101710
opt.First = math.MaxInt32
17111711
}
1712-
// Pre-assign length to make it faster.
1713-
l.RLock()
1714-
// Use approximate length for initial capacity.
1715-
res := make([]uint64, 0, l.mutationMap.len()+codec.ApproxLen(l.plist.Pack))
1716-
out := &pb.List{}
1717-
if l.mutationMap.len() == 0 && opt.Intersect != nil && len(l.plist.Splits) == 0 {
1718-
if opt.ReadTs < l.minTs {
1719-
l.RUnlock()
1720-
return out, errors.Wrapf(ErrTsTooOld, "While reading UIDs")
1712+
1713+
getUidList := func() (*pb.List, error, bool) {
1714+
// Pre-assign length to make it faster.
1715+
l.RLock()
1716+
defer l.RUnlock()
1717+
// Use approximate length for initial capacity.
1718+
res := make([]uint64, 0, l.ApproxLen())
1719+
out := &pb.List{}
1720+
1721+
if l.mutationMap.len() == 0 && opt.Intersect != nil && len(l.plist.Splits) == 0 {
1722+
if opt.ReadTs < l.minTs {
1723+
return out, errors.Wrapf(ErrTsTooOld, "While reading UIDs"), false
1724+
}
1725+
algo.IntersectCompressedWith(l.plist.Pack, opt.AfterUid, opt.Intersect, out)
1726+
return out, nil, false
17211727
}
1722-
algo.IntersectCompressedWith(l.plist.Pack, opt.AfterUid, opt.Intersect, out)
1723-
l.RUnlock()
1724-
return out, nil
1725-
}
17261728

1727-
err := l.iterate(opt.ReadTs, opt.AfterUid, func(p *pb.Posting) error {
1728-
if p.PostingType == pb.Posting_REF {
1729-
res = append(res, p.Uid)
1730-
if opt.First < 0 {
1731-
// We need the last N.
1732-
// TODO: This could be optimized by only considering some of the last UidBlocks.
1733-
if len(res) > -opt.First {
1734-
res = res[1:]
1729+
// If we need to intersect and the number of elements are small, in that case it's better to
1730+
// just check each item is present or not.
1731+
if opt.Intersect != nil && len(opt.Intersect.Uids) < l.ApproxLen() {
1732+
// Cache the iterator as it makes the search space smaller each time.
1733+
var pitr pIterator
1734+
for _, uid := range opt.Intersect.Uids {
1735+
ok, _, err := l.findPostingWithItr(opt.ReadTs, uid, pitr)
1736+
if err != nil {
1737+
return nil, err, false
1738+
}
1739+
if ok {
1740+
res = append(res, uid)
17351741
}
1736-
} else if len(res) > opt.First {
1737-
return ErrStopIteration
17381742
}
1743+
1744+
out.Uids = res
1745+
return out, nil, false
17391746
}
1740-
return nil
1741-
})
1742-
l.RUnlock()
1743-
if err != nil {
1744-
return out, errors.Wrapf(err, "cannot retrieve UIDs from list with key %s",
1745-
hex.EncodeToString(l.key))
1747+
1748+
// If we are going to iterate over the list, in that case we only need to read between min and max
1749+
// of opt.Intersect.
1750+
var uidMin, uidMax uint64 = 0, 0
1751+
if opt.Intersect != nil && len(opt.Intersect.Uids) > 0 {
1752+
uidMin = opt.Intersect.Uids[0]
1753+
uidMax = opt.Intersect.Uids[len(opt.Intersect.Uids)-1]
1754+
}
1755+
1756+
err := l.iterate(opt.ReadTs, opt.AfterUid, func(p *pb.Posting) error {
1757+
if p.PostingType == pb.Posting_REF {
1758+
if p.Uid < uidMin {
1759+
return nil
1760+
}
1761+
if p.Uid > uidMax && uidMax > 0 {
1762+
return ErrStopIteration
1763+
}
1764+
res = append(res, p.Uid)
1765+
1766+
if opt.First < 0 {
1767+
// We need the last N.
1768+
// TODO: This could be optimized by only considering some of the last UidBlocks.
1769+
if len(res) > -opt.First {
1770+
res = res[1:]
1771+
}
1772+
} else if len(res) > opt.First {
1773+
return ErrStopIteration
1774+
}
1775+
}
1776+
return nil
1777+
})
1778+
if err != nil {
1779+
return out, errors.Wrapf(err, "cannot retrieve UIDs from list with key %s",
1780+
hex.EncodeToString(l.key)), false
1781+
}
1782+
out.Uids = res
1783+
return out, nil, true
17461784
}
17471785

17481786
// Do The intersection here as it's optimized.
1749-
out.Uids = res
1750-
lenBefore := len(res)
1787+
out, err, applyIntersectWith := getUidList()
1788+
if err != nil || !applyIntersectWith {
1789+
return out, err
1790+
}
1791+
1792+
lenBefore := len(out.Uids)
17511793
if opt.Intersect != nil {
17521794
algo.IntersectWith(out, opt.Intersect, out)
17531795
}
@@ -2045,7 +2087,7 @@ func (l *List) FindPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posti
20452087
return l.findPosting(readTs, uid)
20462088
}
20472089

2048-
func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posting, err error) {
2090+
func (l *List) findPostingWithItr(readTs uint64, uid uint64, pitr pIterator) (found bool, pos *pb.Posting, err error) {
20492091
// Iterate starts iterating after the given argument, so we pass UID - 1
20502092
// TODO Find what happens when uid = math.MaxUint64
20512093
searchFurther, pos := l.mutationMap.findPosting(readTs, uid)
@@ -2056,7 +2098,6 @@ func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posti
20562098
return false, nil, nil
20572099
}
20582100

2059-
var pitr pIterator
20602101
err = pitr.seek(l, uid-1, 0)
20612102
if err != nil {
20622103
return false, nil, errors.Wrapf(err,
@@ -2080,6 +2121,11 @@ func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posti
20802121
return false, nil, nil
20812122
}
20822123

2124+
func (l *List) findPosting(readTs uint64, uid uint64) (found bool, pos *pb.Posting, err error) {
2125+
var pitr pIterator
2126+
return l.findPostingWithItr(readTs, uid, pitr)
2127+
}
2128+
20832129
// Facets gives facets for the posting representing value.
20842130
func (l *List) Facets(readTs uint64, param *pb.FacetParams, langs []string,
20852131
listType bool) ([]*pb.Facets, error) {

0 commit comments

Comments
 (0)