Skip to content

Commit 9268bf9

Browse files
committed
fix max len
1 parent fe95aaf commit 9268bf9

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

src/Functions/sparseGrams.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class SparseGramsImpl
5757
{
5858
size_t position;
5959
size_t left_ngram_position;
60+
size_t symbol_index;
6061
size_t hash;
6162
};
6263

@@ -88,6 +89,11 @@ class SparseGramsImpl
8889
return {left_iterator, right_iterator};
8990
}
9091

92+
size_t getRightSymbol() const
93+
{
94+
return num_increments;
95+
}
96+
9197
size_t getNextPosition(size_t iterator) const
9298
{
9399
if constexpr (is_utf8)
@@ -123,13 +129,15 @@ class SparseGramsImpl
123129
return false;
124130

125131
auto [ngram_left_position, right_position] = symbol_iterator.getNGramPositions();
132+
size_t right_symbol_index = symbol_iterator.getRightSymbol();
126133
size_t next_right_position = symbol_iterator.getNextPosition(right_position);
127134
size_t right_border_ngram_hash = hasher(pos + ngram_left_position, next_right_position - ngram_left_position);
128135

129136
while (!convex_hull.empty() && convex_hull.back().hash < right_border_ngram_hash)
130137
{
131138
size_t possible_left_position = convex_hull.back().left_ngram_position;
132-
size_t length = next_right_position - possible_left_position;
139+
size_t possible_left_symbol_index = convex_hull.back().symbol_index;
140+
size_t length = right_symbol_index - possible_left_symbol_index + 2;
133141
if (length > max_ngram_length)
134142
{
135143
/// If the current length is greater than the current right position, it will be greater at future right positions, so we can just delete them all.
@@ -143,7 +151,8 @@ class SparseGramsImpl
143151
if (!convex_hull.empty())
144152
{
145153
size_t possible_left_position = convex_hull.back().left_ngram_position;
146-
size_t length = next_right_position - possible_left_position;
154+
size_t possible_left_symbol_index = convex_hull.back().symbol_index;
155+
size_t length = right_symbol_index - possible_left_symbol_index + 2;
147156
if (length <= max_ngram_length)
148157
result.push_back({possible_left_position, next_right_position});
149158
}
@@ -152,7 +161,12 @@ class SparseGramsImpl
152161
while (!convex_hull.empty() && convex_hull.back().hash == right_border_ngram_hash)
153162
convex_hull.pop_back();
154163

155-
convex_hull.push_back(PositionAndHash{.position = right_position, .left_ngram_position = ngram_left_position, .hash = right_border_ngram_hash});
164+
convex_hull.push_back(PositionAndHash{
165+
.position = right_position,
166+
.left_ngram_position = ngram_left_position,
167+
.symbol_index = right_symbol_index,
168+
.hash = right_border_ngram_hash
169+
});
156170
symbol_iterator.increment();
157171
return true;
158172
}

0 commit comments

Comments
 (0)