Skip to content

Commit c1e36a4

Browse files
committed
Improve s2region_term_indexer:
* Add ability to reuse term buffer * Add option for optimize index size if query only points
1 parent e8d8375 commit c1e36a4

File tree

2 files changed

+92
-30
lines changed

2 files changed

+92
-30
lines changed

src/s2/s2region_term_indexer.cc

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ string S2RegionTermIndexer::GetTerm(TermType term_type, const S2CellId id,
126126

127127
vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
128128
string_view prefix) {
129+
vector<string> terms;
130+
GetIndexTerms(point, prefix, &terms);
131+
return terms;
132+
}
133+
134+
void S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
135+
string_view prefix,
136+
vector<string>* terms) {
129137
// See the top of this file for an overview of the indexing strategy.
130138
//
131139
// The last cell generated by this loop is effectively the covering for
@@ -136,12 +144,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Point& point,
136144
// max_level() != true_max_level() (see S2RegionCoverer::Options).
137145

138146
const S2CellId id(point);
139-
vector<string> terms;
140-
for (int level = options_.min_level(); level <= options_.max_level();
141-
level += options_.level_mod()) {
142-
terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
147+
int level = options_.min_level();
148+
if (options_.query_contains_points_only()) {
149+
level = options_.true_max_level();
150+
}
151+
for (; level <= options_.max_level(); level += options_.level_mod()) {
152+
terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
143153
}
144-
return terms;
145154
}
146155

147156
vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
@@ -154,6 +163,13 @@ vector<string> S2RegionTermIndexer::GetIndexTerms(const S2Region& region,
154163

155164
vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
156165
const S2CellUnion& covering, string_view prefix) {
166+
vector<string> terms;
167+
GetIndexTermsForCanonicalCovering(covering, prefix, &terms);
168+
return terms;
169+
}
170+
171+
void S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
172+
const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
157173
// See the top of this file for an overview of the indexing strategy.
158174
//
159175
// Cells in the covering are normally indexed as covering terms. If we are
@@ -168,24 +184,29 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
168184
*coverer_.mutable_options() = options_;
169185
S2_CHECK(coverer_.IsCanonical(covering));
170186
}
171-
vector<string> terms;
172187
S2CellId prev_id = S2CellId::None();
173188
int true_max_level = options_.true_max_level();
174-
for (S2CellId id : covering) {
189+
for (const S2CellId id : covering) {
175190
// IsCanonical() already checks the following conditions, but we repeat
176191
// them here for documentation purposes.
177192
int level = id.level();
178193
S2_DCHECK_GE(level, options_.min_level());
179194
S2_DCHECK_LE(level, options_.max_level());
180195
S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
196+
// assume level <= options_.true_max_level()
181197

182-
if (level < true_max_level) {
183-
// Add a covering term for this cell.
184-
terms.push_back(GetTerm(TermType::COVERING, id, prefix));
185-
}
186-
if (level == true_max_level || !options_.optimize_for_space()) {
187-
// Add an ancestor term for this cell at the constrained level.
188-
terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
198+
const bool is_max_level_cell = level == true_max_level;
199+
// Add a term for this cell, max_level cell ANCESTOR is optimization
200+
terms->push_back(GetTerm(is_max_level_cell ? TermType::ANCESTOR
201+
: TermType::COVERING,
202+
id, prefix));
203+
204+
// If query only contains points, there are no need other terms.
205+
if (options_.query_contains_points_only()) continue;
206+
207+
if (!options_.optimize_for_space() && !is_max_level_cell) {
208+
// Add an ancestor term for this cell.
209+
terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
189210
}
190211
// Finally, add ancestor terms for all the ancestors of this cell.
191212
while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -194,29 +215,34 @@ vector<string> S2RegionTermIndexer::GetIndexTermsForCanonicalCovering(
194215
prev_id.parent(level) == ancestor_id) {
195216
break; // We have already processed this cell and its ancestors.
196217
}
197-
terms.push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
218+
terms->push_back(GetTerm(TermType::ANCESTOR, ancestor_id, prefix));
198219
}
199220
prev_id = id;
200221
}
201-
return terms;
202222
}
203223

204224
vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
205225
string_view prefix) {
226+
vector<string> terms;
227+
GetQueryTerms(point, prefix, &terms);
228+
return terms;
229+
}
230+
231+
void S2RegionTermIndexer::GetQueryTerms(const S2Point& point,
232+
string_view prefix,
233+
vector<string>* terms) {
206234
// See the top of this file for an overview of the indexing strategy.
207235

208236
const S2CellId id(point);
209-
vector<string> terms;
210237
// Recall that all true_max_level() cells are indexed only as ancestor terms.
211238
int level = options_.true_max_level();
212-
terms.push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
213-
if (options_.index_contains_points_only()) return terms;
239+
terms->push_back(GetTerm(TermType::ANCESTOR, id.parent(level), prefix));
240+
if (options_.index_contains_points_only()) return;
214241

215242
// Add covering terms for all the ancestor cells.
216243
for (; level >= options_.min_level(); level -= options_.level_mod()) {
217-
terms.push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
244+
terms->push_back(GetTerm(TermType::COVERING, id.parent(level), prefix));
218245
}
219-
return terms;
220246
}
221247

222248
vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
@@ -229,13 +255,20 @@ vector<string> S2RegionTermIndexer::GetQueryTerms(const S2Region& region,
229255

230256
vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
231257
const S2CellUnion& covering, string_view prefix) {
258+
vector<string> terms;
259+
GetQueryTermsForCanonicalCovering(covering, prefix, &terms);
260+
return terms;
261+
}
262+
263+
void S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
264+
const S2CellUnion& covering, string_view prefix, vector<string>* terms) {
232265
// See the top of this file for an overview of the indexing strategy.
233266

267+
S2_CHECK(!options_.query_contains_points_only());
234268
if (google::DEBUG_MODE) {
235269
*coverer_.mutable_options() = options_;
236270
S2_CHECK(coverer_.IsCanonical(covering));
237271
}
238-
vector<string> terms;
239272
S2CellId prev_id = S2CellId::None();
240273
int true_max_level = options_.true_max_level();
241274
for (S2CellId id : covering) {
@@ -245,18 +278,19 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
245278
S2_DCHECK_GE(level, options_.min_level());
246279
S2_DCHECK_LE(level, options_.max_level());
247280
S2_DCHECK_EQ(0, (level - options_.min_level()) % options_.level_mod());
281+
// assume level <= options_.true_max_level()
248282

249283
// Cells in the covering are always queried as ancestor terms.
250-
terms.push_back(GetTerm(TermType::ANCESTOR, id, prefix));
284+
terms->push_back(GetTerm(TermType::ANCESTOR, id, prefix));
251285

252286
// If the index only contains points, there are no covering terms.
253287
if (options_.index_contains_points_only()) continue;
254288

255289
// If we are optimizing for index space rather than query time, cells are
256290
// also queried as covering terms (except for true_max_level() cells,
257291
// which are indexed and queried as ancestor cells only).
258-
if (options_.optimize_for_space() && level < true_max_level) {
259-
terms.push_back(GetTerm(TermType::COVERING, id, prefix));
292+
if (options_.optimize_for_space() && level != true_max_level) {
293+
terms->push_back(GetTerm(TermType::COVERING, id, prefix));
260294
}
261295
// Finally, add covering terms for all the ancestors of this cell.
262296
while ((level -= options_.level_mod()) >= options_.min_level()) {
@@ -265,9 +299,8 @@ vector<string> S2RegionTermIndexer::GetQueryTermsForCanonicalCovering(
265299
prev_id.parent(level) == ancestor_id) {
266300
break; // We have already processed this cell and its ancestors.
267301
}
268-
terms.push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
302+
terms->push_back(GetTerm(TermType::COVERING, ancestor_id, prefix));
269303
}
270304
prev_id = id;
271305
}
272-
return terms;
273306
}

src/s2/s2region_term_indexer.h

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,21 @@ class S2RegionTermIndexer {
196196
// this flag if your index consists entirely of points.)
197197
//
198198
// DEFAULT: false
199-
bool index_contains_points_only() const { return points_only_; }
200-
void set_index_contains_points_only(bool value) { points_only_ = value; }
199+
bool index_contains_points_only() const { return index_points_only_; }
200+
void set_index_contains_points_only(bool value) { index_points_only_ = value; }
201+
202+
// If your query will only contain points (rather than regions), be sure
203+
// to set this flag. This will generate smaller and faster index that
204+
// are specialized for the points-only case.
205+
//
206+
// With the default quality settings, this flag reduces the number of
207+
// index terms by about a factor of two. (The improvement gets smaller
208+
// as max_cells() is increased, but there is really no reason not to use
209+
// this flag if your query consist entirely of points.)
210+
//
211+
// DEFAULT: false
212+
bool query_contains_points_only() const { return query_points_only_; }
213+
void set_query_contains_points_only(bool value) { query_points_only_ = value; }
201214

202215
// If true, the index will be optimized for space rather than for query
203216
// time. With the default quality settings, this flag reduces the number
@@ -221,7 +234,8 @@ class S2RegionTermIndexer {
221234
void set_marker_character(char ch);
222235

223236
private:
224-
bool points_only_ = false;
237+
bool index_points_only_ = false;
238+
bool query_points_only_ = false;
225239
bool optimize_for_space_ = false;
226240
std::string marker_ = std::string(1, '$');
227241
};
@@ -293,6 +307,21 @@ class S2RegionTermIndexer {
293307
std::vector<std::string> GetQueryTermsForCanonicalCovering(
294308
const S2CellUnion& covering, absl::string_view prefix);
295309

310+
// Same as above but allows to reuse same buffer for different points or use
311+
// single buffer for multiple points (common case is GeoJson MultiPoint)
312+
void GetIndexTerms(const S2Point& point, absl::string_view prefix,
313+
std::vector<std::string>* terms);
314+
void GetQueryTerms(const S2Point& point, absl::string_view prefix,
315+
std::vector<std::string>* terms);
316+
317+
// Same as above but allows to reuse same buffer for different covering
318+
void GetIndexTermsForCanonicalCovering(const S2CellUnion &covering,
319+
absl::string_view prefix,
320+
std::vector<std::string> *terms);
321+
void GetQueryTermsForCanonicalCovering(const S2CellUnion &covering,
322+
absl::string_view prefix,
323+
std::vector<std::string> *terms);
324+
296325
private:
297326
enum TermType { ANCESTOR, COVERING };
298327

0 commit comments

Comments
 (0)