Skip to content

Commit 5c7a993

Browse files
authored
firestore: fix slow queries when a collection has many NO_DOCUMENT tombstones (#7301)
1 parent db3bd7a commit 5c7a993

File tree

8 files changed

+406
-15
lines changed

8 files changed

+406
-15
lines changed

firebase-firestore/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Unreleased
22

3+
- [changed] Improve the performance of queries in collections that contain many deleted documents.
4+
[#7295](//github.com/firebase/firebase-android-sdk/issues/7295)
5+
36
# 26.0.0
47

58
- [changed] **Breaking Change**: Updated minSdkVersion to API level 23 or higher.

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLitePersistence.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ static class LongQuery {
616616
// attempt to check for placeholders in the query {@link head}; if it only relied on the number
617617
// of placeholders it itself generates, in that situation it would still exceed the SQLite
618618
// limit.
619-
private static final int LIMIT = 900;
619+
static final int LIMIT = 900;
620620

621621
/**
622622
* Creates a new {@code LongQuery} with parameters that describe a template for creating each

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java

Lines changed: 237 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static com.google.firebase.firestore.util.Util.repeatSequence;
2222

2323
import android.database.Cursor;
24+
import androidx.annotation.NonNull;
2425
import androidx.annotation.VisibleForTesting;
2526
import com.google.firebase.Timestamp;
2627
import com.google.firebase.database.collection.ImmutableSortedMap;
@@ -40,9 +41,12 @@
4041
import java.util.Collection;
4142
import java.util.Collections;
4243
import java.util.HashMap;
44+
import java.util.Iterator;
4345
import java.util.List;
4446
import java.util.Map;
47+
import java.util.Objects;
4548
import java.util.Set;
49+
import java.util.concurrent.ConcurrentHashMap;
4650
import java.util.concurrent.Executor;
4751
import javax.annotation.Nonnull;
4852
import javax.annotation.Nullable;
@@ -55,6 +59,8 @@ final class SQLiteRemoteDocumentCache implements RemoteDocumentCache {
5559
private final LocalSerializer serializer;
5660
private IndexManager indexManager;
5761

62+
private final DocumentTypeBackfiller documentTypeBackfiller = new DocumentTypeBackfiller();
63+
5864
SQLiteRemoteDocumentCache(SQLitePersistence persistence, LocalSerializer serializer) {
5965
this.db = persistence;
6066
this.serializer = serializer;
@@ -65,6 +71,32 @@ public void setIndexManager(IndexManager indexManager) {
6571
this.indexManager = indexManager;
6672
}
6773

74+
private enum DocumentType {
75+
NO_DOCUMENT(1),
76+
FOUND_DOCUMENT(2),
77+
UNKNOWN_DOCUMENT(3),
78+
INVALID_DOCUMENT(4);
79+
80+
final int dbValue;
81+
82+
DocumentType(int dbValue) {
83+
this.dbValue = dbValue;
84+
}
85+
86+
static DocumentType forMutableDocument(MutableDocument document) {
87+
if (document.isNoDocument()) {
88+
return NO_DOCUMENT;
89+
} else if (document.isFoundDocument()) {
90+
return FOUND_DOCUMENT;
91+
} else if (document.isUnknownDocument()) {
92+
return UNKNOWN_DOCUMENT;
93+
} else {
94+
hardAssert(!document.isValidDocument(), "MutableDocument has an unknown type");
95+
return INVALID_DOCUMENT;
96+
}
97+
}
98+
}
99+
68100
@Override
69101
public void add(MutableDocument document, SnapshotVersion readTime) {
70102
hardAssert(
@@ -77,12 +109,13 @@ public void add(MutableDocument document, SnapshotVersion readTime) {
77109

78110
db.execute(
79111
"INSERT OR REPLACE INTO remote_documents "
80-
+ "(path, path_length, read_time_seconds, read_time_nanos, contents) "
81-
+ "VALUES (?, ?, ?, ?, ?)",
112+
+ "(path, path_length, read_time_seconds, read_time_nanos, document_type, contents) "
113+
+ "VALUES (?, ?, ?, ?, ?, ?)",
82114
EncodedPath.encode(documentKey.getPath()),
83115
documentKey.getPath().length(),
84116
timestamp.getSeconds(),
85117
timestamp.getNanoseconds(),
118+
DocumentType.forMutableDocument(document).dbValue,
86119
message.toByteArray());
87120

88121
indexManager.addToCollectionParentIndex(document.getKey().getCollectionPath());
@@ -131,7 +164,8 @@ public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKe
131164
SQLitePersistence.LongQuery longQuery =
132165
new SQLitePersistence.LongQuery(
133166
db,
134-
"SELECT contents, read_time_seconds, read_time_nanos FROM remote_documents "
167+
"SELECT contents, read_time_seconds, read_time_nanos, document_type, path "
168+
+ "FROM remote_documents "
135169
+ "WHERE path IN (",
136170
bindVars,
137171
") ORDER BY path");
@@ -143,7 +177,14 @@ public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKe
143177
.forEach(row -> processRowInBackground(backgroundQueue, results, row, /*filter*/ null));
144178
}
145179
backgroundQueue.drain();
146-
return results;
180+
181+
// Backfill any rows with null "document_type" discovered by processRowInBackground().
182+
documentTypeBackfiller.backfill(db);
183+
184+
// Synchronize on `results` to avoid a data race with the background queue.
185+
synchronized (results) {
186+
return results;
187+
}
147188
}
148189

149190
@Override
@@ -182,30 +223,40 @@ private Map<DocumentKey, MutableDocument> getAll(
182223
List<ResourcePath> collections,
183224
IndexOffset offset,
184225
int count,
226+
@Nullable DocumentType tryFilterDocumentType,
185227
@Nullable Function<MutableDocument, Boolean> filter,
186228
@Nullable QueryContext context) {
187229
Timestamp readTime = offset.getReadTime().getTimestamp();
188230
DocumentKey documentKey = offset.getDocumentKey();
189231

190232
StringBuilder sql =
191233
repeatSequence(
192-
"SELECT contents, read_time_seconds, read_time_nanos, path "
234+
"SELECT contents, read_time_seconds, read_time_nanos, document_type, path "
193235
+ "FROM remote_documents "
194236
+ "WHERE path >= ? AND path < ? AND path_length = ? "
237+
+ (tryFilterDocumentType == null
238+
? ""
239+
: " AND (document_type IS NULL OR document_type = ?) ")
195240
+ "AND (read_time_seconds > ? OR ( "
196241
+ "read_time_seconds = ? AND read_time_nanos > ?) OR ( "
197242
+ "read_time_seconds = ? AND read_time_nanos = ? and path > ?)) ",
198243
collections.size(),
199244
" UNION ");
200245
sql.append("ORDER BY read_time_seconds, read_time_nanos, path LIMIT ?");
201246

202-
Object[] bindVars = new Object[BINDS_PER_STATEMENT * collections.size() + 1];
247+
Object[] bindVars =
248+
new Object
249+
[(BINDS_PER_STATEMENT + (tryFilterDocumentType != null ? 1 : 0)) * collections.size()
250+
+ 1];
203251
int i = 0;
204252
for (ResourcePath collection : collections) {
205253
String prefixPath = EncodedPath.encode(collection);
206254
bindVars[i++] = prefixPath;
207255
bindVars[i++] = EncodedPath.prefixSuccessor(prefixPath);
208256
bindVars[i++] = collection.length() + 1;
257+
if (tryFilterDocumentType != null) {
258+
bindVars[i++] = tryFilterDocumentType.dbValue;
259+
}
209260
bindVars[i++] = readTime.getSeconds();
210261
bindVars[i++] = readTime.getSeconds();
211262
bindVars[i++] = readTime.getNanoseconds();
@@ -227,15 +278,23 @@ private Map<DocumentKey, MutableDocument> getAll(
227278
}
228279
});
229280
backgroundQueue.drain();
230-
return results;
281+
282+
// Backfill any null "document_type" columns discovered by processRowInBackground().
283+
documentTypeBackfiller.backfill(db);
284+
285+
// Synchronize on `results` to avoid a data race with the background queue.
286+
synchronized (results) {
287+
return results;
288+
}
231289
}
232290

233291
private Map<DocumentKey, MutableDocument> getAll(
234292
List<ResourcePath> collections,
235293
IndexOffset offset,
236294
int count,
237295
@Nullable Function<MutableDocument, Boolean> filter) {
238-
return getAll(collections, offset, count, filter, /*context*/ null);
296+
return getAll(
297+
collections, offset, count, /*tryFilterDocumentType*/ null, filter, /*context*/ null);
239298
}
240299

241300
private void processRowInBackground(
@@ -246,6 +305,8 @@ private void processRowInBackground(
246305
byte[] rawDocument = row.getBlob(0);
247306
int readTimeSeconds = row.getInt(1);
248307
int readTimeNanos = row.getInt(2);
308+
boolean documentTypeIsNull = row.isNull(3);
309+
String path = row.getString(4);
249310

250311
// Since scheduling background tasks incurs overhead, we only dispatch to a
251312
// background thread if there are still some documents remaining.
@@ -254,6 +315,9 @@ private void processRowInBackground(
254315
() -> {
255316
MutableDocument document =
256317
decodeMaybeDocument(rawDocument, readTimeSeconds, readTimeNanos);
318+
if (documentTypeIsNull) {
319+
documentTypeBackfiller.enqueue(path, readTimeSeconds, readTimeNanos, document);
320+
}
257321
if (filter == null || filter.apply(document)) {
258322
synchronized (results) {
259323
results.put(document.getKey(), document);
@@ -278,6 +342,10 @@ public Map<DocumentKey, MutableDocument> getDocumentsMatchingQuery(
278342
Collections.singletonList(query.getPath()),
279343
offset,
280344
Integer.MAX_VALUE,
345+
// Specify tryFilterDocumentType=FOUND_DOCUMENT to getAll() as an optimization, because
346+
// query.matches(doc) will return false for all non-"found" document types anyways.
347+
// See https://github.com/firebase/firebase-android-sdk/issues/7295
348+
DocumentType.FOUND_DOCUMENT,
281349
(MutableDocument doc) -> query.matches(doc) || mutatedKeys.contains(doc.getKey()),
282350
context);
283351
}
@@ -292,4 +360,165 @@ private MutableDocument decodeMaybeDocument(
292360
throw fail("MaybeDocument failed to parse: %s", e);
293361
}
294362
}
363+
364+
/**
365+
* Helper class to backfill the `document_type` column in the `remote_documents` table.
366+
* <p>
367+
* The `document_type` column was added as an optimization to skip deleted document tombstones
368+
* when running queries. Any time a new row is added to the `remote_documents` table it _should_
369+
* have its `document_type` column set to the value that matches the `contents` field. However,
370+
* when upgrading from an older schema version the column value for existing rows will be null
371+
* and this backfiller is intended to replace those null values to improve the future performance
372+
* of queries.
373+
* <p>
374+
* When traversing the `remote_documents` table call `add()` upon finding a row whose
375+
* `document_type` is null. Then, call `backfill()` later on to efficiently update the added
376+
* rows in batches.
377+
* <p>
378+
* This class is thread safe and all public methods may be safely called concurrently from
379+
* multiple threads. This makes it safe to use instances of this class from BackgroundQueue.
380+
*
381+
* @see <a href="https://github.com/firebase/firebase-android-sdk/issues/7295">#7295</a>
382+
*/
383+
private static class DocumentTypeBackfiller {
384+
385+
private final ConcurrentHashMap<BackfillKey, DocumentType> documentTypeByBackfillKey =
386+
new ConcurrentHashMap<>();
387+
388+
void enqueue(String path, int readTimeSeconds, int readTimeNanos, MutableDocument document) {
389+
BackfillKey backfillKey = new BackfillKey(path, readTimeSeconds, readTimeNanos);
390+
DocumentType documentType = DocumentType.forMutableDocument(document);
391+
documentTypeByBackfillKey.putIfAbsent(backfillKey, documentType);
392+
}
393+
394+
void backfill(SQLitePersistence db) {
395+
while (true) {
396+
BackfillSqlInfo backfillSqlInfo = calculateBackfillSql();
397+
if (backfillSqlInfo == null) {
398+
break;
399+
}
400+
db.execute(backfillSqlInfo.sql, backfillSqlInfo.bindings);
401+
}
402+
}
403+
404+
private static class BackfillSqlInfo {
405+
final String sql;
406+
final Object[] bindings;
407+
final int numDocumentsAffected;
408+
409+
BackfillSqlInfo(String sql, Object[] bindings, int numDocumentsAffected) {
410+
this.sql = sql;
411+
this.bindings = bindings;
412+
this.numDocumentsAffected = numDocumentsAffected;
413+
}
414+
}
415+
416+
@Nullable
417+
BackfillSqlInfo calculateBackfillSql() {
418+
if (documentTypeByBackfillKey.isEmpty()) {
419+
return null; // short circuit
420+
}
421+
422+
ArrayList<Object> bindings = new ArrayList<>();
423+
StringBuilder caseClauses = new StringBuilder();
424+
StringBuilder whereClauses = new StringBuilder();
425+
426+
Iterator<BackfillKey> backfillKeys = documentTypeByBackfillKey.keySet().iterator();
427+
int numDocumentsAffected = 0;
428+
while (backfillKeys.hasNext() && bindings.size() < SQLitePersistence.LongQuery.LIMIT) {
429+
BackfillKey backfillKey = backfillKeys.next();
430+
DocumentType documentType = documentTypeByBackfillKey.remove(backfillKey);
431+
if (documentType == null) {
432+
continue;
433+
}
434+
435+
numDocumentsAffected++;
436+
bindings.add(backfillKey.path);
437+
int pathBindingNumber = bindings.size();
438+
bindings.add(backfillKey.readTimeSeconds);
439+
int readTimeSecondsBindingNumber = bindings.size();
440+
bindings.add(backfillKey.readTimeNanos);
441+
int readTimeNanosBindingNumber = bindings.size();
442+
bindings.add(documentType.dbValue);
443+
int dbValueBindingNumber = bindings.size();
444+
445+
caseClauses
446+
.append(" WHEN path=?")
447+
.append(pathBindingNumber)
448+
.append(" AND read_time_seconds=?")
449+
.append(readTimeSecondsBindingNumber)
450+
.append(" AND read_time_nanos=?")
451+
.append(readTimeNanosBindingNumber)
452+
.append(" THEN ?")
453+
.append(dbValueBindingNumber);
454+
455+
if (whereClauses.length() > 0) {
456+
whereClauses.append(" OR");
457+
}
458+
whereClauses
459+
.append(" (path=?")
460+
.append(pathBindingNumber)
461+
.append(" AND read_time_seconds=?")
462+
.append(readTimeSecondsBindingNumber)
463+
.append(" AND read_time_nanos=?")
464+
.append(readTimeNanosBindingNumber)
465+
.append(')');
466+
}
467+
468+
if (numDocumentsAffected == 0) {
469+
return null;
470+
}
471+
472+
String sql =
473+
"UPDATE remote_documents SET document_type = CASE"
474+
+ caseClauses
475+
+ " ELSE NULL END WHERE"
476+
+ whereClauses;
477+
478+
return new BackfillSqlInfo(sql, bindings.toArray(), numDocumentsAffected);
479+
}
480+
481+
private static class BackfillKey {
482+
final String path;
483+
final int readTimeSeconds;
484+
final int readTimeNanos;
485+
486+
BackfillKey(String path, int readTimeSeconds, int readTimeNanos) {
487+
this.path = path;
488+
this.readTimeSeconds = readTimeSeconds;
489+
this.readTimeNanos = readTimeNanos;
490+
}
491+
492+
@Override
493+
public boolean equals(Object object) {
494+
if (object == this) {
495+
return true;
496+
}
497+
if (!(object instanceof BackfillKey)) {
498+
return false;
499+
}
500+
BackfillKey other = (BackfillKey) object;
501+
return readTimeSeconds == other.readTimeSeconds
502+
&& readTimeNanos == other.readTimeNanos
503+
&& Objects.equals(path, other.path);
504+
}
505+
506+
@Override
507+
public int hashCode() {
508+
return Objects.hash(path, readTimeSeconds, readTimeNanos);
509+
}
510+
511+
@NonNull
512+
@Override
513+
public String toString() {
514+
return "DocumentTypeBackfiller.BackfillKey(path="
515+
+ path
516+
+ ", readTimeSeconds="
517+
+ readTimeSeconds
518+
+ ", readTimeNanos="
519+
+ readTimeNanos
520+
+ ")";
521+
}
522+
}
523+
}
295524
}

0 commit comments

Comments
 (0)