diff --git a/firebase-firestore/CHANGELOG.md b/firebase-firestore/CHANGELOG.md index 43d230b2866..6fdff5182c4 100644 --- a/firebase-firestore/CHANGELOG.md +++ b/firebase-firestore/CHANGELOG.md @@ -1,5 +1,8 @@ # Unreleased +- [changed] Improve the performance of queries in collections that contain many deleted documents. + [#7295](//github.com/firebase/firebase-android-sdk/issues/7295) + # 26.0.0 - [changed] **Breaking Change**: Updated minSdkVersion to API level 23 or higher. diff --git a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLitePersistence.java b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLitePersistence.java index b9c89d3cb8b..a7e353cd6e8 100644 --- a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLitePersistence.java +++ b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLitePersistence.java @@ -616,7 +616,7 @@ static class LongQuery { // attempt to check for placeholders in the query {@link head}; if it only relied on the number // of placeholders it itself generates, in that situation it would still exceed the SQLite // limit. - private static final int LIMIT = 900; + static final int LIMIT = 900; /** * Creates a new {@code LongQuery} with parameters that describe a template for creating each diff --git a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java index b26f9601a81..5591da14b7b 100644 --- a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java +++ b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java @@ -21,6 +21,7 @@ import static com.google.firebase.firestore.util.Util.repeatSequence; import android.database.Cursor; +import androidx.annotation.NonNull; import androidx.annotation.VisibleForTesting; import com.google.firebase.Timestamp; import com.google.firebase.database.collection.ImmutableSortedMap; @@ -40,9 +41,11 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -55,6 +58,8 @@ final class SQLiteRemoteDocumentCache implements RemoteDocumentCache { private final LocalSerializer serializer; private IndexManager indexManager; + private final DocumentTypeBackfiller documentTypeBackfiller = new DocumentTypeBackfiller(); + SQLiteRemoteDocumentCache(SQLitePersistence persistence, LocalSerializer serializer) { this.db = persistence; this.serializer = serializer; @@ -65,6 +70,32 @@ public void setIndexManager(IndexManager indexManager) { this.indexManager = indexManager; } + private enum DocumentType { + NO_DOCUMENT(1), + FOUND_DOCUMENT(2), + UNKNOWN_DOCUMENT(3), + INVALID_DOCUMENT(4); + + final int dbValue; + + DocumentType(int dbValue) { + this.dbValue = dbValue; + } + + static DocumentType forMutableDocument(MutableDocument document) { + if (document.isNoDocument()) { + return NO_DOCUMENT; + } else if (document.isFoundDocument()) { + return FOUND_DOCUMENT; + } else if (document.isUnknownDocument()) { + return UNKNOWN_DOCUMENT; + } else { + hardAssert(!document.isValidDocument(), "MutableDocument has an unknown type"); + return INVALID_DOCUMENT; + } + } + } + @Override public void add(MutableDocument document, SnapshotVersion readTime) { hardAssert( @@ -77,12 +108,13 @@ public void add(MutableDocument document, SnapshotVersion readTime) { db.execute( "INSERT OR REPLACE INTO remote_documents " - + "(path, path_length, read_time_seconds, read_time_nanos, contents) " - + "VALUES (?, ?, ?, ?, ?)", + + "(path, path_length, read_time_seconds, read_time_nanos, document_type, contents) " + + "VALUES (?, ?, ?, ?, ?, ?)", EncodedPath.encode(documentKey.getPath()), documentKey.getPath().length(), timestamp.getSeconds(), timestamp.getNanoseconds(), + DocumentType.forMutableDocument(document).dbValue, message.toByteArray()); indexManager.addToCollectionParentIndex(document.getKey().getCollectionPath()); @@ -131,7 +163,8 @@ public Map getAll(Iterable documentKe SQLitePersistence.LongQuery longQuery = new SQLitePersistence.LongQuery( db, - "SELECT contents, read_time_seconds, read_time_nanos FROM remote_documents " + "SELECT contents, read_time_seconds, read_time_nanos, document_type, path " + + "FROM remote_documents " + "WHERE path IN (", bindVars, ") ORDER BY path"); @@ -143,7 +176,14 @@ public Map getAll(Iterable documentKe .forEach(row -> processRowInBackground(backgroundQueue, results, row, /*filter*/ null)); } backgroundQueue.drain(); - return results; + + // Backfill any rows with null "document_type" discovered by processRowInBackground(). + documentTypeBackfiller.backfill(db); + + // Synchronize on `results` to avoid a data race with the background queue. + synchronized (results) { + return results; + } } @Override @@ -182,6 +222,7 @@ private Map getAll( List collections, IndexOffset offset, int count, + @Nullable DocumentType tryFilterDocumentType, @Nullable Function filter, @Nullable QueryContext context) { Timestamp readTime = offset.getReadTime().getTimestamp(); @@ -189,9 +230,12 @@ private Map getAll( StringBuilder sql = repeatSequence( - "SELECT contents, read_time_seconds, read_time_nanos, path " + "SELECT contents, read_time_seconds, read_time_nanos, document_type, path " + "FROM remote_documents " + "WHERE path >= ? AND path < ? AND path_length = ? " + + (tryFilterDocumentType == null + ? "" + : " AND (document_type IS NULL OR document_type = ?) ") + "AND (read_time_seconds > ? OR ( " + "read_time_seconds = ? AND read_time_nanos > ?) OR ( " + "read_time_seconds = ? AND read_time_nanos = ? and path > ?)) ", @@ -199,13 +243,19 @@ private Map getAll( " UNION "); sql.append("ORDER BY read_time_seconds, read_time_nanos, path LIMIT ?"); - Object[] bindVars = new Object[BINDS_PER_STATEMENT * collections.size() + 1]; + Object[] bindVars = + new Object + [(BINDS_PER_STATEMENT + (tryFilterDocumentType != null ? 1 : 0)) * collections.size() + + 1]; int i = 0; for (ResourcePath collection : collections) { String prefixPath = EncodedPath.encode(collection); bindVars[i++] = prefixPath; bindVars[i++] = EncodedPath.prefixSuccessor(prefixPath); bindVars[i++] = collection.length() + 1; + if (tryFilterDocumentType != null) { + bindVars[i++] = tryFilterDocumentType.dbValue; + } bindVars[i++] = readTime.getSeconds(); bindVars[i++] = readTime.getSeconds(); bindVars[i++] = readTime.getNanoseconds(); @@ -227,7 +277,14 @@ private Map getAll( } }); backgroundQueue.drain(); - return results; + + // Backfill any null "document_type" columns discovered by processRowInBackground(). + documentTypeBackfiller.backfill(db); + + // Synchronize on `results` to avoid a data race with the background queue. + synchronized (results) { + return results; + } } private Map getAll( @@ -235,7 +292,8 @@ private Map getAll( IndexOffset offset, int count, @Nullable Function filter) { - return getAll(collections, offset, count, filter, /*context*/ null); + return getAll( + collections, offset, count, /*tryFilterDocumentType*/ null, filter, /*context*/ null); } private void processRowInBackground( @@ -246,6 +304,8 @@ private void processRowInBackground( byte[] rawDocument = row.getBlob(0); int readTimeSeconds = row.getInt(1); int readTimeNanos = row.getInt(2); + boolean documentTypeIsNull = row.isNull(3); + String path = row.getString(4); // Since scheduling background tasks incurs overhead, we only dispatch to a // background thread if there are still some documents remaining. @@ -254,6 +314,9 @@ private void processRowInBackground( () -> { MutableDocument document = decodeMaybeDocument(rawDocument, readTimeSeconds, readTimeNanos); + if (documentTypeIsNull) { + documentTypeBackfiller.enqueue(path, readTimeSeconds, readTimeNanos, document); + } if (filter == null || filter.apply(document)) { synchronized (results) { results.put(document.getKey(), document); @@ -278,6 +341,10 @@ public Map getDocumentsMatchingQuery( Collections.singletonList(query.getPath()), offset, Integer.MAX_VALUE, + // Specify tryFilterDocumentType=FOUND_DOCUMENT to getAll() as an optimization, because + // query.matches(doc) will return false for all non-"found" document types anyways. + // See https://github.com/firebase/firebase-android-sdk/issues/7295 + DocumentType.FOUND_DOCUMENT, (MutableDocument doc) -> query.matches(doc) || mutatedKeys.contains(doc.getKey()), context); } @@ -292,4 +359,146 @@ private MutableDocument decodeMaybeDocument( throw fail("MaybeDocument failed to parse: %s", e); } } + + /** + * Helper class to backfill the `document_type` column in the `remote_documents` table. + *

+ * The `document_type` column was added as an optimization to skip deleted document tombstones + * when running queries. Any time a new row is added to the `remote_documents` table it _should_ + * have its `document_type` column set to the value that matches the `contents` field. However, + * when upgrading from an older schema version the column value for existing rows will be null + * and this backfiller is intended to replace those null values to improve the future performance + * of queries. + *

+ * When traversing the `remote_documents` table call `add()` upon finding a row whose + * `document_type` is null. Then, call `backfill()` later on to efficiently update the added + * rows in batches. + *

+ * This class is thread safe and all public methods may be safely called concurrently from + * multiple threads. This makes it safe to use instances of this class from BackgroundQueue. + * + * @see #7295 + */ + private static class DocumentTypeBackfiller { + + private final ConcurrentHashMap documentTypeByBackfillKey = + new ConcurrentHashMap<>(); + + void enqueue(String path, int readTimeSeconds, int readTimeNanos, MutableDocument document) { + BackfillKey backfillKey = new BackfillKey(path, readTimeSeconds, readTimeNanos); + DocumentType documentType = DocumentType.forMutableDocument(document); + documentTypeByBackfillKey.putIfAbsent(backfillKey, documentType); + } + + void backfill(SQLitePersistence db) { + while (true) { + BackfillSqlInfo backfillSqlInfo = calculateBackfillSql(); + if (backfillSqlInfo == null) { + break; + } + db.execute(backfillSqlInfo.sql, backfillSqlInfo.bindings); + } + } + + private static class BackfillSqlInfo { + final String sql; + final Object[] bindings; + final int numDocumentsAffected; + + BackfillSqlInfo(String sql, Object[] bindings, int numDocumentsAffected) { + this.sql = sql; + this.bindings = bindings; + this.numDocumentsAffected = numDocumentsAffected; + } + } + + @Nullable + BackfillSqlInfo calculateBackfillSql() { + if (documentTypeByBackfillKey.isEmpty()) { + return null; // short circuit + } + + ArrayList bindings = new ArrayList<>(); + StringBuilder caseClauses = new StringBuilder(); + StringBuilder whereClauses = new StringBuilder(); + + Iterator backfillKeys = documentTypeByBackfillKey.keySet().iterator(); + int numDocumentsAffected = 0; + while (backfillKeys.hasNext() && bindings.size() < SQLitePersistence.LongQuery.LIMIT) { + BackfillKey backfillKey = backfillKeys.next(); + DocumentType documentType = documentTypeByBackfillKey.remove(backfillKey); + if (documentType == null) { + continue; + } + + numDocumentsAffected++; + bindings.add(backfillKey.path); + int pathBindingNumber = bindings.size(); + bindings.add(backfillKey.readTimeSeconds); + int readTimeSecondsBindingNumber = bindings.size(); + bindings.add(backfillKey.readTimeNanos); + int readTimeNanosBindingNumber = bindings.size(); + bindings.add(documentType.dbValue); + int dbValueBindingNumber = bindings.size(); + + caseClauses + .append(" WHEN path=?") + .append(pathBindingNumber) + .append(" AND read_time_seconds=?") + .append(readTimeSecondsBindingNumber) + .append(" AND read_time_nanos=?") + .append(readTimeNanosBindingNumber) + .append(" THEN ?") + .append(dbValueBindingNumber); + + if (whereClauses.length() > 0) { + whereClauses.append(" OR"); + } + whereClauses + .append(" (path=?") + .append(pathBindingNumber) + .append(" AND read_time_seconds=?") + .append(readTimeSecondsBindingNumber) + .append(" AND read_time_nanos=?") + .append(readTimeNanosBindingNumber) + .append(')'); + } + + if (numDocumentsAffected == 0) { + return null; + } + + String sql = + "UPDATE remote_documents SET document_type = CASE" + + caseClauses + + " ELSE NULL END WHERE" + + whereClauses; + + return new BackfillSqlInfo(sql, bindings.toArray(), numDocumentsAffected); + } + + private static class BackfillKey { + final String path; + final int readTimeSeconds; + final int readTimeNanos; + + BackfillKey(String path, int readTimeSeconds, int readTimeNanos) { + this.path = path; + this.readTimeSeconds = readTimeSeconds; + this.readTimeNanos = readTimeNanos; + } + + @NonNull + @Override + public String toString() { + return "DocumentTypeBackfiller.BackfillKey(path=" + + path + + ", readTimeSeconds=" + + readTimeSeconds + + ", readTimeNanos=" + + readTimeNanos + + ")"; + } + } + } } diff --git a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteSchema.java b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteSchema.java index ce758195446..88af8a53292 100644 --- a/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteSchema.java +++ b/firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteSchema.java @@ -48,7 +48,7 @@ class SQLiteSchema { * The version of the schema. Increase this by one for each migration added to runMigrations * below. */ - static final int VERSION = 17; + static final int VERSION = 18; /** * The batch size for data migrations. @@ -183,6 +183,10 @@ void runSchemaUpgrades(int fromVersion, int toVersion) { createGlobalsTable(); } + if (fromVersion < 18 && toVersion >= 18) { + addDocumentType(); + } + /* * Adding a new schema upgrade? READ THIS FIRST! * @@ -448,6 +452,18 @@ private void addPathLength() { } } + private void addDocumentType() { + // The new "document_type" column is a copy of the document type encoded in the "contents" blob. + // Its range of values are defined in the `SQLiteRemoteDocumentCache.DocumentType` enum. + // The "document_type" value for a given row must be equal to the document type encoded in the + // "contents" column for that row. The purpose of the "document_type" column is to enable + // efficient filtering on document type. But when using it as a filter, a null value must also + // be considered as their document type must be determined by parsing the the "contents" column. + if (!tableContainsColumn("remote_documents", "document_type")) { + db.execSQL("ALTER TABLE remote_documents ADD COLUMN document_type INTEGER"); + } + } + private boolean hasReadTime() { boolean hasReadTimeSeconds = tableContainsColumn("remote_documents", "read_time_seconds"); boolean hasReadTimeNanos = tableContainsColumn("remote_documents", "read_time_nanos"); diff --git a/firebase-firestore/src/test/java/com/google/firebase/firestore/local/SQLiteSchemaTest.java b/firebase-firestore/src/test/java/com/google/firebase/firestore/local/SQLiteSchemaTest.java index 6e250332143..eaa807d9b67 100644 --- a/firebase-firestore/src/test/java/com/google/firebase/firestore/local/SQLiteSchemaTest.java +++ b/firebase-firestore/src/test/java/com/google/firebase/firestore/local/SQLiteSchemaTest.java @@ -702,6 +702,27 @@ public void createsOverlaysAndMigrationTable() { assertEquals(Persistence.DATA_MIGRATION_BUILD_OVERLAYS, migrationName); } + @Test + public void existingDocumentsMatchAfterRemoteDocumentsDocumentTypeColumnAdded() { + schema.runSchemaUpgrades(0, 1); + for (int i = 0; i < 3; i++) { + db.execSQL( + "INSERT INTO remote_documents (path, contents) VALUES (?, ?)", + new Object[] {encode(path("coll/doc" + i)), createDummyDocument("coll/doc" + i)}); + } + + // The migration of interest is 18, but go to the latest migration to ensure compatibility with + // the SQLiteRemoteDocumentCache implementation. + schema.runSchemaUpgrades(2, VERSION); + + SQLiteRemoteDocumentCache remoteDocumentCache = createRemoteDocumentCache(); + + Map results = + remoteDocumentCache.getDocumentsMatchingQuery( + query("coll"), IndexOffset.NONE, new HashSet()); + assertResultsContain(results, "coll/doc0", "coll/doc1", "coll/doc2"); + } + private SQLiteRemoteDocumentCache createRemoteDocumentCache() { SQLitePersistence persistence = new SQLitePersistence(serializer, LruGarbageCollector.Params.Default(), opener);