@@ -233,12 +233,17 @@ static const QString SELECT_COUNT_CHUNKS_SQL = uR"(
233233)" _s;
234234
235235static const QString SELECT_CHUNKS_FTS_SQL = uR"(
236- select id, bm25(chunks_fts) as score
237- from chunks_fts
236+ select fts.id, bm25(chunks_fts) as score
237+ from chunks_fts fts
238+ join documents d on fts.document_id = d.id
239+ join collection_items ci on d.folder_id = ci.folder_id
240+ join collections co on ci.collection_id = co.id
238241 where chunks_fts match ?
239- order by score limit %1;
242+ and co.name in ('%1')
243+ order by score limit %2;
240244)" _s;
241245
246+
242247#define NAMED_PAIR (name, typea, a, typeb, b ) \
243248 struct name { typea a; typeb b; }; \
244249 static bool operator ==(const name &x, const name &y) { return x.a == y.a && x.b == y.b ; } \
@@ -349,6 +354,14 @@ static const QString UPDATE_LAST_UPDATE_TIME_SQL = uR"(
349354 update collections set last_update_time = ? where id = ?;
350355)" _s;
351356
357+ static const QString FTS_INTEGRITY_SQL = uR"(
358+ insert into chunks_fts(chunks_fts, rank) values('integrity-check', 1);
359+ )" _s;
360+
361+ static const QString FTS_REBUILD_SQL = uR"(
362+ insert into chunks_fts(chunks_fts) values('rebuild');
363+ )" _s;
364+
352365static bool addCollection (QSqlQuery &q, const QString &collection_name, const QDateTime &start_update,
353366 const QDateTime &last_update, const QString &embedding_model, CollectionItem &item)
354367{
@@ -1815,6 +1828,7 @@ void Database::start()
18151828 m_databaseValid = false ;
18161829 } else {
18171830 cleanDB ();
1831+ ftsIntegrityCheck ();
18181832 QSqlQuery q (m_db);
18191833 if (!refreshDocumentIdCache (q)) {
18201834 m_databaseValid = false ;
@@ -2328,7 +2342,7 @@ QList<int> Database::searchBM25(const QString &query, const QList<QString> &coll
23282342 QList<BM25Query> bm25Queries = queriesForFTS5 (query);
23292343
23302344 QSqlQuery sqlQuery (m_db);
2331- sqlQuery.prepare (SELECT_CHUNKS_FTS_SQL.arg (k ));
2345+ sqlQuery.prepare (SELECT_CHUNKS_FTS_SQL.arg (collections. join ( " ', ' " ), QString::number (k) ));
23322346
23332347 QList<SearchResult> results;
23342348 for (auto &bm25Query : std::as_const (bm25Queries)) {
@@ -2346,11 +2360,13 @@ QList<int> Database::searchBM25(const QString &query, const QList<QString> &coll
23462360 }
23472361 }
23482362
2349- do {
2350- const int chunkId = sqlQuery.value (0 ).toInt ();
2351- const float score = sqlQuery.value (1 ).toFloat ();
2352- results.append ({chunkId, score});
2353- } while (sqlQuery.next ());
2363+ if (sqlQuery.at () != QSql::AfterLastRow) {
2364+ do {
2365+ const int chunkId = sqlQuery.value (0 ).toInt ();
2366+ const float score = sqlQuery.value (1 ).toFloat ();
2367+ results.append ({chunkId, score});
2368+ } while (sqlQuery.next ());
2369+ }
23542370
23552371 k = qMin (k, results.size ());
23562372 std::partial_sort (
@@ -2524,6 +2540,26 @@ void Database::retrieveFromDB(const QList<QString> &collections, const QString &
25242540 results->append (tempResults.value (id));
25252541}
25262542
2543+ bool Database::ftsIntegrityCheck ()
2544+ {
2545+ QSqlQuery q (m_db);
2546+
2547+ // Returns an error executing sql if it the integrity check fails
2548+ // See: https://www.sqlite.org/fts5.html#the_integrity_check_command
2549+ const bool success = q.exec (FTS_INTEGRITY_SQL);
2550+ if (!success && q.lastError ().nativeErrorCode () != " 267" /* SQLITE_CORRUPT_VTAB from sqlite header*/ ) {
2551+ qWarning () << " ERROR: Cannot prepare sql for fts integrity check" << q.lastError ();
2552+ return false ;
2553+ }
2554+
2555+ if (!success && !q.exec (FTS_REBUILD_SQL)) {
2556+ qWarning () << " ERROR: Cannot exec sql for fts rebuild" << q.lastError ();
2557+ return false ;
2558+ }
2559+
2560+ return true ;
2561+ }
2562+
25272563// FIXME This is very slow and non-interruptible and when we close the application and we're
25282564// cleaning a large table this can cause the app to take forever to shut down. This would ideally be
25292565// interruptible and we'd continue 'cleaning' when we restart
@@ -2574,7 +2610,7 @@ bool Database::cleanDB()
25742610 int document_id = q.value (0 ).toInt ();
25752611 QString document_path = q.value (1 ).toString ();
25762612 QFileInfo info (document_path);
2577- if (info.exists () && info.isReadable () && m_scannedFileExtensions.contains (info.suffix ()))
2613+ if (info.exists () && info.isReadable () && m_scannedFileExtensions.contains (info.suffix (), Qt::CaseInsensitive ))
25782614 continue ;
25792615
25802616#if defined(DEBUG)
0 commit comments