File tree Expand file tree Collapse file tree 2 files changed +33
-4
lines changed Expand file tree Collapse file tree 2 files changed +33
-4
lines changed Original file line number Diff line number Diff line change @@ -755,6 +755,26 @@ async fn test_filtering_match_updates(rt: TestRuntime) -> anyhow::Result<()> {
755755 anyhow:: Ok ( ( ) )
756756}
757757
758+ #[ convex_macro:: test_runtime]
759+ async fn test_bm25_stats_no_underflow ( rt : TestRuntime ) -> anyhow:: Result < ( ) > {
760+ let mut scenario = Scenario :: new ( rt) . await ?;
761+ scenario
762+ . patch ( TestKey :: C , vec ! [ TestValue :: D ] , TestValue :: A )
763+ . await ?;
764+ scenario. execute ( TestAction :: Backfill ) . await ?;
765+ scenario. execute ( TestAction :: Delete ( TestKey :: C ) ) . await ?;
766+ // This query doens't use the filter field, so the BM25 stats will not include
767+ // the filter field while the commit statistics will in the memory index from
768+ // the delete.
769+ scenario
770+ . execute ( TestAction :: QueryAndCheckScores ( TestQuery {
771+ search : vec ! [ TestValue :: D ] ,
772+ filter : None ,
773+ } ) )
774+ . await ?;
775+ anyhow:: Ok ( ( ) )
776+ }
777+
758778// Regression test: We had a bug where we were computing the index of a matching
759779// union term incorrectly.
760780//
Original file line number Diff line number Diff line change @@ -464,10 +464,19 @@ impl MemorySearchIndex {
464464 . checked_add_signed ( commit_stats. total_docs_diff as i64 )
465465 . context ( "num_documents underflow" ) ?;
466466 for ( field, total_term_diff) in & commit_stats. total_term_diff_by_field {
467- let term_diff = stats. num_terms_by_field . entry ( * field) . or_insert ( 0 ) ;
468- * term_diff = term_diff
469- . checked_add_signed ( * total_term_diff as i64 )
470- . context ( "num_terms underflow" ) ?;
467+ // It's possible some fields are present in the commit statistics but not the
468+ // Bm25Stats because the Bm25Stats are query-specific, and the commit statistics
469+ // are not. e.g. a filter field that isn't in the query might not appear in the
470+ // Bm25 stats. We only need to update the fields that are in the Bm25Stats.
471+ if let Some ( term_diff) = stats. num_terms_by_field . get_mut ( field) {
472+ * term_diff = term_diff
473+ . checked_add_signed ( * total_term_diff as i64 )
474+ . context ( "num_terms underflow" ) ?;
475+ } else if field == & Field :: from_field_id ( SEARCH_FIELD_ID ) {
476+ stats
477+ . num_terms_by_field
478+ . insert ( * field, ( * total_term_diff as i64 ) . try_into ( ) ?) ;
479+ }
471480 }
472481 for ( term, term_id) in & term_ids {
473482 let Some ( & increment) = commit_stats. term_freq_diffs . get ( term_id) else {
You can’t perform that action at this time.
0 commit comments