@@ -28,7 +28,7 @@ use crate::{
2828 } ,
2929 schema:: { clear_database, SuggestConnectionInitializer } ,
3030 suggestion:: { cook_raw_suggestion_url, FtsMatchInfo , Suggestion } ,
31- util:: { full_keyword, split_keyword} ,
31+ util:: { full_keyword, i18n_transform , split_keyword} ,
3232 weather:: WeatherCache ,
3333 Result , SuggestionQuery ,
3434} ;
@@ -1105,7 +1105,11 @@ impl<'a> SuggestDao<'a> {
11051105 // `suggestion.keywords()` can yield duplicates for dynamic
11061106 // suggestions, so ignore failures on insert in the uniqueness
11071107 // constraint on `(suggestion_id, keyword)`.
1108- let mut keyword_insert = KeywordInsertStatement :: new_with_or_ignore ( self . conn ) ?;
1108+ let mut keyword_insert = KeywordInsertStatement :: with_details (
1109+ self . conn ,
1110+ "keywords" ,
1111+ Some ( InsertConflictResolution :: Ignore ) ,
1112+ ) ?;
11091113 let mut suggestion_insert = SuggestionInsertStatement :: new ( self . conn ) ?;
11101114 let mut dynamic_insert = DynamicInsertStatement :: new ( self . conn ) ?;
11111115 for suggestion in suggestions {
@@ -1675,29 +1679,27 @@ pub(crate) struct KeywordInsertStatement<'conn>(rusqlite::Statement<'conn>);
16751679
16761680impl < ' conn > KeywordInsertStatement < ' conn > {
16771681 pub ( crate ) fn new ( conn : & ' conn Connection ) -> Result < Self > {
1678- Ok ( Self ( conn. prepare (
1679- "INSERT INTO keywords(
1680- suggestion_id,
1681- keyword,
1682- full_keyword_id,
1683- rank
1684- )
1685- VALUES(?, ?, ?, ?)
1686- " ,
1687- ) ?) )
1682+ Self :: with_details ( conn, "keywords" , None )
16881683 }
16891684
1690- pub ( crate ) fn new_with_or_ignore ( conn : & ' conn Connection ) -> Result < Self > {
1691- Ok ( Self ( conn. prepare (
1692- "INSERT OR IGNORE INTO keywords(
1693- suggestion_id,
1694- keyword,
1695- full_keyword_id,
1696- rank
1697- )
1698- VALUES(?, ?, ?, ?)
1699- " ,
1700- ) ?) )
1685+ pub ( crate ) fn with_details (
1686+ conn : & ' conn Connection ,
1687+ table : & str ,
1688+ conflict_resolution : Option < InsertConflictResolution > ,
1689+ ) -> Result < Self > {
1690+ Ok ( Self ( conn. prepare ( & format ! (
1691+ r#"
1692+ INSERT {} INTO {}(
1693+ suggestion_id,
1694+ keyword,
1695+ full_keyword_id,
1696+ rank
1697+ )
1698+ VALUES(?, ?, ?, ?)
1699+ "# ,
1700+ conflict_resolution. as_ref( ) . map( |r| r. as_str( ) ) . unwrap_or_default( ) ,
1701+ table,
1702+ ) ) ?) )
17011703 }
17021704
17031705 pub ( crate ) fn execute (
@@ -1714,6 +1716,18 @@ impl<'conn> KeywordInsertStatement<'conn> {
17141716 }
17151717}
17161718
1719+ pub ( crate ) enum InsertConflictResolution {
1720+ Ignore ,
1721+ }
1722+
1723+ impl InsertConflictResolution {
1724+ fn as_str ( & self ) -> & str {
1725+ match self {
1726+ InsertConflictResolution :: Ignore => "OR IGNORE" ,
1727+ }
1728+ }
1729+ }
1730+
17171731struct PrefixKeywordInsertStatement < ' conn > ( rusqlite:: Statement < ' conn > ) ;
17181732
17191733impl < ' conn > PrefixKeywordInsertStatement < ' conn > {
@@ -1752,32 +1766,55 @@ impl<'conn> PrefixKeywordInsertStatement<'conn> {
17521766 }
17531767}
17541768
1755- #[ derive( Debug , Default ) ]
1769+ /// Metrics for a set of keywords. Use `KeywordsMetricsUpdater` to write metrics
1770+ /// to the store and `SuggestDao::get_keywords_metrics` to read them.
1771+ #[ derive( Debug , Default , Eq , PartialEq ) ]
17561772pub ( crate ) struct KeywordsMetrics {
1773+ /// The max byte count (not chars) of all keywords in the set.
17571774 pub ( crate ) max_len : usize ,
1775+ /// The max word count of all keywords in the set.
17581776 pub ( crate ) max_word_count : usize ,
17591777}
17601778
1761- /// This can be used to update metrics as keywords are inserted into the DB.
1762- /// Create a `KeywordsMetricsUpdater`, call `update` on it as each keyword is
1763- /// inserted, and then call `finish` after all keywords have been inserted.
1779+ /// This can be used to keep a running tally of metrics as you process keywords
1780+ /// and then to write the metrics to the store. Make a `KeywordsMetricsUpdater`,
1781+ /// call `update` on it as you process each keyword, and then call `finish` to
1782+ /// write the metrics to the store.
17641783pub ( crate ) struct KeywordsMetricsUpdater {
1765- pub ( crate ) max_len : usize ,
1766- pub ( crate ) max_word_count : usize ,
1784+ metrics : KeywordsMetrics ,
17671785}
17681786
17691787impl KeywordsMetricsUpdater {
17701788 pub ( crate ) fn new ( ) -> Self {
17711789 Self {
1772- max_len : 0 ,
1773- max_word_count : 0 ,
1790+ metrics : KeywordsMetrics :: default ( ) ,
17741791 }
17751792 }
17761793
1794+ /// Updates the metrics with those of the given keyword.
1795+ ///
1796+ /// The keyword's metrics are calculated as the max of its metrics as-is and
1797+ /// its metrics when it's transformed using `i18n_transform`. For example,
1798+ /// `"Qu\u{00e9}bec"` ("Québec" with single two-byte 'é' char) has 7 bytes,
1799+ /// so its `len` is 7, but the `len` of `i18n_transform("Qu\u{00e9}bec")` is
1800+ /// 6, since the 'é' is transformed to an ASCII 'e'. The keyword's `max_len`
1801+ /// will therefore be the max of 7 and 6, which is 7.
17771802 pub ( crate ) fn update ( & mut self , keyword : & str ) {
1778- self . max_len = std:: cmp:: max ( self . max_len , keyword. len ( ) ) ;
1779- self . max_word_count =
1780- std:: cmp:: max ( self . max_word_count , keyword. split_whitespace ( ) . count ( ) ) ;
1803+ let transformed_kw = i18n_transform ( keyword) ;
1804+ self . metrics . max_len = std:: cmp:: max (
1805+ self . metrics . max_len ,
1806+ std:: cmp:: max ( transformed_kw. len ( ) , keyword. len ( ) ) ,
1807+ ) ;
1808+
1809+ // Getting the word count is costly, so we get only the transformed word
1810+ // count under the assumption that it will always be at least as large
1811+ // as the the original word count. That's currently true because the
1812+ // only relevant transform that `i18n_transform` performs is to replace
1813+ // hyphens with spaces.
1814+ self . metrics . max_word_count = std:: cmp:: max (
1815+ self . metrics . max_word_count ,
1816+ transformed_kw. split_whitespace ( ) . count ( ) ,
1817+ ) ;
17811818 }
17821819
17831820 /// Inserts keywords metrics into the database. This assumes you have a
@@ -1805,8 +1842,8 @@ impl KeywordsMetricsUpdater {
18051842 . execute ( (
18061843 record_id. as_str ( ) ,
18071844 record_type,
1808- self . max_len ,
1809- self . max_word_count ,
1845+ self . metrics . max_len ,
1846+ self . metrics . max_word_count ,
18101847 ) )
18111848 . with_context ( "keywords metrics insert" ) ?;
18121849
@@ -1845,3 +1882,127 @@ impl<'conn> AmpFtsInsertStatement<'conn> {
18451882fn provider_config_meta_key ( provider : SuggestionProvider ) -> String {
18461883 format ! ( "{}{}" , PROVIDER_CONFIG_META_KEY_PREFIX , provider as u8 )
18471884}
1885+
1886+ #[ cfg( test) ]
1887+ mod tests {
1888+ use super :: * ;
1889+ use crate :: { store:: tests:: TestStore , testing:: * } ;
1890+
1891+ #[ test]
1892+ fn keywords_metrics_updater ( ) -> anyhow:: Result < ( ) > {
1893+ // (test keyword, expected `KeywordsMetrics`)
1894+ //
1895+ // Tests are cumulative!
1896+ let tests = [
1897+ (
1898+ "abc" ,
1899+ KeywordsMetrics {
1900+ max_len : 3 ,
1901+ max_word_count : 1 ,
1902+ } ,
1903+ ) ,
1904+ (
1905+ "a b" ,
1906+ KeywordsMetrics {
1907+ max_len : 3 ,
1908+ max_word_count : 2 ,
1909+ } ,
1910+ ) ,
1911+ (
1912+ "a b c" ,
1913+ KeywordsMetrics {
1914+ max_len : 5 ,
1915+ max_word_count : 3 ,
1916+ } ,
1917+ ) ,
1918+ // "Québec" with single 'é' char:
1919+ // With `i18n_transform`: len = 6
1920+ // Without `i18n_transform`: len = 7
1921+ // => Length for this string should be max(6, 7) = 7, which is a new
1922+ // overall max
1923+ (
1924+ "Qu\u{00e9} bec" ,
1925+ KeywordsMetrics {
1926+ max_len : 7 ,
1927+ max_word_count : 3 ,
1928+ } ,
1929+ ) ,
1930+ // "Québec" with ASCII 'e' followed by combining acute accent:
1931+ // With `i18n_transform`, len = 6
1932+ // Without `i18n_transform`, len = 8
1933+ // => Length for this string should be max(6, 8) = 8, which is a new
1934+ // overall max
1935+ (
1936+ "Que\u{0301} bec" ,
1937+ KeywordsMetrics {
1938+ max_len : 8 ,
1939+ max_word_count : 3 ,
1940+ } ,
1941+ ) ,
1942+ // Each '-' should be replaced with a space, so the word count for
1943+ // this string should be 4, which is a new overall max
1944+ (
1945+ "Carmel-by-the-Sea" ,
1946+ KeywordsMetrics {
1947+ max_len : 17 ,
1948+ max_word_count : 4 ,
1949+ } ,
1950+ ) ,
1951+ ] ;
1952+
1953+ // Make an updater and call it with each test in turn.
1954+ let mut updater = KeywordsMetricsUpdater :: new ( ) ;
1955+ for ( test_kw, expected_metrics) in & tests {
1956+ updater. update ( test_kw) ;
1957+ assert_eq ! ( & updater. metrics, expected_metrics) ;
1958+ }
1959+
1960+ // Make a store and finish the updater so it updates the metrics table.
1961+ let store = TestStore :: new ( MockRemoteSettingsClient :: default ( ) ) ;
1962+ store. write ( |dao| {
1963+ // Make a dummy cache cell. It doesn't matter what it contains, we
1964+ // just want to make sure it's empty after finishing the updater.
1965+ let mut dummy_cache = OnceCell :: new ( ) ;
1966+ dummy_cache. set ( "test" ) . expect ( "dummy cache set" ) ;
1967+ assert_ne ! ( dummy_cache. get( ) , None ) ;
1968+
1969+ let record_type = SuggestRecordType :: Wikipedia ;
1970+ updater. finish (
1971+ dao. conn ,
1972+ & SuggestRecordId :: new ( "test-record-1" . to_string ( ) ) ,
1973+ record_type,
1974+ & mut dummy_cache,
1975+ ) ?;
1976+
1977+ assert_eq ! ( dummy_cache. get( ) , None ) ;
1978+
1979+ // Read back the metrics and make sure they match the ones in the
1980+ // final test.
1981+ let read_metrics_1 = dao. get_keywords_metrics ( record_type) ?;
1982+ assert_eq ! ( read_metrics_1, tests. last( ) . unwrap( ) . 1 ) ;
1983+
1984+ // Update the metrics one more time and finish them again.
1985+ updater. update ( "a very long keyword with many words" ) ;
1986+ let new_expected = KeywordsMetrics {
1987+ max_len : 35 ,
1988+ max_word_count : 7 ,
1989+ } ;
1990+ assert_eq ! ( updater. metrics, new_expected) ;
1991+
1992+ updater. finish (
1993+ dao. conn ,
1994+ & SuggestRecordId :: new ( "test-record-2" . to_string ( ) ) ,
1995+ record_type,
1996+ & mut dummy_cache,
1997+ ) ?;
1998+
1999+ // Read back the new metrics and make sure they match.
2000+ let read_metrics_2 = dao. get_keywords_metrics ( record_type) ?;
2001+ assert_eq ! ( read_metrics_2, new_expected) ;
2002+
2003+ Ok ( ( ) )
2004+ } ) ?;
2005+
2006+ Ok ( ( ) )
2007+ }
2008+ }
0 commit comments