@@ -9,6 +9,16 @@ const { sinon } = ChromeUtils.importESModule(
99 "resource://testing-common/Sinon.sys.mjs"
1010) ;
1111
12+ const {
13+ normalizeTextForWhitelist,
14+ tokenize,
15+ buildPhraseSets,
16+ makeIsolatedPhraseChecker,
17+ _isForcedChatIsolated,
18+ } = ChromeUtils . importESModule (
19+ "moz-src:///browser/components/genai/SmartAssistEngine.sys.mjs"
20+ ) ;
21+
1222// Prefs
1323const PREF_API_KEY = "browser.ml.smartAssist.apiKey" ;
1424const PREF_ENDPOINT = "browser.ml.smartAssist.endpoint" ;
@@ -259,3 +269,150 @@ add_task(async function test_preprocessQuery_removes_question_marks() {
259269
260270 sb . restore ( ) ;
261271} ) ;
272+
273+ add_task ( function test_normalizeTextForWhitelist_basic ( ) {
274+ // lowercasing + trimming + collapsing internal spaces
275+ Assert . equal (
276+ normalizeTextForWhitelist ( " HeLLo There " ) ,
277+ "hello there" ,
278+ "Should lowercase, trim, and collapse spaces"
279+ ) ;
280+
281+ // NFKC normalization: compatibility forms → canonical
282+ // Fullwidth characters normalize: e.g., 'TEST' → 'test'
283+ Assert . equal (
284+ normalizeTextForWhitelist ( "TEST 123" ) ,
285+ "test 123" ,
286+ "Should NFKC-normalize fullwidth letters/digits"
287+ ) ;
288+
289+ // Multiple whitespace kinds (NBSP, tabs, newlines) collapse
290+ Assert . equal (
291+ normalizeTextForWhitelist ( "a\u00A0b\tc\nd" ) ,
292+ "a b c d" ,
293+ "Should collapse all whitespace kinds to single spaces"
294+ ) ;
295+ } ) ;
296+
297+ add_task ( function test_tokenize_unicode_and_boundaries ( ) {
298+ // Splits on non-word chars, keeps letters/digits/underscore
299+ Assert . deepEqual (
300+ tokenize ( "hello, world! 42_times" ) ,
301+ [ "hello" , "world" , "42_times" ] ,
302+ "Should split on punctuation and keep underscores"
303+ ) ;
304+
305+ // Unicode letters should be treated as word chars (\p{L})
306+ Assert . deepEqual (
307+ tokenize ( "mañana—café!" ) ,
308+ [ "mañana" , "café" ] ,
309+ "Should keep Unicode letters and split on punctuation (em dash, bang)"
310+ ) ;
311+
312+ // Apostrophes split (non-word), as intended
313+ Assert . deepEqual (
314+ tokenize ( "what's up" ) ,
315+ [ "what" , "s" , "up" ] ,
316+ "Apostrophes are separators, so tokens split around them"
317+ ) ;
318+ } ) ;
319+
320+ add_task ( function test_buildPhraseSets_grouping_and_normalization ( ) {
321+ const phrases = [
322+ "sup" ,
323+ "hi there" , // 2 tokens
324+ "what's up" , // becomes "what s up" (3 tokens)
325+ " foo bar " , // leading/trailing + multiple spaces
326+ "" , // empty should be skipped
327+ "___" , // token of underscores counts as 1 token
328+ ] ;
329+ const sets = buildPhraseSets ( phrases ) ;
330+
331+ // Expect keys for lengths: 1, 2, 3
332+ Assert . ok ( sets . has ( 1 ) , "Should have set for single-token phrases" ) ;
333+ Assert . ok ( sets . has ( 2 ) , "Should have set for two-token phrases" ) ;
334+ Assert . ok ( sets . has ( 3 ) , "Should have set for three-token phrases" ) ;
335+
336+ // 1-token set contains: "sup", "___"
337+ Assert . ok ( sets . get ( 1 ) . has ( "sup" ) , "Single-token set should contain 'sup'" ) ;
338+ Assert . ok ( sets . get ( 1 ) . has ( "___" ) , "Single-token set should contain '___'" ) ;
339+
340+ // 2-token set contains normalized "hi there" and "foo bar"
341+ Assert . ok ( sets . get ( 2 ) . has ( "hi there" ) , "Two-token set should contain 'hi there'" ) ;
342+ Assert . ok ( sets . get ( 2 ) . has ( "foo bar" ) , "Two-token set should contain normalized 'foo bar'" ) ;
343+
344+ // 3-token set contains "what s up" (note apostrophe split)
345+ Assert . ok ( sets . get ( 3 ) . has ( "what s up" ) , "Three-token set should contain 'what s up'" ) ;
346+
347+ // Empty phrase skipped: nothing added for length 0
348+ for ( const [ k , set ] of sets ) {
349+ Assert . ok ( k > 0 && set . size >= 1 , "No empty keys, each set has at least one entry" ) ;
350+ }
351+ } ) ;
352+
353+ add_task ( function test_isolated_phrase_checker_single_word_boundaries ( ) {
354+ const phrases = [ "sup" , "hello" , "___" ] ;
355+ const isForced = makeIsolatedPhraseChecker ( phrases ) ;
356+
357+ // Positive: exact token present
358+ Assert . ok ( isForced ( "sup bro" ) , "Should match 'sup' as an isolated token at start" ) ;
359+ Assert . ok ( isForced ( "hey, hello there" ) , "Should match 'hello' surrounded by punctuation" ) ;
360+ Assert . ok ( isForced ( "foo ___ bar" ) , "Should match token with underscores" ) ;
361+
362+ // Negative: partial-word should NOT match
363+ Assert . ok ( ! isForced ( "supposingly, this should not match" ) , "No partial-word match for 'sup'" ) ;
364+ Assert . ok ( ! isForced ( "supper time" ) , "No partial-word match inside 'supper'" ) ;
365+ Assert . ok ( ! isForced ( "shelloworld" ) , "No partial-word match for 'hello'" ) ;
366+ } ) ;
367+
368+ add_task ( function test_isolated_phrase_checker_multiword_and_punctuation ( ) {
369+ // Multiword phrases; apostrophes become token splits -> "what's up" => "what s up"
370+ const phrases = [ "hi there" , "what's up" ] ;
371+ const isForced = makeIsolatedPhraseChecker ( phrases ) ;
372+
373+ // Positive: punctuation between words should still match (token split)
374+ Assert . ok ( isForced ( "hi—there!" ) , "Em dash between words should match 'hi there'" ) ;
375+ Assert . ok ( isForced ( "well, hi there!!" ) , "Punctuation around phrase should match" ) ;
376+ Assert . ok ( isForced ( "so, what’s up today?" ) , "Curly apostrophe splits to tokens; should match 'what s up'" ) ;
377+
378+ // Negative: glued words should not match
379+ Assert . ok ( ! isForced ( "hithere" ) , "Concatenated words should not match 'hi there'" ) ;
380+ Assert . ok ( ! isForced ( "whatssup" ) , "Should not match 'what s up' without separators" ) ;
381+ } ) ;
382+
383+ add_task ( function test_isolated_phrase_checker_spacing_and_unicode_norm ( ) {
384+ const phrases = [ "good morning" , "hello" ] ;
385+ const isForced = makeIsolatedPhraseChecker ( phrases ) ;
386+
387+ // Multiple spaces collapse
388+ Assert . ok ( isForced ( "good morning everyone" ) , "Multiple spaces between tokens should still match" ) ;
389+
390+ // Fullwidth / NFKC normalization (TEST) and basic usage
391+ Assert . ok ( isForced ( " HELLO " ) , "Case and surrounding spaces should normalize and match 'hello'" ) ;
392+
393+ // Non-breaking spaces and tabs
394+ Assert . ok ( isForced ( "good\u00A0morning\tteam" ) , "NBSP and tabs normalize and match" ) ;
395+ } ) ;
396+
397+ add_task ( function test_isolated_phrase_checker_no_match_cases ( ) {
398+ const phrases = [ "hi there" , "sup" ] ;
399+ const isForced = makeIsolatedPhraseChecker ( phrases ) ;
400+
401+ Assert . ok ( ! isForced ( "" ) , "Empty string should not match" ) ;
402+ Assert . ok ( ! isForced ( "nothing to see here" ) , "Unrelated text should not match" ) ;
403+ Assert . ok ( ! isForced ( "support" ) , "Partial token with 'sup' prefix should not match" ) ;
404+ } ) ;
405+
406+ add_task ( function test_isolated_phrase_checker_caching_stability ( ) {
407+ const phrases = [ "hello" , "hi there" ] ;
408+ const isForced = makeIsolatedPhraseChecker ( phrases ) ;
409+
410+ // Repeated calls with the same input should return identical results (cache sanity)
411+ const q1 = "Hello there!" ;
412+ const first = isForced ( q1 ) ;
413+ const second = isForced ( q1 ) ;
414+ Assert . equal ( first , second , "Same query should yield identical result across calls (cache-stable)" ) ;
415+
416+ // Different whitespace should normalize to the same outcome
417+ Assert . equal ( isForced ( " hello there " ) , isForced ( "hello there" ) , "Whitespace variations should not affect result" ) ;
418+ } ) ;
0 commit comments