55 sanitize ,
66 removeUnicode
77} from '../../commons/text' ;
8+ const stem = require ( 'wink-porter2-stemmer' ) ;
89
910/**
1011 * Check if a given text exists in another
@@ -13,13 +14,81 @@ import {
1314 * @param {String } compareWith text against which to be compared
1415 * @returns {Boolean }
1516 */
17+
18+ function cleanText ( str ) {
19+ return str
20+ ?. toLowerCase ( )
21+ . normalize ( 'NFKC' )
22+ . replace ( / [ \u200B - \u200D \u2060 \uFEFF ] / g, '' )
23+ . trim ( ) ;
24+ }
25+
26+ function replaceSynonyms ( text ) {
27+ const synonymMap = {
28+ '&' : 'and'
29+ } ;
30+ return text
31+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
32+ . map ( word => synonymMap [ word ] || word )
33+ . join ( ' ' ) ;
34+ }
35+
36+ function stringStemmer ( str ) {
37+ return replaceSynonyms ( str )
38+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
39+ . filter ( Boolean )
40+ . map ( word => {
41+ const w = cleanText ( word ) . replace ( / [ ^ \p{ L} \p{ N} ] / gu, '' ) ;
42+ try {
43+ return stem ( w ) ;
44+ } catch ( err ) {
45+ console . warn ( 'Stemming failed for' , w , err ) ;
46+ return w ;
47+ }
48+ } )
49+ . join ( ' ' ) ;
50+ }
51+
1652function isStringContained ( compare , compareWith ) {
53+ compare = stringStemmer ( compare ) ;
54+ compareWith = stringStemmer ( compareWith ) ;
55+
1756 const curatedCompareWith = curateString ( compareWith ) ;
1857 const curatedCompare = curateString ( compare ) ;
1958 if ( ! curatedCompareWith || ! curatedCompare ) {
2059 return false ;
2160 }
22- return curatedCompareWith . includes ( curatedCompare ) ;
61+ const res = curatedCompareWith . includes ( curatedCompare ) ;
62+ if ( res ) {
63+ return res ;
64+ }
65+
66+ const tokensA = compare . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
67+ const tokensB = compareWith . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
68+ const freqA = { } ,
69+ freqB = { } ;
70+ tokensA . forEach ( word => {
71+ freqA [ word ] = ( freqA [ word ] || 0 ) + 1 ;
72+ } ) ;
73+ tokensB . forEach ( word => {
74+ freqB [ word ] = ( freqB [ word ] || 0 ) + 1 ;
75+ } ) ;
76+
77+ let dot = 0 ,
78+ magA = 0 ,
79+ magB = 0 ;
80+ const allTerms = new Set ( [ ...Object . keys ( freqA ) , ...Object . keys ( freqB ) ] ) ;
81+ allTerms . forEach ( term => {
82+ const a = freqA [ term ] || 0 ;
83+ const b = freqB [ term ] || 0 ;
84+ dot += a * b ;
85+ magA += a * a ;
86+ magB += b * b ;
87+ } ) ;
88+
89+ const similarity =
90+ magA && magB ? dot / ( Math . sqrt ( magA ) * Math . sqrt ( magB ) ) : 0 ;
91+ return similarity >= 0.75 ; // comparision with threshold as 75%
2392}
2493
2594/**
@@ -32,7 +101,8 @@ function curateString(str) {
32101 const noUnicodeStr = removeUnicode ( str , {
33102 emoji : true ,
34103 nonBmp : true ,
35- punctuations : true
104+ punctuations : true ,
105+ whitespace : true
36106 } ) ;
37107 return sanitize ( noUnicodeStr ) ;
38108}
@@ -52,9 +122,11 @@ function labelContentNameMismatchEvaluate(node, options, virtualNode) {
52122 subtreeDescendant : true ,
53123 ignoreIconLigature : true ,
54124 pixelThreshold,
55- occurrenceThreshold
125+ occurrenceThreshold,
126+ ignoreNativeTextAlternative : true // To Skip for nativeTextAlternative
56127 } )
57128 ) . toLowerCase ( ) ;
129+
58130 if ( ! visibleText ) {
59131 return true ;
60132 }
0 commit comments