5
5
sanitize ,
6
6
removeUnicode
7
7
} from '../../commons/text' ;
8
+ const stem = require ( 'wink-porter2-stemmer' ) ;
8
9
9
10
/**
10
11
* Check if a given text exists in another
@@ -13,13 +14,81 @@ import {
13
14
* @param {String } compareWith text against which to be compared
14
15
* @returns {Boolean }
15
16
*/
17
+
18
+ function cleanText ( str ) {
19
+ return str
20
+ ?. toLowerCase ( )
21
+ . normalize ( 'NFKC' )
22
+ . replace ( / [ \u200B - \u200D \u2060 \uFEFF ] / g, '' )
23
+ . trim ( ) ;
24
+ }
25
+
26
+ function replaceSynonyms ( text ) {
27
+ const synonymMap = {
28
+ '&' : 'and'
29
+ } ;
30
+ return text
31
+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
32
+ . map ( word => synonymMap [ word ] || word )
33
+ . join ( ' ' ) ;
34
+ }
35
+
36
+ function stringStemmer ( str ) {
37
+ return replaceSynonyms ( str )
38
+ . split ( / [ ^ \p{ L} \p{ N} ] + / u)
39
+ . filter ( Boolean )
40
+ . map ( word => {
41
+ const w = cleanText ( word ) . replace ( / [ ^ \p{ L} \p{ N} ] / gu, '' ) ;
42
+ try {
43
+ return stem ( w ) ;
44
+ } catch ( err ) {
45
+ console . warn ( 'Stemming failed for' , w , err ) ;
46
+ return w ;
47
+ }
48
+ } )
49
+ . join ( ' ' ) ;
50
+ }
51
+
16
52
function isStringContained ( compare , compareWith ) {
53
+ compare = stringStemmer ( compare ) ;
54
+ compareWith = stringStemmer ( compareWith ) ;
55
+
17
56
const curatedCompareWith = curateString ( compareWith ) ;
18
57
const curatedCompare = curateString ( compare ) ;
19
58
if ( ! curatedCompareWith || ! curatedCompare ) {
20
59
return false ;
21
60
}
22
- return curatedCompareWith . includes ( curatedCompare ) ;
61
+ const res = curatedCompareWith . includes ( curatedCompare ) ;
62
+ if ( res ) {
63
+ return res ;
64
+ }
65
+
66
+ const tokensA = compare . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
67
+ const tokensB = compareWith . split ( / [ ^ \p{ L} \p{ N} ] + / u) ;
68
+ const freqA = { } ,
69
+ freqB = { } ;
70
+ tokensA . forEach ( word => {
71
+ freqA [ word ] = ( freqA [ word ] || 0 ) + 1 ;
72
+ } ) ;
73
+ tokensB . forEach ( word => {
74
+ freqB [ word ] = ( freqB [ word ] || 0 ) + 1 ;
75
+ } ) ;
76
+
77
+ let dot = 0 ,
78
+ magA = 0 ,
79
+ magB = 0 ;
80
+ const allTerms = new Set ( [ ...Object . keys ( freqA ) , ...Object . keys ( freqB ) ] ) ;
81
+ allTerms . forEach ( term => {
82
+ const a = freqA [ term ] || 0 ;
83
+ const b = freqB [ term ] || 0 ;
84
+ dot += a * b ;
85
+ magA += a * a ;
86
+ magB += b * b ;
87
+ } ) ;
88
+
89
+ const similarity =
90
+ magA && magB ? dot / ( Math . sqrt ( magA ) * Math . sqrt ( magB ) ) : 0 ;
91
+ return similarity >= 0.75 ; // comparision with threshold as 75%
23
92
}
24
93
25
94
/**
@@ -32,7 +101,8 @@ function curateString(str) {
32
101
const noUnicodeStr = removeUnicode ( str , {
33
102
emoji : true ,
34
103
nonBmp : true ,
35
- punctuations : true
104
+ punctuations : true ,
105
+ whitespace : true
36
106
} ) ;
37
107
return sanitize ( noUnicodeStr ) ;
38
108
}
@@ -52,9 +122,11 @@ function labelContentNameMismatchEvaluate(node, options, virtualNode) {
52
122
subtreeDescendant : true ,
53
123
ignoreIconLigature : true ,
54
124
pixelThreshold,
55
- occurrenceThreshold
125
+ occurrenceThreshold,
126
+ ignoreNativeTextAlternative : true // To Skip for nativeTextAlternative
56
127
} )
57
128
) . toLowerCase ( ) ;
129
+
58
130
if ( ! visibleText ) {
59
131
return true ;
60
132
}
0 commit comments