@@ -88,41 +88,48 @@ func GetName(c *gin.Context) {
8888
8989//SearchSimilarNames search for all similar names by metaphone and Levenshtein method
9090func SearchSimilarNames (c * gin.Context ) {
91- //Name to be searched
91+ //name to be searched
9292 name := c .Params .ByName ("name" )
93+ nameMetaphone := metaphone .Pack (name )
9394
94- var names []models. NameType
95- database . Db . Raw ( "select * from name_types" ). Find ( & names )
95+ //find all metaphoneNames matching metaphone
96+ var metaphoneNames []models. NameType
9697
97- var canonicalEntity models. NameType
98- database . Db . Raw ( "select * from name_types where name = ?" , strings . ToUpper ( name )). Find ( & canonicalEntity )
98+ database . Db . Raw ( "select * from name_types where metaphone = ?" , nameMetaphone ). Find ( & metaphoneNames )
99+ similarNames := findNames ( metaphoneNames , name , levenshtein )
99100
100- similarNames , mtf := findSimilarNames (names , name , levenshtein )
101+ //for recall purposes we can't only search for metaphone exact match's if no similar word is found.
102+ if len (metaphoneNames ) == 0 || len (similarNames ) == 0 {
103+ metaphoneNames = searchForAllSimilarMetaphone (nameMetaphone )
104+ similarNames = findNames (metaphoneNames , name , levenshtein )
101105
102- //in case of failure in find a metaphone conde we return status not found
103- if len (names ) == 0 || len (similarNames ) == 0 {
104- c .JSON (http .StatusNotFound , gin.H {"Not found" : "metaphone not found" , "metaphone" : mtf })
105- return
106+ if len (metaphoneNames ) == 0 {
107+ c .JSON (http .StatusNotFound , gin.H {"Not found" : "metaphone not found" , "metaphone" : nameMetaphone })
108+ return
109+ }
110+
111+ if len (similarNames ) == 0 {
112+ c .JSON (http .StatusNotFound , gin.H {"Not found" : "similar names not found" , "metaphone" : nameMetaphone })
113+ return
114+ }
106115 }
107116
108- //when the similar names result's in less than 5 we search for every similar name of all similar names founded previously
117+ //when the similar metaphoneNames result's in less than 5 we search for every similar name of all similar metaphoneNames founded previously
109118 if len (similarNames ) < 5 {
110119 for _ , n := range similarNames {
111- similarNames , _ = findSimilarNames (names , n .Name , levenshtein )
120+ similar := findNames (metaphoneNames , n .Name , levenshtein )
121+ similarNames = append (similarNames , similar ... )
112122 }
113123 }
114124
115- //order all similar names from high to low Levenshtein
125+ //order all similar metaphoneNames from high to low Levenshtein
116126 nameV := orderByLevenshtein (similarNames )
117127
118128 //build canonical
119- if canonicalEntity .ID == 0 {
120- ce , err := findCanonical (nameV )
121- if err != nil {
122- c .JSON (http .StatusNotFound , gin.H {"Not found" : err .Error (), "metaphone" : mtf })
123- return
124- }
125- canonicalEntity = ce
129+ canonicalEntity , err := findCanonical (name , metaphoneNames , nameV )
130+ if err != nil {
131+ c .JSON (http .StatusNotFound , gin.H {"Not found" : err .Error (), "metaphone" : nameMetaphone })
132+ return
126133 }
127134
128135 //return
@@ -141,11 +148,53 @@ func SearchSimilarNames(c *gin.Context) {
141148
142149/*-------ALL BELLOW USED ONLY ON searchSimilarNames-------*/
143150
151+ //searchForAllSimilarMetaphone used in case of not finding exact metaphone match
152+ func searchForAllSimilarMetaphone (mtf string ) []models.NameType {
153+ var names []models.NameType
154+ database .Db .Raw ("select * from name_types" ).Find (& names )
155+
156+ var rNames []models.NameType
157+ for _ , n := range names {
158+ if metaphone .IsMetaphoneSimilar (mtf , n .Metaphone ) {
159+ rNames = append (rNames , n )
160+ }
161+ }
162+
163+ return rNames
164+ }
165+
144166//findCanonical search for every similar name on the database returning the first matched name
145- func findCanonical (similarNames []string ) (models.NameType , error ) {
167+ func findCanonical (name string , matchingMetaphoneNames []models. NameType , nameVariations []string ) (models.NameType , error ) {
146168 var canonicalEntity models.NameType
169+ n := strings .ToUpper (name )
170+
171+ //search exact match on matchingMetaphoneNames
172+ for _ , similarName := range matchingMetaphoneNames {
173+ if similarName .Name == n {
174+ return similarName , nil
175+ }
176+ }
177+
178+ //search for similar names on matchingMetaphoneNames
179+ for _ , similarName := range matchingMetaphoneNames {
180+ if metaphone .SimilarityBetweenWords (name , similarName .Name ) >= levenshtein {
181+ return similarName , nil
182+ }
183+ }
184+
185+ //search exact match on nameVariations
186+ for _ , similarName := range nameVariations {
187+ sn := strings .ToUpper (similarName )
188+ if sn == n {
189+ database .Db .Raw ("select * from name_types where name = ?" , sn ).Find (& canonicalEntity )
190+ if canonicalEntity .ID != 0 {
191+ return canonicalEntity , nil
192+ }
193+ }
194+ }
147195
148- for _ , similarName := range similarNames {
196+ //in case of failure on other attempts, we search every nameVariations directly on database
197+ for _ , similarName := range nameVariations {
149198 database .Db .Raw ("select * from name_types where name = ?" , strings .ToUpper (similarName )).Find (& canonicalEntity )
150199 if canonicalEntity .ID != 0 {
151200 return canonicalEntity , nil
@@ -155,47 +204,35 @@ func findCanonical(similarNames []string) (models.NameType, error) {
155204 return models.NameType {}, errors .New ("couldn't find canonical name" )
156205}
157206
158- //findSimilarNames returns []models.NameVar and if necessary reduces' threshold to a minimum of 0.5
159- func findSimilarNames (names []models.NameType , name string , threshold float32 ) ([]models.NameVar , string ) {
160- similarNames , mtf := findNames (names , name , threshold )
161-
162- //in case of empty return the levenshtein constant is downgraded to the minimum of 0.5
207+ //findNames return []models.NameVar with all similar names of searched string. For recall purpose we reduce the threshold given in 0.1 in case of empty return
208+ func findNames (names []models.NameType , name string , threshold float32 ) []models.NameVar {
209+ similarNames := findSimilarNames (name , names , threshold )
210+ //reduce the threshold given in 0.1 and search again
163211 if len (similarNames ) == 0 {
164- similarNames , _ = findNames (names , name , threshold - 0.1 )
165- if len (similarNames ) == 0 {
166- similarNames , _ = findNames (names , name , threshold - 0.2 )
167- }
168- if len (similarNames ) == 0 {
169- similarNames , _ = findNames (names , name , threshold - 0.3 )
170- }
212+ similarNames = findSimilarNames (name , names , threshold - 0.1 )
171213 }
172214
173- return similarNames , mtf
215+ return similarNames
174216}
175217
176- //findNames return []models.NameVar with all similar names and the metaphone code of searched string , called on findSimilarNames
177- func findNames ( names []models.NameType , name string , threshold float32 ) ( []models.NameVar , string ) {
218+ //findSimilarNames loop for all names given checking the similarity between words by a given threshold , called on findNames
219+ func findSimilarNames ( name string , names []models.NameType , threshold float32 ) []models.NameVar {
178220 var similarNames []models.NameVar
179221
180- mtf := metaphone .Pack (name )
181222 for _ , n := range names {
182- if metaphone .IsMetaphoneSimilar (mtf , n .Metaphone ) {
183- similarity := metaphone .SimilarityBetweenWords (strings .ToLower (name ), strings .ToLower (n .Name ))
184- if similarity >= threshold {
185- similarNames = append (similarNames , models.NameVar {Name : n .Name , Levenshtein : similarity })
186- varWords := strings .Split (n .NameVariations , "|" )
187- for _ , vw := range varWords {
188- if vw != "" {
189- similarNames = append (similarNames , models.NameVar {Name : vw , Levenshtein : similarity })
190- }
223+ similarity := metaphone .SimilarityBetweenWords (strings .ToLower (name ), strings .ToLower (n .Name ))
224+ if similarity >= threshold {
225+ similarNames = append (similarNames , models.NameVar {Name : n .Name , Levenshtein : similarity })
226+ varWords := strings .Split (n .NameVariations , "|" )
227+ for _ , vw := range varWords {
228+ if vw != "" {
229+ similarNames = append (similarNames , models.NameVar {Name : vw , Levenshtein : similarity })
191230 }
192231 }
193-
194232 }
195233 }
196234
197- return similarNames , mtf
198-
235+ return similarNames
199236}
200237
201238//orderByLevenshtein used to sort an array by Levenshtein and len of the name
0 commit comments