Skip to content

Commit cb4ad2f

Browse files
committed
Increase performance, increase precision but lost recall
1 parent 193b09d commit cb4ad2f

File tree

1 file changed

+87
-50
lines changed

1 file changed

+87
-50
lines changed

controllers/controller.go

Lines changed: 87 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -88,41 +88,48 @@ func GetName(c *gin.Context) {
8888

8989
//SearchSimilarNames search for all similar names by metaphone and Levenshtein method
9090
func SearchSimilarNames(c *gin.Context) {
91-
//Name to be searched
91+
//name to be searched
9292
name := c.Params.ByName("name")
93+
nameMetaphone := metaphone.Pack(name)
9394

94-
var names []models.NameType
95-
database.Db.Raw("select * from name_types").Find(&names)
95+
//find all metaphoneNames matching metaphone
96+
var metaphoneNames []models.NameType
9697

97-
var canonicalEntity models.NameType
98-
database.Db.Raw("select * from name_types where name = ?", strings.ToUpper(name)).Find(&canonicalEntity)
98+
database.Db.Raw("select * from name_types where metaphone = ?", nameMetaphone).Find(&metaphoneNames)
99+
similarNames := findNames(metaphoneNames, name, levenshtein)
99100

100-
similarNames, mtf := findSimilarNames(names, name, levenshtein)
101+
//for recall purposes we can't only search for metaphone exact match's if no similar word is found.
102+
if len(metaphoneNames) == 0 || len(similarNames) == 0 {
103+
metaphoneNames = searchForAllSimilarMetaphone(nameMetaphone)
104+
similarNames = findNames(metaphoneNames, name, levenshtein)
101105

102-
//in case of failure in find a metaphone conde we return status not found
103-
if len(names) == 0 || len(similarNames) == 0 {
104-
c.JSON(http.StatusNotFound, gin.H{"Not found": "metaphone not found", "metaphone": mtf})
105-
return
106+
if len(metaphoneNames) == 0 {
107+
c.JSON(http.StatusNotFound, gin.H{"Not found": "metaphone not found", "metaphone": nameMetaphone})
108+
return
109+
}
110+
111+
if len(similarNames) == 0 {
112+
c.JSON(http.StatusNotFound, gin.H{"Not found": "similar names not found", "metaphone": nameMetaphone})
113+
return
114+
}
106115
}
107116

108-
//when the similar names result's in less than 5 we search for every similar name of all similar names founded previously
117+
//when the similar metaphoneNames result's in less than 5 we search for every similar name of all similar metaphoneNames founded previously
109118
if len(similarNames) < 5 {
110119
for _, n := range similarNames {
111-
similarNames, _ = findSimilarNames(names, n.Name, levenshtein)
120+
similar := findNames(metaphoneNames, n.Name, levenshtein)
121+
similarNames = append(similarNames, similar...)
112122
}
113123
}
114124

115-
//order all similar names from high to low Levenshtein
125+
//order all similar metaphoneNames from high to low Levenshtein
116126
nameV := orderByLevenshtein(similarNames)
117127

118128
//build canonical
119-
if canonicalEntity.ID == 0 {
120-
ce, err := findCanonical(nameV)
121-
if err != nil {
122-
c.JSON(http.StatusNotFound, gin.H{"Not found": err.Error(), "metaphone": mtf})
123-
return
124-
}
125-
canonicalEntity = ce
129+
canonicalEntity, err := findCanonical(name, metaphoneNames, nameV)
130+
if err != nil {
131+
c.JSON(http.StatusNotFound, gin.H{"Not found": err.Error(), "metaphone": nameMetaphone})
132+
return
126133
}
127134

128135
//return
@@ -141,11 +148,53 @@ func SearchSimilarNames(c *gin.Context) {
141148

142149
/*-------ALL BELLOW USED ONLY ON searchSimilarNames-------*/
143150

151+
//searchForAllSimilarMetaphone used in case of not finding exact metaphone match
152+
func searchForAllSimilarMetaphone(mtf string) []models.NameType {
153+
var names []models.NameType
154+
database.Db.Raw("select * from name_types").Find(&names)
155+
156+
var rNames []models.NameType
157+
for _, n := range names {
158+
if metaphone.IsMetaphoneSimilar(mtf, n.Metaphone) {
159+
rNames = append(rNames, n)
160+
}
161+
}
162+
163+
return rNames
164+
}
165+
144166
//findCanonical search for every similar name on the database returning the first matched name
145-
func findCanonical(similarNames []string) (models.NameType, error) {
167+
func findCanonical(name string, matchingMetaphoneNames []models.NameType, nameVariations []string) (models.NameType, error) {
146168
var canonicalEntity models.NameType
169+
n := strings.ToUpper(name)
170+
171+
//search exact match on matchingMetaphoneNames
172+
for _, similarName := range matchingMetaphoneNames {
173+
if similarName.Name == n {
174+
return similarName, nil
175+
}
176+
}
177+
178+
//search for similar names on matchingMetaphoneNames
179+
for _, similarName := range matchingMetaphoneNames {
180+
if metaphone.SimilarityBetweenWords(name, similarName.Name) >= levenshtein {
181+
return similarName, nil
182+
}
183+
}
184+
185+
//search exact match on nameVariations
186+
for _, similarName := range nameVariations {
187+
sn := strings.ToUpper(similarName)
188+
if sn == n {
189+
database.Db.Raw("select * from name_types where name = ?", sn).Find(&canonicalEntity)
190+
if canonicalEntity.ID != 0 {
191+
return canonicalEntity, nil
192+
}
193+
}
194+
}
147195

148-
for _, similarName := range similarNames {
196+
//in case of failure on other attempts, we search every nameVariations directly on database
197+
for _, similarName := range nameVariations {
149198
database.Db.Raw("select * from name_types where name = ?", strings.ToUpper(similarName)).Find(&canonicalEntity)
150199
if canonicalEntity.ID != 0 {
151200
return canonicalEntity, nil
@@ -155,47 +204,35 @@ func findCanonical(similarNames []string) (models.NameType, error) {
155204
return models.NameType{}, errors.New("couldn't find canonical name")
156205
}
157206

158-
//findSimilarNames returns []models.NameVar and if necessary reduces' threshold to a minimum of 0.5
159-
func findSimilarNames(names []models.NameType, name string, threshold float32) ([]models.NameVar, string) {
160-
similarNames, mtf := findNames(names, name, threshold)
161-
162-
//in case of empty return the levenshtein constant is downgraded to the minimum of 0.5
207+
//findNames return []models.NameVar with all similar names of searched string. For recall purpose we reduce the threshold given in 0.1 in case of empty return
208+
func findNames(names []models.NameType, name string, threshold float32) []models.NameVar {
209+
similarNames := findSimilarNames(name, names, threshold)
210+
//reduce the threshold given in 0.1 and search again
163211
if len(similarNames) == 0 {
164-
similarNames, _ = findNames(names, name, threshold-0.1)
165-
if len(similarNames) == 0 {
166-
similarNames, _ = findNames(names, name, threshold-0.2)
167-
}
168-
if len(similarNames) == 0 {
169-
similarNames, _ = findNames(names, name, threshold-0.3)
170-
}
212+
similarNames = findSimilarNames(name, names, threshold-0.1)
171213
}
172214

173-
return similarNames, mtf
215+
return similarNames
174216
}
175217

176-
//findNames return []models.NameVar with all similar names and the metaphone code of searched string, called on findSimilarNames
177-
func findNames(names []models.NameType, name string, threshold float32) ([]models.NameVar, string) {
218+
//findSimilarNames loop for all names given checking the similarity between words by a given threshold, called on findNames
219+
func findSimilarNames(name string, names []models.NameType, threshold float32) []models.NameVar {
178220
var similarNames []models.NameVar
179221

180-
mtf := metaphone.Pack(name)
181222
for _, n := range names {
182-
if metaphone.IsMetaphoneSimilar(mtf, n.Metaphone) {
183-
similarity := metaphone.SimilarityBetweenWords(strings.ToLower(name), strings.ToLower(n.Name))
184-
if similarity >= threshold {
185-
similarNames = append(similarNames, models.NameVar{Name: n.Name, Levenshtein: similarity})
186-
varWords := strings.Split(n.NameVariations, "|")
187-
for _, vw := range varWords {
188-
if vw != "" {
189-
similarNames = append(similarNames, models.NameVar{Name: vw, Levenshtein: similarity})
190-
}
223+
similarity := metaphone.SimilarityBetweenWords(strings.ToLower(name), strings.ToLower(n.Name))
224+
if similarity >= threshold {
225+
similarNames = append(similarNames, models.NameVar{Name: n.Name, Levenshtein: similarity})
226+
varWords := strings.Split(n.NameVariations, "|")
227+
for _, vw := range varWords {
228+
if vw != "" {
229+
similarNames = append(similarNames, models.NameVar{Name: vw, Levenshtein: similarity})
191230
}
192231
}
193-
194232
}
195233
}
196234

197-
return similarNames, mtf
198-
235+
return similarNames
199236
}
200237

201238
//orderByLevenshtein used to sort an array by Levenshtein and len of the name

0 commit comments

Comments
 (0)