-
-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Add support for automatic ICORE conference ranking lookup [#13476] #13699
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
d578f69
022bcd4
5bacb29
e035e0e
3a36afe
a7b1b94
33ed0db
806309e
5330632
b092aa9
52d3acd
75d2b47
45a1d10
290cf6c
93da09f
cbd4dda
2a1a73e
6747ad1
ff5c144
89c6600
c63e2da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,12 @@ | |
import java.util.Locale; | ||
|
||
import info.debatty.java.stringsimilarity.Levenshtein; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class StringSimilarity { | ||
private static final Logger LOGGER = LoggerFactory.getLogger(StringSimilarity.class); | ||
|
||
private final Levenshtein METRIC_DISTANCE = new Levenshtein(); | ||
// edit distance threshold for entry title comparison | ||
private final int METRIC_THRESHOLD = 4; | ||
|
@@ -24,4 +28,31 @@ public double editDistanceIgnoreCase(String a, String b) { | |
// TODO: Locale is dependent on the language of the strings. English is a good denominator. | ||
return METRIC_DISTANCE.distance(a.toLowerCase(Locale.ENGLISH), b.toLowerCase(Locale.ENGLISH)); | ||
} | ||
|
||
/** | ||
* Calculates the similarity (a number within 0 and 1) between two strings. | ||
* http://stackoverflow.com/questions/955110/similarity-string-comparison-in-java | ||
*/ | ||
public double similarity(final String first, final String second) { | ||
final String longer; | ||
final String shorter; | ||
|
||
if (first.length() < second.length()) { | ||
longer = second; | ||
shorter = first; | ||
} else { | ||
longer = first; | ||
shorter = second; | ||
} | ||
|
||
final int longerLength = longer.length(); | ||
// both strings are zero length | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is trivial and can be derived directly from the code condition (longerLength == 0). It should be removed as it doesn't add new information. |
||
if (longerLength == 0) { | ||
Comment on lines
+49
to
+50
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment is redundant as it simply restates what the code clearly shows. The comment doesn't provide additional information or reasoning. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment was added there by the original author of the code, I've merely ported it with the necessary modifications. That said, I will contest this review as the lines final int longerLength = longer.length();
// both strings are zero length
if (longerLength == 0) {
return 1.0;
} do not explicitly state, on their own, that the two input strings are equal. Further, the comment is present in the original Stack Overflow post here. That being said, if you're adamant about it, I don't mind changing this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Take the bot meanings with care, they are not always that good |
||
return 1.0; | ||
} | ||
final double distanceIgnoredCase = editDistanceIgnoreCase(longer, shorter); | ||
final double similarity = (longerLength - distanceIgnoredCase) / longerLength; | ||
LOGGER.trace("Longer string: {} Shorter string: {} Similarity: {}", longer, shorter, similarity); | ||
return similarity; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,14 @@ | ||
package org.jabref.logic.util.strings; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.CsvSource; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
class StringSimilarityTest { | ||
private final double EPSILON_SIMILARITY = 0.00001; | ||
|
||
private StringSimilarity similarityChecker = new StringSimilarity(); | ||
|
||
|
@@ -27,4 +30,64 @@ class StringSimilarityTest { | |
void stringSimilarity(String a, String b, String expectedResult) { | ||
assertEquals(Boolean.valueOf(expectedResult), similarityChecker.isSimilar(a, b)); | ||
} | ||
|
||
@Test | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the test Please, convert to |
||
void similarityReturnsOneForExactStrings() { | ||
String a = "abcdef"; | ||
String b = "abcdef"; | ||
double expectedResult = 1.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertEquals(expectedResult, similarity, EPSILON_SIMILARITY); | ||
} | ||
|
||
@Test | ||
void similarityReturnsZeroForNonMatchingStrings() { | ||
String a = "abcdef"; | ||
String b = "uvwxyz"; | ||
double expectedResult = 0.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertEquals(expectedResult, similarity, EPSILON_SIMILARITY); | ||
} | ||
|
||
@Test | ||
void similarityReturnsValueBetweenZeroAndOneForSimilarStrings() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if this test is doing really the expected thing -- check for the expected value directly. |
||
String a = "abc"; | ||
String b = "abcdefgh"; | ||
double exactMatch = 1.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertTrue(similarity >= EPSILON_SIMILARITY && similarity < exactMatch); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using assertTrue with a boolean condition instead of asserting the actual contents. Should compare the actual similarity value with expected bounds using assertEquals. |
||
} | ||
|
||
@Test | ||
void similarityReturnsOneForEmptyStrings() { | ||
String a = ""; | ||
String b = ""; | ||
double expectedResult = 1.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertEquals(expectedResult, similarity, EPSILON_SIMILARITY); | ||
} | ||
|
||
@Test | ||
void similarityReturnsZeroWhenOneStringIsEmpty() { | ||
String a = "abcdef"; | ||
String b = ""; | ||
double expectedResult = 0.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertEquals(expectedResult, similarity, EPSILON_SIMILARITY); | ||
} | ||
|
||
@Test | ||
void similarityIsCaseInsensitive() { | ||
String a = "abcdef"; | ||
String b = "ABCDEF"; | ||
double expectedResult = 1.0; | ||
double similarity = similarityChecker.similarity(a, b); | ||
|
||
assertEquals(expectedResult, similarity, EPSILON_SIMILARITY); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice find :)