Skip to content

Commit aac75cf

Browse files
authored
Merge pull request #11 from paulirwin/java-catchup
Catch up to upstream Java 0.22 release
2 parents fde7399 + 74595f6 commit aac75cf

30 files changed

+638
-92
lines changed

src/F23.StringSimilarity/Cosine.cs

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,30 @@ public Cosine(int k) : base(k) { }
5050
/// </summary>
5151
public Cosine() { }
5252

53+
/// <summary>
54+
/// Compute the cosine similarity between strings.
55+
/// </summary>
56+
/// <param name="s1">The first string to compare.</param>
57+
/// <param name="s2">The second string to compare.</param>
58+
/// <returns>The cosine similarity in the range [0, 1]</returns>
59+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
5360
public double Similarity(string s1, string s2)
5461
{
62+
if (s1 == null)
63+
{
64+
throw new ArgumentNullException(nameof(s1));
65+
}
66+
67+
if (s2 == null)
68+
{
69+
throw new ArgumentNullException(nameof(s2));
70+
}
71+
72+
if (s1.Equals(s2))
73+
{
74+
return 1;
75+
}
76+
5577
if (s1.Length < k || s2.Length < k)
5678
{
5779
return 0;
@@ -63,12 +85,11 @@ public double Similarity(string s1, string s2)
6385
return DotProduct(profile1, profile2) / (Norm(profile1) * Norm(profile2));
6486
}
6587

66-
/**
67-
* Compute the norm L2 : sqrt(Sum_i( v_i²)).
68-
*
69-
* @param profile
70-
* @return L2 norm
71-
*/
88+
/// <summary>
89+
/// Compute the norm L2 : sqrt(Sum_i( v_i²)).
90+
/// </summary>
91+
/// <param name="profile"></param>
92+
/// <returns></returns>
7293
private static double Norm(IDictionary<string, int> profile)
7394
{
7495
double agg = 0;
@@ -107,6 +128,13 @@ private static double DotProduct(IDictionary<string, int> profile1,
107128
return agg;
108129
}
109130

131+
/// <summary>
132+
/// Returns 1.0 - similarity.
133+
/// </summary>
134+
/// <param name="s1">The first string to compare.</param>
135+
/// <param name="s2">The second string to compare.</param>
136+
/// <returns>1.0 - the cosine similarity in the range [0, 1]</returns>
137+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
110138
public double Distance(string s1, string s2)
111139
=> 1.0 - Similarity(s1, s2);
112140

src/F23.StringSimilarity/Damerau.cs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
using System;
2626
using System.Collections.Generic;
2727
using F23.StringSimilarity.Interfaces;
28+
// ReSharper disable ForCanBeConvertedToForeach
29+
// ReSharper disable SuggestVarOrType_Elsewhere
2830

2931
namespace F23.StringSimilarity
3032
{
@@ -42,8 +44,33 @@ namespace F23.StringSimilarity
4244
/// </summary>
4345
public class Damerau : IMetricStringDistance
4446
{
47+
/// <summary>
48+
/// Compute the distance between strings: the minimum number of operations
49+
/// needed to transform one string into the other(insertion, deletion,
50+
/// substitution of a single character, or a transposition of two adjacent
51+
/// characters).
52+
/// </summary>
53+
/// <param name="s1">The first string to compare.</param>
54+
/// <param name="s2">The second string to compare.</param>
55+
/// <returns>The computed distance.</returns>
56+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
4557
public double Distance(string s1, string s2)
4658
{
59+
if (s1 == null)
60+
{
61+
throw new ArgumentNullException(nameof(s1));
62+
}
63+
64+
if (s2 == null)
65+
{
66+
throw new ArgumentNullException(nameof(s2));
67+
}
68+
69+
if (s1.Equals(s2))
70+
{
71+
return 0;
72+
}
73+
4774
// Infinite distance is the max possible distance
4875
int inf = s1.Length + s2.Length;
4976

src/F23.StringSimilarity/ICharacterSubstitution.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ public interface ICharacterSubstitution
3535
/// <summary>
3636
/// Indicate the cost of substitution c1 and c2.
3737
/// </summary>
38-
/// <param name="c1"></param>
39-
/// <param name="c2"></param>
40-
/// <returns></returns>
38+
/// <param name="c1">The first character of the substitution.</param>
39+
/// <param name="c2">The second character of the substitution.</param>
40+
/// <returns>The cost in the range [0, 1].</returns>
4141
double Cost(char c1, char c2);
4242
}
4343
}

src/F23.StringSimilarity/Jaccard.cs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
using System;
2626
using System.Collections.Generic;
2727
using F23.StringSimilarity.Interfaces;
28-
using F23.StringSimilarity.Support;
28+
2929
// ReSharper disable LoopCanBeConvertedToQuery
3030

3131
namespace F23.StringSimilarity
@@ -39,11 +39,27 @@ public Jaccard() { }
3939
/// <summary>
4040
/// Compute jaccard index: |A inter B| / |A union B|.
4141
/// </summary>
42-
/// <param name="s1">First string</param>
43-
/// <param name="s2">Second string</param>
44-
/// <returns>Similarity</returns>
42+
/// <param name="s1">The first string to compare.</param>
43+
/// <param name="s2">The second string to compare.</param>
44+
/// <returns>The Jaccard index in the range [0, 1]</returns>
45+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
4546
public double Similarity(string s1, string s2)
4647
{
48+
if (s1 == null)
49+
{
50+
throw new ArgumentNullException(nameof(s1));
51+
}
52+
53+
if (s2 == null)
54+
{
55+
throw new ArgumentNullException(nameof(s2));
56+
}
57+
58+
if (s1.Equals(s2))
59+
{
60+
return 1;
61+
}
62+
4763
var profile1 = GetProfile(s1);
4864
var profile2 = GetProfile(s2);
4965

@@ -66,9 +82,10 @@ public double Similarity(string s1, string s2)
6682
/// <summary>
6783
/// Distance is computed as 1 - similarity.
6884
/// </summary>
69-
/// <param name="s1">First string</param>
70-
/// <param name="s2">Second string</param>
71-
/// <returns>Distance</returns>
85+
/// <param name="s1">The first string to compare.</param>
86+
/// <param name="s2">The second string to compare.</param>
87+
/// <returns>1 - the Jaccard similarity.</returns>
88+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
7289
public double Distance(string s1, string s2)
7390
=> 1.0 - Similarity(s1, s2);
7491
}

src/F23.StringSimilarity/JaroWinkler.cs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
using System;
2626
using System.Linq;
2727
using F23.StringSimilarity.Interfaces;
28+
// ReSharper disable SuggestVarOrType_Elsewhere
29+
// ReSharper disable LoopCanBeConvertedToQuery
2830

2931
namespace F23.StringSimilarity
3032
{
@@ -38,9 +40,9 @@ namespace F23.StringSimilarity
3840
/// The distance is computed as 1 - Jaro-Winkler similarity.
3941
public class JaroWinkler : INormalizedStringSimilarity, INormalizedStringDistance
4042
{
41-
private static readonly double DEFAULT_THRESHOLD = 0.7;
42-
private static readonly int THREE = 3;
43-
private static readonly double JW_COEF = 0.1;
43+
private const double DEFAULT_THRESHOLD = 0.7;
44+
private const int THREE = 3;
45+
private const double JW_COEF = 0.1;
4446

4547
/// <summary>
4648
/// The current value of the threshold used for adding the Winkler bonus. The default value is 0.7.
@@ -64,19 +66,30 @@ public JaroWinkler(double threshold)
6466
{
6567
Threshold = threshold;
6668
}
67-
68-
/**
69-
* Compute JW similarity.
70-
* @param s1
71-
* @param s2
72-
* @return
73-
*/
69+
70+
/// <summary>
71+
/// Compute Jaro-Winkler similarity.
72+
/// </summary>
73+
/// <param name="s1">The first string to compare.</param>
74+
/// <param name="s2">The second string to compare.</param>
75+
/// <returns>The Jaro-Winkler similarity in the range [0, 1]</returns>
76+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
7477
public double Similarity(string s1, string s2)
7578
{
76-
if (s1 == null) s1 = string.Empty;
77-
if (s2 == null) s2 = string.Empty;
79+
if (s1 == null)
80+
{
81+
throw new ArgumentNullException(nameof(s1));
82+
}
7883

79-
if (string.Equals(s1, s2)) return 1f;
84+
if (s2 == null)
85+
{
86+
throw new ArgumentNullException(nameof(s2));
87+
}
88+
89+
if (s1.Equals(s2))
90+
{
91+
return 1f;
92+
}
8093

8194
int[] mtp = Matches(s1, s2);
8295
float m = mtp[0];
@@ -94,19 +107,20 @@ public double Similarity(string s1, string s2)
94107
}
95108
return jw;
96109
}
97-
110+
98111
/// <summary>
99112
/// Return 1 - similarity.
100113
/// </summary>
101-
/// <param name="s1"></param>
102-
/// <param name="s2"></param>
114+
/// <param name="s1">The first string to compare.</param>
115+
/// <param name="s2">The second string to compare.</param>
103116
/// <returns>1 - similarity</returns>
117+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
104118
public double Distance(string s1, string s2)
105119
=> 1.0 - Similarity(s1, s2);
106120

107121
private int[] Matches(string s1, string s2)
108122
{
109-
String max, min;
123+
string max, min;
110124
if (s1.Length > s2.Length)
111125
{
112126
max = s1;

src/F23.StringSimilarity/Levenshtein.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424

2525
using System;
2626
using F23.StringSimilarity.Interfaces;
27+
// ReSharper disable SuggestVarOrType_Elsewhere
28+
// ReSharper disable TooWideLocalVariableScope
2729

2830
namespace F23.StringSimilarity
2931
{
@@ -52,11 +54,22 @@ public class Levenshtein : IMetricStringDistance
5254
/// only 2 rows of data. The space requirement is thus O(m) and the algorithm
5355
/// runs in O(mn).
5456
/// </summary>
55-
/// <param name="s1">The first string</param>
56-
/// <param name="s2">The second string</param>
57+
/// <param name="s1">The first string to compare.</param>
58+
/// <param name="s2">The second string to compare.</param>
5759
/// <returns>The Levenshtein distance between strings</returns>
60+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
5861
public double Distance(string s1, string s2)
5962
{
63+
if (s1 == null)
64+
{
65+
throw new ArgumentNullException(nameof(s1));
66+
}
67+
68+
if (s2 == null)
69+
{
70+
throw new ArgumentNullException(nameof(s2));
71+
}
72+
6073
if (s1.Equals(s2))
6174
{
6275
return 0;

src/F23.StringSimilarity/LongestCommonSubsequence.cs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
using System;
2626
using F23.StringSimilarity.Interfaces;
27+
// ReSharper disable SuggestVarOrType_Elsewhere
2728

2829
namespace F23.StringSimilarity
2930
{
@@ -49,24 +50,41 @@ public class LongestCommonSubsequence : IStringDistance
4950
/// Return the LCS distance between strings s1 and s2, computed as |s1| +
5051
/// |s2| - 2 * |LCS(s1, s2)|.
5152
/// </summary>
52-
/// <param name="s1">The first string</param>
53-
/// <param name="s2">The second string</param>
53+
/// <param name="s1">The first string to compare.</param>
54+
/// <param name="s2">The second string to compare.</param>
5455
/// <returns>
5556
/// The LCS distance between strings s1 and s2, computed as |s1| +
5657
/// |s2| - 2 * |LCS(s1, s2)|
5758
/// </returns>
59+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
5860
public double Distance(string s1, string s2)
5961
{
62+
if (s1 == null)
63+
{
64+
throw new ArgumentNullException(nameof(s1));
65+
}
66+
67+
if (s2 == null)
68+
{
69+
throw new ArgumentNullException(nameof(s2));
70+
}
71+
72+
if (s1.Equals(s2))
73+
{
74+
return 0;
75+
}
76+
6077
return s1.Length + s2.Length - 2 * Length(s1, s2);
6178
}
6279

6380
/// <summary>
6481
/// Return the length of Longest Common Subsequence (LCS) between strings s1
6582
/// and s2.
6683
/// </summary>
67-
/// <param name="s1">The first string</param>
68-
/// <param name="s2">The second string</param>
84+
/// <param name="s1">The first string to compare.</param>
85+
/// <param name="s2">The second string to compare.</param>
6986
/// <returns>The length of LCS(s2, s2)</returns>
87+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
7088
public int Length(string s1, string s2)
7189
{
7290
/* function LCSLength(X[1..m], Y[1..n])

src/F23.StringSimilarity/MetricLCS.cs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,27 @@ public class MetricLCS : IMetricStringDistance, INormalizedStringDistance
3939
/// Distance metric based on Longest Common Subsequence, computed as
4040
/// 1 - |LCS(s1, s2)| / max(|s1|, |s2|).
4141
/// </summary>
42-
/// <param name="s1">The first string</param>
43-
/// <param name="s2">The second string</param>
42+
/// <param name="s1">The first string to compare.</param>
43+
/// <param name="s2">The second string to compare.</param>
4444
/// <returns>LCS distance metric</returns>
45+
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
4546
public double Distance(string s1, string s2)
4647
{
48+
if (s1 == null)
49+
{
50+
throw new ArgumentNullException(nameof(s1));
51+
}
52+
53+
if (s2 == null)
54+
{
55+
throw new ArgumentNullException(nameof(s2));
56+
}
57+
58+
if (s1.Equals(s2))
59+
{
60+
return 0;
61+
}
62+
4763
int mLen = Math.Max(s1.Length, s2.Length);
4864

4965
if (mLen == 0) return 0.0;

0 commit comments

Comments
 (0)