Skip to content

Commit 551e431

Browse files
committed
Catch up to upstream 1.1.0 release by adding insertion/deletion strategy to WeightedLevenshtein. Update xunit and address analysis warnings.
1 parent 3d9ddb8 commit 551e431

File tree

7 files changed

+136
-23
lines changed

7 files changed

+136
-23
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
2+
<s:Boolean x:Key="/Default/UserDictionary/Words/=Levenshtein/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace F23.StringSimilarity
2+
{
3+
/// <summary>
4+
/// As an adjunct to <see cref="ICharacterSubstitution"/>, this interface
5+
/// allows you to specify the cost of deletion or insertion of a
6+
/// character.
7+
/// </summary>
8+
public interface ICharacterInsDel
9+
{
10+
/// <summary>
11+
/// Computes the deletion cost.
12+
/// </summary>
13+
/// <param name="c">The character being deleted.</param>
14+
/// <returns>The cost to be allocated to deleting the given character,
15+
/// in the range [0, 1].</returns>
16+
double DeletionCost(char c);
17+
18+
/// <summary>
19+
/// Computes the insertion cost.
20+
/// </summary>
21+
/// <param name="c">The character being inserted.</param>
22+
/// <returns>The cost to be allocated to inserting the given character,
23+
/// in the range [0, 1].</returns>
24+
double InsertionCost(char c);
25+
}
26+
}

src/F23.StringSimilarity/WeightedLevenshtein.cs

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
using F23.StringSimilarity.Interfaces;
2727
// ReSharper disable SuggestVarOrType_Elsewhere
2828
// ReSharper disable TooWideLocalVariableScope
29+
// ReSharper disable IntroduceOptionalParameters.Global
2930

3031
namespace F23.StringSimilarity
3132
{
@@ -34,14 +35,28 @@ namespace F23.StringSimilarity
3435
public class WeightedLevenshtein : IStringDistance
3536
{
3637
private readonly ICharacterSubstitution _characterSubstitution;
38+
private readonly ICharacterInsDel _characterInsDel;
3739

3840
/// <summary>
39-
/// Create a new instance with provided character substitution.
41+
/// Instantiate with provided character substitution.
4042
/// </summary>
4143
/// <param name="characterSubstitution">The strategy to determine character substitution weights.</param>
4244
public WeightedLevenshtein(ICharacterSubstitution characterSubstitution)
45+
: this(characterSubstitution, null)
46+
{
47+
}
48+
49+
/// <summary>
50+
/// Instantiate with provided character substitution, insertion, and
51+
/// deletion weights.
52+
/// </summary>
53+
/// <param name="characterSubstitution">The strategy to determine character substitution weights.</param>
54+
/// <param name="characterInsDel">The strategy to determine character insertion/deletion weights.</param>
55+
public WeightedLevenshtein(ICharacterSubstitution characterSubstitution,
56+
ICharacterInsDel characterInsDel)
4357
{
4458
_characterSubstitution = characterSubstitution;
59+
_characterInsDel = characterInsDel;
4560
}
4661

4762
/// <summary>
@@ -84,45 +99,64 @@ public double Distance(string s1, string s2)
8499
double[] vtemp;
85100

86101
// initialize v0 (the previous row of distances)
87-
// this row is A[0][i]: edit distance for an empty s
88-
// the distance is just the number of characters to delete from t
89-
for (int i = 0; i < v0.Length; i++)
102+
// this row is A[0][i]: edit distance for an empty s1
103+
// the distance is the cost of inserting each character of s2
104+
v0[0] = 0;
105+
for (int i = 1; i < v0.Length; i++)
90106
{
91-
v0[i] = i;
107+
v0[i] = v0[i - 1] + InsertionCost(s2[i - 1]);
92108
}
93109

94110
for (int i = 0; i < s1.Length; i++)
95111
{
112+
char s1i = s1[i];
113+
double deletionCost = DeletionCost(s1i);
114+
96115
// calculate v1 (current row distances) from the previous row v0
97116
// first element of v1 is A[i+1][0]
98-
// edit distance is delete (i+1) chars from s to match empty t
99-
v1[0] = i + 1;
117+
// Edit distance is the cost of deleting characters from s1
118+
// to match empty t.
119+
v1[0] = v0[0] + deletionCost;
100120

101121
// use formula to fill in the rest of the row
102122
for (int j = 0; j < s2.Length; j++)
103123
{
124+
char s2j = s2[j];
104125
double cost = 0;
105-
if (s1[i] != s2[j])
126+
127+
if (s1i != s2j)
106128
{
107-
cost = _characterSubstitution.Cost(s1[i], s2[j]);
129+
cost = _characterSubstitution.Cost(s1i, s2j);
108130
}
131+
132+
double insertionCost = InsertionCost(s2j);
133+
109134
v1[j + 1] = Math.Min(
110-
v1[j] + 1, // Cost of insertion
135+
v1[j] + insertionCost, // Cost of insertion
111136
Math.Min(
112-
v0[j + 1] + 1, // Cost of remove
137+
v0[j + 1] + deletionCost, // Cost of deletion
113138
v0[j] + cost)); // Cost of substitution
114139
}
115140

116141
// copy v1 (current row) to v0 (previous row) for next iteration
117-
//System.arraycopy(v1, 0, v0, 0, v0.length);
142+
// System.arraycopy(v1, 0, v0, 0, v0.length);
118143
// Flip references to current and previous row
119144
vtemp = v0;
120145
v0 = v1;
121146
v1 = vtemp;
122-
123147
}
124148

125149
return v0[s2.Length];
126150
}
151+
152+
private double InsertionCost(char c)
153+
{
154+
return _characterInsDel?.InsertionCost(c) ?? 1.0;
155+
}
156+
157+
private double DeletionCost(char c)
158+
{
159+
return _characterInsDel?.DeletionCost(c) ?? 1.0;
160+
}
127161
}
128162
}

test/F23.StringSimilarity.Tests/F23.StringSimilarity.Tests.csproj

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
</ItemGroup>
1616

1717
<ItemGroup>
18-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.3.0" />
19-
<PackageReference Include="xunit" Version="2.2.0" />
20-
<PackageReference Include="xunit.runner.visualstudio" Version="2.2.0" />
18+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.8.0" />
19+
<PackageReference Include="xunit" Version="2.4.0" />
20+
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0">
21+
<PrivateAssets>all</PrivateAssets>
22+
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
23+
</PackageReference>
2124
</ItemGroup>
2225

2326
<ItemGroup>

test/F23.StringSimilarity.Tests/Support/ArrayExtensionsTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public void TestWithPadding()
4040

4141
var padded = source.WithPadding(1200);
4242

43-
Assert.Equal(actual: 1200, expected: padded.Length);
43+
Assert.Equal(expected: 1200, actual: padded.Length);
4444

4545
Assert.True(padded.Take(1000).All(x => x == 42));
4646
Assert.True(padded.Skip(1000).Take(200).All(x => x == 0));

test/F23.StringSimilarity.Tests/TestUtil/NullEmptyTests.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,16 @@ public static void TestSimilarity(INormalizedStringSimilarity instance)
5454
Assert.Equal(0.0, instance.Similarity("", "foo"), 1);
5555
Assert.Equal(0.0, instance.Similarity("foo", ""), 1);
5656

57-
Assert.Throws(typeof(ArgumentNullException), () => instance.Similarity(null, null));
58-
Assert.Throws(typeof(ArgumentNullException), () => instance.Similarity(null, ""));
59-
Assert.Throws(typeof(ArgumentNullException), () => instance.Similarity("", null));
57+
Assert.Throws<ArgumentNullException>(() => instance.Similarity(null, null));
58+
Assert.Throws<ArgumentNullException>(() => instance.Similarity(null, ""));
59+
Assert.Throws<ArgumentNullException>(() => instance.Similarity("", null));
6060
}
6161

6262
public static void AssertArgumentNullExceptions(IStringDistance instance)
6363
{
64-
Assert.Throws(typeof(ArgumentNullException), () => instance.Distance(null, null));
65-
Assert.Throws(typeof(ArgumentNullException), () => instance.Distance(null, ""));
66-
Assert.Throws(typeof(ArgumentNullException), () => instance.Distance("", null));
64+
Assert.Throws<ArgumentNullException>(() => instance.Distance(null, null));
65+
Assert.Throws<ArgumentNullException>(() => instance.Distance(null, ""));
66+
Assert.Throws<ArgumentNullException>(() => instance.Distance("", null));
6767
}
6868
}
6969
}

test/F23.StringSimilarity.Tests/WeightedLevenshteinTest.cs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,31 @@ public void TestDistance()
3838
Assert.Equal(0.5, instance.Distance("String1", "Srring1"), 1);
3939
Assert.Equal(1.5, instance.Distance("String1", "Srring2"), 1);
4040

41+
// One insert or delete.
42+
Assert.Equal(1.0, instance.Distance("Strng", "String"), 1);
43+
Assert.Equal(1.0, instance.Distance("String", "Strng"), 1);
44+
45+
NullEmptyTests.TestDistance(instance);
46+
}
47+
48+
[Fact]
49+
public void TestDistanceCharacterInsDelInterface()
50+
{
51+
var instance = new WeightedLevenshtein(new ExampleCharSub(), new ExampleInsDel());
52+
53+
// Same as testDistance above.
54+
Assert.Equal(0.0, instance.Distance("String1", "String1"), 1);
55+
Assert.Equal(0.5, instance.Distance("String1", "Srring1"), 1);
56+
Assert.Equal(1.5, instance.Distance("String1", "Srring2"), 1);
57+
// Cost of insert of 'i' is less than normal, so these scores are
58+
// different than testDistance above. Note that the cost of delete
59+
// has been set differently than the cost of insert, so the distance
60+
// call is not symmetric in its arguments if an 'i' has changed.
61+
Assert.Equal(0.5, instance.Distance("Strng", "String"), 1);
62+
Assert.Equal(0.8, instance.Distance("String", "Strng"), 1);
63+
Assert.Equal(1.0, instance.Distance("Strig", "String"), 1);
64+
Assert.Equal(1.0, instance.Distance("String", "Strig"), 1);
65+
4166
NullEmptyTests.TestDistance(instance);
4267
}
4368

@@ -58,5 +83,28 @@ public double Cost(char c1, char c2)
5883
return 1.0;
5984
}
6085
}
86+
87+
private class ExampleInsDel : ICharacterInsDel
88+
{
89+
public double DeletionCost(char c)
90+
{
91+
if (c == 'i')
92+
{
93+
return 0.8;
94+
}
95+
96+
return 1.0;
97+
}
98+
99+
public double InsertionCost(char c)
100+
{
101+
if (c == 'i')
102+
{
103+
return 0.5;
104+
}
105+
106+
return 1.0;
107+
}
108+
}
61109
}
62110
}

0 commit comments

Comments
 (0)