Skip to content

Commit d77b543

Browse files
committed
v1.0.0
1 parent d3ea15d commit d77b543

File tree

11 files changed

+892
-0
lines changed

11 files changed

+892
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
# SuffixTreeSharp
2+
[![NuGet Status](http://nugetstatus.com/SuffixTreeSharp.png)](http://nugetstatus.com/packages/SuffixTreeSharp)
3+
24
Generalized Suffix Tree in pure C#
5+
6+
Targetting .NET Standard 1.6
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>netcoreapp3.1</TargetFramework>
5+
6+
<IsPackable>false</IsPackable>
7+
</PropertyGroup>
8+
9+
<ItemGroup>
10+
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
11+
<PackageReference Include="MSTest.TestAdapter" Version="2.2.3" />
12+
<PackageReference Include="MSTest.TestFramework" Version="2.2.3" />
13+
<PackageReference Include="coverlet.collector" Version="3.0.2" />
14+
</ItemGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\SuffixTreeSharp\SuffixTreeSharp.csproj" />
18+
</ItemGroup>
19+
20+
</Project>
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using Microsoft.VisualStudio.TestTools.UnitTesting;
4+
5+
namespace SuffixTreeSharp.Test
6+
{
7+
[TestClass]
8+
public class SuffixTreeTest
9+
{
10+
public static void AssertEmpty<T>(ICollection<T> collection)
11+
{
12+
Assert.IsTrue(collection.Count == 0, "Expected empty collection.");
13+
}
14+
15+
[TestMethod]
16+
public void TestBasicTreeGeneration()
17+
{
18+
var input = new GeneralizedSuffixTree();
19+
20+
var word = "cacao";
21+
input.Put(word, 0);
22+
23+
/* Test that every substring is contained within the tree */
24+
foreach (var s in word.GetSubstrings())
25+
{
26+
Assert.IsTrue(input.Search(s).Contains(0));
27+
}
28+
29+
AssertEmpty(input.Search("caco"));
30+
AssertEmpty(input.Search("cacaoo"));
31+
AssertEmpty(input.Search("ccacao"));
32+
33+
input = new GeneralizedSuffixTree();
34+
word = "bookkeeper";
35+
input.Put(word, 0);
36+
foreach (var s in word.GetSubstrings())
37+
{
38+
Assert.IsTrue(input.Search(s).Contains(0));
39+
}
40+
41+
AssertEmpty(input.Search("books"));
42+
AssertEmpty(input.Search("boke"));
43+
AssertEmpty(input.Search("ookepr"));
44+
}
45+
46+
[TestMethod]
47+
public void TestWeirdword()
48+
{
49+
var input = new GeneralizedSuffixTree();
50+
51+
var word = "cacacato";
52+
input.Put(word, 0);
53+
54+
/* Test that every substring is contained within the tree */
55+
foreach (var s in word.GetSubstrings())
56+
{
57+
Assert.IsTrue(input.Search(s).Contains(0));
58+
}
59+
}
60+
61+
[TestMethod]
62+
public void TestDouble()
63+
{
64+
// Test whether the tree can handle repetitions
65+
var input = new GeneralizedSuffixTree();
66+
var word = "cacao";
67+
input.Put(word, 0);
68+
input.Put(word, 1);
69+
70+
foreach (var s in word.GetSubstrings())
71+
{
72+
Assert.IsTrue(input.Search(s).Contains(0));
73+
Assert.IsTrue(input.Search(s).Contains(1));
74+
}
75+
}
76+
77+
[TestMethod]
78+
public void TestBananaAddition()
79+
{
80+
var input = new GeneralizedSuffixTree();
81+
var words = new[] { "banana", "bano", "ba" };
82+
for (var i = 0; i < words.Length; ++i)
83+
{
84+
input.Put(words[i], i);
85+
86+
foreach (var s in words[i].GetSubstrings())
87+
{
88+
var result = input.Search(s);
89+
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
90+
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
91+
}
92+
}
93+
94+
// verify post-addition
95+
for (var i = 0; i < words.Length; ++i)
96+
{
97+
foreach (var s in words[i].GetSubstrings())
98+
{
99+
Assert.IsTrue(input.Search(s).Contains(i));
100+
}
101+
}
102+
103+
// add again, to see if it's stable
104+
for (var i = 0; i < words.Length; ++i)
105+
{
106+
input.Put(words[i], i + words.Length);
107+
108+
foreach (var s in words[i].GetSubstrings())
109+
{
110+
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
111+
}
112+
}
113+
}
114+
115+
[TestMethod]
116+
public void TestAddition()
117+
{
118+
var input = new GeneralizedSuffixTree();
119+
var words = new[] { "cacaor", "caricato", "cacato", "cacata", "caricata", "cacao", "banana" };
120+
for (var i = 0; i < words.Length; ++i)
121+
{
122+
input.Put(words[i], i);
123+
124+
foreach (var s in words[i].GetSubstrings())
125+
{
126+
var result = input.Search(s);
127+
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
128+
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
129+
}
130+
}
131+
132+
// verify post-addition
133+
for (var i = 0; i < words.Length; ++i)
134+
{
135+
foreach (var s in words[i].GetSubstrings())
136+
{
137+
var result = input.Search(s);
138+
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
139+
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
140+
}
141+
}
142+
143+
// add again, to see if it's stable
144+
for (var i = 0; i < words.Length; ++i)
145+
{
146+
input.Put(words[i], i + words.Length);
147+
148+
foreach (var s in words[i].GetSubstrings())
149+
{
150+
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
151+
}
152+
}
153+
154+
// input.computeCount();
155+
// TestResultsCount(input.getRoot());
156+
157+
AssertEmpty(input.Search("aoca"));
158+
}
159+
160+
[TestMethod]
161+
public void TestSampleAddition()
162+
{
163+
var input = new GeneralizedSuffixTree();
164+
var words = new[]
165+
{
166+
"libertypike",
167+
"franklintn",
168+
"carothersjohnhenryhouse",
169+
"carothersezealhouse",
170+
"acrossthetauntonriverfromdightonindightonrockstatepark",
171+
"dightonma",
172+
"dightonrock",
173+
"6mineoflowgaponlowgapfork",
174+
"lowgapky",
175+
"lemasterjohnjandellenhouse",
176+
"lemasterhouse",
177+
"70wilburblvd",
178+
"poughkeepsieny",
179+
"freerhouse",
180+
"701laurelst",
181+
"conwaysc",
182+
"hollidayjwjrhouse",
183+
"mainandappletonsts",
184+
"menomoneefallswi",
185+
"mainstreethistoricdistrict",
186+
"addressrestricted",
187+
"brownsmillsnj",
188+
"hanoverfurnace",
189+
"hanoverbogironfurnace",
190+
"sofsavannahatfergusonaveandbethesdard",
191+
"savannahga",
192+
"bethesdahomeforboys",
193+
"bethesda"
194+
};
195+
for (var i = 0; i < words.Length; ++i)
196+
{
197+
input.Put(words[i], i);
198+
199+
foreach (var s in words[i].GetSubstrings())
200+
{
201+
var result = input.Search(s);
202+
Assert.IsNotNull(result, "result null for string " + s + " after adding " + words[i]);
203+
Assert.IsTrue(result.Contains(i), "substring " + s + " not found after adding " + words[i]);
204+
}
205+
}
206+
207+
// verify post-addition
208+
for (var i = 0; i < words.Length; ++i)
209+
{
210+
foreach (var s in words[i].GetSubstrings())
211+
{
212+
Assert.IsTrue(input.Search(s).Contains(i));
213+
}
214+
}
215+
216+
// add again, to see if it's stable
217+
for (var i = 0; i < words.Length; ++i)
218+
{
219+
input.Put(words[i], i + words.Length);
220+
221+
foreach (var s in words[i].GetSubstrings())
222+
{
223+
Assert.IsTrue(input.Search(s).Contains(i + words.Length));
224+
}
225+
}
226+
227+
// input.computeCount();
228+
// TestResultsCount(input.getRoot());
229+
230+
AssertEmpty(input.Search("aoca"));
231+
}
232+
233+
// private void TestResultsCount(Node n) {
234+
// for (Edge e : n.getEdges().values()) {
235+
// assertEquals(n.getData(-1).size(), n.getResultCount());
236+
// TestResultsCount(e.getDest());
237+
// }
238+
// }
239+
240+
/* Testing a Test method :) */
241+
[TestMethod]
242+
public void TestGetSubstrings()
243+
{
244+
var exp = new[] { "w", "r", "d", "wr", "rd", "wrd" }.ToHashSet();
245+
var ret = "wrd".GetSubstrings();
246+
Assert.IsTrue(ret.SetEquals(exp));
247+
}
248+
}
249+
}

SuffixTreeSharp.Test/Utils.cs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
6+
namespace SuffixTreeSharp.Test
7+
{
8+
public static class Utils
9+
{
10+
/**
11+
* Normalize an input string
12+
*
13+
* @param input the input string to normalize
14+
* @return <tt>input</tt> all lower-case, withoutput any non alphanumeric character
15+
*/
16+
public static string Normalize(this string input)
17+
{
18+
var output = new StringBuilder();
19+
var l = input.ToLower();
20+
foreach (var c in l.Where(c => c >= 'a' && c <= 'z' || c >= '0' && c <= '9'))
21+
{
22+
output.Append(c);
23+
}
24+
return output.ToString();
25+
}
26+
27+
/**
28+
* Computes the set of all the substrings contained within the <tt>str</tt>
29+
*
30+
* It is fairly inefficient, but it is used just in tests ;)
31+
* @param str the string to compute substrings of
32+
* @return the set of all possible substrings of str
33+
*/
34+
public static HashSet<string> GetSubstrings(this string str)
35+
{
36+
var ret = new HashSet<string>();
37+
// compute all substrings
38+
for (var len = 1; len <= str.Length; ++len)
39+
{
40+
for (var start = 0; start + len <= str.Length; ++start)
41+
{
42+
var itstr = str.Substring(start, len);
43+
ret.Add(itstr);
44+
}
45+
}
46+
47+
return ret;
48+
}
49+
}
50+
}

SuffixTreeSharp.sln

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 16
4+
VisualStudioVersion = 16.0.31911.196
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SuffixTreeSharp", "SuffixTreeSharp\SuffixTreeSharp.csproj", "{C0986C3D-E80F-4753-B0AD-F185EB838A1D}"
7+
EndProject
8+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SuffixTreeSharp.Test", "SuffixTreeSharp.Test\SuffixTreeSharp.Test.csproj", "{902F9192-EED9-44B6-8B39-222B725545E3}"
9+
EndProject
10+
Global
11+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
12+
Debug|Any CPU = Debug|Any CPU
13+
Release|Any CPU = Release|Any CPU
14+
EndGlobalSection
15+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
16+
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17+
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Debug|Any CPU.Build.0 = Debug|Any CPU
18+
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Release|Any CPU.ActiveCfg = Release|Any CPU
19+
{C0986C3D-E80F-4753-B0AD-F185EB838A1D}.Release|Any CPU.Build.0 = Release|Any CPU
20+
{902F9192-EED9-44B6-8B39-222B725545E3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21+
{902F9192-EED9-44B6-8B39-222B725545E3}.Debug|Any CPU.Build.0 = Debug|Any CPU
22+
{902F9192-EED9-44B6-8B39-222B725545E3}.Release|Any CPU.ActiveCfg = Release|Any CPU
23+
{902F9192-EED9-44B6-8B39-222B725545E3}.Release|Any CPU.Build.0 = Release|Any CPU
24+
EndGlobalSection
25+
GlobalSection(SolutionProperties) = preSolution
26+
HideSolutionNode = FALSE
27+
EndGlobalSection
28+
GlobalSection(ExtensibilityGlobals) = postSolution
29+
SolutionGuid = {3879536C-DBC5-462A-82F3-AB8CED5E0F58}
30+
EndGlobalSection
31+
EndGlobal
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
6+
namespace SuffixTreeSharp
7+
{
8+
public class CombinedSearchTrees : ISearchTree
9+
{
10+
public readonly List<ISearchTree> SearchTrees = new List<ISearchTree>();
11+
12+
public ISet<int> Search(string word)
13+
{
14+
ISet<int> searchResults = new HashSet<int>();
15+
return SearchTrees.Select(searchTree => searchTree.Search(word)).Aggregate(searchResults, Union);
16+
}
17+
18+
/// <summary>
19+
/// Efficiently get all the elements from both sets.
20+
/// Note that this implementation will alter the original sets.
21+
/// </summary>
22+
private static ISet<int> Union(ISet<int> set1, ISet<int> set2)
23+
{
24+
if (set1.Count > set2.Count)
25+
{
26+
set1.UnionWith(set2);
27+
return set1;
28+
}
29+
30+
set2.UnionWith(set1);
31+
return set2;
32+
}
33+
}
34+
}

0 commit comments

Comments
 (0)