Skip to content

Commit 87178ca

Browse files
committed
Better default fuzzy search support
1 parent d6269a0 commit 87178ca

File tree

8 files changed

+221
-10
lines changed

8 files changed

+221
-10
lines changed

docs/guide/query-syntax.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,43 @@ Add `~N` to find words within N positions of each other:
4040
"hello world"~2 // "hello" and "world" within 2 words of each other
4141
```
4242

43+
## Fuzzy Search
44+
45+
Add `~` to a term to perform fuzzy matching based on edit distance (Levenshtein distance):
46+
47+
```
48+
roam~ // Fuzzy search with default edit distance of 2
49+
roam~1 // Fuzzy search with edit distance of 1
50+
roam~0 // Exact match (edit distance of 0)
51+
```
52+
53+
The edit distance specifies the maximum number of character changes (insertions, deletions, substitutions) allowed to match a term. For example, `roam~1` would match:
54+
55+
- `roam` (exact)
56+
- `foam` (1 substitution)
57+
- `roams` (1 insertion)
58+
59+
::: tip
60+
Edit distance must be an integer. The default value is 2 when omitted. Lower values (0-1) are more restrictive and generally faster.
61+
:::
62+
63+
::: info AST Representation
64+
The AST preserves whether the fuzzy distance was explicitly specified:
65+
- `term~``FuzzyDistance = TermNode.DefaultFuzzyDistance` (sentinel value -1)
66+
- `term~2``FuzzyDistance = 2` (explicitly specified)
67+
68+
Both resolve to an effective distance of 2, but you can distinguish between default and explicit using `GetEffectiveFuzzyDistance()`.
69+
:::
70+
71+
### Fuzzy with Field Queries
72+
73+
Fuzzy search works with field queries:
74+
75+
```
76+
title:hello~2 // Fuzzy search in the "title" field
77+
user.name:john~1 // Fuzzy search in nested field
78+
```
79+
4380
## Field Queries
4481

4582
Specify which field to search:
@@ -267,6 +304,12 @@ level:error AND timestamp:[now-1h TO now] AND (service:api OR service:web) NOT t
267304
name:john* AND role:(admin OR moderator) AND status:active AND lastLogin:[now-30d TO *]
268305
```
269306

307+
### Fuzzy Name Search
308+
309+
```
310+
firstName:john~1 AND lastName:smith~2
311+
```
312+
270313
### Document Search
271314

272315
```

src/Foundatio.Lucene.Elasticsearch/ElasticsearchQueryBuilderVisitor.cs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,25 +241,31 @@ public override Task<QueryNode> VisitAsync(TermNode node, IQueryVisitorContext c
241241
}
242242
else if (node.FuzzyDistance.HasValue)
243243
{
244-
// Fuzzy query - use Fuzziness property which accepts a string
244+
var fuzziness = node.FuzzyDistance.Value == TermNode.DefaultFuzzyDistance
245+
? "AUTO"
246+
: node.FuzzyDistance.Value.ToString();
247+
245248
if (field is not null)
246249
{
247250
query = new FuzzyQuery((Field)field, term)
248251
{
249-
Fuzziness = node.FuzzyDistance.Value.ToString()
252+
Fuzziness = fuzziness
250253
};
251254
}
252255
else if (_context.DefaultFields is { Length: > 0 })
253256
{
254257
query = new MultiMatchQuery(term)
255258
{
256259
Fields = Fields.FromStrings(_context.DefaultFields),
257-
Fuzziness = new Fuzziness(node.FuzzyDistance.Value.ToString())
260+
Fuzziness = new Fuzziness(fuzziness)
258261
};
259262
}
260263
else
261264
{
262-
query = new QueryStringQuery($"{term}~{node.FuzzyDistance.Value}");
265+
var fuzzyString = node.FuzzyDistance.Value == TermNode.DefaultFuzzyDistance
266+
? $"{term}~"
267+
: $"{term}~{node.FuzzyDistance.Value}";
268+
query = new QueryStringQuery(fuzzyString);
263269
}
264270
}
265271
else if (field is null && _context.DefaultFields is { Length: > 1 })

src/Foundatio.Lucene/Ast/MultiTermNode.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,19 @@ public string CombinedText
5656

5757
/// <summary>
5858
/// Optional fuzzy distance (for fuzzy queries with ~).
59+
/// Use <see cref="TermNode.DefaultFuzzyDistance"/> (-1) to indicate default fuzzy distance was requested.
60+
/// Use <see cref="GetEffectiveFuzzyDistance"/> to get the actual fuzzy distance to use.
5961
/// </summary>
6062
public int? FuzzyDistance { get; set; }
63+
64+
/// <summary>
65+
/// Gets the effective fuzzy distance, resolving the default sentinel value to the actual default.
66+
/// </summary>
67+
/// <returns>The fuzzy distance to use, or null if not a fuzzy query.</returns>
68+
public int? GetEffectiveFuzzyDistance()
69+
{
70+
if (FuzzyDistance == TermNode.DefaultFuzzyDistance)
71+
return TermNode.DefaultFuzzyDistanceValue;
72+
return FuzzyDistance;
73+
}
6174
}

src/Foundatio.Lucene/Ast/TermNode.cs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ namespace Foundatio.Lucene.Ast;
55
/// </summary>
66
public class TermNode : QueryNode
77
{
8+
/// <summary>
9+
/// Sentinel value indicating the default fuzzy distance should be used.
10+
/// When FuzzyDistance equals this value, it means ~ was specified without a number.
11+
/// </summary>
12+
public const int DefaultFuzzyDistance = -1;
13+
14+
/// <summary>
15+
/// The actual default fuzzy distance value used when DefaultFuzzyDistance is specified.
16+
/// This is the Lucene standard default of 2.
17+
/// </summary>
18+
public const int DefaultFuzzyDistanceValue = 2;
819
private ReadOnlyMemory<char> _term;
920
private ReadOnlyMemory<char> _unescapedTerm;
1021

@@ -51,9 +62,22 @@ public string UnescapedTerm
5162

5263
/// <summary>
5364
/// Optional fuzzy distance (for fuzzy queries with ~).
65+
/// Use <see cref="DefaultFuzzyDistance"/> (-1) to indicate default fuzzy distance was requested.
66+
/// Use <see cref="GetEffectiveFuzzyDistance"/> to get the actual fuzzy distance to use.
5467
/// </summary>
5568
public int? FuzzyDistance { get; set; }
5669

70+
/// <summary>
71+
/// Gets the effective fuzzy distance, resolving the default sentinel value to the actual default.
72+
/// </summary>
73+
/// <returns>The fuzzy distance to use, or null if not a fuzzy query.</returns>
74+
public int? GetEffectiveFuzzyDistance()
75+
{
76+
if (FuzzyDistance == DefaultFuzzyDistance)
77+
return DefaultFuzzyDistanceValue;
78+
return FuzzyDistance;
79+
}
80+
5781
/// <summary>
5882
/// Whether this is a prefix query (ends with *).
5983
/// </summary>

src/Foundatio.Lucene/LuceneParser.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,10 @@ private float ParseBoost()
10541054
/// <summary>
10551055
/// Parses a fuzzy distance value after ~.
10561056
/// </summary>
1057+
/// <returns>
1058+
/// The explicit fuzzy distance if specified, or <see cref="TermNode.DefaultFuzzyDistance"/>
1059+
/// to indicate the default should be used.
1060+
/// </returns>
10571061
private int ParseFuzzyDistance()
10581062
{
10591063
SkipWhitespace();
@@ -1067,8 +1071,8 @@ private int ParseFuzzyDistance()
10671071
}
10681072
}
10691073

1070-
// Default fuzzy distance
1071-
return 2;
1074+
// Return sentinel value to indicate default fuzzy distance
1075+
return TermNode.DefaultFuzzyDistance;
10721076
}
10731077

10741078
/// <summary>

src/Foundatio.Lucene/QueryStringBuilder.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ private void AppendTerm(TermNode node)
203203
if (node.FuzzyDistance.HasValue)
204204
{
205205
_builder.Append('~');
206-
if (node.FuzzyDistance.Value != 2)
206+
if (node.FuzzyDistance.Value != TermNode.DefaultFuzzyDistanceValue)
207207
{
208208
_builder.Append(node.FuzzyDistance.Value);
209209
}
@@ -312,7 +312,7 @@ private void AppendMultiTerm(MultiTermNode node)
312312
if (node.FuzzyDistance.HasValue)
313313
{
314314
_builder.Append('~');
315-
if (node.FuzzyDistance.Value != 2)
315+
if (node.FuzzyDistance.Value != TermNode.DefaultFuzzyDistanceValue)
316316
{
317317
_builder.Append(node.FuzzyDistance.Value);
318318
}

tests/Foundatio.Lucene.Tests/ParserTests.cs

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ public void Parse_FuzzyTerm_ReturnsTermNodeWithFuzzy()
190190
Assert.IsType<TermNode>(doc.Query);
191191
var term = (TermNode)doc.Query;
192192
Assert.Equal("roam", term.Term);
193-
Assert.Equal(2, term.FuzzyDistance);
193+
Assert.Equal(2, term.FuzzyDistance); // Explicitly specified as 2
194+
Assert.Equal(2, term.GetEffectiveFuzzyDistance());
194195
}
195196

196197
[Fact]
@@ -203,7 +204,27 @@ public void Parse_DefaultFuzzy_ReturnsTermNodeWithDefaultFuzziness()
203204
Assert.IsType<TermNode>(doc.Query);
204205
var term = (TermNode)doc.Query;
205206
Assert.Equal("roam", term.Term);
206-
Assert.Equal(2, term.FuzzyDistance); // Default fuzziness is 2
207+
Assert.Equal(TermNode.DefaultFuzzyDistance, term.FuzzyDistance); // Sentinel value (-1)
208+
Assert.Equal(2, term.GetEffectiveFuzzyDistance()); // Effective value is 2
209+
}
210+
211+
[Fact]
212+
public void Parse_DefaultFuzzy_DifferentFromExplicitTwo()
213+
{
214+
var defaultResult = LuceneQuery.Parse("roam~");
215+
var explicitResult = LuceneQuery.Parse("roam~2");
216+
217+
var defaultTerm = (TermNode)defaultResult.Document.Query!;
218+
var explicitTerm = (TermNode)explicitResult.Document.Query!;
219+
220+
// The raw FuzzyDistance values are different
221+
Assert.Equal(TermNode.DefaultFuzzyDistance, defaultTerm.FuzzyDistance);
222+
Assert.Equal(2, explicitTerm.FuzzyDistance);
223+
Assert.NotEqual(defaultTerm.FuzzyDistance, explicitTerm.FuzzyDistance);
224+
225+
// But the effective values are the same
226+
Assert.Equal(2, defaultTerm.GetEffectiveFuzzyDistance());
227+
Assert.Equal(2, explicitTerm.GetEffectiveFuzzyDistance());
207228
}
208229

209230
[Fact]
@@ -972,6 +993,82 @@ public void Parse_RequiredRangeOperator_ReturnsCorrectStructure()
972993

973994
#region Fuzzy and Proximity Tests
974995

996+
[Fact]
997+
public void Parse_FuzzyWithEditDistance1_ReturnsTermWithFuzzy()
998+
{
999+
var result = LuceneQuery.Parse("roam~1");
1000+
1001+
Assert.True(result.IsSuccess);
1002+
var doc = result.Document;
1003+
Assert.IsType<TermNode>(doc.Query);
1004+
var term = (TermNode)doc.Query;
1005+
Assert.Equal("roam", term.Term);
1006+
Assert.Equal(1, term.FuzzyDistance);
1007+
}
1008+
1009+
[Fact]
1010+
public void Parse_FuzzyWithEditDistance0_ReturnsTermWithFuzzy()
1011+
{
1012+
var result = LuceneQuery.Parse("exact~0");
1013+
1014+
Assert.True(result.IsSuccess);
1015+
var doc = result.Document;
1016+
Assert.IsType<TermNode>(doc.Query);
1017+
var term = (TermNode)doc.Query;
1018+
Assert.Equal("exact", term.Term);
1019+
Assert.Equal(0, term.FuzzyDistance);
1020+
}
1021+
1022+
[Fact]
1023+
public void Parse_FuzzyFieldQuery_ReturnsFieldNodeWithFuzzyTerm()
1024+
{
1025+
var result = LuceneQuery.Parse("title:hello~2");
1026+
1027+
Assert.True(result.IsSuccess);
1028+
var doc = result.Document;
1029+
Assert.IsType<FieldQueryNode>(doc.Query);
1030+
var field = (FieldQueryNode)doc.Query;
1031+
Assert.Equal("title", field.Field);
1032+
Assert.IsType<TermNode>(field.Query);
1033+
var term = (TermNode)field.Query;
1034+
Assert.Equal("hello", term.Term);
1035+
Assert.Equal(2, term.FuzzyDistance);
1036+
}
1037+
1038+
[Fact]
1039+
public void Parse_FuzzyWithBoost_ReturnsTermWithBothModifiers()
1040+
{
1041+
var result = LuceneQuery.Parse("term~2^3");
1042+
1043+
Assert.True(result.IsSuccess);
1044+
var doc = result.Document;
1045+
Assert.IsType<TermNode>(doc.Query);
1046+
var term = (TermNode)doc.Query;
1047+
Assert.Equal("term", term.Term);
1048+
Assert.Equal(2, term.FuzzyDistance);
1049+
Assert.Equal(3.0f, term.Boost);
1050+
}
1051+
1052+
[Fact]
1053+
public void Parse_FuzzyInBooleanQuery_ReturnsBooleanWithFuzzyTerm()
1054+
{
1055+
var result = LuceneQuery.Parse("hello~1 AND world~2");
1056+
1057+
Assert.True(result.IsSuccess);
1058+
var doc = result.Document;
1059+
Assert.IsType<BooleanQueryNode>(doc.Query);
1060+
var boolQuery = (BooleanQueryNode)doc.Query;
1061+
Assert.Equal(2, boolQuery.Clauses.Count);
1062+
1063+
var firstTerm = (TermNode)boolQuery.Clauses[0].Query!;
1064+
Assert.Equal("hello", firstTerm.Term);
1065+
Assert.Equal(1, firstTerm.FuzzyDistance);
1066+
1067+
var secondTerm = (TermNode)boolQuery.Clauses[1].Query!;
1068+
Assert.Equal("world", secondTerm.Term);
1069+
Assert.Equal(2, secondTerm.FuzzyDistance);
1070+
}
1071+
9751072
[Fact]
9761073
public void Parse_FuzzyDecimal_ReturnsTermWithFuzzy()
9771074
{
@@ -998,6 +1095,19 @@ public void Parse_ProximitySearchWithQuotes_ReturnsPhraseWithSlop()
9981095
Assert.Equal(1, phrase.Slop);
9991096
}
10001097

1098+
[Fact]
1099+
public void Parse_ProximityWithLargeDistance_ReturnsPhraseWithSlop()
1100+
{
1101+
var result = LuceneQuery.Parse("\"hello world\"~10");
1102+
1103+
Assert.True(result.IsSuccess);
1104+
var doc = result.Document;
1105+
Assert.IsType<PhraseNode>(doc.Query);
1106+
var phrase = (PhraseNode)doc.Query;
1107+
Assert.Equal("hello world", phrase.Phrase);
1108+
Assert.Equal(10, phrase.Slop);
1109+
}
1110+
10011111
#endregion
10021112

10031113
#region Wildcard Tests

tests/Foundatio.Lucene.Tests/QueryStringBuilderTests.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,17 @@ public void ToQueryString_FuzzyTerm_ReturnsTilde()
103103
Assert.Equal("roam~", output);
104104
}
105105

106+
[Fact]
107+
public void ToQueryString_FuzzyTermNonDefault_ReturnsEditDistance()
108+
{
109+
var result = LuceneQuery.Parse("roam~1");
110+
var builder = new QueryStringBuilder();
111+
112+
var output = builder.Visit(result.Document);
113+
114+
Assert.Equal("roam~1", output);
115+
}
116+
106117
[Fact]
107118
public void ToQueryString_BoostedTerm_ReturnsCaret()
108119
{

0 commit comments

Comments
 (0)