Skip to content

Commit 962369d

Browse files
authored
fix: enable searching for literal reserved words (AND, OR, NOT) (#1103)
## Summary - Fix issue 4 from KNOWN-ISSUES.md: users could not search for literal words "AND", "OR", "NOT" even with quotes - FTS5 reserved words are now properly escaped when generating FTS queries - Added examples to `km examples` showing how to search for literal reserved words ## Changes - **`src/Core/Search/NodeSearchService.cs`**: Added FTS5 reserved word detection and escaping in `FtsQueryExtractor` - **`src/Main/CLI/Commands/ExamplesCommand.cs`**: Added examples for searching literal reserved words - **`KNOWN-ISSUES.md`**: Removed resolved issue 4 - **`tests/Core.Tests/Search/FtsQueryExtractionTest.cs`**: Added 5 E2E tests - **`tests/Core.Tests/Search/Query/InfixQueryParserTests.cs`**: Added 12 parser tests ## Test plan - [x] All 520 tests pass (306 Core + 214 Main) - [x] Zero skipped tests - [x] Code coverage at 83.82% (above 80% threshold) - [x] `build.sh` passes with 0 warnings - [x] `format.sh` passes - [x] `coverage.sh` passes ## Usage examples (now working) ```bash km search '"NOT"' # Search for literal "NOT" km search '"AND"' # Search for literal "AND" km search '"Alice AND Bob"' # Search for phrase containing AND ```
1 parent b22235d commit 962369d

File tree

5 files changed

+478
-49
lines changed

5 files changed

+478
-49
lines changed

KNOWN-ISSUES.md

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -84,33 +84,6 @@ km search 'content:"user:password"'
8484

8585
---
8686

87-
### 4. Reserved Words Cannot Be Searched
88-
89-
**Status:** Known limitation
90-
91-
**Issue:** Cannot search for the literal words "AND", "OR", "NOT" even with quotes.
92-
93-
**Example:**
94-
```bash
95-
km put "this is NOT important"
96-
km search "NOT"
97-
# Expected: Find the document
98-
# Actual: Parser error "Unexpected end of query"
99-
```
100-
101-
**Root Cause:**
102-
- Tokenizer treats AND/OR/NOT as reserved keywords (case-insensitive)
103-
- Even quoted, they're tokenized as operators
104-
- Parser expects operands after NOT
105-
106-
**Workaround:** None. These words cannot be searched.
107-
108-
**Fix Required:**
109-
- Tokenizer must recognize quotes and treat content literally
110-
- Major parser refactoring needed
111-
112-
---
113-
11487
## Testing Gaps
11588

11689
These bugs were discovered through comprehensive E2E testing. Previous tests only verified:
@@ -127,16 +100,3 @@ But did NOT test:
127100

128101
---
129102

130-
## Resolved Issues
131-
132-
### BM25 Score Normalization (FIXED)
133-
- **Issue:** All searches returned 0 results despite FTS finding matches
134-
- **Cause:** BM25 scores (~0.000001) filtered by MinRelevance=0.3
135-
- **Fix:** Exponential normalization maps [-10, 0][0.37, 1.0]
136-
- **Commit:** 4cb283e
137-
138-
### Field-Specific Equal Operator (FIXED)
139-
- **Issue:** `content:summaries` failed with SQLite error
140-
- **Cause:** Equal operator didn't extract FTS queries
141-
- **Fix:** ExtractComparison now handles both Contains and Equal
142-
- **Commit:** 59bf3f2

src/Core/Search/NodeSearchService.cs

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,15 @@ private string ExtractFtsQuery(QueryNode queryNode)
127127
/// </summary>
128128
private sealed class FtsQueryExtractor
129129
{
130+
/// <summary>
131+
/// SQLite FTS5 reserved words that must be quoted when used as search terms.
132+
/// These keywords have special meaning in FTS5 query syntax.
133+
/// </summary>
134+
private static readonly HashSet<string> s_fts5ReservedWords = new(StringComparer.OrdinalIgnoreCase)
135+
{
136+
"AND", "OR", "NOT", "NEAR"
137+
};
138+
130139
public string Extract(QueryNode node)
131140
{
132141
var terms = this.ExtractTerms(node);
@@ -153,22 +162,31 @@ private string ExtractTextSearch(TextSearchNode node)
153162
{
154163
// Phrase searches: use quotes and no field prefix
155164
// FTS5 doesn't support field:phrase syntax well, so just search all fields
156-
var escapedPhrase = node.SearchText.Replace("\"", "\"\"", StringComparison.Ordinal);
165+
var escapedPhrase = this.EscapeFtsPhrase(node.SearchText);
157166
return $"\"{escapedPhrase}\"";
158167
}
159168

169+
// Check if the term is a reserved word that needs quoting
170+
if (this.IsFts5ReservedWord(node.SearchText))
171+
{
172+
// Reserved words must be quoted to be treated as literal search terms
173+
// We cannot use field prefix with quoted terms in FTS5, so search all fields
174+
var escapedTerm = this.EscapeFtsPhrase(node.SearchText);
175+
return $"\"{escapedTerm}\"";
176+
}
177+
160178
// Single word searches: use field prefix WITHOUT quotes
161-
var escapedTerm = this.EscapeFtsSingleTerm(node.SearchText);
179+
var escaped = this.EscapeFtsSingleTerm(node.SearchText);
162180

163181
// If specific field, prefix with field name (SQLite FTS5 syntax)
164182
if (node.Field != null && this.IsFtsField(node.Field.FieldPath))
165183
{
166-
return $"{node.Field.FieldPath}:{escapedTerm}";
184+
return $"{node.Field.FieldPath}:{escaped}";
167185
}
168186

169187
// Default field: search all FTS fields (title, description, content)
170188
// FTS5 syntax: {title description content}:term
171-
return $"{{title description content}}:{escapedTerm}";
189+
return $"{{title description content}}:{escaped}";
172190
}
173191

174192
private string ExtractLogical(LogicalNode node)
@@ -208,13 +226,22 @@ private string ExtractComparison(ComparisonNode node)
208226
if (isPhrase)
209227
{
210228
// Phrase search: use quotes without field prefix
211-
var escapedPhrase = searchText.Replace("\"", "\"\"", StringComparison.Ordinal);
229+
var escapedPhrase = this.EscapeFtsPhrase(searchText);
212230
return $"\"{escapedPhrase}\"";
213231
}
214232

233+
// Check if the term is a reserved word that needs quoting
234+
if (this.IsFts5ReservedWord(searchText))
235+
{
236+
// Reserved words must be quoted to be treated as literal search terms
237+
// We cannot use field prefix with quoted terms in FTS5
238+
var escapedTerm = this.EscapeFtsPhrase(searchText);
239+
return $"\"{escapedTerm}\"";
240+
}
241+
215242
// Single word: use field prefix without quotes
216-
var escapedTerm = this.EscapeFtsSingleTerm(searchText);
217-
return $"{node.Field.FieldPath}:{escapedTerm}";
243+
var escaped = this.EscapeFtsSingleTerm(searchText);
244+
return $"{node.Field.FieldPath}:{escaped}";
218245
}
219246

220247
// Other comparison operators (!=, >=, <, etc.) are handled by LINQ filtering
@@ -233,13 +260,31 @@ private bool IsFtsField(string? fieldPath)
233260
return normalized == "title" || normalized == "description" || normalized == "content";
234261
}
235262

263+
/// <summary>
264+
/// Check if a term is an FTS5 reserved word.
265+
/// Reserved words need special escaping to be searched as literals.
266+
/// </summary>
267+
private bool IsFts5ReservedWord(string term)
268+
{
269+
return s_fts5ReservedWords.Contains(term);
270+
}
271+
272+
/// <summary>
273+
/// Escape a phrase for FTS5 quoted string search.
274+
/// Doubles any internal quotes (FTS5 escape syntax).
275+
/// </summary>
276+
private string EscapeFtsPhrase(string phrase)
277+
{
278+
return phrase.Replace("\"", "\"\"", StringComparison.Ordinal);
279+
}
280+
236281
private string EscapeFtsSingleTerm(string term)
237282
{
238283
// For single-word searches with field prefix (e.g., content:call)
239284
// FTS5 does NOT support quotes after the colon: content:"call" is INVALID
240285
// We must use: content:call
241-
//
242-
// Escape FTS5 special characters: " *
286+
//
287+
// Escape FTS5 special characters: " *
243288
// For now, keep it simple: just remove quotes and wildcards that could break syntax
244289
return term.Replace("\"", string.Empty, StringComparison.Ordinal)
245290
.Replace("*", string.Empty, StringComparison.Ordinal);

src/Main/CLI/Commands/ExamplesCommand.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ private void ShowSearchExamples()
139139
AnsiConsole.MarkupLine("[dim]Find documents with either python or javascript[/]");
140140
AnsiConsole.WriteLine();
141141

142+
AnsiConsole.MarkupLine("[bold]Search for literal reserved words[/]");
143+
AnsiConsole.MarkupLine("[cyan]km search '\"NOT\"'[/]");
144+
AnsiConsole.MarkupLine("[dim]Use quotes to search for literal AND, OR, NOT as words[/]");
145+
AnsiConsole.MarkupLine("[cyan]km search '\"Alice AND Bob\"'[/]");
146+
AnsiConsole.MarkupLine("[dim]Search for a phrase containing reserved words literally[/]");
147+
AnsiConsole.WriteLine();
148+
142149
AnsiConsole.MarkupLine("[bold]Complex queries with parentheses[/]");
143150
AnsiConsole.MarkupLine("[cyan]km search \"vacation AND (beach OR mountain)\"[/]");
144151
AnsiConsole.MarkupLine("[dim]Find vacation plans for beach or mountain trips[/]");

0 commit comments

Comments
 (0)