Skip to content

Commit 96aeda1

Browse files
committed
Use retrievers in search query
1 parent fdc4425 commit 96aeda1

File tree

1 file changed

+119
-1
lines changed

1 file changed

+119
-1
lines changed

src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,125 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger<E
6767
}
6868

6969
public async Task<(int TotalHits, List<SearchResultItem> Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) =>
70-
await ExactSearchAsync(query, pageNumber, pageSize, ctx);
70+
await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx);
71+
72+
public async Task<(int TotalHits, List<SearchResultItem> Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default)
73+
{
74+
_logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize);
75+
76+
var searchQuery = query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase);
77+
78+
try
79+
{
80+
var response = await _client.SearchAsync<DocumentDto>(s => s
81+
.Indices(_elasticsearchOptions.IndexName)
82+
.Retriever(r => r
83+
.Rrf(rrf => rrf
84+
.Retrievers(
85+
// Lexical/Traditional search retriever
86+
ret => ret.Standard(std => std
87+
.Query(q => q
88+
.Bool(b => b
89+
.Should(
90+
// Tier 1: Exact/Prefix matches (highest priority)
91+
sh => sh.Prefix(p => p
92+
.Field("title.keyword")
93+
.Value(searchQuery)
94+
.CaseInsensitive(true)
95+
.Boost(10.0f) // Highest importance - exact prefix matches
96+
),
97+
// Tier 2: Title matching with AND operator
98+
sh => sh.Match(m => m
99+
.Field(f => f.Title)
100+
.Query(searchQuery)
101+
.Operator(Operator.And)
102+
.Boost(8.0f) // High importance - all terms must match
103+
),
104+
// Tier 3: Match bool prefix for partial matches
105+
sh => sh.MatchBoolPrefix(m => m
106+
.Field(f => f.Title)
107+
.Query(searchQuery)
108+
.Boost(6.0f) // Medium-high importance - partial matches
109+
),
110+
// Tier 4: Abstract matching
111+
sh => sh.Match(m => m
112+
.Field(f => f.Abstract)
113+
.Query(searchQuery)
114+
.Boost(4.0f) // Medium importance - content matching
115+
),
116+
// Tier 5: Parent matching
117+
sh => sh.Match(m => m
118+
.Field("parents.title")
119+
.Query(searchQuery)
120+
.Boost(2.0f) // Lower importance - parent context
121+
),
122+
// Tier 6: Fuzzy fallback
123+
sh => sh.Match(m => m
124+
.Field(f => f.Title)
125+
.Query(searchQuery)
126+
.Fuzziness(1)
127+
.Boost(1.0f) // Lowest importance - fuzzy fallback
128+
)
129+
)
130+
.MustNot(mn => mn.Terms(t => t
131+
.Field("url.keyword")
132+
.Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/"))
133+
))
134+
.MinimumShouldMatch(1)
135+
)
136+
)
137+
),
138+
// Semantic search retriever
139+
ret => ret.Standard(std => std
140+
.Query(q => q
141+
.Bool(b => b
142+
.Should(
143+
// Title semantic search
144+
sh => sh.Semantic(sem => sem
145+
.Field("title.semantic_text")
146+
.Query(searchQuery)
147+
.Boost(5.0f) // Higher importance - title semantic matching
148+
),
149+
// Abstract semantic search
150+
sh => sh.Semantic(sem => sem
151+
.Field("abstract")
152+
.Query(searchQuery)
153+
.Boost(3.0f) // Medium importance - content semantic matching
154+
)
155+
)
156+
.MustNot(mn => mn.Terms(t => t
157+
.Field("url.keyword")
158+
.Terms(factory => factory.Value("/docs", "/docs/", "/docs/404", "/docs/404/"))
159+
))
160+
.MinimumShouldMatch(1)
161+
)
162+
)
163+
)
164+
)
165+
.RankConstant(60) // Controls how much weight is given to document ranking
166+
)
167+
)
168+
.From((pageNumber - 1) * pageSize)
169+
.Size(pageSize), ctx);
170+
171+
if (!response.IsValidResponse)
172+
{
173+
_logger.LogWarning("Elasticsearch RRF search response was not valid. Reason: {Reason}",
174+
response.ElasticsearchServerError?.Error?.Reason ?? "Unknown");
175+
}
176+
else
177+
{
178+
_logger.LogInformation("RRF search completed for '{Query}'. Total hits: {TotalHits}", query, response.Total);
179+
}
180+
181+
return ProcessSearchResponse(response);
182+
}
183+
catch (Exception ex)
184+
{
185+
_logger.LogError(ex, "Error occurred during Elasticsearch RRF search for '{Query}'", query);
186+
throw;
187+
}
188+
}
71189

72190
public async Task<(int TotalHits, List<SearchResultItem> Results)> ExactSearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default)
73191
{

0 commit comments

Comments
 (0)