Skip to content

Commit 2990567

Browse files
reakaleekMpdreamz
andauthored
Use retrievers in search query (#1835)
* Use retrievers in search query * Use boolean query DSL (#1851) * Update src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs --------- Co-authored-by: Martijn Laarman <[email protected]>
1 parent 11912ee commit 2990567

File tree

1 file changed

+60
-1
lines changed

1 file changed

+60
-1
lines changed

src/api/Elastic.Documentation.Api.Infrastructure/Adapters/Search/ElasticsearchGateway.cs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,66 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger<E
6767
}
6868

6969
public async Task<(int TotalHits, List<SearchResultItem> Results)> SearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default) =>
70-
await ExactSearchAsync(query, pageNumber, pageSize, ctx);
70+
await HybridSearchWithRrfAsync(query, pageNumber, pageSize, ctx);
71+
72+
public async Task<(int TotalHits, List<SearchResultItem> Results)> HybridSearchWithRrfAsync(string query, int pageNumber, int pageSize, Cancel ctx = default)
73+
{
74+
_logger.LogInformation("Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}", query, pageNumber, pageSize);
75+
76+
var searchQuery = query.Replace("dotnet", "net", StringComparison.InvariantCultureIgnoreCase);
77+
78+
var lexicalSearchRetriever =
79+
((Query)new PrefixQuery(Infer.Field<DocumentDto>(f => f.Title.Suffix("keyword")), searchQuery) { Boost = 10.0f, CaseInsensitive = true }
80+
|| new MatchQuery(Infer.Field<DocumentDto>(f => f.Title), searchQuery) { Operator = Operator.And, Boost = 8.0f }
81+
|| new MatchBoolPrefixQuery(Infer.Field<DocumentDto>(f => f.Title), searchQuery) { Boost = 6.0f }
82+
|| new MatchQuery(Infer.Field<DocumentDto>(f => f.Abstract), searchQuery) { Boost = 4.0f }
83+
|| new MatchQuery(Infer.Field<DocumentDto>(f => f.Parents.First().Title), searchQuery) { Boost = 2.0f }
84+
|| new MatchQuery(Infer.Field<DocumentDto>(f => f.Title), searchQuery) { Fuzziness = 1, Boost = 1.0f }
85+
)
86+
&& !(Query)new TermsQuery(Infer.Field<DocumentDto>(f => f.Url.Suffix("keyword")), new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"]))
87+
;
88+
var semanticSearchRetriever =
89+
((Query)new SemanticQuery("title.semantic_text", searchQuery) { Boost = 5.0f }
90+
|| new SemanticQuery("abstract", searchQuery) { Boost = 3.0f }
91+
)
92+
&& !(Query)new TermsQuery(Infer.Field<DocumentDto>(f => f.Url.Suffix("keyword")),
93+
new TermsQueryField(["/docs", "/docs/", "/docs/404", "/docs/404/"]))
94+
;
95+
96+
try
97+
{
98+
var response = await _client.SearchAsync<DocumentDto>(s => s
99+
.Indices(_elasticsearchOptions.IndexName)
100+
.Retriever(r => r
101+
.Rrf(rrf => rrf
102+
.Retrievers(
103+
// Lexical/Traditional search retriever
104+
ret => ret.Standard(std => std.Query(lexicalSearchRetriever)),
105+
// Semantic search retriever
106+
ret => ret.Standard(std => std.Query(semanticSearchRetriever))
107+
)
108+
.RankConstant(60) // Controls how much weight is given to document ranking
109+
)
110+
)
111+
.From((pageNumber - 1) * pageSize)
112+
.Size(pageSize), ctx);
113+
114+
if (!response.IsValidResponse)
115+
{
116+
_logger.LogWarning("Elasticsearch RRF search response was not valid. Reason: {Reason}",
117+
response.ElasticsearchServerError?.Error?.Reason ?? "Unknown");
118+
}
119+
else
120+
_logger.LogInformation("RRF search completed for '{Query}'. Total hits: {TotalHits}", query, response.Total);
121+
122+
return ProcessSearchResponse(response);
123+
}
124+
catch (Exception ex)
125+
{
126+
_logger.LogError(ex, "Error occurred during Elasticsearch RRF search for '{Query}'", query);
127+
throw;
128+
}
129+
}
71130

72131
public async Task<(int TotalHits, List<SearchResultItem> Results)> ExactSearchAsync(string query, int pageNumber, int pageSize, Cancel ctx = default)
73132
{

0 commit comments

Comments
 (0)