@@ -67,7 +67,125 @@ public ElasticsearchGateway(ElasticsearchOptions elasticsearchOptions, ILogger<E
6767 }
6868
6969 public async Task < ( int TotalHits , List < SearchResultItem > Results ) > SearchAsync ( string query , int pageNumber , int pageSize , Cancel ctx = default ) =>
70- await ExactSearchAsync ( query , pageNumber , pageSize , ctx ) ;
70+ await HybridSearchWithRrfAsync ( query , pageNumber , pageSize , ctx ) ;
71+
72+ public async Task < ( int TotalHits , List < SearchResultItem > Results ) > HybridSearchWithRrfAsync ( string query , int pageNumber , int pageSize , Cancel ctx = default )
73+ {
74+ _logger . LogInformation ( "Starting RRF hybrid search for '{Query}' with pageNumber={PageNumber}, pageSize={PageSize}" , query , pageNumber , pageSize ) ;
75+
76+ var searchQuery = query . Replace ( "dotnet" , "net" , StringComparison . InvariantCultureIgnoreCase ) ;
77+
78+ try
79+ {
80+ var response = await _client . SearchAsync < DocumentDto > ( s => s
81+ . Indices ( _elasticsearchOptions . IndexName )
82+ . Retriever ( r => r
83+ . Rrf ( rrf => rrf
84+ . Retrievers (
85+ // Lexical/Traditional search retriever
86+ ret => ret . Standard ( std => std
87+ . Query ( q => q
88+ . Bool ( b => b
89+ . Should (
90+ // Tier 1: Exact/Prefix matches (highest priority)
91+ sh => sh . Prefix ( p => p
92+ . Field ( "title.keyword" )
93+ . Value ( searchQuery )
94+ . CaseInsensitive ( true )
95+ . Boost ( 10.0f ) // Highest importance - exact prefix matches
96+ ) ,
97+ // Tier 2: Title matching with AND operator
98+ sh => sh . Match ( m => m
99+ . Field ( f => f . Title )
100+ . Query ( searchQuery )
101+ . Operator ( Operator . And )
102+ . Boost ( 8.0f ) // High importance - all terms must match
103+ ) ,
104+ // Tier 3: Match bool prefix for partial matches
105+ sh => sh . MatchBoolPrefix ( m => m
106+ . Field ( f => f . Title )
107+ . Query ( searchQuery )
108+ . Boost ( 6.0f ) // Medium-high importance - partial matches
109+ ) ,
110+ // Tier 4: Abstract matching
111+ sh => sh . Match ( m => m
112+ . Field ( f => f . Abstract )
113+ . Query ( searchQuery )
114+ . Boost ( 4.0f ) // Medium importance - content matching
115+ ) ,
116+ // Tier 5: Parent matching
117+ sh => sh . Match ( m => m
118+ . Field ( "parents.title" )
119+ . Query ( searchQuery )
120+ . Boost ( 2.0f ) // Lower importance - parent context
121+ ) ,
122+ // Tier 6: Fuzzy fallback
123+ sh => sh . Match ( m => m
124+ . Field ( f => f . Title )
125+ . Query ( searchQuery )
126+ . Fuzziness ( 1 )
127+ . Boost ( 1.0f ) // Lowest importance - fuzzy fallback
128+ )
129+ )
130+ . MustNot ( mn => mn . Terms ( t => t
131+ . Field ( "url.keyword" )
132+ . Terms ( factory => factory . Value ( "/docs" , "/docs/" , "/docs/404" , "/docs/404/" ) )
133+ ) )
134+ . MinimumShouldMatch ( 1 )
135+ )
136+ )
137+ ) ,
138+ // Semantic search retriever
139+ ret => ret . Standard ( std => std
140+ . Query ( q => q
141+ . Bool ( b => b
142+ . Should (
143+ // Title semantic search
144+ sh => sh . Semantic ( sem => sem
145+ . Field ( "title.semantic_text" )
146+ . Query ( searchQuery )
147+ . Boost ( 5.0f ) // Higher importance - title semantic matching
148+ ) ,
149+ // Abstract semantic search
150+ sh => sh . Semantic ( sem => sem
151+ . Field ( "abstract" )
152+ . Query ( searchQuery )
153+ . Boost ( 3.0f ) // Medium importance - content semantic matching
154+ )
155+ )
156+ . MustNot ( mn => mn . Terms ( t => t
157+ . Field ( "url.keyword" )
158+ . Terms ( factory => factory . Value ( "/docs" , "/docs/" , "/docs/404" , "/docs/404/" ) )
159+ ) )
160+ . MinimumShouldMatch ( 1 )
161+ )
162+ )
163+ )
164+ )
165+ . RankConstant ( 60 ) // Controls how much weight is given to document ranking
166+ )
167+ )
168+ . From ( ( pageNumber - 1 ) * pageSize )
169+ . Size ( pageSize ) , ctx ) ;
170+
171+ if ( ! response . IsValidResponse )
172+ {
173+ _logger . LogWarning ( "Elasticsearch RRF search response was not valid. Reason: {Reason}" ,
174+ response . ElasticsearchServerError ? . Error ? . Reason ?? "Unknown" ) ;
175+ }
176+ else
177+ {
178+ _logger . LogInformation ( "RRF search completed for '{Query}'. Total hits: {TotalHits}" , query , response . Total ) ;
179+ }
180+
181+ return ProcessSearchResponse ( response ) ;
182+ }
183+ catch ( Exception ex )
184+ {
185+ _logger . LogError ( ex , "Error occurred during Elasticsearch RRF search for '{Query}'" , query ) ;
186+ throw ;
187+ }
188+ }
71189
72190 public async Task < ( int TotalHits , List < SearchResultItem > Results ) > ExactSearchAsync ( string query , int pageNumber , int pageSize , Cancel ctx = default )
73191 {
0 commit comments