@@ -177,9 +177,13 @@ def _extract_quotes_from_ai_response(self, ai_response: str, user_question: str
177
177
for match in matches1 :
178
178
citation_num = int (match [0 ])
179
179
quote_text = match [1 ].strip ()
180
- # Try to extract more focused quotes for long citations
181
- focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
182
- citation_quotes [citation_num ] = focused_quote
180
+ # For citation highlighting, preserve the full quote text
181
+ # Only use focused extraction for very long quotes (>20 words)
182
+ if len (quote_text .split ()) > 20 :
183
+ focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
184
+ citation_quotes [citation_num ] = focused_quote
185
+ else :
186
+ citation_quotes [citation_num ] = quote_text
183
187
184
188
# Pattern 2: [1]: "exact quote" - legacy format with colon (anywhere in line)
185
189
if not citation_quotes :
@@ -189,17 +193,28 @@ def _extract_quotes_from_ai_response(self, ai_response: str, user_question: str
189
193
for match in matches2 :
190
194
citation_num = int (match [0 ])
191
195
quote_text = match [1 ].strip ()
192
- focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
193
- citation_quotes [citation_num ] = focused_quote
196
+ # For citation highlighting, preserve the full quote text
197
+ # Only use focused extraction for very long quotes (>20 words)
198
+ if len (quote_text .split ()) > 20 :
199
+ focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
200
+ citation_quotes [citation_num ] = focused_quote
201
+ else :
202
+ citation_quotes [citation_num ] = quote_text
194
203
195
204
# Pattern 3: [Exact quote: "text"] - current problematic format
196
205
if not citation_quotes :
197
206
pattern3 = r'\[Exact quote:\s*"([^"]+)"\]'
198
207
matches3 = re .findall (pattern3 , ai_response , re .IGNORECASE )
199
208
200
209
for i , quote_text in enumerate (matches3 , 1 ):
201
- focused_quote = self ._extract_focused_quote (quote_text .strip (), ai_response , user_question )
202
- citation_quotes [i ] = focused_quote
210
+ quote_text = quote_text .strip ()
211
+ # For citation highlighting, preserve the full quote text
212
+ # Only use focused extraction for very long quotes (>20 words)
213
+ if len (quote_text .split ()) > 20 :
214
+ focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
215
+ citation_quotes [i ] = focused_quote
216
+ else :
217
+ citation_quotes [i ] = quote_text
203
218
204
219
# Pattern 3b: "text" in brackets without "Exact quote:" prefix
205
220
if not citation_quotes :
@@ -208,8 +223,14 @@ def _extract_quotes_from_ai_response(self, ai_response: str, user_question: str
208
223
209
224
for i , quote_text in enumerate (matches3b , 1 ):
210
225
if len (quote_text .strip ()) > 15 : # Only substantial quotes
211
- focused_quote = self ._extract_focused_quote (quote_text .strip (), ai_response , user_question )
212
- citation_quotes [i ] = focused_quote
226
+ quote_text = quote_text .strip ()
227
+ # For citation highlighting, preserve the full quote text
228
+ # Only use focused extraction for very long quotes (>20 words)
229
+ if len (quote_text .split ()) > 20 :
230
+ focused_quote = self ._extract_focused_quote (quote_text , ai_response , user_question )
231
+ citation_quotes [i ] = focused_quote
232
+ else :
233
+ citation_quotes [i ] = quote_text
213
234
214
235
# Pattern 4: Any text in double quotes as fallback
215
236
if not citation_quotes :
@@ -220,8 +241,13 @@ def _extract_quotes_from_ai_response(self, ai_response: str, user_question: str
220
241
# Only use if it looks like a substantial quote
221
242
cleaned = quote_text .strip ()
222
243
if len (cleaned ) > 15 and not cleaned .startswith ('http' ):
223
- focused_quote = self ._extract_focused_quote (cleaned , ai_response , user_question )
224
- citation_quotes [i ] = focused_quote
244
+ # For citation highlighting, preserve the full quote text
245
+ # Only use focused extraction for very long quotes (>20 words)
246
+ if len (cleaned .split ()) > 20 :
247
+ focused_quote = self ._extract_focused_quote (cleaned , ai_response , user_question )
248
+ citation_quotes [i ] = focused_quote
249
+ else :
250
+ citation_quotes [i ] = cleaned
225
251
226
252
return citation_quotes
227
253
0 commit comments