Skip to content

Commit 1b654b7

Browse files
added optimized code
1 parent 4e38a0a commit 1b654b7

File tree

3 files changed

+51
-23
lines changed

3 files changed

+51
-23
lines changed
Binary file not shown.

FASTAPI-DEPLOYMENT/rhl_fastapi_deploy.py

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,9 @@ def get_chat_context(history_pairs: List[Tuple[str, str, str]], summary: str) ->
227227
)
228228

229229
judge_prompt = PromptTemplate(
230-
input_variables=["query","context_snippet"],
230+
input_variables=["query","context_snippets"],
231231
template="""
232-
Return JSON only: {{"topic_match":"strong|medium|absolutely_not_possible","sufficient":true/false,"why":"short","alternative":"<anchored question or empty>"}}
232+
Return JSON only: {{"judgments":[{{"index": <index of snippet>, "topic_match":"strong|medium|absolutely_not_possible","sufficient":true/false,"why":"short","alternative":"<anchored question or empty>"}}]}}
233233
234234
Guidance:
235235
- strong: Large facts about the query can be found and more supporting facts about the topic so A strong answer can be formed about the query from the context.
@@ -239,11 +239,13 @@ def get_chat_context(history_pairs: List[Tuple[str, str, str]], summary: str) ->
239239
sufficient is True when topic_match is strong or weak with some similarity to query or topic
240240
Sufficient is False otherwise
241241
242+
For each provided context snippet, make a judgment. The `index` in the output JSON should correspond to the order of the snippets in the input.
243+
242244
Query:
243245
{query}
244246
245-
Context (short excerpts):
246-
{context_snippet}
247+
Context Snippets:
248+
{context_snippets}
247249
"""
248250
)
249251

@@ -345,27 +347,53 @@ def judge_sufficiency(query: str, candidates: List[Dict[str, Any]], judge_llm: C
345347
topic_match_order = {"strong": 3, "medium": 2, "absolutely_not_possible": 1}
346348

347349
logging.info(f"len of candidates {len(candidates)}")
348-
for c in candidates: # inspect up to 12. Iterate through all candidates initially
349-
snippet = f"Source: {c['meta'].get('doc_name','unknown')}\\nExcerpt: {c['text']}"
350-
prompt = judge_prompt.format(query=query, context_snippet=snippet)
351350

352-
resp = judge_llm.invoke([HumanMessage(content=prompt)]).content
351+
# Prepare snippets for batch judging
352+
snippets_for_llm = []
353+
for idx, c in enumerate(candidates):
354+
snippet_text = f"Source: {c['meta'].get('doc_name', 'unknown')}\nExcerpt: {c['text']}"
355+
snippets_for_llm.append(f"Snippet {idx}:\n{snippet_text}")
356+
357+
combined_snippets = "\n\n".join(snippets_for_llm)
358+
359+
prompt = judge_prompt.format(query=query, context_snippets=combined_snippets)
353360

354-
try:
355-
obj = json.loads(resp[resp.rfind("{"):resp.rfind("}")+1])
356-
logging.info(obj)
357-
topic_match_label = obj.get("topic_match", "absolutely_not_possible")
358-
# Store topic_match_score in the chunk's meta for easier sorting
359-
c['meta']['topic_match_score'] = topic_match_order.get(topic_match_label, 0) # Default to 0 for unknown/error
360-
361-
if obj.get("sufficient", False):
362-
qualified_with_scores.append(c) # Add to qualified list
363-
else:
364-
followup_chunks_raw.append(c)
365-
except Exception:
366-
# Fallback based on cross-encoder score if LLM judge fails
367-
# Assign a default topic_match_score (e.g., 'medium' equivalent if LLM fails to parse)
368-
c['meta']['topic_match_score'] = topic_match_order.get("medium", 0)
361+
try:
362+
resp = judge_llm.invoke([HumanMessage(content=prompt)]).content
363+
parsed_judgments = safe_json_parse(resp)
364+
if parsed_judgments and "judgments" in parsed_judgments:
365+
for judgment in parsed_judgments["judgments"]:
366+
idx = judgment.get("index")
367+
if idx is not None and 0 <= idx < len(candidates):
368+
c = candidates[idx]
369+
topic_match_label = judgment.get("topic_match", "absolutely_not_possible")
370+
c['meta']['topic_match_score'] = topic_match_order.get(topic_match_label, 0)
371+
if judgment.get("sufficient", False):
372+
qualified_with_scores.append(c)
373+
else:
374+
followup_chunks_raw.append(c)
375+
else:
376+
logging.warning(f"[judge_sufficiency] Invalid index in LLM judgment: {judgment}")
377+
# Fallback for invalid index
378+
if c["scores"]["cross"] > threshold_weak:
379+
qualified_with_scores.append(c)
380+
else:
381+
followup_chunks_raw.append(c)
382+
else:
383+
logging.warning("[judge_sufficiency] LLM did not return valid batched judgments. Falling back to cross-encoder scores.")
384+
# Fallback based on cross-encoder score if LLM fails to parse or returns no judgments
385+
for c in candidates:
386+
c['meta']['topic_match_score'] = topic_match_order.get("medium", 0) # Assign a default topic_match_score
387+
if c["scores"]["cross"] > threshold_weak:
388+
qualified_with_scores.append(c)
389+
else:
390+
followup_chunks_raw.append(c)
391+
except Exception as e:
392+
logging.error(f"[judge_sufficiency] Error during batched LLM judging: {e}")
393+
logging.exception("[judge_sufficiency] Full traceback for batched judging error:")
394+
# Fallback based on cross-encoder score if LLM invocation fails
395+
for c in candidates:
396+
c['meta']['topic_match_score'] = topic_match_order.get("medium", 0) # Assign a default topic_match_score
369397
if c["scores"]["cross"] > threshold_weak:
370398
qualified_with_scores.append(c)
371399
else:

chat_history.db

24 KB
Binary file not shown.

0 commit comments

Comments
 (0)