@@ -203,7 +203,7 @@ def build_item_map(items: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
203203 if has_recency :
204204 boost += 0.03
205205
206- if generic_terms and is_topical and not topic_terms :
206+ if generic_terms and is_topical :
207207 item_terms = set (re .findall (r"\b[a-zA-Z][a-zA-Z0-9-]{2,}\b" , item_text ))
208208 if not (topic_terms & item_terms ):
209209 boost -= 0.05
@@ -555,6 +555,7 @@ def _retrieve_seed_nodes(
555555 seed_k : int ,
556556 enable_rerank : bool = True ,
557557 rerank_model : str = "gemini-2.0-flash" ,
558+ rerank_top_n : int = 40 ,
558559 query_embedding : list [float ] | None = None ,
559560) -> list [dict [str , Any ]]:
560561 vector_candidates : list [dict [str , Any ]] = []
@@ -669,7 +670,7 @@ def _retrieve_seed_nodes(
669670 candidates = fused_candidates ,
670671 query = query ,
671672 model = rerank_model ,
672- top_n = min (50 , seed_k * 3 ),
673+ top_n = max ( 5 , min (int ( rerank_top_n ), max ( 50 , seed_k * 3 )) ),
673674 )
674675 except Exception :
675676 pass
@@ -686,18 +687,33 @@ def _retrieve_edges_hops_1(
686687 if not seed_ids :
687688 return []
688689 placeholders = "," .join (["%s" ] * len (seed_ids ))
689- rows = postgres .execute_query (
690- f"""
691- SELECT id, source_id, predicate, predicate_raw, target_id,
692- youtube_video_id, earliest_timestamp_str, earliest_seconds,
693- utterance_ids, evidence, speaker_ids, confidence
694- FROM kg_edges
695- WHERE source_id IN ({ placeholders } ) OR target_id IN ({ placeholders } )
696- ORDER BY edge_rank_score DESC NULLS LAST, confidence DESC NULLS LAST, earliest_seconds ASC
697- LIMIT %s
698- """ ,
699- tuple (seed_ids + seed_ids + [max_edges ]),
700- )
690+ params = tuple (seed_ids + seed_ids + [max_edges ])
691+ try :
692+ rows = postgres .execute_query (
693+ f"""
694+ SELECT id, source_id, predicate, predicate_raw, target_id,
695+ youtube_video_id, earliest_timestamp_str, earliest_seconds,
696+ utterance_ids, evidence, speaker_ids, confidence, edge_rank_score
697+ FROM kg_edges
698+ WHERE source_id IN ({ placeholders } ) OR target_id IN ({ placeholders } )
699+ ORDER BY edge_rank_score DESC NULLS LAST, confidence DESC NULLS LAST, earliest_seconds ASC
700+ LIMIT %s
701+ """ ,
702+ params ,
703+ )
704+ except Exception :
705+ rows = postgres .execute_query (
706+ f"""
707+ SELECT id, source_id, predicate, predicate_raw, target_id,
708+ youtube_video_id, earliest_timestamp_str, earliest_seconds,
709+ utterance_ids, evidence, speaker_ids, confidence
710+ FROM kg_edges
711+ WHERE source_id IN ({ placeholders } ) OR target_id IN ({ placeholders } )
712+ ORDER BY confidence DESC NULLS LAST, earliest_seconds ASC
713+ LIMIT %s
714+ """ ,
715+ params ,
716+ )
701717 out : list [dict [str , Any ]] = []
702718 for row in rows :
703719 out .append (
@@ -714,6 +730,9 @@ def _retrieve_edges_hops_1(
714730 "evidence" : row [9 ],
715731 "speaker_ids" : row [10 ] or [],
716732 "confidence" : float (row [11 ]) if row [11 ] is not None else None ,
733+ "edge_rank_score" : float (row [12 ])
734+ if len (row ) > 12 and row [12 ] is not None
735+ else None ,
717736 }
718737 )
719738 return out
@@ -921,9 +940,11 @@ def kg_hybrid_graph_rag(
921940
922941 enable_rerank = getattr (config , "enable_seed_rerank" , False )
923942 rerank_model = getattr (config , "seed_rerank_model" , "gemini-2.0-flash" )
943+ rerank_top_n = getattr (config , "seed_rerank_top_n" , 40 )
924944 except Exception :
925945 enable_rerank = False
926946 rerank_model = "gemini-2.0-flash"
947+ rerank_top_n = 40
927948
928949 seeds = _retrieve_seed_nodes (
929950 postgres = postgres ,
@@ -932,6 +953,7 @@ def kg_hybrid_graph_rag(
932953 seed_k = seed_k ,
933954 enable_rerank = enable_rerank ,
934955 rerank_model = rerank_model ,
956+ rerank_top_n = int (rerank_top_n ),
935957 query_embedding = query_embedding ,
936958 )
937959 seed_ids = [s ["id" ] for s in seeds ]
@@ -989,12 +1011,22 @@ def kg_hybrid_graph_rag(
9891011 utterance_ids .append (uid )
9901012
9911013 edges_filtered : int = 0
1014+ edge_rank_filter_skipped_no_scores = False
9921015 if edge_rank_threshold is not None :
993- edges_before_filter = len (edges )
994- edges = [e for e in edges if e .get ("edge_rank_score" , 0.0 ) >= edge_rank_threshold ]
995- edges_filtered = edges_before_filter - len (edges )
996- if edges_filtered > 0 :
997- edges = edges [:max_edges ]
1016+ has_rank_scores = any (e .get ("edge_rank_score" ) is not None for e in edges )
1017+ if has_rank_scores :
1018+ edges_before_filter = len (edges )
1019+ edges = [
1020+ e
1021+ for e in edges
1022+ if e .get ("edge_rank_score" ) is not None
1023+ and float (e .get ("edge_rank_score" ) or 0.0 ) >= edge_rank_threshold
1024+ ]
1025+ edges_filtered = edges_before_filter - len (edges )
1026+ if edges_filtered > 0 :
1027+ edges = edges [:max_edges ]
1028+ else :
1029+ edge_rank_filter_skipped_no_scores = True
9981030
9991031 citations = _hydrate_citations (
10001032 postgres = postgres ,
@@ -1013,6 +1045,7 @@ def kg_hybrid_graph_rag(
10131045 ** debug_info ,
10141046 "edge_rank_threshold" : float (edge_rank_threshold ),
10151047 "edges_filtered_by_threshold" : edges_filtered ,
1048+ "edge_rank_filter_skipped_no_scores" : edge_rank_filter_skipped_no_scores ,
10161049 }
10171050
10181051 return {
0 commit comments