22import google .generativeai as genai
33from typing import Dict , Any , List
44from ..dependencies import get_settings
5+ from ..utils .stream_utils import create_metadata_chunk
56
67log = logging .getLogger (__name__ )
78
@@ -12,19 +13,42 @@ def __init__(self):
1213 raise ValueError ("GEMINI_API_KEY not found" )
1314 genai .configure (api_key = settings .gemini_api_key )
1415 self .model_name = 'gemini-2.5-pro' # Or settings.GEMINI_COMPOSER_MODEL
16+
17+ def _build_citation_from_node_id (self , node_id : str ) -> str :
18+ """Parse node_id to build citation title"""
19+ import re
20+ parts = []
21+
22+ article_match = re .search (r'_Dieu_(\d+)' , node_id )
23+ clause_match = re .search (r'_Khoan_(\d+)' , node_id )
24+ point_match = re .search (r'_Diem_([a-z])' , node_id )
25+ doc_match = re .match (r'^(luat|nghi-dinh|thong-tu)-(\d+)-(\d+)-([A-Z\-]+)' , node_id )
26+
27+ if clause_match :
28+ parts .append (f"Khoản { clause_match .group (1 )} " )
29+ if point_match :
30+ parts .append (f"Điểm { point_match .group (1 )} " )
31+ if article_match :
32+ parts .append (f"Điều { article_match .group (1 )} " )
33+ if doc_match :
34+ doc_type , number , year , issuer = doc_match .groups ()
35+ type_map = {"luat" : "Luật" , "nghi-dinh" : "Nghị định" , "thong-tu" : "Thông tư" }
36+ parts .append (f"của { type_map .get (doc_type , 'Văn bản' )} { number } /{ year } /{ issuer } " )
37+
38+ return ", " .join (parts ) if parts else f"Node { node_id } "
1539
1640 def compose (
1741 self ,
1842 question : str ,
19- graph_result : Dict [str , Any ],
43+ search_result : Dict [str , Any ], # Chứa kết quả từ vector + graph search
2044 web_context : str = ""
2145 ) -> str :
2246 """
2347 Tổng hợp câu trả lời cuối cùng.
2448 """
2549 log .info (f"Composing answer for question: { question } " )
26- final_choice = graph_result .get ("final_choice" )
27- all_candidates = graph_result .get ("all_candidates" , [])
50+ final_choice = search_result .get ("final_choice" )
51+ all_candidates = search_result .get ("all_candidates" , [])
2852
2953 # Xây dựng context từ Graph
3054 graph_context_str = ""
@@ -79,6 +103,31 @@ def compose(
79103 else :
80104 graph_context_str = "Không tìm thấy thông tin trong cơ sở dữ liệu luật nội bộ."
81105
106+ # Build citations từ all_candidates cho FE
107+ citations = []
108+ if all_candidates :
109+ for idx , candidate in enumerate (all_candidates , 1 ):
110+ node_id = candidate .get ("source_node_id" , "" )
111+
112+ # Format title từ metadata
113+ parts = []
114+ if candidate .get ("clause_number" ):
115+ parts .append (f"Khoản { candidate ['clause_number' ]} " )
116+ if candidate .get ("point_letter" ):
117+ parts .append (f"Điểm { candidate ['point_letter' ]} " )
118+ if candidate .get ("article_number" ):
119+ parts .append (f"Điều { candidate ['article_number' ]} " )
120+ if candidate .get ("document_code" ):
121+ parts .append (f"của { candidate ['document_code' ]} " )
122+
123+ title = ", " .join (parts ) if parts else self ._build_citation_from_node_id (node_id )
124+
125+ # Add to citations for FE
126+ citations .append ({
127+ "node_id" : node_id ,
128+ "title" : title
129+ })
130+
82131 # Xây dựng context từ Web (nếu có)
83132 web_context_str = ""
84133 if web_context :
@@ -143,22 +192,22 @@ def compose(
143192
144193 try :
145194 response = model .generate_content (prompt )
146- return response .text .strip ()
195+ return response .text .strip (), citations
147196 except Exception as e :
148197 log .error (f"Lỗi compose answer: { e } " )
149- return "Xin lỗi, hệ thống gặp sự cố khi tổng hợp câu trả lời."
198+ return "Xin lỗi, hệ thống gặp sự cố khi tổng hợp câu trả lời." , []
150199
151200 def compose_stream (
152201 self ,
153202 question : str ,
154- graph_result : Dict [str , Any ],
203+ search_result : Dict [str , Any ], # Chứa kết quả từ vector + graph search
155204 web_context : str = ""
156205 ):
157206 """
158207 Tổng hợp câu trả lời cuối cùng (Streaming).
159208 """
160- final_choice = graph_result .get ("final_choice" )
161- all_candidates = graph_result .get ("all_candidates" , [])
209+ final_choice = search_result .get ("final_choice" )
210+ all_candidates = search_result .get ("all_candidates" , [])
162211
163212 # Xây dựng context từ Graph (same logic as compose)
164213 graph_context_str = ""
@@ -207,6 +256,33 @@ def compose_stream(
207256 else :
208257 graph_context_str = "Không tìm thấy thông tin trong cơ sở dữ liệu luật nội bộ."
209258
259+ # Build citations từ all_candidates (duplicated from compose for consistency)
260+ citations = []
261+ log .info (f"DEBUG: all_candidates count = { len (all_candidates )} " )
262+ if all_candidates :
263+ log .info (f"DEBUG: Building citations from { len (all_candidates )} candidates" )
264+ for idx , candidate in enumerate (all_candidates , 1 ):
265+ node_id = candidate .get ("source_node_id" , "" )
266+
267+ # Format title từ metadata
268+ parts = []
269+ if candidate .get ("clause_number" ):
270+ parts .append (f"Khoản { candidate ['clause_number' ]} " )
271+ if candidate .get ("point_letter" ):
272+ parts .append (f"Điểm { candidate ['point_letter' ]} " )
273+ if candidate .get ("article_number" ):
274+ parts .append (f"Điều { candidate ['article_number' ]} " )
275+ if candidate .get ("document_code" ):
276+ parts .append (f"của { candidate ['document_code' ]} " )
277+
278+ title = ", " .join (parts ) if parts else self ._build_citation_from_node_id (node_id )
279+
280+ # Add to citations for FE
281+ citations .append ({
282+ "node_id" : node_id ,
283+ "title" : title
284+ })
285+
210286 # Xây dựng context từ Web (nếu có)
211287 web_context_str = ""
212288 if web_context :
@@ -270,6 +346,10 @@ def compose_stream(
270346 )
271347
272348 try :
349+ # Yield metadata first với citations
350+ if citations :
351+ yield create_metadata_chunk (citations )
352+
273353 response = model .generate_content (prompt , stream = True )
274354 for chunk in response :
275355 if chunk .text :
0 commit comments