-
Notifications
You must be signed in to change notification settings - Fork 150
Open
Description
涉及到提取chunk的函数是
def _extract_chunk_ids_from_triples(self, scored_triples: List[Tuple[str, str, str, float]]) -> set:
"""Extract chunk IDs from nodes in scored triples."""
chunk_ids = set()
for h, r, t, score in scored_triples:
if h in self.graph.nodes:
chunk_id = self._get_node_chunk_id(self.graph.nodes[h])
if chunk_id:
chunk_ids.add(str(chunk_id))
if t in self.graph.nodes:
chunk_id = self._get_node_chunk_id(self.graph.nodes[t])
if chunk_id:
chunk_ids.add(str(chunk_id))
return chunk_ids
其中的if h in self.graph.nodes:这部分,h的形式类似于”小A“,
但是self.graph.nodes的形式是
self.graph.nodes = {
"entity_0": {
"label": "entity",
"properties": {
"name": "小B",
"chunk id": "XXX1"
}
}
}
所以导致无法通过h找到node,再找到chunk id,可能需要改成
def _extract_chunk_ids_from_triples(self, scored_triples: List[Tuple[str, str, str, float]]) -> set:
"""Extract chunk IDs from nodes in scored triples."""
chunk_ids = set()
for h, r, t, score in scored_triples:
logger.info(f"Extracting chunk IDs from triple: ({h}, {r}, {t})")
h_node_id = self._find_node_by_name(h)
t_node_id = self._find_node_by_name(t)
if h_node_id:
logger.info(f"Found node ID {h_node_id} for entity '{h}'")
chunk_id = self._get_node_chunk_id(self.graph.nodes[h_node_id])
if chunk_id:
chunk_ids.add(str(chunk_id))
else:
logger.warning(f"Cannot find node ID for entity '{h}'")
if t_node_id:
logger.info(f"Found node ID {t_node_id} for entity '{t}'")
chunk_id = self._get_node_chunk_id(self.graph.nodes[t_node_id])
if chunk_id:
chunk_ids.add(str(chunk_id))
else:
logger.warning(f"Cannot find node ID for entity '{t}'")
return chunk_ids
def _find_node_by_name(self, entity_name: str) -> Optional[str]:
for node_id, node_data in self.graph.nodes(data=True):
if node_data.get('properties', {}).get('name') == entity_name:
return node_id
return None
我这边跑出来,如果保持原有的函数,在组装context_initial时,chunk部分为空。改了之后chunk部分有值了。
Metadata
Metadata
Assignees
Labels
No labels