Skip to content

源码中根据提取的三元组,找对应的node,再根据node,找对应的chunk;但是在具体实现中,好像不能提取到对应的chunk。 #160

@Kylinin0817

Description

@Kylinin0817

涉及到提取chunk的函数是

def _extract_chunk_ids_from_triples(self, scored_triples: List[Tuple[str, str, str, float]]) -> set:
        """Extract chunk IDs from nodes in scored triples."""
        chunk_ids = set()
        
        for h, r, t, score in scored_triples:
            if h in self.graph.nodes:
                chunk_id = self._get_node_chunk_id(self.graph.nodes[h])
                if chunk_id:
                    chunk_ids.add(str(chunk_id))
            
            if t in self.graph.nodes:
                chunk_id = self._get_node_chunk_id(self.graph.nodes[t])
                if chunk_id:
                    chunk_ids.add(str(chunk_id))
                    
        return chunk_ids

其中的if h in self.graph.nodes:这部分,h的形式类似于”小A“,
但是self.graph.nodes的形式是

self.graph.nodes = {
    "entity_0": {  
        "label": "entity",
        "properties": {
            "name": "小B", 
            "chunk id": "XXX1"
        }
    }
}

所以导致无法通过h找到node,再找到chunk id,可能需要改成

def _extract_chunk_ids_from_triples(self, scored_triples: List[Tuple[str, str, str, float]]) -> set:
        """Extract chunk IDs from nodes in scored triples."""
        chunk_ids = set()
        
        for h, r, t, score in scored_triples:
            logger.info(f"Extracting chunk IDs from triple: ({h}, {r}, {t})")

            h_node_id = self._find_node_by_name(h)
            t_node_id = self._find_node_by_name(t)
            
            if h_node_id:
                logger.info(f"Found node ID {h_node_id} for entity '{h}'")
                chunk_id = self._get_node_chunk_id(self.graph.nodes[h_node_id])
                if chunk_id:
                    chunk_ids.add(str(chunk_id))
            else:
                logger.warning(f"Cannot find node ID for entity '{h}'")
            
            if t_node_id:
                logger.info(f"Found node ID {t_node_id} for entity '{t}'")
                chunk_id = self._get_node_chunk_id(self.graph.nodes[t_node_id])
                if chunk_id:
                    chunk_ids.add(str(chunk_id))
            else:
                logger.warning(f"Cannot find node ID for entity '{t}'")
        
        return chunk_ids

    def _find_node_by_name(self, entity_name: str) -> Optional[str]:
        for node_id, node_data in self.graph.nodes(data=True):
            if node_data.get('properties', {}).get('name') == entity_name:
                return node_id
        return None

我这边跑出来,如果保持原有的函数,在组装context_initial时,chunk部分为空。改了之后chunk部分有值了。

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions