Small renaming

nullchimp · nullchimp · commit 46e9b2ee5a5e · 2025-05-19T03:07:59.000+02:00
diff --git a/docs/ADRs/RAG-Integration.md b/docs/ADRs/RAG-Integration.md
@@ -40,9 +40,9 @@ Implementing a RAG system with a graph database addresses these limitations by p
    The implementation uses these core node types:
 
    ```
-   (:Source {id, name, type, base_uri})  # Origin of documents
+   (:Source {id, name, type, uri})  # Origin of documents
    (:Document {id, path, content, title, source_id})  # Full documents
-   (:DocumentChunk {id, path, content, parent_document_id, chunk_index})  # Document portions
+   (:DocumentChunk {id, path, content, parent_id, chunk_index})  # Document portions
    (:VectorStore {id, model, status})  # Vector embedding configuration
    (:Vector {id, chunk_id, vector_store_id, embedding})  # Actual embeddings
    (:Interaction {id, session_id, content, role})  # Chat messages
@@ -368,7 +368,7 @@ class TextSplitter:
 class ChunkMetadata(DocumentMetadata):
     chunk_index: int
     chunk_count: int
-    parent_document_id: str
+    parent_id: str
     parent_document_path: str
     is_chunk: bool
 
diff --git a/docs/rag_architecture.md b/docs/rag_architecture.md
@@ -74,15 +74,15 @@ graph TD
 The RAG system uses the following node types:
 
 1. **Source**: Represents the origin of documents
-   - Properties: name, type, base_uri
+   - Properties: name, type, uri
    - Types: "file", "website", etc.
 
 2. **Document**: Represents a full document with content and metadata
    - Properties: path, content, title, source_id, reference_ids
    - Linked to Source with SOURCED_FROM relationship
 
 3. **DocumentChunk**: Represents a portion of a document for embedding
-   - Properties: path, content, content_hash, parent_document_id, chunk_index
+   - Properties: path, content, content_hash, parent_id, chunk_index
    - Linked to Document with CHUNK_OF relationship
 
 4. **VectorStore**: Represents embedding storage configuration
diff --git a/docs/rag_edge_relationships.md b/docs/rag_edge_relationships.md
@@ -48,7 +48,7 @@ When documents are processed through the RAG system:
 1. **Source Creation**: Register content origin
    ```python
    # Create source node based on content type
-   source = Source(name=domain, type="website", base_uri=url)
+   source = Source(name=domain, type="website", uri=url)
    db.create_source(source)
    ```
 
@@ -67,7 +67,7 @@ When documents are processed through the RAG system:
        chunk = DocumentChunk(
            path=path,
            content=node.text,
-           parent_document_id=document.id,
+           parent_id=document.id,
            chunk_index=idx
        )
        db.create_chunk(chunk)
@@ -91,14 +91,14 @@ Relationships are established during entity creation:
 def create_chunk(self, chunk: DocumentChunk) -> str:
     self._execute(*chunk.create())
     
-    if not chunk.parent_document_id:
-        raise ValueError("DocumentChunk must have a parent_document_id")
+    if not chunk.parent_id:
+        raise ValueError("DocumentChunk must have a parent_id")
     
     # Create CHUNK_OF relationship from chunk to document
     self._execute(*chunk.link(
         EdgeType.CHUNK_OF,
         Document.label(),
-        chunk.parent_document_id,
+        chunk.parent_id,
     ))
 ```
 
diff --git a/examples/9-test.py b/examples/9-test.py
@@ -88,7 +88,7 @@ async def test_vector_search(query_text: str):
     doc_ids = set()
     for result in search_results:
         chunk = result["chunk"]
-        doc_id = chunk["parent_document_id"]
+        doc_id = chunk["parent_id"]
         if not doc_id in doc_ids:
             doc_ids.add(doc_id)
     
@@ -101,13 +101,13 @@ async def test_vector_search(query_text: str):
         print(f"Document ID: {doc_id}")
         refs = db.get_references(doc_id)
         for ref in refs:
-            ref_uri = ref["base_uri"]
+            ref_uri = ref["uri"]
             if not ref_uri in references:
                 references.add(ref_uri)
         
         sources = db.get_sources(doc_id)
         print(f"Sources: {sources}")
-        source_uris = [source["base_uri"] for source in sources]
+        source_uris = [source["uri"] for source in sources]
         data.append({
             "sources": source_uris,
             "content": document["content"],
diff --git a/src/core/rag/dbhandler/__init__.py b/src/core/rag/dbhandler/__init__.py
@@ -100,13 +100,13 @@ def create_document(self, doc: Document) -> str:
     def create_chunk(self, chunk: DocumentChunk) -> str:
         self._execute(*chunk.create())
         
-        if not chunk.parent_document_id:
-            raise ValueError("DocumentChunk must have a parent_document_id")
+        if not chunk.parent_id:
+            raise ValueError("DocumentChunk must have a parent_id")
         
         self._execute(*chunk.link(
             EdgeType.CHUNK_OF,
             Document.label(),
-            chunk.parent_document_id,
+            chunk.parent_id,
         ))
 
     def create_source(self, source: Source) -> str:
diff --git a/src/core/rag/schema.py b/src/core/rag/schema.py
@@ -145,15 +145,15 @@ def __init__(
         self,
         path: str,
         content: str,                   # Keep content in the graph for direct access
-        parent_document_id: str,
+        parent_id: str,
         chunk_index: int = 0,
         token_count: int = 0,
     ):
         super().__init__()
         self.path = path
         self.content = content
         self.content_hash =  hashlib.sha256(content.encode()).hexdigest()
-        self.parent_id = parent_document_id
+        self.parent_id = parent_id
         self.chunk_index = chunk_index
         self.token_count = token_count
 
diff --git a/src/libs/dataloader/document.py b/src/libs/dataloader/document.py
@@ -17,7 +17,7 @@ def create_source(self, source_path) -> Document:
         source = Source(
             name=source_path.split("/")[-1],
             type="file",
-            base_uri=source_path
+            uri=source_path
         )
 
         source.id = hashlib.sha256(self.path.encode()).hexdigest()[:16]
@@ -66,7 +66,7 @@ def load_data(self) -> Generator[Tuple[Document, List[DocumentChunk]], Source]:
                     doc_chunk = DocumentChunk(
                         path=path,
                         content=chunk_content,
-                        parent_document_id=document.id,
+                        parent_id=document.id,
                         chunk_index=idx,
                         token_count=len(chunk_content.split())
                     )
diff --git a/src/libs/dataloader/web.py b/src/libs/dataloader/web.py
@@ -44,7 +44,7 @@ def create_source(self, source_url) -> Source:
         source = Source(
             name=domain,
             type="website",
-            base_uri=source_url
+            uri=source_url
         )
         
         source.id = hashlib.sha256(source_url.encode()).hexdigest()[:16]
@@ -168,7 +168,7 @@ def load_data(self) -> Generator[Tuple[Source, Document, List[DocumentChunk]], N
                     doc_chunk = DocumentChunk(
                         path=display_url,
                         content=chunk_content,
-                        parent_document_id=document.id,
+                        parent_id=document.id,
                         chunk_index=idx,
                         token_count=len(chunk_content.split())
                     )