diff --git a/pyproject.toml b/pyproject.toml
index 4df6392..f287be6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ streamlit = [
 "Bug Tracker" = "https://github.com/Intugle/data-tools/issues"
 
 [project.scripts]
+intugle = "intugle.cli:main"
 intugle-mcp = "intugle.mcp.server:main"
 intugle-streamlit = "intugle.cli:run_streamlit_app"
 
diff --git a/src/intugle/__init__.py b/src/intugle/__init__.py
index 272058c..ed7fb5e 100644
--- a/src/intugle/__init__.py
+++ b/src/intugle/__init__.py
@@ -1,3 +1,14 @@
 from intugle.analysis.models import DataSet as DataSet
 from intugle.data_product import DataProduct as DataProduct
 from intugle.semantic_model import SemanticModel as SemanticModel
+
+__all__ = ["DataSet", "DataProduct", "SemanticModel"]
+
+# Expose text processor for unstructured text-to-semantic conversion
+try:
+    from intugle.text_processor import TextToSemanticProcessor  # noqa: F401
+
+    __all__.append("TextToSemanticProcessor")
+except ImportError:
+    # Text processor dependencies might not be available
+    pass
diff --git a/src/intugle/cli.py b/src/intugle/cli.py
index ab126a9..a4c9c39 100644
--- a/src/intugle/cli.py
+++ b/src/intugle/cli.py
@@ -1,6 +1,16 @@
+import argparse
 import importlib.util
+import json
+import logging
 import os
 import subprocess
+import sys
+
+# Setup basic logging for CLI commands
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
 
 
 def run_streamlit_app():
@@ -30,7 +40,7 @@ def run_streamlit_app():
     # Get the absolute path to the main.py of the Streamlit app
     app_dir = os.path.join(os.path.dirname(__file__), 'streamlit_app')
     app_path = os.path.join(app_dir, 'main.py')
-    
+
     # Ensure the app_path exists
     if not os.path.exists(app_path):
         print(f"Error: Streamlit app not found at {app_path}")
@@ -41,5 +51,109 @@ def run_streamlit_app():
     subprocess.run(["streamlit", "run", app_path], cwd=app_dir)
 
 
+def run_text_to_semantic(args):
+    """Execute the text-to-semantic conversion command."""
+    try:
+        from intugle.text_processor import TextToSemanticProcessor
+    except ImportError as e:
+        logger.error("Text processor not available.")
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+
+    try:
+        # Read input text
+        if args.input == "-":
+            text = sys.stdin.read()
+        else:
+            with open(args.input, "r", encoding="utf-8") as f:
+                text = f.read()
+
+        logger.info(f"Processing text of length {len(text)} characters")
+
+        # Initialize processor
+        processor = TextToSemanticProcessor(
+            model=args.model,
+            output_format=args.format,
+        )
+
+        # Parse text to RDF
+        logger.info("Extracting entities and relationships...")
+        rdf_graph = processor.parse(text)
+
+        logger.info(
+            f"Extracted {len(rdf_graph.entities)} entities, "
+            f"{len(rdf_graph.relationships)} relationships, "
+            f"{len(rdf_graph.triples)} triples"
+        )
+
+        # Output results
+        if args.output_format == "turtle":
+            output = rdf_graph.to_turtle()
+        elif args.output_format == "json-ld":
+            output = json.dumps(rdf_graph.to_json_ld(), indent=2)
+        else:  # json
+            output = json.dumps(rdf_graph.to_dict(), indent=2)
+
+        if args.output:
+            os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
+            with open(args.output, "w", encoding="utf-8") as f:
+                f.write(output)
+            logger.info(f"Output written to: {args.output}")
+        else:
+            print(output)
+
+        logger.info("Text-to-semantic conversion complete.")
+
+    except Exception as e:
+        logger.error(f"Job failed: {str(e)}")
+        sys.exit(1)
+
+
+def main():
+    """Main entry point for the intugle CLI."""
+    parser = argparse.ArgumentParser(
+        description="Intugle - GenAI-powered semantic layer toolkit"
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
+    # Streamlit command
+    subparsers.add_parser("streamlit", help="Launch the Streamlit web application")
+
+    # Text-to-semantic command
+    text_parser = subparsers.add_parser(
+        "text-to-semantic", help="Convert unstructured text to RDF/semantic triples"
+    )
+    text_parser.add_argument(
+        "--input", "-i", required=True,
+        help="Input text file path (use '-' for stdin)"
+    )
+    text_parser.add_argument(
+        "--output", "-o",
+        help="Output file path (prints to stdout if not specified)"
+    )
+    text_parser.add_argument(
+        "--model", "-m", default="gpt-4o-mini",
+        help="LLM model for extraction (default: gpt-4o-mini)"
+    )
+    text_parser.add_argument(
+        "--format", "-f", choices=["rdf", "rdf_star"], default="rdf_star",
+        help="RDF format: 'rdf' or 'rdf_star' (default: rdf_star)"
+    )
+    text_parser.add_argument(
+        "--output-format", choices=["json", "turtle", "json-ld"], default="json",
+        help="Output format: json, turtle, or json-ld (default: json)"
+    )
+
+    args = parser.parse_args()
+
+    if args.command == "streamlit":
+        run_streamlit_app()
+    elif args.command == "text-to-semantic":
+        run_text_to_semantic(args)
+    else:
+        parser.print_help()
+
+
 if __name__ == "__main__":
-    run_streamlit_app()
+    main()
+
diff --git a/src/intugle/semantic_model.py b/src/intugle/semantic_model.py
index 31d1af0..20cbf93 100644
--- a/src/intugle/semantic_model.py
+++ b/src/intugle/semantic_model.py
@@ -195,6 +195,55 @@ def search(self, query: str):
             log.error(f"Could not perform semantic search: {e}")
             raise e
 
+    def overlay(self, rdf_graph: Any, match_threshold: float = 0.85) -> "SemanticModel":
+        """
+        Overlay an RDF graph from unstructured text onto this semantic model.
+        
+        Maps extracted entities and relationships from the RDF graph to existing
+        semantic nodes, enabling integration of text-derived knowledge.
+        
+        Args:
+            rdf_graph: An RDFGraph instance from TextToSemanticProcessor.
+            match_threshold: Minimum similarity score for entity matching (0.0-1.0).
+            
+        Returns:
+            Self for method chaining.
+        """
+        from intugle.text_processor.mapper import SemanticMapper
+
+        console.print(
+            f"Overlaying RDF graph with {len(rdf_graph.entities)} entities...",
+            style="yellow",
+        )
+
+        mapper = SemanticMapper(match_threshold=match_threshold)
+        mapping_results = mapper.map_to_semantic_model(rdf_graph, self)
+
+        # Store mapping results for later use
+        if not hasattr(self, "_text_mappings"):
+            self._text_mappings = []
+        self._text_mappings.extend(mapping_results)
+
+        # Generate suggestions for new nodes
+        new_suggestions = mapper.suggest_new_nodes(mapping_results)
+        if new_suggestions:
+            console.print(
+                f"Found {len(new_suggestions)} unmapped entities that could be new concepts.",
+                style="yellow",
+            )
+
+        matched = sum(1 for r in mapping_results if not r.is_new)
+        console.print(
+            f"Overlay complete: {matched}/{len(mapping_results)} entities matched to existing nodes.",
+            style="bold green",
+        )
+
+        return self
+
+    def get_text_mappings(self) -> list:
+        """Get all text-to-semantic mappings from overlay operations."""
+        return getattr(self, "_text_mappings", [])
+
     def deploy(self, target: str, **kwargs):
         """
         Deploys the semantic model to a specified target platform based on the persisted YAML files.
diff --git a/src/intugle/text_processor/__init__.py b/src/intugle/text_processor/__init__.py
new file mode 100644
index 0000000..6e228bf
--- /dev/null
+++ b/src/intugle/text_processor/__init__.py
@@ -0,0 +1,17 @@
+"""
+Unstructured Text Processor Module.
+
+Provides functionality to convert unstructured text into RDF triples
+and map them to the existing Semantic Model.
+"""
+
+from intugle.text_processor.models import Entity, RDFGraph, RDFTriple, Relationship
+from intugle.text_processor.processor import TextToSemanticProcessor
+
+__all__ = [
+    "TextToSemanticProcessor",
+    "RDFTriple",
+    "RDFGraph",
+    "Entity",
+    "Relationship",
+]
diff --git a/src/intugle/text_processor/extractors/__init__.py b/src/intugle/text_processor/extractors/__init__.py
new file mode 100644
index 0000000..07a3962
--- /dev/null
+++ b/src/intugle/text_processor/extractors/__init__.py
@@ -0,0 +1,6 @@
+"""Extractors subpackage for NLP backends."""
+
+from intugle.text_processor.extractors.base import BaseExtractor
+from intugle.text_processor.extractors.llm_extractor import LLMExtractor
+
+__all__ = ["BaseExtractor", "LLMExtractor"]
diff --git a/src/intugle/text_processor/extractors/base.py b/src/intugle/text_processor/extractors/base.py
new file mode 100644
index 0000000..491e9fb
--- /dev/null
+++ b/src/intugle/text_processor/extractors/base.py
@@ -0,0 +1,57 @@
+"""Base extractor interface for pluggable NLP backends."""
+
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+
+from intugle.text_processor.models import Entity, Relationship
+
+
+class BaseExtractor(ABC):
+    """
+    Abstract base class for text extractors.
+    
+    Implementations should extract entities and relationships from text.
+    """
+
+    @abstractmethod
+    def extract_entities(self, text: str) -> List[Entity]:
+        """
+        Extract named entities from text.
+        
+        Args:
+            text: Input text to process.
+            
+        Returns:
+            List of extracted Entity objects.
+        """
+        pass
+
+    @abstractmethod
+    def extract_relationships(
+        self, text: str, entities: List[Entity]
+    ) -> List[Relationship]:
+        """
+        Extract relationships between entities.
+        
+        Args:
+            text: Input text to process.
+            entities: Previously extracted entities.
+            
+        Returns:
+            List of extracted Relationship objects.
+        """
+        pass
+
+    def extract(self, text: str) -> Tuple[List[Entity], List[Relationship]]:
+        """
+        Full extraction pipeline: entities then relationships.
+        
+        Args:
+            text: Input text to process.
+            
+        Returns:
+            Tuple of (entities, relationships).
+        """
+        entities = self.extract_entities(text)
+        relationships = self.extract_relationships(text, entities)
+        return entities, relationships
diff --git a/src/intugle/text_processor/extractors/llm_extractor.py b/src/intugle/text_processor/extractors/llm_extractor.py
new file mode 100644
index 0000000..0c4d97a
--- /dev/null
+++ b/src/intugle/text_processor/extractors/llm_extractor.py
@@ -0,0 +1,201 @@
+"""
+LLM-based extractor using LangChain infrastructure.
+
+Uses the existing LangChain setup in intugle for entity and relationship extraction.
+"""
+
+import hashlib
+import logging
+
+from typing import List
+
+from pydantic import BaseModel, Field
+
+from intugle.text_processor.extractors.base import BaseExtractor
+from intugle.text_processor.models import Entity, Relationship
+
+log = logging.getLogger(__name__)
+
+
+class ExtractedEntity(BaseModel):
+    """Schema for LLM-extracted entity."""
+
+    text: str = Field(..., description="The entity text as it appears in the document")
+    label: str = Field(
+        ...,
+        description="Entity type: PERSON, ORGANIZATION, LOCATION, DATE, MONEY, PRODUCT, DOCUMENT, or OTHER",
+    )
+    normalized_id: str = Field(
+        ..., description="A normalized identifier (e.g., 'Invoice_123' from 'Invoice 123')"
+    )
+
+
+class ExtractedRelationship(BaseModel):
+    """Schema for LLM-extracted relationship."""
+
+    subject: str = Field(..., description="The normalized_id of the subject entity")
+    predicate: str = Field(
+        ...,
+        description="The relationship type in camelCase (e.g., 'hasAmount', 'issuedBy', 'locatedIn')",
+    )
+    object: str = Field(
+        ..., description="The normalized_id of the object entity OR a literal value"
+    )
+
+
+class ExtractionResult(BaseModel):
+    """Combined extraction result from LLM."""
+
+    entities: List[ExtractedEntity] = Field(default_factory=list)
+    relationships: List[ExtractedRelationship] = Field(default_factory=list)
+
+
+class LLMExtractor(BaseExtractor):
+    """
+    LLM-based entity and relationship extractor.
+    
+    Uses LangChain structured output for reliable extraction.
+    Supports multiple LLM providers via LLM_PROVIDER environment variable.
+    """
+
+    def __init__(self, model_name: str = "gpt-4o-mini"):
+        """
+        Initialize the LLM extractor.
+        
+        Args:
+            model_name: Name of the LLM model to use.
+        """
+        self.model_name = model_name
+        self._llm = None
+
+    def _get_llm(self):
+        """Lazy initialization of LLM based on provider."""
+        if self._llm is None:
+            import os
+            provider = os.environ.get("LLM_PROVIDER", "openai").lower()
+            
+            if provider == "google-genai" or self.model_name.startswith("gemini"):
+                from langchain_google_genai import ChatGoogleGenerativeAI
+                model = self.model_name if self.model_name.startswith("gemini") else "gemini-2.5-flash"
+                self._llm = ChatGoogleGenerativeAI(model=model, temperature=0)
+            else:
+                from langchain_openai import ChatOpenAI
+                self._llm = ChatOpenAI(model=self.model_name, temperature=0)
+                
+        return self._llm
+
+    def _generate_entity_id(self, text: str, label: str) -> str:
+        """Generate a unique ID for an entity."""
+        content = f"{label}:{text}".lower()
+        return hashlib.md5(content.encode()).hexdigest()[:8]
+
+    def extract_entities(self, text: str) -> List[Entity]:
+        """Extract entities using LLM."""
+        llm = self._get_llm()
+
+        prompt = f"""Extract all named entities from the following text.
+For each entity, identify:
+- The exact text as it appears
+- The entity type (PERSON, ORGANIZATION, LOCATION, DATE, MONEY, PRODUCT, DOCUMENT, or OTHER)
+- A normalized identifier (convert spaces to underscores, remove special chars)
+
+Text:
+{text}
+
+Extract entities:"""
+
+        structured_llm = llm.with_structured_output(ExtractionResult)
+        result: ExtractionResult = structured_llm.invoke(prompt)
+
+        entities = []
+        for ext in result.entities:
+            entity = Entity(
+                id=ext.normalized_id or self._generate_entity_id(ext.text, ext.label),
+                text=ext.text,
+                label=ext.label,
+                confidence=0.9,
+            )
+            entities.append(entity)
+
+        log.info(f"Extracted {len(entities)} entities from text")
+        return entities
+
+    def extract_relationships(
+        self, text: str, entities: List[Entity]
+    ) -> List[Relationship]:
+        """Extract relationships between entities using LLM."""
+        if not entities:
+            return []
+
+        llm = self._get_llm()
+
+        entity_list = "\n".join([f"- {e.id} ({e.label}): {e.text}" for e in entities])
+
+        prompt = f"""Given the following text and extracted entities, identify relationships between them.
+
+Text:
+{text}
+
+Entities:
+{entity_list}
+
+For each relationship, specify:
+- subject: The normalized_id of the subject entity
+- predicate: The relationship type in camelCase (e.g., 'hasAmount', 'issuedBy', 'worksFor')
+- object: The normalized_id of the object entity OR a literal value
+
+Extract relationships:"""
+
+        structured_llm = llm.with_structured_output(ExtractionResult)
+        result: ExtractionResult = structured_llm.invoke(prompt)
+
+        relationships = []
+        {e.id for e in entities}
+
+        for rel in result.relationships:
+            relationship = Relationship(
+                subject_id=rel.subject,
+                predicate=rel.predicate,
+                object_id=rel.object,
+                confidence=0.85,
+            )
+            relationships.append(relationship)
+
+        log.info(f"Extracted {len(relationships)} relationships from text")
+        return relationships
+
+    def extract_all(self, text: str) -> ExtractionResult:
+        """
+        Single-pass extraction of both entities and relationships.
+        
+        More efficient than separate calls as it uses one LLM invocation.
+        """
+        llm = self._get_llm()
+
+        prompt = f"""Analyze the following text and extract:
+1. All named entities (PERSON, ORGANIZATION, LOCATION, DATE, MONEY, PRODUCT, DOCUMENT, or OTHER)
+2. All relationships between entities
+
+For entities:
+- text: The exact text as it appears
+- label: The entity type
+- normalized_id: A normalized identifier (e.g., 'Invoice_123' from 'Invoice 123')
+
+For relationships:
+- subject: The normalized_id of the subject entity
+- predicate: The relationship type in camelCase (e.g., 'hasAmount', 'issuedBy')
+- object: The normalized_id of the object entity OR a literal value
+
+Text:
+{text}
+
+Extract:"""
+
+        structured_llm = llm.with_structured_output(ExtractionResult)
+        result: ExtractionResult = structured_llm.invoke(prompt)
+
+        log.info(
+            f"Single-pass extraction: {len(result.entities)} entities, "
+            f"{len(result.relationships)} relationships"
+        )
+        return result
diff --git a/src/intugle/text_processor/mapper.py b/src/intugle/text_processor/mapper.py
new file mode 100644
index 0000000..25c529a
--- /dev/null
+++ b/src/intugle/text_processor/mapper.py
@@ -0,0 +1,189 @@
+"""
+Semantic Mapper for aligning RDF graphs with SemanticModel.
+
+Maps extracted entities and relationships to existing semantic nodes.
+"""
+
+import logging
+
+from difflib import SequenceMatcher
+from typing import Any, Dict, List, Optional, Tuple
+
+from intugle.text_processor.models import Entity, RDFGraph
+
+log = logging.getLogger(__name__)
+
+
+class MappingResult:
+    """Result of mapping an entity to a semantic node."""
+
+    def __init__(
+        self,
+        entity: Entity,
+        matched_table: Optional[str] = None,
+        matched_column: Optional[str] = None,
+        confidence: float = 0.0,
+        is_new: bool = False,
+    ):
+        self.entity = entity
+        self.matched_table = matched_table
+        self.matched_column = matched_column
+        self.confidence = confidence
+        self.is_new = is_new
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "entity_id": self.entity.id,
+            "entity_text": self.entity.text,
+            "entity_label": self.entity.label,
+            "matched_table": self.matched_table,
+            "matched_column": self.matched_column,
+            "confidence": self.confidence,
+            "is_new": self.is_new,
+        }
+
+
+class SemanticMapper:
+    """
+    Maps RDF graph entities to existing SemanticModel nodes.
+    
+    Uses string similarity and optional embeddings for matching.
+    """
+
+    def __init__(
+        self,
+        match_threshold: float = 0.85,
+        use_embeddings: bool = False,
+    ):
+        """
+        Initialize the semantic mapper.
+        
+        Args:
+            match_threshold: Minimum similarity score for a match (0.0-1.0).
+            use_embeddings: Whether to use embedding-based matching.
+        """
+        self.match_threshold = match_threshold
+        self.use_embeddings = use_embeddings
+
+    def _string_similarity(self, s1: str, s2: str) -> float:
+        """Calculate string similarity using SequenceMatcher."""
+        s1_lower = s1.lower().replace("_", " ")
+        s2_lower = s2.lower().replace("_", " ")
+        return SequenceMatcher(None, s1_lower, s2_lower).ratio()
+
+    def _find_best_match(
+        self,
+        entity: Entity,
+        candidates: List[Tuple[str, str]],  # (table_name, column_name)
+    ) -> Optional[Tuple[str, str, float]]:
+        """
+        Find the best matching candidate for an entity.
+        
+        Args:
+            entity: The entity to match.
+            candidates: List of (table_name, column_name) tuples to match against.
+            
+        Returns:
+            Tuple of (table_name, column_name, confidence) or None if no match.
+        """
+        best_match = None
+        best_score = 0.0
+
+        for table_name, column_name in candidates:
+            # Compare against column name
+            score = self._string_similarity(entity.text, column_name)
+
+            # Also try the entity ID
+            id_score = self._string_similarity(entity.id, column_name)
+            score = max(score, id_score)
+
+            if score > best_score:
+                best_score = score
+                best_match = (table_name, column_name, score)
+
+        if best_match and best_match[2] >= self.match_threshold:
+            return best_match
+        return None
+
+    def map_to_semantic_model(
+        self,
+        rdf_graph: RDFGraph,
+        semantic_model: Any,  # SemanticModel type
+    ) -> List[MappingResult]:
+        """
+        Map RDF graph entities to a SemanticModel.
+        
+        Args:
+            rdf_graph: The RDF graph to map.
+            semantic_model: The target SemanticModel instance.
+            
+        Returns:
+            List of MappingResult objects describing the mappings.
+        """
+        results = []
+
+        # Extract candidate columns from semantic model datasets
+        candidates = []
+        for dataset_name, dataset in semantic_model.datasets.items():
+            if hasattr(dataset, "source") and hasattr(dataset.source, "table"):
+                for col in dataset.source.table.columns:
+                    candidates.append((dataset_name, col.name))
+
+        log.info(f"Mapping {len(rdf_graph.entities)} entities against {len(candidates)} candidates")
+
+        for entity in rdf_graph.entities:
+            match = self._find_best_match(entity, candidates)
+
+            if match:
+                table_name, column_name, confidence = match
+                result = MappingResult(
+                    entity=entity,
+                    matched_table=table_name,
+                    matched_column=column_name,
+                    confidence=confidence,
+                    is_new=False,
+                )
+                log.debug(
+                    f"Matched entity '{entity.id}' to {table_name}.{column_name} "
+                    f"(confidence: {confidence:.2f})"
+                )
+            else:
+                result = MappingResult(
+                    entity=entity,
+                    is_new=True,
+                )
+                log.debug(f"No match found for entity '{entity.id}' - marked as new")
+
+            results.append(result)
+
+        matched = sum(1 for r in results if not r.is_new)
+        log.info(f"Mapping complete: {matched}/{len(results)} entities matched")
+
+        return results
+
+    def suggest_new_nodes(
+        self,
+        mapping_results: List[MappingResult],
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate suggestions for new semantic nodes from unmapped entities.
+        
+        Args:
+            mapping_results: Results from map_to_semantic_model.
+            
+        Returns:
+            List of suggested new node definitions.
+        """
+        suggestions = []
+
+        for result in mapping_results:
+            if result.is_new:
+                suggestion = {
+                    "suggested_name": result.entity.id,
+                    "entity_type": result.entity.label,
+                    "original_text": result.entity.text,
+                    "suggested_table_name": f"text_extracted_{result.entity.label.lower()}",
+                }
+                suggestions.append(suggestion)
+
+        return suggestions
diff --git a/src/intugle/text_processor/models.py b/src/intugle/text_processor/models.py
new file mode 100644
index 0000000..7329af5
--- /dev/null
+++ b/src/intugle/text_processor/models.py
@@ -0,0 +1,124 @@
+"""
+Pydantic models for RDF triples and graph representation.
+
+Supports RDF-star annotations for provenance and confidence metadata.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class Entity(BaseModel):
+    """Represents an extracted entity from text."""
+
+    id: str = Field(..., description="Unique identifier for the entity")
+    text: str = Field(..., description="Original text of the entity")
+    label: str = Field(..., description="Entity type/label (e.g., PERSON, ORG, MONEY)")
+    start_char: Optional[int] = Field(None, description="Start character position in source text")
+    end_char: Optional[int] = Field(None, description="End character position in source text")
+    confidence: float = Field(1.0, description="Confidence score for the entity extraction")
+    attributes: Dict[str, Any] = Field(default_factory=dict, description="Additional entity attributes")
+
+
+class Relationship(BaseModel):
+    """Represents a relationship between two entities."""
+
+    subject_id: str = Field(..., description="ID of the subject entity")
+    predicate: str = Field(..., description="Relationship type/predicate")
+    object_id: str = Field(..., description="ID of the object entity")
+    confidence: float = Field(1.0, description="Confidence score for the relationship")
+
+
+class RDFTriple(BaseModel):
+    """
+    Represents an RDF triple (subject, predicate, object).
+    
+    Supports RDF-star annotations via the metadata field for provenance,
+    confidence, and other contextual information.
+    """
+
+    subject: str = Field(..., description="Subject of the triple")
+    predicate: str = Field(..., description="Predicate/relationship of the triple")
+    object: str = Field(..., description="Object of the triple")
+    object_type: str = Field("literal", description="Type of object: 'uri' or 'literal'")
+    metadata: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="RDF-star annotations (provenance, confidence, source, etc.)",
+    )
+
+    def to_turtle(self) -> str:
+        """Convert triple to Turtle format."""
+        obj = f'"{self.object}"' if self.object_type == "literal" else f"<{self.object}>"
+        return f"<{self.subject}> <{self.predicate}> {obj} ."
+
+    def to_ntriples(self) -> str:
+        """Convert triple to N-Triples format."""
+        return self.to_turtle()
+
+
+class RDFGraph(BaseModel):
+    """
+    Represents a collection of RDF triples forming a graph.
+    
+    Includes extracted entities, relationships, and the resulting triples.
+    """
+
+    entities: List[Entity] = Field(default_factory=list, description="Extracted entities")
+    relationships: List[Relationship] = Field(default_factory=list, description="Extracted relationships")
+    triples: List[RDFTriple] = Field(default_factory=list, description="RDF triples")
+    source_text: Optional[str] = Field(None, description="Original source text")
+    metadata: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Graph-level metadata (processing info, model used, etc.)",
+    )
+
+    def add_triple(
+        self,
+        subject: str,
+        predicate: str,
+        obj: str,
+        object_type: str = "literal",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> "RDFGraph":
+        """Add a triple to the graph."""
+        triple = RDFTriple(
+            subject=subject,
+            predicate=predicate,
+            object=obj,
+            object_type=object_type,
+            metadata=metadata or {},
+        )
+        self.triples.append(triple)
+        return self
+
+    def get_entity_by_id(self, entity_id: str) -> Optional[Entity]:
+        """Retrieve an entity by its ID."""
+        for entity in self.entities:
+            if entity.id == entity_id:
+                return entity
+        return None
+
+    def to_turtle(self) -> str:
+        """Export graph to Turtle format."""
+        lines = ["@prefix ex: <http://example.org/> .", ""]
+        for triple in self.triples:
+            lines.append(triple.to_turtle())
+        return "\n".join(lines)
+
+    def to_json_ld(self) -> Dict[str, Any]:
+        """Export graph to JSON-LD format."""
+        return {
+            "@context": {"ex": "http://example.org/"},
+            "@graph": [
+                {
+                    "@id": t.subject,
+                    t.predicate: {"@value": t.object} if t.object_type == "literal" else {"@id": t.object},
+                }
+                for t in self.triples
+            ],
+        }
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Export graph as a dictionary."""
+        return self.model_dump()
diff --git a/src/intugle/text_processor/processor.py b/src/intugle/text_processor/processor.py
new file mode 100644
index 0000000..e457401
--- /dev/null
+++ b/src/intugle/text_processor/processor.py
@@ -0,0 +1,136 @@
+"""
+Main TextToSemanticProcessor class.
+
+High-level orchestrator for converting unstructured text to RDF graphs.
+"""
+
+import logging
+
+from typing import Any, Dict, Literal, Optional
+
+from intugle.text_processor.extractors.base import BaseExtractor
+from intugle.text_processor.extractors.llm_extractor import LLMExtractor
+from intugle.text_processor.models import Entity, RDFGraph, Relationship
+from intugle.text_processor.rdf.builder import RDFBuilder
+
+log = logging.getLogger(__name__)
+
+
+class TextToSemanticProcessor:
+    """
+    High-level orchestrator for converting unstructured text to RDF graphs.
+    
+    Provides a simple API for text-to-semantic conversion as specified in the
+    feature requirements.
+    
+    Example:
+        >>> processor = TextToSemanticProcessor(model="gpt-4o-mini", output_format="rdf_star")
+        >>> rdf_graph = processor.parse(text_input)
+    """
+
+    def __init__(
+        self,
+        model: str = "gpt-4o-mini",
+        output_format: Literal["rdf", "rdf_star"] = "rdf_star",
+        extractor: Optional[BaseExtractor] = None,
+        namespace: Optional[str] = None,
+    ):
+        """
+        Initialize the text processor.
+        
+        Args:
+            model: Name of the LLM model for extraction (e.g., 'gpt-4o-mini', 'en_core_web_lg').
+            output_format: Output format - 'rdf' for standard RDF, 'rdf_star' for RDF with annotations.
+            extractor: Optional custom extractor instance. If not provided, uses LLMExtractor.
+            namespace: Base namespace URI for generated entities.
+        """
+        self.model = model
+        self.output_format = output_format
+        self.namespace = namespace
+
+        # Use provided extractor or default to LLM-based
+        if extractor is not None:
+            self.extractor = extractor
+        else:
+            self.extractor = LLMExtractor(model_name=model)
+
+        # Configure RDF builder based on output format
+        include_provenance = output_format == "rdf_star"
+        self.rdf_builder = RDFBuilder(
+            namespace=namespace,
+            include_provenance=include_provenance,
+        )
+
+        log.info(f"TextToSemanticProcessor initialized with model={model}, format={output_format}")
+
+    def parse(self, text: str, metadata: Optional[Dict[str, Any]] = None) -> RDFGraph:
+        """
+        Parse unstructured text and convert to an RDF graph.
+        
+        This is the main entry point for text-to-RDF conversion.
+        
+        Args:
+            text: Input text to process (e.g., from OCR, documents, etc.).
+            metadata: Optional metadata to include in the graph (e.g., source document ID).
+            
+        Returns:
+            RDFGraph containing extracted entities, relationships, and triples.
+        """
+        log.info(f"Processing text of length {len(text)}")
+
+        # Extract entities and relationships
+        entities, relationships = self.extractor.extract(text)
+
+        # Build RDF graph
+        graph_metadata = metadata or {}
+        graph_metadata["model"] = self.model
+        graph_metadata["output_format"] = self.output_format
+
+        rdf_graph = self.rdf_builder.build(
+            entities=entities,
+            relationships=relationships,
+            source_text=text,
+            metadata=graph_metadata,
+        )
+
+        log.info(
+            f"Parsed text into RDF graph: {len(entities)} entities, "
+            f"{len(relationships)} relationships, {len(rdf_graph.triples)} triples"
+        )
+
+        return rdf_graph
+
+    def extract_entities(self, text: str) -> list[Entity]:
+        """
+        Extract only entities from text (no relationships).
+        
+        Args:
+            text: Input text to process.
+            
+        Returns:
+            List of extracted Entity objects.
+        """
+        return self.extractor.extract_entities(text)
+
+    def extract_relationships(
+        self, text: str, entities: list[Entity]
+    ) -> list[Relationship]:
+        """
+        Extract relationships between known entities.
+        
+        Args:
+            text: Input text to process.
+            entities: Pre-extracted entities.
+            
+        Returns:
+            List of extracted Relationship objects.
+        """
+        return self.extractor.extract_relationships(text, entities)
+
+    def export_turtle(self, rdf_graph: RDFGraph) -> str:
+        """Export RDF graph to Turtle format."""
+        return rdf_graph.to_turtle()
+
+    def export_json_ld(self, rdf_graph: RDFGraph) -> Dict[str, Any]:
+        """Export RDF graph to JSON-LD format."""
+        return rdf_graph.to_json_ld()
diff --git a/src/intugle/text_processor/rdf/__init__.py b/src/intugle/text_processor/rdf/__init__.py
new file mode 100644
index 0000000..6046453
--- /dev/null
+++ b/src/intugle/text_processor/rdf/__init__.py
@@ -0,0 +1,5 @@
+"""RDF subpackage for graph building and serialization."""
+
+from intugle.text_processor.rdf.builder import RDFBuilder
+
+__all__ = ["RDFBuilder"]
diff --git a/src/intugle/text_processor/rdf/builder.py b/src/intugle/text_processor/rdf/builder.py
new file mode 100644
index 0000000..38a136d
--- /dev/null
+++ b/src/intugle/text_processor/rdf/builder.py
@@ -0,0 +1,165 @@
+"""
+RDF Graph Builder.
+
+Constructs RDF triples from extracted entities and relationships.
+"""
+
+import logging
+
+from typing import Any, Dict, List, Optional
+
+from intugle.text_processor.models import Entity, RDFGraph, RDFTriple, Relationship
+
+log = logging.getLogger(__name__)
+
+
+class RDFBuilder:
+    """
+    Builds RDF graphs from extracted entities and relationships.
+    
+    Supports configurable ontology prefixes and RDF-star annotations.
+    """
+
+    DEFAULT_NAMESPACE = "http://intugle.ai/ontology/"
+    
+    # Standard predicates for entity properties
+    PREDICATE_TYPE = "rdf:type"
+    PREDICATE_LABEL = "rdfs:label"
+    PREDICATE_VALUE = "hasValue"
+
+    def __init__(
+        self,
+        namespace: Optional[str] = None,
+        include_entity_triples: bool = True,
+        include_provenance: bool = True,
+    ):
+        """
+        Initialize the RDF builder.
+        
+        Args:
+            namespace: Base namespace URI for generated entities.
+            include_entity_triples: Whether to generate type/label triples for entities.
+            include_provenance: Whether to include provenance metadata in triples.
+        """
+        self.namespace = namespace or self.DEFAULT_NAMESPACE
+        self.include_entity_triples = include_entity_triples
+        self.include_provenance = include_provenance
+
+    def _make_uri(self, local_name: str) -> str:
+        """Create a full URI from a local name."""
+        # Clean the local name for URI usage
+        clean_name = local_name.replace(" ", "_").replace("$", "").replace(",", "")
+        return f"{self.namespace}{clean_name}"
+
+    def _entity_to_triples(self, entity: Entity) -> List[RDFTriple]:
+        """Generate RDF triples for an entity."""
+        triples = []
+        entity_uri = self._make_uri(entity.id)
+
+        # Type triple
+        triples.append(
+            RDFTriple(
+                subject=entity_uri,
+                predicate=self._make_uri(self.PREDICATE_TYPE),
+                object=self._make_uri(entity.label),
+                object_type="uri",
+                metadata={"confidence": entity.confidence} if self.include_provenance else {},
+            )
+        )
+
+        # Label triple
+        triples.append(
+            RDFTriple(
+                subject=entity_uri,
+                predicate=self._make_uri(self.PREDICATE_LABEL),
+                object=entity.text,
+                object_type="literal",
+                metadata={"confidence": entity.confidence} if self.include_provenance else {},
+            )
+        )
+
+        # Additional attribute triples
+        for attr_key, attr_value in entity.attributes.items():
+            triples.append(
+                RDFTriple(
+                    subject=entity_uri,
+                    predicate=self._make_uri(attr_key),
+                    object=str(attr_value),
+                    object_type="literal",
+                )
+            )
+
+        return triples
+
+    def _relationship_to_triple(
+        self,
+        relationship: Relationship,
+        entities: Dict[str, Entity],
+    ) -> RDFTriple:
+        """Convert a relationship to an RDF triple."""
+        subject_uri = self._make_uri(relationship.subject_id)
+        predicate_uri = self._make_uri(relationship.predicate)
+
+        # Check if object is an entity or a literal
+        if relationship.object_id in entities:
+            object_value = self._make_uri(relationship.object_id)
+            object_type = "uri"
+        else:
+            # Treat as literal value
+            object_value = relationship.object_id
+            object_type = "literal"
+
+        metadata = {}
+        if self.include_provenance:
+            metadata["confidence"] = relationship.confidence
+
+        return RDFTriple(
+            subject=subject_uri,
+            predicate=predicate_uri,
+            object=object_value,
+            object_type=object_type,
+            metadata=metadata,
+        )
+
+    def build(
+        self,
+        entities: List[Entity],
+        relationships: List[Relationship],
+        source_text: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> RDFGraph:
+        """
+        Build an RDF graph from entities and relationships.
+        
+        Args:
+            entities: List of extracted entities.
+            relationships: List of extracted relationships.
+            source_text: Original source text (for provenance).
+            metadata: Additional graph-level metadata.
+            
+        Returns:
+            RDFGraph containing all generated triples.
+        """
+        triples = []
+        entity_map = {e.id: e for e in entities}
+
+        # Generate entity triples
+        if self.include_entity_triples:
+            for entity in entities:
+                triples.extend(self._entity_to_triples(entity))
+
+        # Generate relationship triples
+        for relationship in relationships:
+            triple = self._relationship_to_triple(relationship, entity_map)
+            triples.append(triple)
+
+        graph = RDFGraph(
+            entities=entities,
+            relationships=relationships,
+            triples=triples,
+            source_text=source_text,
+            metadata=metadata or {},
+        )
+
+        log.info(f"Built RDF graph with {len(triples)} triples")
+        return graph
diff --git a/tests/streamlit_app/test_helper.py b/tests/streamlit_app/test_helper.py
index 93d5d61..70eddab 100644
--- a/tests/streamlit_app/test_helper.py
+++ b/tests/streamlit_app/test_helper.py
@@ -1,14 +1,16 @@
 """Tests for streamlit_app/helper.py file reading functions."""
 
 import io
-import pytest
-import pandas as pd
+
 from unittest.mock import Mock
 
+import pandas as pd
+import pytest
+
 from src.intugle.streamlit_app.helper import (
+    _read_bytes_to_df_core,
     read_bytes_to_df,
     read_file_to_df,
-    _read_bytes_to_df_core,
 )
 
 
diff --git a/tests/text_processor/__init__.py b/tests/text_processor/__init__.py
new file mode 100644
index 0000000..995b374
--- /dev/null
+++ b/tests/text_processor/__init__.py
@@ -0,0 +1 @@
+# Text processor tests
diff --git a/tests/text_processor/test_builder.py b/tests/text_processor/test_builder.py
new file mode 100644
index 0000000..bd8e2a3
--- /dev/null
+++ b/tests/text_processor/test_builder.py
@@ -0,0 +1,114 @@
+"""Tests for the RDF Builder."""
+
+from intugle.text_processor.models import Entity, Relationship
+from intugle.text_processor.rdf.builder import RDFBuilder
+
+
+class TestRDFBuilder:
+    def test_builder_initialization(self):
+        builder = RDFBuilder()
+        assert builder.namespace == RDFBuilder.DEFAULT_NAMESPACE
+        assert builder.include_entity_triples is True
+        assert builder.include_provenance is True
+
+    def test_builder_custom_namespace(self):
+        builder = RDFBuilder(namespace="http://custom.org/")
+        assert builder.namespace == "http://custom.org/"
+
+    def test_build_empty(self):
+        builder = RDFBuilder()
+        graph = builder.build(entities=[], relationships=[])
+        assert len(graph.triples) == 0
+        assert len(graph.entities) == 0
+
+    def test_build_with_entities(self):
+        builder = RDFBuilder()
+        entities = [
+            Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT"),
+            Entity(id="Vendor_A", text="Vendor A", label="ORGANIZATION"),
+        ]
+        graph = builder.build(entities=entities, relationships=[])
+        
+        # Each entity should produce type and label triples
+        assert len(graph.triples) == 4  # 2 entities x 2 triples each
+        assert len(graph.entities) == 2
+
+    def test_build_with_relationships(self):
+        builder = RDFBuilder()
+        entities = [
+            Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT"),
+            Entity(id="Vendor_A", text="Vendor A", label="ORGANIZATION"),
+        ]
+        relationships = [
+            Relationship(
+                subject_id="Invoice_123",
+                predicate="issuedBy",
+                object_id="Vendor_A",
+            )
+        ]
+        graph = builder.build(entities=entities, relationships=relationships)
+        
+        # 4 entity triples + 1 relationship triple
+        assert len(graph.triples) == 5
+        assert len(graph.relationships) == 1
+
+    def test_build_with_literal_object(self):
+        builder = RDFBuilder()
+        entities = [
+            Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT"),
+        ]
+        relationships = [
+            Relationship(
+                subject_id="Invoice_123",
+                predicate="hasAmount",
+                object_id="5400",  # Not an entity ID, should be treated as literal
+            )
+        ]
+        graph = builder.build(entities=entities, relationships=relationships)
+        
+        # Find the hasAmount triple
+        amount_triple = None
+        for t in graph.triples:
+            if "hasAmount" in t.predicate:
+                amount_triple = t
+                break
+        
+        assert amount_triple is not None
+        assert amount_triple.object_type == "literal"
+
+    def test_build_without_entity_triples(self):
+        builder = RDFBuilder(include_entity_triples=False)
+        entities = [
+            Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT"),
+        ]
+        relationships = [
+            Relationship(
+                subject_id="Invoice_123",
+                predicate="hasAmount",
+                object_id="5400",
+            )
+        ]
+        graph = builder.build(entities=entities, relationships=relationships)
+        
+        # Only the relationship triple, no entity type/label triples
+        assert len(graph.triples) == 1
+
+    def test_provenance_metadata(self):
+        builder = RDFBuilder(include_provenance=True)
+        entities = [
+            Entity(id="Test", text="Test", label="TEST", confidence=0.95),
+        ]
+        graph = builder.build(entities=entities, relationships=[])
+        
+        # Check that confidence is in metadata
+        assert graph.triples[0].metadata.get("confidence") == 0.95
+
+    def test_no_provenance_metadata(self):
+        builder = RDFBuilder(include_provenance=False)
+        entities = [
+            Entity(id="Test", text="Test", label="TEST", confidence=0.95),
+        ]
+        graph = builder.build(entities=entities, relationships=[])
+        
+        # Metadata should be empty
+        assert graph.triples[0].metadata == {}
diff --git a/tests/text_processor/test_mapper.py b/tests/text_processor/test_mapper.py
new file mode 100644
index 0000000..14dbf05
--- /dev/null
+++ b/tests/text_processor/test_mapper.py
@@ -0,0 +1,123 @@
+"""Tests for the SemanticMapper."""
+
+from unittest.mock import MagicMock
+
+from intugle.text_processor.mapper import MappingResult, SemanticMapper
+from intugle.text_processor.models import Entity, RDFGraph
+
+
+class TestSemanticMapper:
+    def test_mapper_initialization(self):
+        mapper = SemanticMapper()
+        assert mapper.match_threshold == 0.85
+        assert mapper.use_embeddings is False
+
+    def test_mapper_custom_threshold(self):
+        mapper = SemanticMapper(match_threshold=0.7)
+        assert mapper.match_threshold == 0.7
+
+    def test_string_similarity(self):
+        mapper = SemanticMapper()
+        
+        # Exact match
+        assert mapper._string_similarity("invoice", "invoice") == 1.0
+        
+        # Case insensitive
+        assert mapper._string_similarity("Invoice", "invoice") == 1.0
+        
+        # Underscore vs space
+        assert mapper._string_similarity("invoice_id", "invoice id") == 1.0
+        
+        # Partial match
+        similarity = mapper._string_similarity("invoice", "invoices")
+        assert 0.8 < similarity < 1.0
+
+    def test_map_to_semantic_model_no_match(self):
+        mapper = SemanticMapper(match_threshold=0.9)
+        
+        entity = Entity(id="random_entity", text="Random Thing", label="OTHER")
+        graph = RDFGraph(entities=[entity])
+        
+        # Mock semantic model with no matching columns
+        mock_model = MagicMock()
+        mock_dataset = MagicMock()
+        mock_column = MagicMock()
+        mock_column.name = "unrelated_column"
+        mock_dataset.source.table.columns = [mock_column]
+        mock_model.datasets = {"table1": mock_dataset}
+        
+        results = mapper.map_to_semantic_model(graph, mock_model)
+        
+        assert len(results) == 1
+        assert results[0].is_new is True
+        assert results[0].matched_table is None
+
+    def test_map_to_semantic_model_with_match(self):
+        mapper = SemanticMapper(match_threshold=0.8)
+        
+        entity = Entity(id="customer_id", text="Customer ID", label="IDENTIFIER")
+        graph = RDFGraph(entities=[entity])
+        
+        # Mock semantic model with matching column
+        mock_model = MagicMock()
+        mock_dataset = MagicMock()
+        mock_column = MagicMock()
+        mock_column.name = "customer_id"
+        mock_dataset.source.table.columns = [mock_column]
+        mock_model.datasets = {"customers": mock_dataset}
+        
+        results = mapper.map_to_semantic_model(graph, mock_model)
+        
+        assert len(results) == 1
+        assert results[0].is_new is False
+        assert results[0].matched_table == "customers"
+        assert results[0].matched_column == "customer_id"
+        assert results[0].confidence >= 0.8
+
+    def test_suggest_new_nodes(self):
+        mapper = SemanticMapper()
+        
+        entity = Entity(id="new_concept", text="New Concept", label="CONCEPT")
+        result = MappingResult(entity=entity, is_new=True)
+        
+        suggestions = mapper.suggest_new_nodes([result])
+        
+        assert len(suggestions) == 1
+        assert suggestions[0]["suggested_name"] == "new_concept"
+        assert suggestions[0]["entity_type"] == "CONCEPT"
+
+    def test_suggest_new_nodes_filters_matched(self):
+        mapper = SemanticMapper()
+        
+        matched_entity = Entity(id="matched", text="Matched", label="TEST")
+        new_entity = Entity(id="new", text="New", label="TEST")
+        
+        results = [
+            MappingResult(entity=matched_entity, matched_table="t1", matched_column="c1", is_new=False),
+            MappingResult(entity=new_entity, is_new=True),
+        ]
+        
+        suggestions = mapper.suggest_new_nodes(results)
+        
+        # Only the new entity should be suggested
+        assert len(suggestions) == 1
+        assert suggestions[0]["suggested_name"] == "new"
+
+
+class TestMappingResult:
+    def test_to_dict(self):
+        entity = Entity(id="test_id", text="Test", label="TEST")
+        result = MappingResult(
+            entity=entity,
+            matched_table="table1",
+            matched_column="col1",
+            confidence=0.9,
+            is_new=False,
+        )
+        
+        d = result.to_dict()
+        
+        assert d["entity_id"] == "test_id"
+        assert d["matched_table"] == "table1"
+        assert d["confidence"] == 0.9
+        assert d["is_new"] is False
diff --git a/tests/text_processor/test_models.py b/tests/text_processor/test_models.py
new file mode 100644
index 0000000..d86716e
--- /dev/null
+++ b/tests/text_processor/test_models.py
@@ -0,0 +1,117 @@
+"""Tests for text processor Pydantic models."""
+
+from intugle.text_processor.models import Entity, RDFGraph, RDFTriple, Relationship
+
+
+class TestEntity:
+    def test_entity_creation(self):
+        entity = Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT")
+        assert entity.id == "Invoice_123"
+        assert entity.text == "Invoice 123"
+        assert entity.label == "DOCUMENT"
+        assert entity.confidence == 1.0
+
+    def test_entity_with_attributes(self):
+        entity = Entity(
+            id="Amount_5400",
+            text="$5,400",
+            label="MONEY",
+            attributes={"currency": "USD", "value": 5400},
+        )
+        assert entity.attributes["currency"] == "USD"
+        assert entity.attributes["value"] == 5400
+
+
+class TestRelationship:
+    def test_relationship_creation(self):
+        rel = Relationship(
+            subject_id="Invoice_123",
+            predicate="hasAmount",
+            object_id="Amount_5400",
+        )
+        assert rel.subject_id == "Invoice_123"
+        assert rel.predicate == "hasAmount"
+        assert rel.object_id == "Amount_5400"
+        assert rel.confidence == 1.0
+
+
+class TestRDFTriple:
+    def test_triple_creation(self):
+        triple = RDFTriple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/hasAmount",
+            object="5400",
+            object_type="literal",
+        )
+        assert triple.subject == "http://example.org/Invoice_123"
+        assert triple.object_type == "literal"
+
+    def test_triple_to_turtle(self):
+        triple = RDFTriple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/hasAmount",
+            object="5400",
+            object_type="literal",
+        )
+        turtle = triple.to_turtle()
+        assert "<http://example.org/Invoice_123>" in turtle
+        assert '"5400"' in turtle
+
+    def test_triple_with_uri_object(self):
+        triple = RDFTriple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/issuedBy",
+            object="http://example.org/Vendor_A",
+            object_type="uri",
+        )
+        turtle = triple.to_turtle()
+        assert "<http://example.org/Vendor_A>" in turtle
+
+
+class TestRDFGraph:
+    def test_graph_creation(self):
+        graph = RDFGraph()
+        assert len(graph.entities) == 0
+        assert len(graph.triples) == 0
+
+    def test_add_triple(self):
+        graph = RDFGraph()
+        graph.add_triple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/hasAmount",
+            obj="5400",
+        )
+        assert len(graph.triples) == 1
+
+    def test_to_turtle(self):
+        graph = RDFGraph()
+        graph.add_triple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/hasAmount",
+            obj="5400",
+        )
+        turtle = graph.to_turtle()
+        assert "@prefix" in turtle
+        assert "http://example.org/Invoice_123" in turtle
+
+    def test_to_json_ld(self):
+        graph = RDFGraph()
+        graph.add_triple(
+            subject="http://example.org/Invoice_123",
+            predicate="http://example.org/hasAmount",
+            obj="5400",
+        )
+        json_ld = graph.to_json_ld()
+        assert "@context" in json_ld
+        assert "@graph" in json_ld
+        assert len(json_ld["@graph"]) == 1
+
+    def test_get_entity_by_id(self):
+        entity = Entity(id="test_id", text="Test", label="TEST")
+        graph = RDFGraph(entities=[entity])
+        found = graph.get_entity_by_id("test_id")
+        assert found is not None
+        assert found.text == "Test"
+        
+        not_found = graph.get_entity_by_id("nonexistent")
+        assert not_found is None
diff --git a/tests/text_processor/test_processor.py b/tests/text_processor/test_processor.py
new file mode 100644
index 0000000..aaa55e9
--- /dev/null
+++ b/tests/text_processor/test_processor.py
@@ -0,0 +1,84 @@
+"""Tests for the TextToSemanticProcessor."""
+
+from unittest.mock import MagicMock
+
+from intugle.text_processor.models import Entity, RDFGraph, Relationship
+from intugle.text_processor.processor import TextToSemanticProcessor
+
+
+class TestTextToSemanticProcessor:
+    def test_processor_initialization(self):
+        processor = TextToSemanticProcessor()
+        assert processor.model == "gpt-4o-mini"
+        assert processor.output_format == "rdf_star"
+
+    def test_processor_custom_config(self):
+        processor = TextToSemanticProcessor(
+            model="gpt-4",
+            output_format="rdf",
+            namespace="http://custom.org/",
+        )
+        assert processor.model == "gpt-4"
+        assert processor.output_format == "rdf"
+        assert processor.rdf_builder.namespace == "http://custom.org/"
+
+    def test_processor_with_mock_extractor(self):
+        # Create mock extractor
+        mock_extractor = MagicMock()
+        mock_extractor.extract.return_value = (
+            [Entity(id="Invoice_123", text="Invoice 123", label="DOCUMENT")],
+            [Relationship(subject_id="Invoice_123", predicate="hasAmount", object_id="5400")],
+        )
+
+        processor = TextToSemanticProcessor(extractor=mock_extractor)
+        
+        result = processor.parse("Invoice 123 for $5,400")
+        
+        mock_extractor.extract.assert_called_once()
+        assert isinstance(result, RDFGraph)
+        assert len(result.entities) == 1
+        assert len(result.relationships) == 1
+
+    def test_export_turtle(self):
+        mock_extractor = MagicMock()
+        mock_extractor.extract.return_value = (
+            [Entity(id="Test", text="Test", label="TEST")],
+            [],
+        )
+
+        processor = TextToSemanticProcessor(extractor=mock_extractor)
+        graph = processor.parse("Test text")
+        
+        turtle = processor.export_turtle(graph)
+        assert "@prefix" in turtle
+        assert "Test" in turtle
+
+    def test_export_json_ld(self):
+        mock_extractor = MagicMock()
+        mock_extractor.extract.return_value = (
+            [Entity(id="Test", text="Test", label="TEST")],
+            [],
+        )
+
+        processor = TextToSemanticProcessor(extractor=mock_extractor)
+        graph = processor.parse("Test text")
+        
+        json_ld = processor.export_json_ld(graph)
+        assert "@context" in json_ld
+        assert "@graph" in json_ld
+
+    def test_metadata_in_graph(self):
+        mock_extractor = MagicMock()
+        mock_extractor.extract.return_value = ([], [])
+
+        processor = TextToSemanticProcessor(
+            model="test-model",
+            output_format="rdf_star",
+            extractor=mock_extractor,
+        )
+        
+        graph = processor.parse("Test", metadata={"source": "test.txt"})
+        
+        assert graph.metadata["model"] == "test-model"
+        assert graph.metadata["output_format"] == "rdf_star"
+        assert graph.metadata["source"] == "test.txt"