Replace loacl ollama LLM with GLiNER

pbernet · pbernet · commit 2cd9b8923f5d · 2025-11-28T10:02:21.000+01:00
diff --git a/build.sbt b/build.sbt
@@ -141,7 +141,6 @@ libraryDependencies ++= Seq(
   "dev.langchain4j" % "langchain4j" % langchain4jVersion,
   "dev.langchain4j" % "langchain4j-open-ai" % langchain4jVersion,
   "dev.langchain4j" % "langchain4j-anthropic" % langchain4jVersion,
-  "dev.langchain4j" % "langchain4j-ollama" % langchain4jVersion,
 
   // LangChain4j PgVector extension
   "dev.langchain4j" % "langchain4j-pgvector" % "1.8.0-beta15",
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -124,5 +124,32 @@ services:
     networks:
       - runtime-net
 
+  # GLiNER NER service
+  gliner-ner:
+    build:
+      context: ./gliner-service
+      dockerfile: Dockerfile
+    hostname: gliner-ner
+    ports:
+      - "8085:8085"
+    environment:
+      - TRANSFORMERS_CACHE=/app/model_cache
+      - PYTHONUNBUFFERED=1
+    volumes:
+      # Cache downloaded GLiNER model to avoid re-downloading on restart
+      - gliner_model_cache:/app/model_cache
+    healthcheck:
+      test: [ "CMD", "curl", "-f", "http://localhost:8085/health" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 60s
+    restart: unless-stopped
+    networks:
+      - runtime-net
+
+volumes:
+  gliner_model_cache:
+
 networks:
   runtime-net:
diff --git a/docker/gliner-service/Dockerfile b/docker/gliner-service/Dockerfile
@@ -0,0 +1,39 @@
+# Use Python 3.11 slim image for better performance and smaller size
+FROM python:3.11-slim
+
+# Set working directory
+WORKDIR /app
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements first (for better Docker layer caching)
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app.py .
+
+# Create directory for model cache
+RUN mkdir -p /app/model_cache
+ENV TRANSFORMERS_CACHE=/app/model_cache
+
+# Expose port
+EXPOSE 8085
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
+  CMD curl -f http://localhost:8085/health || exit 1
+
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8085", "--workers", "1"]
diff --git a/docker/gliner-service/app.py b/docker/gliner-service/app.py
@@ -0,0 +1,173 @@
+"""
+GLiNER NER Service
+FastAPI service for Named Entity Recognition using GLiNER model.
+Specifically designed to extract person names from text.
+"""
+
+import logging
+import time
+from typing import List, Optional
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel, Field
+from gliner import GLiNER
+import uvicorn
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Global model instance
+model: Optional[GLiNER] = None
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Application lifespan manager to load model on startup and cleanup on shutdown.
+    """
+    global model
+    logger.info("Loading GLiNER model...")
+    start_time = time.time()
+    
+    try:
+        # Load GLiNER model - using medium version for balance of speed/accuracy
+        model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")
+        load_time = time.time() - start_time
+        logger.info(f"GLiNER model loaded successfully in {load_time:.2f}s")
+    except Exception as e:
+        logger.error(f"Failed to load GLiNER model: {e}")
+        raise
+    
+    yield
+    
+    # Cleanup
+    logger.info("Shutting down GLiNER service")
+    model = None
+
+# Initialize FastAPI app with lifespan manager
+app = FastAPI(
+    title="GLiNER NER Service",
+    description="Named Entity Recognition service using GLiNER for person extraction",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+# Request/Response Models
+class PersonExtractionRequest(BaseModel):
+    """Request model for person extraction endpoint"""
+    text: str = Field(..., description="Text content to extract persons from", min_length=1)
+    threshold: float = Field(default=0.5, description="Confidence threshold for entity extraction", ge=0.0, le=1.0)
+
+class PersonEntity(BaseModel):
+    """Represents a person entity found in text"""
+    text: str = Field(..., description="The person name text")
+    start: int = Field(..., description="Start position in original text")
+    end: int = Field(..., description="End position in original text") 
+    score: float = Field(..., description="Confidence score for this entity")
+
+class PersonExtractionResponse(BaseModel):
+    """Response model for person extraction"""
+    persons: List[PersonEntity] = Field(..., description="List of person entities found")
+    processing_time_ms: float = Field(..., description="Processing time in milliseconds")
+    text_length: int = Field(..., description="Length of input text")
+
+class HealthResponse(BaseModel):
+    """Health check response"""
+    status: str
+    model_loaded: bool
+    service_name: str
+    version: str
+
+class ErrorResponse(BaseModel):
+    """Error response model"""
+    error: str
+    detail: Optional[str] = None
+
+# API Endpoints
+
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint"""
+    return HealthResponse(
+        status="healthy" if model is not None else "unhealthy",
+        model_loaded=model is not None,
+        service_name="GLiNER NER Service",
+        version="1.0.0"
+    )
+
+@app.post("/extract-persons", response_model=PersonExtractionResponse)
+async def extract_persons(request: PersonExtractionRequest):
+    """
+    Extract person entities from text using GLiNER model.
+    
+    This endpoint is specifically designed to work with the WikipediaEditsAnalyser
+    Scala application for consistent person name extraction.
+    """
+    if model is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="GLiNER model not loaded"
+        )
+    
+    start_time = time.time()
+    
+    try:
+        # Extract person entities using GLiNER
+        # GLiNER expects labels as a list - we specify "person" as the entity type
+        entities = model.predict_entities(
+            request.text, 
+            labels=["person"], 
+            threshold=request.threshold
+        )
+        
+        # Convert GLiNER output to our response format
+        person_entities = []
+        for entity in entities:
+            person_entities.append(PersonEntity(
+                text=entity["text"],
+                start=entity["start"],
+                end=entity["end"],
+                score=entity["score"]
+            ))
+        
+        processing_time = (time.time() - start_time) * 1000  # Convert to milliseconds
+        
+        logger.info(f"Extracted {len(person_entities)} persons from text of length {len(request.text)} in {processing_time:.2f}ms")
+        
+        return PersonExtractionResponse(
+            persons=person_entities,
+            processing_time_ms=processing_time,
+            text_length=len(request.text)
+        )
+        
+    except Exception as e:
+        logger.error(f"Error during person extraction: {e}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Person extraction failed: {str(e)}"
+        )
+
+@app.get("/")
+async def root():
+    """Root endpoint with service information"""
+    return {
+        "service": "GLiNER NER Service",
+        "version": "1.0.0",
+        "description": "Named Entity Recognition service for person extraction",
+        "endpoints": {
+            "health": "/health",
+            "extract_persons": "/extract-persons",
+            "docs": "/docs"
+        }
+    }
+
+if __name__ == "__main__":
+    # For development only - in production, use docker with uvicorn
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=8085,
+        log_level="info",
+        reload=False  # Set to True for development
+    )
diff --git a/docker/gliner-service/requirements.txt b/docker/gliner-service/requirements.txt
@@ -0,0 +1,12 @@
+# FastAPI and web server dependencies
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+
+# GLiNER and ML dependencies
+gliner==0.1.11
+torch>=2.0.0
+transformers>=4.30.0
+numpy>=1.24.0
+
+# Additional utilities
+pydantic>=2.0.0
diff --git a/src/main/resources/application.conf b/src/main/resources/application.conf
@@ -32,6 +32,9 @@ pekko {
     max-open-requests = 64
   }
 
+  # Default: 4096
+  http.sse.max-line-size = 8192
+  http.sse.max-event-size = 8193
 }
 
 # Custom dispatcher used to show blocking behaviour
diff --git a/src/main/scala/alpakka/sse_to_elasticsearch/WikipediaEditsAnalyser.scala b/src/main/scala/alpakka/sse_to_elasticsearch/WikipediaEditsAnalyser.scala

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,9 @@ pekko {`
`32`	`32`	`max-open-requests = 64`
`33`	`33`	`}`
`34`	`34`
	`35`	`+ # Default: 4096`
	`36`	`+ http.sse.max-line-size = 8192`
	`37`	`+ http.sse.max-event-size = 8193`
`35`	`38`	`}`
`36`	`39`
`37`	`40`	`# Custom dispatcher used to show blocking behaviour`