Skip to content

Commit 2cd9b89

Browse files
committed
Replace loacl ollama LLM with GLiNER
1 parent e6a238c commit 2cd9b89

File tree

7 files changed

+327
-95
lines changed

7 files changed

+327
-95
lines changed

build.sbt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ libraryDependencies ++= Seq(
141141
"dev.langchain4j" % "langchain4j" % langchain4jVersion,
142142
"dev.langchain4j" % "langchain4j-open-ai" % langchain4jVersion,
143143
"dev.langchain4j" % "langchain4j-anthropic" % langchain4jVersion,
144-
"dev.langchain4j" % "langchain4j-ollama" % langchain4jVersion,
145144

146145
// LangChain4j PgVector extension
147146
"dev.langchain4j" % "langchain4j-pgvector" % "1.8.0-beta15",

docker/docker-compose.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,5 +124,32 @@ services:
124124
networks:
125125
- runtime-net
126126

127+
# GLiNER NER service
128+
gliner-ner:
129+
build:
130+
context: ./gliner-service
131+
dockerfile: Dockerfile
132+
hostname: gliner-ner
133+
ports:
134+
- "8085:8085"
135+
environment:
136+
- TRANSFORMERS_CACHE=/app/model_cache
137+
- PYTHONUNBUFFERED=1
138+
volumes:
139+
# Cache downloaded GLiNER model to avoid re-downloading on restart
140+
- gliner_model_cache:/app/model_cache
141+
healthcheck:
142+
test: [ "CMD", "curl", "-f", "http://localhost:8085/health" ]
143+
interval: 30s
144+
timeout: 10s
145+
retries: 3
146+
start_period: 60s
147+
restart: unless-stopped
148+
networks:
149+
- runtime-net
150+
151+
volumes:
152+
gliner_model_cache:
153+
127154
networks:
128155
runtime-net:

docker/gliner-service/Dockerfile

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Use Python 3.11 slim image for better performance and smaller size
2+
FROM python:3.11-slim
3+
4+
# Set working directory
5+
WORKDIR /app
6+
7+
# Set environment variables
8+
ENV PYTHONUNBUFFERED=1
9+
ENV PYTHONDONTWRITEBYTECODE=1
10+
11+
# Install system dependencies
12+
RUN apt-get update && apt-get install -y \
13+
gcc \
14+
g++ \
15+
&& rm -rf /var/lib/apt/lists/*
16+
17+
# Copy requirements first (for better Docker layer caching)
18+
COPY requirements.txt .
19+
20+
# Install Python dependencies
21+
RUN pip install --no-cache-dir --upgrade pip && \
22+
pip install --no-cache-dir -r requirements.txt
23+
24+
# Copy application code
25+
COPY app.py .
26+
27+
# Create directory for model cache
28+
RUN mkdir -p /app/model_cache
29+
ENV TRANSFORMERS_CACHE=/app/model_cache
30+
31+
# Expose port
32+
EXPOSE 8085
33+
34+
# Health check
35+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
36+
CMD curl -f http://localhost:8085/health || exit 1
37+
38+
# Run the application
39+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8085", "--workers", "1"]

docker/gliner-service/app.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
"""
2+
GLiNER NER Service
3+
FastAPI service for Named Entity Recognition using GLiNER model.
4+
Specifically designed to extract person names from text.
5+
"""
6+
7+
import logging
8+
import time
9+
from typing import List, Optional
10+
from contextlib import asynccontextmanager
11+
12+
from fastapi import FastAPI, HTTPException, status
13+
from pydantic import BaseModel, Field
14+
from gliner import GLiNER
15+
import uvicorn
16+
17+
# Configure logging
18+
logging.basicConfig(level=logging.INFO)
19+
logger = logging.getLogger(__name__)
20+
21+
# Global model instance
22+
model: Optional[GLiNER] = None
23+
24+
@asynccontextmanager
25+
async def lifespan(app: FastAPI):
26+
"""
27+
Application lifespan manager to load model on startup and cleanup on shutdown.
28+
"""
29+
global model
30+
logger.info("Loading GLiNER model...")
31+
start_time = time.time()
32+
33+
try:
34+
# Load GLiNER model - using medium version for balance of speed/accuracy
35+
model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")
36+
load_time = time.time() - start_time
37+
logger.info(f"GLiNER model loaded successfully in {load_time:.2f}s")
38+
except Exception as e:
39+
logger.error(f"Failed to load GLiNER model: {e}")
40+
raise
41+
42+
yield
43+
44+
# Cleanup
45+
logger.info("Shutting down GLiNER service")
46+
model = None
47+
48+
# Initialize FastAPI app with lifespan manager
49+
app = FastAPI(
50+
title="GLiNER NER Service",
51+
description="Named Entity Recognition service using GLiNER for person extraction",
52+
version="1.0.0",
53+
lifespan=lifespan
54+
)
55+
56+
# Request/Response Models
57+
class PersonExtractionRequest(BaseModel):
58+
"""Request model for person extraction endpoint"""
59+
text: str = Field(..., description="Text content to extract persons from", min_length=1)
60+
threshold: float = Field(default=0.5, description="Confidence threshold for entity extraction", ge=0.0, le=1.0)
61+
62+
class PersonEntity(BaseModel):
63+
"""Represents a person entity found in text"""
64+
text: str = Field(..., description="The person name text")
65+
start: int = Field(..., description="Start position in original text")
66+
end: int = Field(..., description="End position in original text")
67+
score: float = Field(..., description="Confidence score for this entity")
68+
69+
class PersonExtractionResponse(BaseModel):
70+
"""Response model for person extraction"""
71+
persons: List[PersonEntity] = Field(..., description="List of person entities found")
72+
processing_time_ms: float = Field(..., description="Processing time in milliseconds")
73+
text_length: int = Field(..., description="Length of input text")
74+
75+
class HealthResponse(BaseModel):
76+
"""Health check response"""
77+
status: str
78+
model_loaded: bool
79+
service_name: str
80+
version: str
81+
82+
class ErrorResponse(BaseModel):
83+
"""Error response model"""
84+
error: str
85+
detail: Optional[str] = None
86+
87+
# API Endpoints
88+
89+
@app.get("/health", response_model=HealthResponse)
90+
async def health_check():
91+
"""Health check endpoint"""
92+
return HealthResponse(
93+
status="healthy" if model is not None else "unhealthy",
94+
model_loaded=model is not None,
95+
service_name="GLiNER NER Service",
96+
version="1.0.0"
97+
)
98+
99+
@app.post("/extract-persons", response_model=PersonExtractionResponse)
100+
async def extract_persons(request: PersonExtractionRequest):
101+
"""
102+
Extract person entities from text using GLiNER model.
103+
104+
This endpoint is specifically designed to work with the WikipediaEditsAnalyser
105+
Scala application for consistent person name extraction.
106+
"""
107+
if model is None:
108+
raise HTTPException(
109+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
110+
detail="GLiNER model not loaded"
111+
)
112+
113+
start_time = time.time()
114+
115+
try:
116+
# Extract person entities using GLiNER
117+
# GLiNER expects labels as a list - we specify "person" as the entity type
118+
entities = model.predict_entities(
119+
request.text,
120+
labels=["person"],
121+
threshold=request.threshold
122+
)
123+
124+
# Convert GLiNER output to our response format
125+
person_entities = []
126+
for entity in entities:
127+
person_entities.append(PersonEntity(
128+
text=entity["text"],
129+
start=entity["start"],
130+
end=entity["end"],
131+
score=entity["score"]
132+
))
133+
134+
processing_time = (time.time() - start_time) * 1000 # Convert to milliseconds
135+
136+
logger.info(f"Extracted {len(person_entities)} persons from text of length {len(request.text)} in {processing_time:.2f}ms")
137+
138+
return PersonExtractionResponse(
139+
persons=person_entities,
140+
processing_time_ms=processing_time,
141+
text_length=len(request.text)
142+
)
143+
144+
except Exception as e:
145+
logger.error(f"Error during person extraction: {e}")
146+
raise HTTPException(
147+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
148+
detail=f"Person extraction failed: {str(e)}"
149+
)
150+
151+
@app.get("/")
152+
async def root():
153+
"""Root endpoint with service information"""
154+
return {
155+
"service": "GLiNER NER Service",
156+
"version": "1.0.0",
157+
"description": "Named Entity Recognition service for person extraction",
158+
"endpoints": {
159+
"health": "/health",
160+
"extract_persons": "/extract-persons",
161+
"docs": "/docs"
162+
}
163+
}
164+
165+
if __name__ == "__main__":
166+
# For development only - in production, use docker with uvicorn
167+
uvicorn.run(
168+
"app:app",
169+
host="0.0.0.0",
170+
port=8085,
171+
log_level="info",
172+
reload=False # Set to True for development
173+
)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# FastAPI and web server dependencies
2+
fastapi==0.104.1
3+
uvicorn[standard]==0.24.0
4+
5+
# GLiNER and ML dependencies
6+
gliner==0.1.11
7+
torch>=2.0.0
8+
transformers>=4.30.0
9+
numpy>=1.24.0
10+
11+
# Additional utilities
12+
pydantic>=2.0.0

src/main/resources/application.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ pekko {
3232
max-open-requests = 64
3333
}
3434

35+
# Default: 4096
36+
http.sse.max-line-size = 8192
37+
http.sse.max-event-size = 8193
3538
}
3639

3740
# Custom dispatcher used to show blocking behaviour

0 commit comments

Comments
 (0)