11# Ragify All-in-One Container
2+ # Includes Ollama, Qdrant, Apache Tika, and Python API
23# Multi-stage build for Docker/Podman
34
45# ============================================
@@ -26,16 +27,24 @@ RUN pip install --no-cache-dir --upgrade pip && \
2627FROM python:3.12-slim
2728
2829LABEL maintainer="Ragify"
29- LABEL description="All-in-one RAG documentation search with Ollama, Qdrant, and MCP "
30- LABEL version="1 .0.0"
30+ LABEL description="All-in-one RAG documentation search with Ollama, Qdrant, MCP, and Apache Tika "
31+ LABEL version="2 .0.0"
3132
32- # Install runtime dependencies
33+ # Install runtime dependencies including Java for Tika server
3334RUN apt-get update && apt-get install -y --no-install-recommends \
3435 tini \
3536 curl \
3637 ca-certificates \
38+ openjdk-21-jre-headless \
3739 && rm -rf /var/lib/apt/lists/*
3840
41+ # Set Java environment (create symlink to avoid architecture-specific path)
42+ RUN ln -s /usr/lib/jvm/java-21-openjdk-* /usr/lib/jvm/java-21
43+ ENV JAVA_HOME=/usr/lib/jvm/java-21
44+ ENV PATH="${JAVA_HOME}/bin:${PATH}"
45+ # Tika JAR path (used by entrypoint.sh to start server)
46+ ENV TIKA_JAR_PATH=/tmp/tika-server.jar
47+
3948# Install Ollama - pinned to v0.11.0 to avoid embedding bugs in 0.12.x/0.13.x
4049# See: https://github.com/ollama/ollama/issues/13054
4150ENV OLLAMA_VERSION=0.11.0
@@ -78,13 +87,40 @@ COPY docker/ ./docker/
7887# Make scripts executable
7988RUN chmod +x /app/docker/*.sh
8089
81- # Pre-pull default Ollama model (makes container larger but faster startup)
82- # This runs ollama serve temporarily to pull the model
90+ # Pre-pull default Ollama model
8391RUN ollama serve & \
8492 sleep 5 && \
8593 ollama pull nomic-embed-text && \
8694 pkill ollama || true
8795
96+ # Pre-download Tika JAR and ensure it's in the expected location
97+ RUN python3 <<'PYEOF'
98+ import os
99+ import glob
100+ from tika import parser
101+
102+ # Trigger download (this downloads JAR to /tmp/)
103+ parser.from_buffer(b'test' , xmlContent=False)
104+
105+ # Find the JAR file (handles versioned names like tika-server-standard-3.1.0.jar)
106+ expected_path = '/tmp/tika-server.jar'
107+ if os.path.exists(expected_path):
108+ print(f'Tika JAR found at {expected_path}' )
109+ else:
110+ # Search for versioned JAR
111+ jars = glob.glob('/tmp/tika-server*.jar' )
112+ if jars:
113+ print(f'Found JAR: {jars[0]}' )
114+ os.symlink(jars[0], expected_path)
115+ print(f'Created symlink: {expected_path} -> {jars[0]}' )
116+ else:
117+ raise Exception('No Tika JAR found in /tmp/' )
118+
119+ # Final verification
120+ assert os.path.exists(expected_path), f'Tika JAR not found at {expected_path}'
121+ print(f'Tika JAR verified at {expected_path}' )
122+ PYEOF
123+
88124# Create data directory for Qdrant
89125RUN mkdir -p /data/qdrant
90126
@@ -100,7 +136,9 @@ ENV PYTHONUNBUFFERED=1 \
100136 # Qdrant (internal)
101137 QDRANT_URL=http://localhost:6333 \
102138 QDRANT_PATH=/data/qdrant \
103- # Auth (must be provided)
139+ # Tika server (internal - started by entrypoint.sh)
140+ TIKA_SERVER_ENDPOINT=http://localhost:9998 \
141+ # Auth (must be provided for production)
104142 AUTH_CONFIG="" \
105143 GITHUB_CLIENT_ID="" \
106144 GITHUB_CLIENT_SECRET="" \
@@ -112,12 +150,12 @@ VOLUME ["/data"]
112150# Expose ports
113151EXPOSE 8080 6666
114152
115- # Health check
116- HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
153+ # Health check (verifies API, Ollama, Qdrant, Tika)
154+ HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \
117155 CMD /app/docker/healthcheck.sh
118156
119157# Use tini as init
120158ENTRYPOINT ["/usr/bin/tini" , "--" ]
121159
122- # Run entrypoint script
160+ # Run entrypoint script (starts Qdrant, Ollama, Tika, API)
123161CMD ["/app/docker/entrypoint.sh" ]
0 commit comments