HKUDS · LaansDole · Sep 6, 2025 · Sep 13, 2025 · Sep 13, 2025 · Sep 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -67,6 +67,7 @@ download_models_hf.py
 lightrag-dev/
 gui/
 tiktoken_cache/
+uv.lock
 
 # unit-test files
 test_*

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,17 +1,18 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: requirements-txt-fixer
 
+
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.4
+    rev: v0.14.0
     hooks:
-      - id: ruff-format
-      - id: ruff
+      - id: ruff-check
         args: [--fix, --ignore=E402]
+      - id: ruff-format
 
 
   - repo: https://github.com/mgedmin/check-manifest

diff --git a/Makefile b/Makefile
@@ -0,0 +1,19 @@
+.PHONY: mock-test
+mock-test:
+	uv run python api/core_endpoint_test.py api/datasets/patient_records_small.xlsx
+
+.PHONY: server
+server:
+	@echo "Starting RAGAnything API server..."
+	@echo "Server will be available at http://localhost:8000"
+	@echo "Press Ctrl+C to gracefully stop the server"
+	uv run uvicorn api.app:app --reload
+
+.PHONY: stop
+stop:
+	@echo "Stopping all services..."
+	@pkill -f "uvicorn api.app:app" || true
+
+.PHONY: lint
+lint:
+	uv run ruff check . --fix --ignore=E402
diff --git a/README.md b/README.md
@@ -48,9 +48,16 @@
   </a>
 </div>
 
+<div align="center">
+  <a href="#fastapi-service" style="text-decoration: none;">
+    <img src="https://img.shields.io/badge/FastAPI%20Service-Deploy%20Now-00d9ff?style=for-the-badge&logo=fastapi&logoColor=white&labelColor=1a1a2e">
+  </a>
+</div>
+
 ---
 
 ## 🎉 News
+- [X] [2026.01]🎯📢 🚀 Released [FastAPI Service](#fastapi-service) for RAG-Anything! Now includes a production-ready API server with Office document support, Excel processing, and seamless integration capabilities.
 - [X] [2025.10]🎯📢 🚀 We have released the technical report of [RAG-Anything](http://arxiv.org/abs/2510.12323). Access it now to explore our latest research findings.
 - [X] [2025.08]🎯📢 🔍 RAG-Anything now features **VLM-Enhanced Query** mode! When documents include images, the system seamlessly integrates them into VLM for advanced multimodal analysis, combining visual and textual context for deeper insights.
 - [X] [2025.07]🎯📢 RAG-Anything now features a [context configuration module](docs/context_aware_processing.md), enabling intelligent integration of relevant contextual information to enhance multimodal content processing.
@@ -1012,6 +1019,35 @@ python examples/image_format_test.py --check-pillow --file dummy
 python examples/text_format_test.py --check-reportlab --file dummy
 ```
 
+---
+
+## FastAPI Service
+
+Spin up a minimal API server to query and process documents using RAG-Anything with any OpenAI-compatible backend (LM Studio, Ollama, vLLM, DeepSeek, etc.).
+
+Quick start (using uv):
+
+```bash
+# Install FastAPI and Uvicorn into the existing uv environment
+uv sync
+
+# Run the server (reload for dev)
+uv run uvicorn api.app:app --reload
+# or using make
+make server
+```
+
+**Command Reference:**
+
+| Action | Make Command | Full UV Command |
+|--------|--------------|-----------------|
+| **Start Server** | `make server` | `uv run uvicorn api.app:app --reload` |
+| **Run Integration Test** | `make integration-test` | `uv run python api/core_endpoint_test.py api/datasets/patient_records_small.xlsx` |
+| **Run Mock Test** | `make mock-test` | `uv run python api/core_endpoint_test.py api/datasets/medical_symptoms_small.xlsx` |
+| **Dev Mode** | `make dev` | `uv run uvicorn api.app:app &` |
+| **Stop Server** | `make stop` | `pkill -f "uvicorn api.app:app"` |
+
+
 ---
 
 ## 🔧 Configuration
@@ -1030,6 +1066,8 @@ PARSER=mineru                   # Parser selection: mineru or docling
 PARSE_METHOD=auto              # Parse method: auto, ocr, or txt
 ```
 
+> **LLM Recommendation:** For optimal performance with extensive RAG queries, it is recommended to use a text generation LLM that can handle at least **262,144 tokens** (e.g., Qwen models). This large context window helps prevent context overflow errors when processing comprehensive multimodal knowledge graphs.
+
 **Note:** For backward compatibility, legacy environment variable names are still supported:
 - `MINERU_PARSE_METHOD` is deprecated, please use `PARSE_METHOD`
 

diff --git a/api/README.md b/api/README.md
@@ -0,0 +1,72 @@
+# RAG-Anything FastAPI Service
+
+**Simple Upload → Process → Q&A Pipeline** for Office documents and Excel files. Optimized for structured text processing without image/vision overhead.
+
+## Quick Start
+
+Using uv:
+```bash
+# Install dependencies
+uv add fastapi 'uvicorn[standard]'
+
+# Start the server
+uv run uvicorn api.app:app --reload
+```
+
+Server will be available at: http://127.0.0.1:8000/docs
+
+## Environment Variables
+
+Configure via `.env` file (see project root `.env.example` for details):
+```env
+# Support for any OpenAI-compatible backend (LM Studio, Ollama, vLLM, DeepSeek, etc.)
+LLM_BINDING_HOST=http://localhost:1234/v1
+LLM_BINDING_API_KEY=lm-studio
+# Recommended: Use models with 262k+ context (e.g., Qwen) to avoid overflow
+LLM_MODEL=openai/gpt-oss-20b
+EMBEDDING_BINDING_HOST=http://localhost:1234/v1
+EMBEDDING_BINDING_API_KEY=lm-studio
+EMBEDDING_MODEL=text-embedding-nomic-embed-text-v1.5
+EMBEDDING_DIM=768 # Ensure this matches your model (e.g. 1024 for bge-m3)
+WORKING_DIR=./rag_storage_service
+```
+
+## Simple API Endpoints
+
+### Core Pipeline
+- `GET /health` - Health check
+- `POST /process-file` - Upload Office documents (DOC, DOCX, PDF, etc.)
+- `POST /process-excel` - Upload Excel files (XLS, XLSX)
+- `POST /query` - Ask questions about processed documents
+
+### Advanced Querying
+- `POST /query-multimodal` - Query with structured content (tables, text)
+
+## Usage Workflow
+
+1. **Upload & Process**: Upload your document using `/process-file` or `/process-excel`
+2. **Query**: Ask questions using `/query`
+
+## Excel Processing Features
+
+- Automatic conversion to natural language text
+- Dataset summaries and column statistics
+- Configurable row limits and chunk sizes
+- Support for multiple sheets
+- Direct RAG system integration
+
+## Example Usage
+
+```bash
+# 1. Upload Excel file
+curl -X POST "http://localhost:8000/process-excel" \
+  -F "file=@data.xlsx" \
+  -F "max_rows=500"
+
+# 2. Query the data
+curl -X POST "http://localhost:8000/query" \
+  -H "Content-Type: application/json" \
+  -d '{"query": "What are the main patterns in this data?"}'
+```
+
+**Note:** Vision/image processing has been removed to focus on Office document workflows.
diff --git a/api/__init__.py b/api/__init__.py
@@ -0,0 +1 @@
+# FastAPI service package for RAG-Anything
diff --git a/api/app.py b/api/app.py
@@ -0,0 +1,72 @@
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+
+from .routes import router
+from .core import initialize_rag, cleanup_rag
+from .utils import cancel_all_background_tasks
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Initialize rate limiter
+limiter = Limiter(key_func=get_remote_address, default_limits=["100/minute"])
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Manage application lifespan with proper cleanup"""
+    logger.info("RAG-Anything Service starting up...")
+
+    # Initialize RAG instance in app state
+    try:
+        app.state.rag_instance = await initialize_rag()
+        logger.info("RAG instance initialized successfully")
+    except Exception as e:
+        logger.error(f"Failed to initialize RAG instance: {e}")
+        raise
+
+    yield
+
+    # Shutdown cleanup
+    logger.info("RAG-Anything Service shutting down...")
+    try:
+        # Cancel all background tasks
+        await cancel_all_background_tasks()
+
+        # Cleanup RAG instance
+        await cleanup_rag(app.state.rag_instance)
+
+        logger.info("RAG-Anything Service shutdown complete")
+    except Exception as e:
+        logger.error(f"Error during shutdown: {e}")
+
+
+app = FastAPI(title="RAG-Anything Service", version="0.1.0", lifespan=lifespan)
+
+# Add rate limiter to app state
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+# CORS for local UI dev
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(router)
+
+
+@app.get("/", include_in_schema=False)
+async def root():
+    """Redirect root path to API documentation"""
+    return RedirectResponse(url="/docs")
-Original file line number
+Diff line change
@@ Expand Up / @@ -67,6 +67,7 @@ download_models_hf.py @@
     lightrag-dev/
     gui/
     tiktoken_cache/
+    uv.lock
     # unit-test files
     test_*
@@ Expand Down @@