RooCodeInc · roomote · Aug 2, 2025 · Aug 3, 2025 · roomote · Aug 3, 2025
@@ -97,6 +97,15 @@ Roo Code comes with powerful [tools](https://docs.roocode.com/basic-usage/how-to
 
 MCP extends Roo Code's capabilities by allowing you to add unlimited custom tools. Integrate with external APIs, connect to databases, or create specialized development tools - MCP provides the framework to expand Roo Code's functionality to meet your specific needs.
 
+### Search Enhancement
+
+Roo Code now supports **semantic search reranking** to improve code search results:
+
+- **What is reranking:** Reranking uses advanced AI models to reorganize search results based on semantic relevance, ensuring the most contextually appropriate code appears first. This dramatically improves the accuracy of code discovery across your project.
+- **How to enable it:** Enable reranking through the Roo Code settings by configuring a reranking provider. Once enabled, all codebase searches will automatically benefit from improved result ordering.
+- **Supported providers:** Currently supports local (self-hosted) reranking models for privacy and offline use.
+- **Setup instructions:** For detailed setup and configuration instructions, see the [reranker service documentation](reranker-service/README.md).
+
 ### Customization
 
 Make Roo Code work your way with:

@@ -34,6 +34,14 @@ export const codebaseIndexConfigSchema = z.object({
 	// OpenAI Compatible specific fields
 	codebaseIndexOpenAiCompatibleBaseUrl: z.string().optional(),
 	codebaseIndexOpenAiCompatibleModelDimension: z.number().optional(),
+	// Reranker configuration
+	codebaseIndexRerankerEnabled: z.boolean().optional(),
+	codebaseIndexRerankerProvider: z.enum(["local", "cohere", "openai", "custom"]).optional(),
+	codebaseIndexRerankerUrl: z.string().optional(),
+	codebaseIndexRerankerModel: z.string().optional(),
+	codebaseIndexRerankerTopN: z.number().min(10).max(500).optional(),
+	codebaseIndexRerankerTopK: z.number().min(5).max(100).optional(),
+	codebaseIndexRerankerTimeout: z.number().min(1000).max(30000).optional(),
 })
 
 export type CodebaseIndexConfig = z.infer<typeof codebaseIndexConfigSchema>
@@ -64,6 +72,7 @@ export const codebaseIndexProviderSchema = z.object({
 	codebaseIndexOpenAiCompatibleModelDimension: z.number().optional(),
 	codebaseIndexGeminiApiKey: z.string().optional(),
 	codebaseIndexMistralApiKey: z.string().optional(),
+	codebaseIndexRerankerApiKey: z.string().optional(),
 })
 
 export type CodebaseIndexProvider = z.infer<typeof codebaseIndexProviderSchema>
@@ -189,6 +189,7 @@ export const SECRET_STATE_KEYS = [
 	"codebaseIndexOpenAiCompatibleApiKey",
 	"codebaseIndexGeminiApiKey",
 	"codebaseIndexMistralApiKey",
+	"codebaseIndexRerankerApiKey",
 	"huggingFaceApiKey",
 	"sambaNovaApiKey",
 ] as const satisfies readonly (keyof ProviderSettings)[]

@@ -0,0 +1,48 @@
+FROM python:3.10-slim
+
+# Set working directory
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY . .
+
+# Create cache directory for models
+RUN mkdir -p /app/.cache/models
+
+# Download the model during build to cache it
+RUN python -c "from sentence_transformers import CrossEncoder; CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', cache_folder='/app/.cache/models')"
+
+# Create a non-root user to run the application
+RUN useradd -m -u 1000 appuser && \
+    chown -R appuser:appuser /app
+
+# Switch to non-root user
+USER appuser
+
+# Expose port
+EXPOSE 8080
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV MODEL_CACHE_DIR=/app/.cache/models
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8080/health || exit 1
+
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "1"]
@@ -0,0 +1,199 @@
+# Code Reranker Service
+
+A FastAPI-based service for reranking code search results using cross-encoder models. This service is designed to improve the relevance of search results in the Roo-Code codebase indexing feature.
+
+## Overview
+
+The reranker service uses sentence-transformers with cross-encoder models to rerank code search results based on query-document relevance. It provides a simple REST API that accepts a query and a list of candidate documents, then returns them ordered by relevance.
+
+## Prerequisites
+
+- Python 3.10 or higher
+- Docker and Docker Compose (for containerized deployment)
+- CUDA-capable GPU (optional, for improved performance)
+
+## Quick Start
+
+### Using Docker Compose (Recommended)
+
+1. Navigate to the reranker service directory:
+
+    ```bash
+    cd reranker-service
+    ```
+
+2. Build and start the service:
+
+    ```bash
+    docker-compose up --build
+    ```
+
+3. The service will be available at `http://localhost:8080`
+
+### Using Python Directly
+
+1. Create a virtual environment:
+
+    ```bash
+    python -m venv venv
+    source venv/bin/activate  # On Windows: venv\Scripts\activate
+    ```
+
+2. Install dependencies:
+
+    ```bash
+    pip install -r requirements.txt
+    ```
+
+3. Run the service:
+    ```bash
+    uvicorn app:app --host 0.0.0.0 --port 8080
+    ```
+
+## API Endpoints
+
+### Health Check
+
+```
+GET /health
+```
+
+Returns the service health status and model information.
+
+### Rerank
+
+```
+POST /rerank
+```
+
+Reranks documents based on query relevance.
+
+**Request Body:**
+
+```json
+{
+	"query": "implement user authentication",
+	"documents": [
+		{
+			"id": "doc1",
+			"content": "def authenticate_user(username, password):",
+			"metadata": {
+				"filePath": "src/auth.py",
+				"startLine": 10,
+				"endLine": 20
+			}
+		}
+	],
+	"max_results": 20
+}
+```
+
+**Response:**
+
+```json
+[
+	{
+		"id": "doc1",
+		"score": 0.95,
+		"rank": 1
+	}
+]
+```
+
+### API Documentation
+
+- Swagger UI: `http://localhost:8080/docs`
+- ReDoc: `http://localhost:8080/redoc`
+
+## Configuration
+
+The service can be configured using environment variables:
+
+| Variable          | Description                              | Default                                |
+| ----------------- | ---------------------------------------- | -------------------------------------- |
+| `MODEL_NAME`      | Cross-encoder model to use               | `cross-encoder/ms-marco-MiniLM-L-6-v2` |
+| `API_PORT`        | Port to run the service on               | `8080`                                 |
+| `API_WORKERS`     | Number of worker processes               | `1`                                    |
+| `REQUEST_TIMEOUT` | Request timeout in seconds               | `30`                                   |
+| `BATCH_SIZE`      | Batch size for model inference           | `32`                                   |
+| `LOG_LEVEL`       | Logging level                            | `INFO`                                 |
+| `FORCE_CPU`       | Force CPU usage even if GPU is available | `false`                                |
+| `WARMUP_ON_START` | Warm up model on startup                 | `true`                                 |
+
+## Development
+
+### Running Tests
+
+```bash
+pytest tests/
+```
+
+### Building Docker Image
+
+```bash
+docker build -t code-reranker .
+```
+
+### Development Mode
+
+For development, you can mount your local code into the container:
+
+```bash
+docker-compose -f docker-compose.yml up
+```
+
+This will mount the source files as volumes, allowing you to make changes without rebuilding the image.
+
+## Model Information
+
+The default model (`cross-encoder/ms-marco-MiniLM-L-6-v2`) is a lightweight cross-encoder optimized for passage reranking. It provides a good balance between performance and accuracy.
+
+### Supported Models
+
+- `cross-encoder/ms-marco-MiniLM-L-6-v2` (default)
+- `cross-encoder/ms-marco-MiniLM-L-12-v2` (higher accuracy, slower)
+- `cross-encoder/ms-marco-TinyBERT-L-2-v2` (faster, lower accuracy)
+
+## Performance Considerations
+
+1. **GPU Usage**: The service will automatically use CUDA if available. For CPU-only deployment, set `FORCE_CPU=true`.
+
+2. **Model Caching**: Models are downloaded and cached in `/app/.cache/models` during the Docker build process.
+
+3. **Batch Processing**: Adjust `BATCH_SIZE` based on your hardware capabilities and memory constraints.
+
+4. **Resource Limits**: The Docker Compose configuration sets memory limits (2GB max, 1GB reserved). Adjust these based on your needs.
+
+## Troubleshooting
+
+### Service won't start
+
+- Check logs: `docker-compose logs reranker`
+- Ensure port 8080 is not already in use
+- Verify Docker daemon is running
+
+### Out of memory errors
+
+- Reduce `BATCH_SIZE`
+- Increase Docker memory limits in `docker-compose.yml`
+- Use a smaller model
+
+### Slow performance
+
+- Enable GPU support by ensuring CUDA is available
+- Use a smaller model for faster inference
+- Increase `API_WORKERS` for parallel processing
+
+## Next Steps
+
+This is a placeholder implementation. The actual implementation should:
+
+1. Integrate the real CrossEncoder model from sentence-transformers
+2. Add proper error handling and validation
+3. Implement request queuing for high load
+4. Add metrics and monitoring
+5. Implement model versioning and updates
+
+## License
+
+This service is part of the Roo-Code project.