Skip to content

Commit 59b3e21

Browse files
authored
Merge pull request #18 from m1rl0k/Collection-Seamless
try to unify the collection process to make more seamless
2 parents 1be6846 + 9647f82 commit 59b3e21

40 files changed

+3700
-83
lines changed

.env

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
QDRANT_URL=http://qdrant:6333
44
# QDRANT_API_KEY= # not needed for local
55

6-
# Default collection used by the MCP server (auto-created if missing)
7-
# COLLECTION_NAME=my-collection # Use auto-detected default from .codebase/state.json
6+
# Single unified collection for seamless cross-repo search
7+
# Default: "codebase" - all your code in one collection for unified search
8+
# This enables searching across multiple repos/workspaces without fragmentation
9+
COLLECTION_NAME=codebase
810

911
# Embedding settings (FastEmbed model)
1012
EMBEDDING_MODEL=BAAI/bge-base-en-v1.5

.env.example

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# Qdrant connection
22
QDRANT_URL=http://localhost:6333
33
QDRANT_API_KEY=
4-
COLLECTION_NAME=my-collection
4+
# Single unified collection for seamless cross-repo search (default: "codebase")
5+
# Leave unset or use "codebase" for unified search across all your code
6+
COLLECTION_NAME=codebase
57

68
# Embeddings
79
EMBEDDING_MODEL=BAAI/bge-base-en-v1.5

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ tests/.codebase/cache.json
2424
tests/.codebase/state.json
2525
/scripts/.codebase
2626
/tests/.codebase
27+
.claude/settings.local.json
28+
.mcp.json

Dockerfile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Unified Context-Engine image for Kubernetes deployment
2+
# Supports multiple roles: memory, indexer, watcher, llamacpp
3+
FROM python:3.11-slim
4+
5+
ENV PYTHONDONTWRITEBYTECODE=1 \
6+
PYTHONUNBUFFERED=1 \
7+
WORK_ROOTS="/work,/app"
8+
9+
# Install OS dependencies
10+
RUN apt-get update && apt-get install -y --no-install-recommends \
11+
git \
12+
ca-certificates \
13+
curl \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
# Install Python dependencies for all services
17+
RUN pip install --no-cache-dir --upgrade \
18+
qdrant-client \
19+
fastembed \
20+
watchdog \
21+
onnxruntime \
22+
tokenizers \
23+
tree_sitter \
24+
tree_sitter_languages \
25+
mcp \
26+
fastmcp
27+
28+
# Copy scripts for all services
29+
COPY scripts /app/scripts
30+
31+
# Create directories
32+
WORKDIR /work
33+
34+
# Expose all necessary ports
35+
EXPOSE 8000 8001 8002 8003 18000 18001 18002 18003
36+
37+
# Default to memory server
38+
CMD ["python", "/app/scripts/mcp_memory_server.py"]

README.md

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual
4040
```
4141
- Default ports: Memory MCP :8000, Indexer MCP :8001, Qdrant :6333, llama.cpp :8080
4242

43+
**🎯 Seamless Setup Note:**
44+
- The stack uses a **single unified `codebase` collection** by default
45+
- All your code goes into one collection for seamless cross-repo search
46+
- No per-workspace fragmentation - search across everything at once
47+
- Health checks auto-detect and fix cache/collection sync issues
48+
- Just run `make reset-dev-dual` on any machine and it works™
49+
4350
### Make targets: SSE, RMCP, and dual-compat
4451
- Legacy SSE only (default):
4552
- Ports: 8000 (/sse), 8001 (/sse)
@@ -96,9 +103,10 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual
96103
GLM_MODEL=glm-4.6 # Optional, defaults to glm-4.6
97104
```
98105

99-
5. **Custom collection name**:
106+
5. **Collection name** (unified by default):
100107
```bash
101-
COLLECTION_NAME=my-project # Defaults to auto-detected repo name
108+
COLLECTION_NAME=codebase # Default: single unified collection for all code
109+
# Only change this if you need isolated collections per project
102110
```
103111

104112
**After changing `.env`:**
@@ -280,7 +288,7 @@ Ports
280288

281289
| Name | Description | Default |
282290
|------|-------------|---------|
283-
| COLLECTION_NAME | Qdrant collection name used by both servers | my-collection |
291+
| COLLECTION_NAME | Qdrant collection name (unified across all repos) | codebase |
284292
| REPO_NAME | Logical repo tag stored in payload for filtering | auto-detect from git/folder |
285293
| HOST_INDEX_PATH | Host path mounted at /work in containers | current repo (.) |
286294
| QDRANT_URL | Qdrant base URL | container: http://qdrant:6333; local: http://localhost:6333 |
@@ -763,6 +771,50 @@ Notes:
763771
- Named vector remains aligned with the MCP server (fast-bge-base-en-v1.5). If you change EMBEDDING_MODEL, run `make reindex` to recreate the collection.
764772
- For very large repos, consider running `make index` on a schedule (or pre-commit) to keep Qdrant warm without full reingestion.
765773

774+
### Multi-repo indexing (unified search)
775+
776+
The stack uses a **single unified `codebase` collection** by default, making multi-repo search seamless:
777+
778+
**Index another repo into the same collection:**
779+
```bash
780+
# From your qdrant directory
781+
make index-here HOST_INDEX_PATH=/path/to/other/repo REPO_NAME=other-repo
782+
783+
# Or with full control:
784+
HOST_INDEX_PATH=/path/to/other/repo \
785+
COLLECTION_NAME=codebase \
786+
REPO_NAME=other-repo \
787+
docker compose run --rm indexer --root /work
788+
```
789+
790+
**What happens:**
791+
- Files from the other repo get indexed into the unified `codebase` collection
792+
- Each file is tagged with `metadata.repo = "other-repo"` for filtering
793+
- Search across all repos by default, or filter by specific repo
794+
795+
**Search examples:**
796+
```bash
797+
# Search across all indexed repos
798+
make hybrid QUERY="authentication logic"
799+
800+
# Filter by specific repo
801+
python scripts/hybrid_search.py \
802+
--query "authentication logic" \
803+
--repo other-repo
804+
805+
# Filter by repo + language
806+
python scripts/hybrid_search.py \
807+
--query "authentication logic" \
808+
--repo other-repo \
809+
--language python
810+
```
811+
812+
**Benefits:**
813+
- One collection = unified search across all your code
814+
- No fragmentation or collection management overhead
815+
- Filter by repo when you need isolation
816+
- All repos share the same vector space for better semantic search
817+
766818
### Multi-query re-ranker (no new deps)
767819

768820
- Run a fused query with several phrasings and metadata-aware boosts:
@@ -1296,6 +1348,39 @@ Client tips:
12961348
12971349
## Troubleshooting
12981350
1351+
### Collection Health & Cache Sync
1352+
1353+
The stack includes automatic health checks that detect and fix cache/collection sync issues:
1354+
1355+
**Check collection health:**
1356+
```bash
1357+
python scripts/collection_health.py --workspace . --collection codebase
1358+
```
1359+
1360+
**Auto-heal cache issues:**
1361+
```bash
1362+
python scripts/collection_health.py --workspace . --collection codebase --auto-heal
1363+
```
1364+
1365+
**What it detects:**
1366+
- Empty collection with cached files (cache thinks files are indexed but they're not)
1367+
- Significant mismatch between cached files and actual collection contents
1368+
- Missing metadata in collection points
1369+
1370+
**When to use:**
1371+
- After manually deleting collections
1372+
- If searches return no results despite indexing
1373+
- After Qdrant crashes or data loss
1374+
- When switching between collection names
1375+
1376+
**Automatic healing:**
1377+
- Health checks run automatically on watcher and indexer startup
1378+
- Cache is cleared when sync issues are detected
1379+
- Files are reindexed on next run
1380+
1381+
### General Issues
1382+
12991383
- If the MCP servers can’t reach Qdrant, confirm both containers are up: `make ps`.
13001384
- If the SSE port collides, change `FASTMCP_PORT` in `.env` and the mapped port in `docker-compose.yml`.
13011385
- If you customize tool descriptions, restart: `make restart`.
1386+
- If searches return no results, check collection health (see above).

deploy/kubernetes/Makefile

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# Context-Engine Kubernetes Deployment Makefile
2+
3+
# Configuration
4+
NAMESPACE ?= context-engine
5+
IMAGE_REGISTRY ?= context-engine
6+
IMAGE_TAG ?= latest
7+
8+
# Default target
9+
.PHONY: help
10+
help: ## Show this help message
11+
@echo "Context-Engine Kubernetes Deployment Commands"
12+
@echo ""
13+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
14+
15+
# Prerequisites
16+
.PHONY: check-kubectl
17+
check-kubectl: ## Check if kubectl is available and cluster is accessible
18+
@which kubectl > /dev/null || (echo "kubectl not found. Please install kubectl." && exit 1)
19+
@kubectl cluster-info > /dev/null || (echo "Cannot connect to Kubernetes cluster." && exit 1)
20+
@echo "✓ Kubernetes connection verified"
21+
22+
# Deployment targets
23+
.PHONY: deploy
24+
deploy: check-kubectl ## Deploy all Context-Engine services
25+
./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG)
26+
27+
.PHONY: deploy-core
28+
deploy-core: check-kubectl ## Deploy only core services (Qdrant + MCP servers)
29+
@echo "Deploying core services..."
30+
kubectl apply -f namespace.yaml
31+
kubectl apply -f configmap.yaml
32+
kubectl apply -f qdrant.yaml
33+
kubectl apply -f mcp-memory.yaml
34+
kubectl apply -f mcp-indexer.yaml
35+
36+
.PHONY: deploy-full
37+
deploy-full: check-kubectl ## Deploy all services including optional ones
38+
./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --deploy-ingress
39+
40+
.PHONY: deploy-minimal
41+
deploy-minimal: check-kubectl ## Deploy minimal setup (skip Llama.cpp and Ingress)
42+
./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --skip-llamacpp
43+
44+
# Kustomize targets
45+
.PHONY: kustomize-build
46+
kustomize-build: ## Build manifests with Kustomize
47+
kustomize build .
48+
49+
.PHONY: kustomize-apply
50+
kustomize-apply: check-kubectl ## Apply manifests with Kustomize
51+
kustomize build . | kubectl apply -f -
52+
53+
.PHONY: kustomize-delete
54+
kustomize-delete: check-kubectl ## Delete manifests with Kustomize
55+
kustomize build . | kubectl delete -f -
56+
57+
# Management targets
58+
.PHONY: status
59+
status: check-kubectl ## Show deployment status
60+
@echo "=== Namespace Status ==="
61+
kubectl get namespace $(NAMESPACE) || echo "Namespace $(NAMESPACE) not found"
62+
@echo ""
63+
@echo "=== Pods ==="
64+
kubectl get pods -n $(NAMESPACE) -o wide || echo "No pods found"
65+
@echo ""
66+
@echo "=== Services ==="
67+
kubectl get services -n $(NAMESPACE) || echo "No services found"
68+
@echo ""
69+
@echo "=== Deployments ==="
70+
kubectl get deployments -n $(NAMESPACE) || echo "No deployments found"
71+
@echo ""
72+
@echo "=== StatefulSets ==="
73+
kubectl get statefulsets -n $(NAMESPACE) || echo "No statefulsets found"
74+
@echo ""
75+
@echo "=== PersistentVolumeClaims ==="
76+
kubectl get pvc -n $(NAMESPACE) || echo "No PVCs found"
77+
@echo ""
78+
@echo "=== Jobs ==="
79+
kubectl get jobs -n $(NAMESPACE) || echo "No jobs found"
80+
81+
.PHONY: logs
82+
logs: check-kubectl ## Show logs for all services
83+
@echo "=== Qdrant Logs ==="
84+
kubectl logs -f statefulset/qdrant -n $(NAMESPACE) --tail=50 || echo "Qdrant logs not available"
85+
86+
.PHONY: logs-service
87+
logs-service: check-kubectl ## Show logs for specific service (usage: make logs-service SERVICE=mcp-memory)
88+
@if [ -z "$(SERVICE)" ]; then echo "Usage: make logs-service SERVICE=<service-name>"; exit 1; fi
89+
kubectl logs -f deployment/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f statefulset/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f job/$(SERVICE) -n $(NAMESPACE) --tail=100 || echo "Service $(SERVICE) not found"
90+
91+
.PHONY: shell
92+
shell: check-kubectl ## Get a shell in a running pod (usage: make shell POD=mcp-memory-xxx)
93+
@if [ -z "$(POD)" ]; then echo "Usage: make shell POD=<pod-name>"; echo "Available pods:"; kubectl get pods -n $(NAMESPACE); exit 1; fi
94+
kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/bash || kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/sh
95+
96+
# Cleanup targets
97+
.PHONY: cleanup
98+
cleanup: check-kubectl ## Remove all Context-Engine resources
99+
./cleanup.sh --namespace $(NAMESPACE)
100+
101+
.PHONY: clean-force
102+
clean-force: check-kubectl ## Force cleanup without confirmation
103+
./cleanup.sh --namespace $(NAMESPACE) --force
104+
105+
# Development targets
106+
.PHONY: restart
107+
restart: check-kubectl ## Restart all deployments
108+
kubectl rollout restart deployment -n $(NAMESPACE)
109+
kubectl rollout restart statefulset -n $(NAMESPACE)
110+
111+
.PHONY: restart-service
112+
restart-service: check-kubectl ## Restart specific service (usage: make restart-service SERVICE=mcp-memory)
113+
@if [ -z "$(SERVICE)" ]; then echo "Usage: make restart-service SERVICE=<service-name>"; exit 1; fi
114+
kubectl rollout restart deployment/$(SERVICE) -n $(NAMESPACE) || kubectl rollout restart statefulset/$(SERVICE) -n $(NAMESPACE)
115+
116+
.PHONY: scale
117+
scale: check-kubectl ## Scale a deployment (usage: make scale SERVICE=mcp-memory REPLICAS=3)
118+
@if [ -z "$(SERVICE)" ] || [ -z "$(REPLICAS)" ]; then echo "Usage: make scale SERVICE=<service-name> REPLICAS=<number>"; exit 1; fi
119+
kubectl scale deployment $(SERVICE) -n $(NAMESPACE) --replicas=$(REPLICAS)
120+
121+
# Port forwarding targets
122+
.PHONY: port-forward
123+
port-forward: check-kubectl ## Port forward all services
124+
@echo "Opening port forwards in background..."
125+
@kubectl port-forward -n $(NAMESPACE) service/qdrant 6333:6333 &
126+
@kubectl port-forward -n $(NAMESPACE) service/mcp-memory 8000:8000 &
127+
@kubectl port-forward -n $(NAMESPACE) service/mcp-indexer 8001:8001 &
128+
@echo "Port forwards started. Use 'make stop-port-forward' to stop."
129+
130+
.PHONY: port-forward-service
131+
port-forward-service: check-kubectl ## Port forward specific service (usage: make port-forward-service SERVICE=qdrant LOCAL=6333 REMOTE=6333)
132+
@if [ -z "$(SERVICE)" ] || [ -z "$(LOCAL)" ] || [ -z "$(REMOTE)" ]; then echo "Usage: make port-forward-service SERVICE=<service-name> LOCAL=<local-port> REMOTE=<remote-port>"; exit 1; fi
133+
kubectl port-forward -n $(NAMESPACE) service/$(SERVICE) $(LOCAL):$(REMOTE)
134+
135+
.PHONY: stop-port-forward
136+
stop-port-forward: ## Stop all port forwards
137+
pkill -f "kubectl port-forward" || echo "No port forwards found"
138+
139+
# Build and push targets
140+
.PHONY: build-image
141+
build-image: ## Build Docker image
142+
docker build -t $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG) ../../
143+
144+
.PHONY: push-image
145+
push-image: build-image ## Push Docker image to registry
146+
docker push $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG)
147+
148+
# Test targets
149+
.PHONY: test-connection
150+
test-connection: check-kubectl ## Test connectivity to all services
151+
@echo "Testing service connectivity..."
152+
@echo "Qdrant:"
153+
@kubectl run qdrant-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://qdrant.$(NAMESPACE).svc.cluster.local:6333/health || echo "Qdrant test failed"
154+
@echo "MCP Memory:"
155+
@kubectl run memory-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-memory.$(NAMESPACE).svc.cluster.local:18000/health || echo "MCP Memory test failed"
156+
@echo "MCP Indexer:"
157+
@kubectl run indexer-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-indexer.$(NAMESPACE).svc.cluster.local:18001/health || echo "MCP Indexer test failed"
158+
159+
# Configuration targets
160+
.PHONY: show-config
161+
show-config: ## Show current configuration
162+
@echo "Configuration:"
163+
@echo " NAMESPACE: $(NAMESPACE)"
164+
@echo " IMAGE_REGISTRY: $(IMAGE_REGISTRY)"
165+
@echo " IMAGE_TAG: $(IMAGE_TAG)"
166+
@echo ""
167+
@echo "Quick start commands:"
168+
@echo " make deploy # Deploy all services"
169+
@echo " make status # Show deployment status"
170+
@echo " make logs-service SERVICE=mcp-memory # Show logs"
171+
@echo " make cleanup # Remove everything"
172+
173+
.PHONY: show-urls
174+
show-urls: check-kubectl ## Show access URLs for services
175+
@echo "Service URLs (via NodePort):"
176+
@echo " Qdrant: http://<node-ip>:30333"
177+
@echo " MCP Memory (SSE): http://<node-ip>:30800"
178+
@echo " MCP Indexer (SSE): http://<node-ip>:30802"
179+
@echo " MCP Memory (HTTP): http://<node-ip>:30804"
180+
@echo " MCP Indexer (HTTP): http://<node-ip>:30806"
181+
@echo " Llama.cpp: http://<node-ip>:30808"
182+
@echo ""
183+
@echo "Service URLs (via port-forward):"
184+
@echo " make port-forward # Then access via localhost ports"
185+
186+
# Advanced targets
187+
.PHONY: watch-deployment
188+
watch-deployment: check-kubectl ## Watch deployment progress
189+
watch kubectl get pods,services,deployments -n $(NAMESPACE)
190+
191+
.PHONY: describe-service
192+
describe-service: check-kubectl ## Describe a service (usage: make describe-service SERVICE=mcp-memory)
193+
@if [ -z "$(SERVICE)" ]; then echo "Usage: make describe-service SERVICE=<service-name>"; echo "Available services:"; kubectl get services -n $(NAMESPACE); exit 1; fi
194+
kubectl describe service $(SERVICE) -n $(NAMESPACE)
195+
196+
.PHONY: events
197+
events: check-kubectl ## Show recent events
198+
kubectl get events -n $(NAMESPACE) --sort-by=.metadata.creationTimestamp
199+

0 commit comments

Comments
 (0)