Context-Engine-AI
diff --git a/‎.env‎
Lines changed: 9 additions & 1 deletion b/‎.env‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎.env.example‎
Lines changed: 11 additions & 0 deletions b/‎.env.example‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎Dockerfile.mcp‎
Lines changed: 6 additions & 1 deletion b/‎Dockerfile.mcp‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎Dockerfile.upload-service‎
Lines changed: 56 additions & 0 deletions b/‎Dockerfile.upload-service‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 62 additions & 3 deletions b/‎Makefile‎
Lines changed: 62 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 17 additions & 1 deletion b/‎README.md‎
Lines changed: 17 additions & 1 deletion
@@ -3,6 +3,11 @@
 QDRANT_URL=http://qdrant:6333
 # QDRANT_API_KEY= # not needed for local
 
+# Repository mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Single unified collection for seamless cross-repo search
 # Default: "codebase" - all your code in one collection for unified search
 # This enables searching across multiple repos/workspaces without fragmentation
@@ -144,7 +149,7 @@ MEMORY_COLLECTION_TTL_SECS=300
 # INDEX_UPSERT_BATCH=128
 # INDEX_UPSERT_RETRIES=5
 # INDEX_UPSERT_BACKOFF=0.5
- WATCH_DEBOUNCE_SECS=4
+WATCH_DEBOUNCE_SECS=4
 
 
 # Duplicate Streamable HTTP MCP instances (run alongside SSE)
@@ -161,3 +166,6 @@ HYBRID_RESULTS_CACHE_ENABLED=1
 INDEX_CHUNK_LINES=60
 INDEX_CHUNK_OVERLAP=10
 USE_GPU_DECODER=0
+
+# Development Remote Upload Configuration
+HOST_INDEX_PATH=./dev-workspace
@@ -1,10 +1,21 @@
 # Qdrant connection
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=
+
+# Multi-repo mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Single unified collection for seamless cross-repo search (default: "codebase")
 # Leave unset or use "codebase" for unified search across all your code
 COLLECTION_NAME=codebase
 
+# Repository mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Embeddings
 EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
 EMBEDDING_PROVIDER=fastembed
 
@@ -3,11 +3,16 @@ FROM python:3.11-slim
 
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
-    WORK_ROOTS="/work,/app"
+    WORK_ROOTS="/work,/app" \
+    HF_HOME=/tmp/cache \
+    TRANSFORMERS_CACHE=/tmp/cache
 
 # Install latest FastMCP with Streamable HTTP (RMCP) support + deps
 RUN pip install --no-cache-dir --upgrade mcp fastmcp qdrant-client fastembed
 
+# Create cache directory with proper permissions
+RUN mkdir -p /tmp/cache && chmod 755 /tmp/cache
+
 # Bake scripts into image so server can run even when /work points elsewhere
 COPY scripts /app/scripts
 
 
@@ -0,0 +1,56 @@
+# Dockerfile for Context-Engine Delta Upload Service
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PYTHONPATH=/app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create app directory
+WORKDIR /app
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --upgrade pip && \
+    pip install -r requirements.txt
+
+# Copy application code
+COPY scripts/ ./scripts/
+COPY . .
+
+# Create work directory for repositories
+RUN mkdir -p /work && \
+    chmod 755 /work
+
+# Create non-root user for security
+RUN useradd --create-home --shell /bin/bash app && \
+    chown -R app:app /app /work
+USER app
+
+# Expose port
+EXPOSE 8002
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8002/health || exit 1
+
+# Default environment variables
+ENV UPLOAD_SERVICE_HOST=0.0.0.0 \
+    UPLOAD_SERVICE_PORT=8002 \
+    QDRANT_URL=http://qdrant:6333 \
+    WORK_DIR=/work \
+    MAX_BUNDLE_SIZE_MB=100 \
+    UPLOAD_TIMEOUT_SECS=300
+
+# Run the upload service
+CMD ["python", "scripts/upload_service.py"]
@@ -4,8 +4,8 @@ SHELL := /bin/bash
 # An empty export forces docker to use its default context/socket.
 export DOCKER_HOST =
 
-.PHONY: help up down logs ps restart rebuild index reindex watch env hybrid bootstrap history rerank-local setup-reranker prune warm health
-.PHONY: venv venv-install
+.PHONY: help up down logs ps restart rebuild index reindex watch watch-remote env hybrid bootstrap history rerank-local setup-reranker prune warm health test-e2e
+.PHONY: venv venv-install dev-remote-up dev-remote-down dev-remote-logs dev-remote-restart dev-remote-bootstrap dev-remote-test dev-remote-client dev-remote-clean
 
 .PHONY: qdrant-status qdrant-list qdrant-prune qdrant-index-root
 
@@ -77,6 +77,23 @@ index-here: ## index the current directory: make index-here [RECREATE=1] [REPO_N
 watch: ## watch mode: reindex changed files on save (Ctrl+C to stop)
 	docker compose run --rm --entrypoint python indexer /work/scripts/watch_index.py
 
+watch-remote: ## remote watch mode: upload delta bundles to remote server (Ctrl+C to stop)
+	@echo "Starting remote watch mode..."
+	@if [ -z "$(REMOTE_UPLOAD_ENDPOINT)" ]; then \
+		echo "Error: REMOTE_UPLOAD_ENDPOINT is required"; \
+		echo "Usage: make watch-remote REMOTE_UPLOAD_ENDPOINT=http://your-server:8080 [REMOTE_UPLOAD_MAX_RETRIES=3] [REMOTE_UPLOAD_TIMEOUT=30]"; \
+		exit 1; \
+	fi
+	@echo "Remote upload endpoint: $(REMOTE_UPLOAD_ENDPOINT)"
+	@echo "Max retries: $${REMOTE_UPLOAD_MAX_RETRIES:-3}"
+	@echo "Timeout: $${REMOTE_UPLOAD_TIMEOUT:-30} seconds"
+	docker compose run --rm --entrypoint python \
+		-e REMOTE_UPLOAD_ENABLED=1 \
+		-e REMOTE_UPLOAD_ENDPOINT=$(REMOTE_UPLOAD_ENDPOINT) \
+		-e REMOTE_UPLOAD_MAX_RETRIES=$${REMOTE_UPLOAD_MAX_RETRIES:-3} \
+		-e REMOTE_UPLOAD_TIMEOUT=$${REMOTE_UPLOAD_TIMEOUT:-30} \
+		indexer /work/scripts/watch_index.py
+
 rerank: ## multi-query re-ranker helper example
 	docker compose run --rm --entrypoint python indexer /work/scripts/rerank_query.py \
 	  --query "chunk code by lines with overlap for indexing" \
@@ -216,12 +233,54 @@ llamacpp-build-image: ## build custom llama.cpp image with baked model (override
 # Download a tokenizer.json for micro-chunking (default: BAAI/bge-base-en-v1.5)
 TOKENIZER_URL ?= https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json
 TOKENIZER_PATH ?= models/tokenizer.json
-
 tokenizer: ## download tokenizer.json to models/tokenizer.json (override with TOKENIZER_URL/TOKENIZER_PATH)
 	@mkdir -p $(dir $(TOKENIZER_PATH))
 	@echo "Downloading: $(TOKENIZER_URL) -> $(TOKENIZER_PATH)" && \
 	curl -L --fail --retry 3 -C - "$(TOKENIZER_URL)" -o "$(TOKENIZER_PATH)"
 
+# --- Development Remote Upload System Targets ---
+
+dev-remote-up: ## start dev-remote stack with upload service
+	@echo "Starting development remote upload system..."
+	@mkdir -p dev-workspace/.codebase
+	docker compose -f docker-compose.dev-remote.yml up -d --build
+
+dev-remote-down: ## stop dev-remote stack
+	@echo "Stopping development remote upload system..."
+	docker compose -f docker-compose.dev-remote.yml down
+
+dev-remote-logs: ## follow logs for dev-remote stack
+	docker compose -f docker-compose.dev-remote.yml logs -f --tail=100
+
+dev-remote-restart: ## restart dev-remote stack (rebuild)
+	docker compose -f docker-compose.dev-remote.yml down && docker compose -f docker-compose.dev-remote.yml up -d --build
+
+dev-remote-bootstrap: env dev-remote-up ## bootstrap dev-remote: up -> wait -> init -> index -> warm
+	@echo "Bootstrapping development remote upload system..."
+	./scripts/wait-for-qdrant.sh
+	docker compose -f docker-compose.dev-remote.yml run --rm init_payload || true
+	$(MAKE) tokenizer
+	docker compose -f docker-compose.dev-remote.yml run --rm indexer --root /work --recreate
+	$(MAKE) warm || true
+	$(MAKE) health
+
+dev-remote-test: ## test remote upload workflow
+	@echo "Testing remote upload workflow..."
+	@echo "Upload service should be accessible at http://localhost:8004"
+	@echo "Health check: curl http://localhost:8004/health"
+	@echo "Status check: curl 'http://localhost:8004/api/v1/delta/status?workspace_path=/work/test-repo'"
+	@echo "Test upload: curl -X POST -F 'bundle=@test-bundle.tar.gz' -F 'workspace_path=/work/test-repo' http://localhost:8004/api/v1/delta/upload"
+
+dev-remote-client: ## start remote upload client for testing
+	@echo "Starting remote upload client..."
+	docker compose -f docker-compose.dev-remote.yml --profile client up -d remote_upload_client
+
+dev-remote-clean: ## clean up dev-remote volumes and containers
+	@echo "Cleaning up development remote upload system..."
+	docker compose -f docker-compose.dev-remote.yml down -v
+	docker volume rm context-engine_shared_workspace context-engine_shared_codebase context-engine_upload_temp context-engine_qdrant_storage_dev_remote 2>/dev/null || true
+	rm -rf dev-workspace
+
 
 # Router helpers
 Q ?= what is hybrid search?
 
@@ -730,6 +730,8 @@ Indexer/Search MCP (8001 SSE, 8003 RMCP):
 - search_callers_for — intent wrapper for probable callers/usages
 - search_importers_for — intent wrapper for files importing a module/symbol
 - change_history_for_path(path) — summarize recent changes using stored metadata
+- collection_map - return collection↔repo mappings
+- default_collection - set the collection to use for the session
 
 Notes:
 - Most search tools accept filters like language, under, path_glob, kind, symbol, ext.
@@ -888,11 +890,25 @@ For production-grade backup/migration strategies, see the official Qdrant docume
 
 Operational notes:
 - Collection name comes from `COLLECTION_NAME` (see .env). This stack defaults to a single collection for both code and memories; filtering uses `metadata.kind`.
-- If you switch to a dedicated memory collection, update the MCP Memory server and the Indexer’s memory blending env to point at it.
+- If you switch to a dedicated memory collection, update the MCP Memory server and the Indexer's memory blending env to point at it.
 - Consider pruning expired memories by filtering `expires_at < now`.
 
 - Call `context_search` on :8001 (SSE) or :8003 (RMCP) with `{ "include_memories": true }` to return both memory and code results.
 
+### Collection Naming Strategies
+
+Different hash lengths are used for different workspace types:
+
+**Local Workspaces:** `repo-name-8charhash`
+- Example: `Anesidara-e8d0f5fc`
+- Used by local indexer/watcher
+- Assumes unique repo names within workspace
+
+**Remote Uploads:** `folder-name-16charhash-8charhash`
+- Example: `testupload2-04e680d5939dd035-b8b8d4cc`
+- Collision avoidance for duplicate folder names for different codebases
+- 16-char hash identifies workspace, 8-char hash identifies collection
+
 
 ### Enable memory blending (for context_search)