Kitware · vicentebolea · Oct 24, 2025 · Oct 20, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -43,7 +43,7 @@ jobs:
       - name: Run Flake8
         run: |
           source venv/bin/activate
-          flake8
+          flake8 src/
 
   actionlint:
     runs-on: ubuntu-latest

diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,5 @@ venv.bak/
 ehthumbs.db
 Thumbs.db
 CLAUDE.md
+db/
+vtk-examples.json
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "rag-components"]
+	path = rag-components
+	url = [email protected]:christos.tsolakis/rag-components.git
diff --git a/Dockerfile b/Dockerfile
diff --git a/README.md b/README.md
@@ -51,10 +51,50 @@ vtk-mcp-client --host localhost --port 8000 info-cpp vtkActor
 
 ## MCP Tools
 
-The server provides three MCP tools:
+The server provides four MCP tools:
 - `get_vtk_class_info_cpp(class_name)` - Get detailed C++ documentation for a VTK class from online documentation
 - `get_vtk_class_info_python(class_name)` - Get Python API documentation using help() function
 - `search_vtk_classes(search_term)` - Search for VTK classes containing a term
+- `vector_search_vtk_examples(query)` - Search VTK examples using vector similarity (requires embeddings database)
+
+## Vector Search with RAG
+
+The server supports semantic search over VTK Python examples using vector embeddings. This requires the embeddings database.
+
+### Downloading the Embeddings Database
+
+The pre-built embeddings database is available as a container image on GitHub Container Registry:
+
+```bash
+# Using Docker
+docker create --name vtk-embeddings ghcr.io/kitware/vtk-mcp/embeddings-database:latest
+docker cp vtk-embeddings:/vtk-examples-embeddings.tar.gz .
+docker rm vtk-embeddings
+
+# Using Podman
+podman create --name vtk-embeddings ghcr.io/kitware/vtk-mcp/embeddings-database:latest
+podman cp vtk-embeddings:/vtk-examples-embeddings.tar.gz .
+podman rm vtk-embeddings
+
+# Extract the database
+tar -xzf vtk-examples-embeddings.tar.gz
+```
+
+### Using Vector Search
+
+After downloading and extracting the database, start the server with the database path:
+
+```bash
+# Install RAG dependencies
+pip install -r rag-components/requirements.txt
+
+# Start server with vector search enabled
+vtk-mcp-server --transport http --database-path ./db/vtk-examples
+
+# Use vector search with the client
+vtk-mcp-client vector-search "render a sphere"
+vtk-mcp-client vector-search "read DICOM files" --top-k 10
+```
 
 ## Docker
 

diff --git a/deploy.Dockerfile b/deploy.Dockerfile
@@ -0,0 +1,48 @@
+LABEL org.opencontainers.image.title="VTK MCP Server with Embeddings"
+LABEL org.opencontainers.image.description="Model Context Protocol server for VTK with vector search embeddings"
+LABEL org.opencontainers.image.source="https://github.com/kitware/vtk-mcp"
+LABEL org.opencontainers.image.authors="Vicente Adolfo Bolea Sanchez <[email protected]>"
+LABEL org.opencontainers.image.licenses="MIT"
+LABEL org.opencontainers.image.documentation="https://github.com/kitware/vtk-mcp/blob/main/README.md"
+
+FROM python:3.12-slim AS embeddings
+
+# Download embeddings database from GHCR
+COPY --from=ghcr.io/kitware/vtk-mcp/embeddings-database:latest /vtk-examples-embeddings.tar.gz /tmp/
+
+# Extract the database
+RUN mkdir -p /app/db && \
+    tar -xzf /tmp/vtk-examples-embeddings.tar.gz -C /app/db && \
+    rm /tmp/vtk-examples-embeddings.tar.gz
+
+FROM python:3.12-slim
+
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+# Install system dependencies for VTK
+RUN apt update && \
+    apt install --no-install-recommends --no-install-suggests -y \
+    libgl1-mesa-dev \
+    libxrender-dev/stable && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy application code
+COPY . .
+
+# Copy embeddings database from first stage
+COPY --from=embeddings /app/db /app/db
+
+# Install Python dependencies (including RAG dependencies)
+RUN pip install --upgrade pip && \
+    pip install --verbose . && \
+    pip install -r rag-components/requirements.txt
+
+EXPOSE 8000
+
+# Start server with database path configured
+CMD ["vtk-mcp-server", "--transport", "http", "--host", "0.0.0.0", "--port", "8000", "--database-path", "/app/db/vtk-examples"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -60,6 +60,7 @@ markers = [
     "integration: Integration tests that require server/client interaction",
     "http: HTTP transport integration tests",
     "stdio: Stdio transport integration tests",
+    "vector_search: Vector search integration tests (requires podman and embeddings database)",
     "slow: Tests that take longer to run",
 ]
 filterwarnings = [

diff --git a/rag-components/.gitignore b/rag-components/.gitignore
@@ -0,0 +1,13 @@
+# OS files
+.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Environments
+.env
+.venv
+env/
+venv/
diff --git a/rag-components/LICENSE b/rag-components/LICENSE
@@ -0,0 +1,13 @@
+Copyright 2025 Kitware Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/rag-components/README.md b/rag-components/README.md
@@ -0,0 +1,67 @@
+# A simple RAG for VTK 
+
+This project creates a database out of the existing Python examples of VTK and allows to ask questions related to VTK.
+
+## Set up
+1. By default it uses the OpenAI API. Make sure you  get an API key and set
+your environmental variable appropriately. To use other an other model see
+[below](#supported-llm-models).
+
+2. Get the code of the vtk-examples. We will use this to generate our database.
+
+```bash
+git clone https://gitlab.kitware.com/vtk/vtk-examples 
+```
+
+3. Create a virtual environment and install the dependencies.
+
+```bash
+git clone https://gitlab.kitware.com/vtk/vtk-examples 
+python -m venv env
+source env/bin/activate
+pip install -r requirements.txt
+```
+
+4. Populate the database. This is required only once or if you want to experiment with a different embedding function.
+It will take some time depending on the hardware you are using.
+
+```bash
+python populate_db.py --dir ./vtk-examples/src/Python 
+```
+
+5. Now ask your question !
+
+```bash
+$ python chat.py --database ./db/codesage-codesage-large-v2
+User: How to read a vti file
+ To read a VTK image data file (.vti), you can use the `vtkXMLImageDataReader` class. Here is a basic example:
+
+import vtk
+
+# Create a reader for your vti file
+reader = vtk.vtkXMLImageDataReader()
+reader.SetFileName('your_file.vti')
+reader.Update()
+
+# The output of reader.GetOutput() is your vtkImageData object
+image_data = reader.GetOutput()
+
+In this code, replace `'your_file.vti'` with the path to your .vti file. The
+`reader.Update()` call is necessary to actually perform the reading operation.
+After this, you can use `reader.GetOutput()` to get the `vtkImageData` object
+that was read from the file.
+
+References:
+https://examples.vtk.org/site/Python/Medical/GenerateModelsFromLabels
+https://examples.vtk.org/site/Python/ImageData/WriteReadVtkImageData
+...
+```
+
+### Supported LLM models
+`chat.py` uses by default "gpt-4" model to switch to a different one pass the name of the model via the `--model=<model name>` parameter.
+Currently supported models:
+- OpenAI models. See exact model names [here](https://platform.openai.com/docs/models#current-model-aliases). To use them you need an OpenAI API [key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key).
+- Anthropic models. See exact names [here](https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names). To use them you need an Anthropic API [key](https://docs.anthropic.com/en/api/getting-started#accessing-the-api).
+- Models supported by the Ollama framework. To use these models make sure you have [ollama](https://github.com/ollama/ollama) installed and that it
+  is running in another terminal (via `ollama serve`) and the you have already
+  pulled the model you want to use (via `ollama pull <model-name>`). You can find available models [here](https://ollama.com/).