IBM
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/docs/using/tool-annotations.md
Lines changed: 1 addition & 1 deletion b/‎docs/docs/using/tool-annotations.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎mcp-servers/python/mcp_eval_server/.env.example
Lines changed: 53 additions & 0 deletions b/‎mcp-servers/python/mcp_eval_server/.env.example
Lines changed: 53 additions & 0 deletions
diff --git a/‎mcp-servers/python/mcp_eval_server/.gitignore
Lines changed: 1 addition & 0 deletions b/‎mcp-servers/python/mcp_eval_server/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎mcp-servers/python/mcp_eval_server/Containerfile
Lines changed: 298 additions & 0 deletions b/‎mcp-servers/python/mcp_eval_server/Containerfile
Lines changed: 298 additions & 0 deletions
@@ -527,7 +527,7 @@ images:
 # help: vulture              - Dead code detection
 
 # Allow specific file/directory targeting
-DEFAULT_TARGETS := mcpgateway
+DEFAULT_TARGETS := mcpgateway mcp-servers/python
 TARGET ?= $(DEFAULT_TARGETS)
 
 # Add dummy targets for file arguments passed to lint commands only
 
@@ -254,4 +254,4 @@ Many MCP clients use annotations to:
 - **Cache results** from read-only tools
 - **Adjust UI presentation** based on safety hints
 
-Properly annotated tools provide better user experiences and safer AI agent interactions.
+Properly annotated tools provide better user experiences and safer AI agent interactions.
@@ -0,0 +1,53 @@
+# MCP Evaluation Server Environment Configuration
+# Copy this file to .env and configure your settings
+
+# OpenAI Configuration
+OPENAI_API_KEY=sk-your-openai-api-key-here
+# OPENAI_ORG_ID=org-your-organization-id  # Optional
+
+# Azure OpenAI Configuration
+# AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+# AZURE_OPENAI_KEY=your-azure-openai-key
+# AZURE_OPENAI_API_VERSION=2024-02-01
+
+# Anthropic Configuration (for future support)
+# ANTHROPIC_API_KEY=sk-ant-your-anthropic-key
+
+# Default Judge Model Selection
+DEFAULT_JUDGE_MODEL=gpt-4
+# Alternative options: gpt-3.5-turbo, gpt-4-turbo, gpt-4-azure, rule-based
+
+# Cache Configuration
+MCP_EVAL_CACHE_DIR=/app/data/cache
+MCP_EVAL_CACHE_TTL=3600  # Cache TTL in seconds (1 hour)
+MCP_EVAL_CACHE_SIZE=1000  # Maximum cached items
+
+# Database Configuration
+MCP_EVAL_RESULTS_DB=/app/data/results/evaluation_results.db
+
+# Logging Configuration
+LOG_LEVEL=INFO
+PYTHONUNBUFFERED=1
+
+# Performance Configuration
+MAX_CONCURRENT_EVALUATIONS=3
+EVALUATION_TIMEOUT=300  # seconds
+
+# Development Configuration
+# DEVELOPMENT_MODE=true  # Enable for development features
+
+# Model-specific settings
+GPT4_TEMPERATURE=0.3
+GPT4_MAX_TOKENS=2000
+GPT35_TEMPERATURE=0.2
+GPT35_MAX_TOKENS=2000
+
+# Evaluation defaults
+DEFAULT_CONSISTENCY_RUNS=3
+DEFAULT_TEMPERATURE_RANGE=0.1,0.5,0.9
+DEFAULT_RELEVANCE_THRESHOLD=0.7
+DEFAULT_CONFIDENCE_THRESHOLD=0.8
+
+# Security settings
+RATE_LIMIT_REQUESTS=100  # per hour
+RATE_LIMIT_TOKENS=50000  # per hour
@@ -0,0 +1 @@
+*.db
@@ -0,0 +1,298 @@
+# syntax=docker/dockerfile:1.7
+
+###############################################################################
+# MCP Evaluation Server - OCI-compliant container build
+#
+# This multi-stage Dockerfile produces an ultra-slim, scratch-based runtime
+# image that automatically tracks the latest Python 3.11.x patch release
+# from the RHEL 9 repositories and is fully patched on each rebuild.
+#
+# Key design points:
+#   - Builder stage has full DNF + devel headers for wheel compilation
+#   - Runtime stage is scratch: only the Python runtime and app
+#   - Both builder and runtime rootfs receive `dnf upgrade -y`
+#   - Development headers are dropped from the final image
+#   - Hadolint DL3041 is suppressed to allow "latest patch" RPM usage
+#   - Includes ML/AI dependencies for sentence transformers and embeddings
+###############################################################################
+
+###########################
+# Build-time arguments
+###########################
+# Temporary dir for assembling the scratch rootfs
+ARG ROOTFS_PATH=/tmp/rootfs
+# Python major.minor series to track
+ARG PYTHON_VERSION=3.11
+
+###########################
+# Builder stage
+###########################
+FROM registry.access.redhat.com/ubi9/ubi:9.6-1753978585 AS builder
+SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
+
+ARG PYTHON_VERSION
+ARG ROOTFS_PATH
+
+# ----------------------------------------------------------------------------
+# 1) Patch the OS
+# 2) Install Python + headers for building wheels (needed for ML dependencies)
+# 3) Install build tools for compiling scientific libraries
+# 4) Install binutils for strip command
+# 5) Register python3 alternative
+# 6) Clean caches to reduce layer size
+# ----------------------------------------------------------------------------
+# hadolint ignore=DL3041
+RUN set -euo pipefail \
+    && dnf upgrade -y \
+    && dnf install -y \
+         python${PYTHON_VERSION} \
+         python${PYTHON_VERSION}-devel \
+         python${PYTHON_VERSION}-pip \
+         gcc \
+         binutils \
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+    && dnf clean all
+
+WORKDIR /app
+
+# ----------------------------------------------------------------------------
+# Copy only the files needed for dependency installation first
+# This maximizes Docker layer caching - dependencies change less often
+# ----------------------------------------------------------------------------
+COPY pyproject.toml /app/
+COPY README.md /app/
+
+# ----------------------------------------------------------------------------
+# Create and populate virtual environment
+#  - Upgrade pip, setuptools, wheel, pdm, uv
+#  - Install project dependencies and package
+#  - Include all evaluation dependencies (ML/AI packages)
+#  - Remove build tools but keep runtime dist-info
+#  - Remove build caches and build artifacts
+# ----------------------------------------------------------------------------
+RUN set -euo pipefail \
+    && python3 -m venv /app/.venv \
+    && /app/.venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && /app/.venv/bin/pip install --no-cache-dir -e ".[dev]" \
+    && /app/.venv/bin/pip uninstall --yes pip setuptools wheel \
+    && rm -rf /root/.cache /var/cache/dnf \
+    && find /app/.venv -name "*.dist-info" -type d \
+        \( -name "pip-*" -o -name "setuptools-*" -o -name "wheel-*" \) \
+        -exec rm -rf {} + 2>/dev/null || true \
+    && rm -rf /app/.venv/share/python-wheels \
+    && rm -rf /app/*.egg-info /app/build /app/dist /app/.eggs
+
+# ----------------------------------------------------------------------------
+# Now copy the application files needed for runtime
+# This ensures code changes don't invalidate the dependency layer
+# ----------------------------------------------------------------------------
+COPY mcp_eval_server/ /app/mcp_eval_server/
+COPY config/ /app/config/
+COPY __init__.py /app/
+
+# ----------------------------------------------------------------------------
+# Create runtime script for MCP server
+# ----------------------------------------------------------------------------
+RUN cat > /app/run-server.sh << 'EOF'
+#!/bin/bash
+set -euo pipefail
+
+# Set default values
+export OPENAI_API_KEY="${OPENAI_API_KEY:-}"
+export AZURE_OPENAI_ENDPOINT="${AZURE_OPENAI_ENDPOINT:-}"
+export AZURE_OPENAI_KEY="${AZURE_OPENAI_KEY:-}"
+
+# Log startup information
+echo "Starting MCP Evaluation Server..."
+echo "Available judges: $(python3 -c 'from mcp_eval_server.tools.judge_tools import JudgeTools; jt=JudgeTools(); print(jt.get_available_judges())')"
+
+# Run the MCP server
+exec python3 -m mcp_eval_server.server
+EOF
+
+# ----------------------------------------------------------------------------
+# Ensure executable permissions for scripts
+# ----------------------------------------------------------------------------
+RUN chmod +x /app/run-server.sh
+
+# ----------------------------------------------------------------------------
+# Pre-compile Python bytecode with -OO optimization
+#  - Strips docstrings and assertions
+#  - Improves startup performance
+#  - Must be done before copying to rootfs
+#  - Remove __pycache__ directories after compilation
+# ----------------------------------------------------------------------------
+RUN python3 -OO -m compileall -q /app/.venv /app/mcp_eval_server /app/config \
+    && find /app -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+
+# ----------------------------------------------------------------------------
+# Build a minimal, fully-patched rootfs containing only the runtime Python
+# Include ca-certificates for HTTPS connections to OpenAI/Azure APIs
+# ----------------------------------------------------------------------------
+# hadolint ignore=DL3041
+RUN set -euo pipefail \
+    && mkdir -p "${ROOTFS_PATH:?}" \
+    && dnf --installroot="${ROOTFS_PATH:?}" --releasever=9 upgrade -y \
+    && dnf --installroot="${ROOTFS_PATH:?}" --releasever=9 install -y \
+         --setopt=install_weak_deps=0 \
+         --setopt=tsflags=nodocs \
+         python${PYTHON_VERSION} \
+         ca-certificates \
+    && dnf clean all --installroot="${ROOTFS_PATH:?}"
+
+# ----------------------------------------------------------------------------
+# Create `python3` symlink in the rootfs for compatibility
+# ----------------------------------------------------------------------------
+RUN ln -s /usr/bin/python${PYTHON_VERSION} ${ROOTFS_PATH:?}/usr/bin/python3
+
+# ----------------------------------------------------------------------------
+# Clean up unnecessary files from rootfs (if they exist)
+#  - Remove development headers, documentation
+#  - Use ${var:?} to prevent accidental deletion of host directories
+# ----------------------------------------------------------------------------
+RUN set -euo pipefail \
+    && rm -rf ${ROOTFS_PATH:?}/usr/include/* \
+    ${ROOTFS_PATH:?}/usr/share/man/* \
+    ${ROOTFS_PATH:?}/usr/share/doc/* \
+    ${ROOTFS_PATH:?}/usr/share/info/* \
+    ${ROOTFS_PATH:?}/usr/share/locale/* \
+    ${ROOTFS_PATH:?}/var/log/* \
+    ${ROOTFS_PATH:?}/boot \
+    ${ROOTFS_PATH:?}/media \
+    ${ROOTFS_PATH:?}/srv \
+    ${ROOTFS_PATH:?}/usr/games \
+    && find ${ROOTFS_PATH:?}/usr/lib*/python*/ -type d -name "test" -exec rm -rf {} + 2>/dev/null || true \
+    && find ${ROOTFS_PATH:?}/usr/lib*/python*/ -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true \
+    && find ${ROOTFS_PATH:?}/usr/lib*/python*/ -type d -name "idle_test" -exec rm -rf {} + 2>/dev/null || true \
+    && find ${ROOTFS_PATH:?}/usr/lib*/python*/ -name "*.mo" -delete 2>/dev/null || true \
+    && rm -rf ${ROOTFS_PATH:?}/usr/lib*/python*/ensurepip \
+    ${ROOTFS_PATH:?}/usr/lib*/python*/idlelib \
+    ${ROOTFS_PATH:?}/usr/lib*/python*/tkinter \
+    ${ROOTFS_PATH:?}/usr/lib*/python*/turtle* \
+    ${ROOTFS_PATH:?}/usr/lib*/python*/distutils/command/*.exe
+
+# ----------------------------------------------------------------------------
+# Remove package managers and unnecessary system tools from rootfs
+# - Keep essential tools for MCP server functionality
+# - Remove security-sensitive tools to minimize attack surface
+# ----------------------------------------------------------------------------
+RUN rm -rf ${ROOTFS_PATH:?}/usr/bin/dnf* \
+    ${ROOTFS_PATH:?}/usr/bin/yum* \
+    ${ROOTFS_PATH:?}/usr/bin/rpm* \
+    ${ROOTFS_PATH:?}/usr/bin/microdnf \
+    ${ROOTFS_PATH:?}/usr/lib/rpm \
+    ${ROOTFS_PATH:?}/usr/lib/dnf \
+    ${ROOTFS_PATH:?}/usr/lib/yum* \
+    ${ROOTFS_PATH:?}/etc/dnf \
+    ${ROOTFS_PATH:?}/etc/yum*
+
+# ----------------------------------------------------------------------------
+# Strip unneeded symbols from shared libraries and remove build tools
+# - This reduces the final image size and removes build tools in one step
+# ----------------------------------------------------------------------------
+RUN find "${ROOTFS_PATH:?}/usr/lib64" -name '*.so*' -exec strip --strip-unneeded {} + 2>/dev/null || true \
+    && dnf remove -y gcc binutils \
+    && dnf clean all
+
+# ----------------------------------------------------------------------------
+# Remove setuid/setgid binaries for security
+# ----------------------------------------------------------------------------
+RUN find ${ROOTFS_PATH:?} -perm /4000 -o -perm /2000 -type f -delete 2>/dev/null || true
+
+# ----------------------------------------------------------------------------
+# Create minimal passwd/group files for user 1001
+# - Using GID 1001 to match UID for consistency
+# - OpenShift compatible (accepts any UID in group 1001)
+# ----------------------------------------------------------------------------
+RUN printf 'mcp-eval:x:1001:1001:mcp-eval:/app:/sbin/nologin\n' > "${ROOTFS_PATH:?}/etc/passwd" && \
+    printf 'mcp-eval:x:1001:\n' > "${ROOTFS_PATH:?}/etc/group"
+
+# ----------------------------------------------------------------------------
+# Create necessary directories in the rootfs
+#  - /tmp and /var/tmp with sticky bit for security
+#  - Create directories for evaluation data and cache
+# ----------------------------------------------------------------------------
+RUN chmod 1777 ${ROOTFS_PATH:?}/tmp ${ROOTFS_PATH:?}/var/tmp 2>/dev/null || true \
+    && chown 1001:1001 ${ROOTFS_PATH:?}/tmp ${ROOTFS_PATH:?}/var/tmp \
+    && mkdir -p ${ROOTFS_PATH:?}/app/data/cache \
+    && mkdir -p ${ROOTFS_PATH:?}/app/data/results \
+    && chown -R 1001:1001 ${ROOTFS_PATH:?}/app/data
+
+# ----------------------------------------------------------------------------
+# Copy application directory into the rootfs and fix permissions for non-root
+# - Set ownership to 1001:1001 (matching passwd/group)
+# - Allow group write permissions for OpenShift compatibility
+# ----------------------------------------------------------------------------
+RUN cp -r /app ${ROOTFS_PATH:?}/app \
+    && chown -R 1001:1001 ${ROOTFS_PATH:?}/app \
+    && chmod -R g=u ${ROOTFS_PATH:?}/app
+
+###########################
+# Final runtime (squashed)
+###########################
+FROM scratch AS runtime
+
+ARG PYTHON_VERSION
+ARG ROOTFS_PATH
+
+# ----------------------------------------------------------------------------
+# OCI image metadata
+# ----------------------------------------------------------------------------
+LABEL maintainer="Mihai Criveti" \
+      org.opencontainers.image.title="mcp/mcp-eval-server" \
+      org.opencontainers.image.description="MCP Evaluation Server: Comprehensive agent and prompt evaluation using LLM-as-a-judge" \
+      org.opencontainers.image.licenses="Apache-2" \
+      org.opencontainers.image.version="0.1.0" \
+      org.opencontainers.image.source="https://github.com/contextforge/mcp-context-forge" \
+      org.opencontainers.image.documentation="https://github.com/contextforge/mcp-context-forge/mcp-servers/python/mcp-eval-server" \
+      org.opencontainers.image.vendor="MCP Context Forge"
+
+# ----------------------------------------------------------------------------
+# Copy the entire prepared root filesystem from the builder stage
+# ----------------------------------------------------------------------------
+COPY --from=builder ${ROOTFS_PATH}/ /
+
+# ----------------------------------------------------------------------------
+# Ensure our virtual environment binaries have priority in PATH
+# - Don't write bytecode files (we pre-compiled with -OO)
+# - Unbuffered output for better logging
+# - Random hash seed for security
+# - Disable pip cache to save space
+# - Set evaluation server specific environment variables
+# ----------------------------------------------------------------------------
+ENV PATH="/app/.venv/bin:${PATH}" \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONHASHSEED=random \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    MCP_EVAL_CACHE_DIR="/app/data/cache" \
+    MCP_EVAL_RESULTS_DB="/app/data/results/evaluation_results.db" \
+    DEFAULT_JUDGE_MODEL="gpt-4"
+
+# ----------------------------------------------------------------------------
+# Application working directory
+# ----------------------------------------------------------------------------
+WORKDIR /app
+
+# ----------------------------------------------------------------------------
+# Expose MCP server port (stdio by default, but useful for HTTP wrapper)
+# ----------------------------------------------------------------------------
+EXPOSE 8080
+
+# ----------------------------------------------------------------------------
+# Run as non-root user (1001)
+# ----------------------------------------------------------------------------
+USER 1001
+
+# ----------------------------------------------------------------------------
+# Health check for MCP server functionality
+# - Test that the server can import all modules and list tools
+# ----------------------------------------------------------------------------
+HEALTHCHECK --interval=60s --timeout=15s --start-period=120s --retries=3 \
+  CMD ["python3", "-c", "from mcp_eval_server.server import judge_tools; print('MCP Eval Server healthy:', len(judge_tools.get_available_judges()), 'judges available')"]
+
+# ----------------------------------------------------------------------------
+# Entrypoint - Run the MCP Evaluation Server
+# ----------------------------------------------------------------------------
+CMD ["./run-server.sh"]