Merge pull request #21 from voarsh2/ctx-glm-claude-hook-refrag

m1rl0k · web-flow · commit 357da65cdd42 · 2025-11-20T11:52:22.000-05:00
Fix refrag GLM mode, add GLM support to CTX, ctx based Claude Code hook to enhance user prompts
diff --git a/.env.example b/.env.example
@@ -127,6 +127,9 @@ REFRAG_SENSE=heuristic
 LLAMACPP_URL=http://llamacpp:8080
 REFRAG_DECODER_MODE=prompt  # prompt|soft
 
+# GLM_API_BASE=https://api.z.ai/api/coding/paas/v4/
+# GLM_MODEL=glm-4.6
+
 # GPU Performance Toggle
 # Set to 1 to use native GPU-accelerated server on localhost:8081
 # Set to 0 to use Docker CPU-only server (default, stable)
diff --git a/Dockerfile.indexer b/Dockerfile.indexer
@@ -8,7 +8,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 # OS packages needed: git for history ingestion
 RUN apt-get update && apt-get install -y --no-install-recommends git ca-certificates && rm -rf /var/lib/apt/lists/*
 
-RUN pip install --no-cache-dir qdrant-client fastembed watchdog onnxruntime tokenizers tree_sitter tree_sitter_languages
+# Python deps: reuse shared requirements file
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
 
 # Bake scripts into the image so we can mount arbitrary code at /work
 COPY scripts /app/scripts
diff --git a/Dockerfile.mcp-indexer b/Dockerfile.mcp-indexer
@@ -10,9 +10,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 RUN apt-get update && apt-get install -y --no-install-recommends git ca-certificates \
     && rm -rf /var/lib/apt/lists/*
 
-# Python deps: include FastMCP with Streamable HTTP (RMCP) support
-RUN pip install --no-cache-dir --upgrade qdrant-client fastembed watchdog onnxruntime tokenizers \
-    tree_sitter tree_sitter_languages mcp fastmcp
+# Python deps: reuse shared requirements (includes FastMCP + OpenAI SDK)
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
 
 # Bake scripts into the image so entrypoints don't rely on /work
 COPY scripts /app/scripts
diff --git a/ctx-hook-simple.sh b/ctx-hook-simple.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Simplified Claude Code UserPromptSubmit hook for ctx.py
+# Takes JSON input from Claude Code and outputs enhanced prompt
+
+# Read JSON input from stdin
+INPUT=$(cat)
+
+# Extract the user message using jq
+if command -v jq >/dev/null 2>&1; then
+    USER_MESSAGE=$(echo "$INPUT" | jq -r '.user_message')
+else
+    echo "$INPUT"
+    exit 0
+fi
+
+# Skip if empty message
+if [ -z "$USER_MESSAGE" ] || [ "$USER_MESSAGE" = "null" ]; then
+    echo "$INPUT"
+    exit 0
+fi
+
+# Easy bypass patterns - any of these will skip ctx enhancement
+if [[ "$USER_MESSAGE" =~ ^(noctx|raw|bypass|skip|no-enhance): ]] || \
+   [[ "$USER_MESSAGE" =~ ^\\ ]] || \
+   [[ "$USER_MESSAGE" =~ ^\< ]] || \
+   [[ "$USER_MESSAGE" =~ ^(/help|/clear|/exit|/quit) ]] || \
+   [[ "$USER_MESSAGE" =~ ^\?\s*$ ]] || \
+   [ ${#USER_MESSAGE} -lt 12 ]; then
+    echo "$INPUT"
+    exit 0
+fi
+
+# Set working directory to where the hook script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+# Read all settings from ctx_config.json
+CONFIG_FILE="ctx_config.json"
+if [ -f "$CONFIG_FILE" ]; then
+    CTX_COLLECTION=$(grep -o '"default_collection"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"default_collection"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+    REFRAG_RUNTIME=$(grep -o '"refrag_runtime"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"refrag_runtime"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' || echo "glm")
+    GLM_API_KEY=$(grep -o '"glm_api_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_key"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+    GLM_API_BASE=$(grep -o '"glm_api_base"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_base"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+    GLM_MODEL=$(grep -o '"glm_model"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"glm_model"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/' || echo "glm-4.6")
+    CTX_DEFAULT_MODE=$(grep -o '"default_mode"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"default_mode"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/')
+    CTX_REQUIRE_CONTEXT=$(grep -o '"require_context"[[:space:]]*:[[:space:]]*\(true\|false\)' "$CONFIG_FILE" | sed 's/.*"require_context"[[:space:]]*:[[:space:]]*\(true\|false\).*/\1/')
+    CTX_RELEVANCE_GATE=$(grep -o '"relevance_gate_enabled"[[:space:]]*:[[:space:]]*\(true\|false\)' "$CONFIG_FILE" | sed 's/.*"relevance_gate_enabled"[[:space:]]*:[[:space:]]*\(true\|false\).*/\1/')
+    CTX_MIN_RELEVANCE=$(grep -o '"min_relevance"[[:space:]]*:[[:space:]]*[0-9.][0-9.]*' "$CONFIG_FILE" | sed 's/.*"min_relevance"[[:space:]]*:[[:space:]]*\([0-9.][0-9.]*\).*/\1/')
+fi
+
+# Set defaults if not found in config
+CTX_COLLECTION=${CTX_COLLECTION:-"codebase"}
+REFRAG_RUNTIME=${REFRAG_RUNTIME:-"glm"}
+GLM_API_KEY=${GLM_API_KEY:-}
+GLM_API_BASE=${GLM_API_BASE:-}
+GLM_MODEL=${GLM_MODEL:-"glm-4.6"}
+CTX_DEFAULT_MODE=${CTX_DEFAULT_MODE:-"default"}
+CTX_REQUIRE_CONTEXT=${CTX_REQUIRE_CONTEXT:-true}
+CTX_RELEVANCE_GATE=${CTX_RELEVANCE_GATE:-false}
+CTX_MIN_RELEVANCE=${CTX_MIN_RELEVANCE:-0.1}
+
+# Export GLM/context environment variables from config
+export REFRAG_RUNTIME GLM_API_KEY GLM_API_BASE GLM_MODEL CTX_REQUIRE_CONTEXT CTX_RELEVANCE_GATE CTX_MIN_RELEVANCE
+
+# Build ctx command with optional unicorn flag
+CTX_CMD=(python3 scripts/ctx.py)
+case "${CTX_DEFAULT_MODE,,}" in
+	unicorn)
+		CTX_CMD+=("--unicorn")
+		;;
+	detail)
+		CTX_CMD+=("--detail")
+		;;
+esac
+CTX_CMD+=("$USER_MESSAGE" --collection "$CTX_COLLECTION")
+
+# Run ctx with collection
+ENHANCED=$(timeout 30s "${CTX_CMD[@]}" 2>/dev/null || echo "$USER_MESSAGE")
+
+# Replace user message with enhanced version using jq
+echo "$INPUT" | jq --arg enhanced "$ENHANCED" '.user_message = $enhanced'
diff --git a/ctx_config.example.json b/ctx_config.example.json
@@ -1,7 +1,16 @@
 {
+  "default_collection": "codebase",
+  "refrag_runtime": "glm",
+  "glm_api_key": "",
+  "glm_api_base": "https://api.z.ai/api/coding/paas/v4/",
+  "glm_model": "glm-4.6",
   "always_include_tests": true,
   "prefer_bullet_commands": false,
   "extra_instructions": "Always consider error handling and edge cases",
-  "streaming": true
+  "default_mode": "unicorn",
+  "streaming": true,
+  "require_context": true,
+  "relevance_gate_enabled": false,
+  "min_relevance": 0.1
 }
 
diff --git a/docker-compose.dev-remote.yml b/docker-compose.dev-remote.yml
@@ -75,6 +75,12 @@ services:
       - FASTMCP_HOST=${FASTMCP_HOST}
       - FASTMCP_INDEXER_PORT=${FASTMCP_INDEXER_PORT}
       - QDRANT_URL=${QDRANT_URL}
+      - REFRAG_DECODER=${REFRAG_DECODER:-1}
+      - REFRAG_RUNTIME=${REFRAG_RUNTIME:-llamacpp}
+      - GLM_API_KEY=${GLM_API_KEY}
+      - GLM_API_BASE=${GLM_API_BASE:-https://api.z.ai/api/paas/v4/}
+      - GLM_MODEL=${GLM_MODEL:-glm-4.6}
+      - LLAMACPP_URL=${LLAMACPP_URL:-http://llamacpp:8080}
       - COLLECTION_NAME=${COLLECTION_NAME}
       - PATH_EMIT_MODE=container
       - HF_HOME=/tmp/huggingface
@@ -156,6 +162,12 @@ services:
       - FASTMCP_INDEXER_PORT=8001
       - FASTMCP_TRANSPORT=${FASTMCP_HTTP_TRANSPORT}
       - QDRANT_URL=${QDRANT_URL}
+      - REFRAG_DECODER=${REFRAG_DECODER:-1}
+      - REFRAG_RUNTIME=${REFRAG_RUNTIME:-llamacpp}
+      - GLM_API_KEY=${GLM_API_KEY}
+      - GLM_API_BASE=${GLM_API_BASE:-https://api.z.ai/api/paas/v4/}
+      - GLM_MODEL=${GLM_MODEL:-glm-4.6}
+      - LLAMACPP_URL=${LLAMACPP_URL:-http://llamacpp:8080}
       - FASTMCP_HEALTH_PORT=18001
       - COLLECTION_NAME=${COLLECTION_NAME}
       - PATH_EMIT_MODE=container
diff --git a/scripts/ctx.py b/scripts/ctx.py
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
diff --git a/scripts/refrag_glm.py b/scripts/refrag_glm.py