system prompt changes

Zvi Fried · Zvi Fried · commit bd83fae62941 · 2025-09-11T22:08:42.000+03:00
diff --git a/src/mcp_as_a_judge/messaging/llm_provider.py b/src/mcp_as_a_judge/messaging/llm_provider.py
@@ -66,9 +66,7 @@ async def send_message(
             if provider.provider_type == "mcp_sampling":
                 # MCP sampling provider with SamplingMessage objects
                 # Type ignore because MCPSamplingProvider has send_message method
-                response = await provider.send_message(
-                    formatted_messages, config
-                )
+                response = await provider.send_message(formatted_messages, config)
             else:
                 # LLM API provider with universal Message objects
                 response = await provider.send_message(formatted_messages, config)
diff --git a/src/mcp_as_a_judge/models.py b/src/mcp_as_a_judge/models.py
@@ -7,6 +7,7 @@
 
 from pydantic import BaseModel, Field
 
+from mcp_as_a_judge.constants import MAX_TOKENS
 from mcp_as_a_judge.models.task_metadata import TaskMetadata
 from mcp_as_a_judge.workflow import WorkflowGuidance
 
@@ -176,14 +177,27 @@ class URLValidationResult(BaseModel):
 # Prompt variable models for type safety and validation
 
 
-class JudgeCodingPlanSystemVars(BaseModel):
-    """Variables for judge_coding_plan system prompt."""
+class SystemVars(BaseModel):
+    """Unified system variables for all system prompts.
+
+    This replaces all individual SystemVars models to reduce duplication.
+    All system prompts use the same basic structure with response schema and token limits.
+    """
 
     response_schema: str = Field(
-        description="JSON schema for the expected response format"
+        default="", description="JSON schema for the expected response format (optional)"
+    )
+    max_tokens: int = Field(
+        default=MAX_TOKENS, description="Maximum tokens available for response"
+    )
+    task_size_definitions: str = Field(
+        default="", description="Task size classifications and workflow routing rules (optional)"
     )
 
 
+
+
+
 class JudgeCodingPlanUserVars(BaseModel):
     """Variables for judge_coding_plan user prompt."""
 
@@ -246,12 +260,7 @@ class JudgeCodingPlanUserVars(BaseModel):
     )
 
 
-class JudgeCodeChangeSystemVars(BaseModel):
-    """Variables for judge_code_change system prompt."""
 
-    response_schema: str = Field(
-        description="JSON schema for the expected response format"
-    )
 
 
 class JudgeCodeChangeUserVars(BaseModel):
@@ -270,12 +279,7 @@ class JudgeCodeChangeUserVars(BaseModel):
     )
 
 
-class ResearchValidationSystemVars(BaseModel):
-    """Variables for research_validation system prompt."""
 
-    response_schema: str = Field(
-        description="JSON schema for the expected response format"
-    )
 
 
 class ResearchValidationUserVars(BaseModel):
@@ -296,12 +300,7 @@ class ResearchValidationUserVars(BaseModel):
     )
 
 
-class WorkflowGuidanceSystemVars(BaseModel):
-    """Variables for build_workflow system prompt."""
 
-    response_schema: str = Field(
-        description="JSON schema for the expected response format"
-    )
 
 
 class WorkflowGuidanceUserVars(BaseModel):
@@ -325,10 +324,7 @@ class WorkflowGuidanceUserVars(BaseModel):
     )
 
 
-class ValidationErrorSystemVars(BaseModel):
-    """Variables for validation_error system prompt."""
 
-    # No additional variables needed for system prompt
 
 
 class ValidationErrorUserVars(BaseModel):
@@ -340,10 +336,7 @@ class ValidationErrorUserVars(BaseModel):
     context: str = Field(description="Additional context about the validation failure")
 
 
-class DynamicSchemaSystemVars(BaseModel):
-    """Variables for dynamic_schema system prompt."""
 
-    # No additional variables needed for system prompt
 
 
 class DynamicSchemaUserVars(BaseModel):
@@ -374,12 +367,7 @@ class ElicitationFallbackUserVars(BaseModel):
     )
 
 
-class ResearchRequirementsAnalysisSystemVars(BaseModel):
-    """Variables for research_requirements_analysis system prompt."""
 
-    response_schema: str = Field(
-        description="JSON schema for the expected response format"
-    )
 
 
 class ResearchRequirementsAnalysisUserVars(BaseModel):
diff --git a/src/mcp_as_a_judge/models/__init__.py b/src/mcp_as_a_judge/models/__init__.py
@@ -23,12 +23,10 @@
 from .task_metadata import RequirementsVersion, TaskMetadata, TaskState
 
 __all__ = [
-    "DynamicSchemaSystemVars",
+    "DynamicSchemaUserVars",
     "ElicitationFallbackUserVars",
     "EnhancedResponseFactory",
-    "JudgeCodeChangeSystemVars",
     "JudgeCodeChangeUserVars",
-    "JudgeCodingPlanSystemVars",
     "JudgeCodingPlanUserVars",
     "JudgeResponse",
     "JudgeResponseWithTask",
@@ -37,18 +35,17 @@
     "RequirementsVersion",
     "ResearchComplexityFactors",
     "ResearchRequirementsAnalysis",
-    "ResearchRequirementsAnalysisSystemVars",
     "ResearchRequirementsAnalysisUserVars",
     "ResearchValidationResponse",
-    "ResearchValidationSystemVars",
     "ResearchValidationUserVars",
+    "SystemVars",
     "TaskAnalysisResult",
     "TaskCompletionResult",
     "TaskMetadata",
     "TaskState",
     "URLValidationResult",
+    "ValidationErrorUserVars",
     "WorkflowGuidance",
-    "WorkflowGuidanceSystemVars",
     "WorkflowGuidanceUserVars",
 ]
 
@@ -71,24 +68,18 @@
 
         # Import the models we need
         ElicitationFallbackUserVars = models_py.ElicitationFallbackUserVars
-        JudgeCodeChangeSystemVars = models_py.JudgeCodeChangeSystemVars
         JudgeCodeChangeUserVars = models_py.JudgeCodeChangeUserVars
-        JudgeCodingPlanSystemVars = models_py.JudgeCodingPlanSystemVars
         JudgeCodingPlanUserVars = models_py.JudgeCodingPlanUserVars
         ResearchValidationResponse = models_py.ResearchValidationResponse
-        ResearchValidationSystemVars = models_py.ResearchValidationSystemVars
         ResearchValidationUserVars = models_py.ResearchValidationUserVars
-        WorkflowGuidanceSystemVars = models_py.WorkflowGuidanceSystemVars
         WorkflowGuidanceUserVars = models_py.WorkflowGuidanceUserVars
-        DynamicSchemaSystemVars = models_py.DynamicSchemaSystemVars
         DynamicSchemaUserVars = models_py.DynamicSchemaUserVars
+        ValidationErrorUserVars = models_py.ValidationErrorUserVars
+        SystemVars = models_py.SystemVars
 
         # Import research-related models
         ResearchComplexityFactors = models_py.ResearchComplexityFactors
         ResearchRequirementsAnalysis = models_py.ResearchRequirementsAnalysis
-        ResearchRequirementsAnalysisSystemVars = (
-            models_py.ResearchRequirementsAnalysisSystemVars
-        )
         ResearchRequirementsAnalysisUserVars = (
             models_py.ResearchRequirementsAnalysisUserVars
         )
@@ -102,37 +93,25 @@
     class ElicitationFallbackUserVars(BaseModel):
         pass
 
-    class JudgeCodeChangeSystemVars(BaseModel):
-        pass
-
     class JudgeCodeChangeUserVars(BaseModel):
         pass
 
-    class JudgeCodingPlanSystemVars(BaseModel):
-        pass
-
     class JudgeCodingPlanUserVars(BaseModel):
         pass
 
     class ResearchValidationResponse(BaseModel):
         pass
 
-    class ResearchValidationSystemVars(BaseModel):
-        pass
-
     class ResearchValidationUserVars(BaseModel):
         pass
 
-    class WorkflowGuidanceSystemVars(BaseModel):
-        pass
-
     class WorkflowGuidanceUserVars(BaseModel):
         pass
 
-    class DynamicSchemaSystemVars(BaseModel):
+    class DynamicSchemaUserVars(BaseModel):
         pass
 
-    class DynamicSchemaUserVars(BaseModel):
+    class SystemVars(BaseModel):
         pass
 
     class ResearchComplexityFactors(BaseModel):
diff --git a/src/mcp_as_a_judge/models/enhanced_responses.py b/src/mcp_as_a_judge/models/enhanced_responses.py
@@ -6,7 +6,6 @@
 and intelligent next-step guidance.
 """
 
-
 from pydantic import BaseModel, Field
 
 from mcp_as_a_judge.models.task_metadata import TaskMetadata, TaskSize
diff --git a/src/mcp_as_a_judge/prompts/shared/response_constraints.md b/src/mcp_as_a_judge/prompts/shared/response_constraints.md
@@ -0,0 +1,10 @@
+## Response Constraints
+
+**CRITICAL: You have {{ max_tokens }} tokens maximum for your response.**
+
+**Be PRECISE and MINIMALISTIC:**
+- Transform all information without creating overhead
+- Use concise, direct language
+- Focus on essential analysis only
+- Avoid verbose explanations unless critical
+- Prioritize actionable insights over detailed descriptions
diff --git a/src/mcp_as_a_judge/prompts/system/dynamic_schema.md b/src/mcp_as_a_judge/prompts/system/dynamic_schema.md
@@ -2,6 +2,8 @@ You are an expert UX designer and software engineer specializing in creating dyn
 
 Your task is to generate field definitions for collecting specific information from a user through an interactive elicitation form.
 
+{% include 'shared/response_constraints.md' %}
+
 **KEY REQUIREMENTS:**
 1. **Always provide at least 1 required field** - there must be at least one essential piece of information
 2. **Create minimal fields based on the user query** - only generate fields that are actually needed for the specific request
diff --git a/src/mcp_as_a_judge/prompts/system/judge_code_change.md b/src/mcp_as_a_judge/prompts/system/judge_code_change.md
@@ -2,6 +2,8 @@
 
 You are an expert software engineering judge specializing in code review. Your role is to evaluate code changes and provide feedback on quality, security, and best practices.
 
+{% include 'shared/response_constraints.md' %}
+
 ## Your Expertise
 
 - Code quality assessment and best practices
diff --git a/src/mcp_as_a_judge/prompts/system/judge_coding_plan.md b/src/mcp_as_a_judge/prompts/system/judge_coding_plan.md
@@ -2,6 +2,8 @@
 
 You are an expert software engineering judge. Your role is to review coding plans and provide comprehensive feedback based on established software engineering best practices.
 
+{% include 'shared/response_constraints.md' %}
+
 ## Your Expertise
 
 - Deep knowledge of software architecture and design patterns
diff --git a/src/mcp_as_a_judge/prompts/system/research_requirements_analysis.md b/src/mcp_as_a_judge/prompts/system/research_requirements_analysis.md
@@ -2,6 +2,8 @@
 
 You are an expert at analyzing software development tasks to determine appropriate research requirements based on task complexity, domain specialization, and implementation risk.
 
+{% include 'shared/response_constraints.md' %}
+
 ## Your Expertise
 
 - Assessing task complexity across multiple dimensions
diff --git a/src/mcp_as_a_judge/prompts/system/research_validation.md b/src/mcp_as_a_judge/prompts/system/research_validation.md
@@ -2,6 +2,8 @@
 
 You are an expert at evaluating the comprehensiveness and quality of research for software development tasks.
 
+{% include 'shared/response_constraints.md' %}
+
 ## Your Expertise
 
 - Assessing research thoroughness and depth
diff --git a/src/mcp_as_a_judge/prompts/system/validation_error.md b/src/mcp_as_a_judge/prompts/system/validation_error.md
@@ -1,5 +1,7 @@
 You are an expert software engineering mentor helping an AI coding assistant understand what went wrong with their submission.
 
+{% include 'shared/response_constraints.md' %}
+
 Your task is to generate a clear, actionable error message that explains:
 1. What specific requirement was not met
 2. Why this requirement is important for software engineering best practices
diff --git a/src/mcp_as_a_judge/prompts/system/workflow_guidance.md b/src/mcp_as_a_judge/prompts/system/workflow_guidance.md
@@ -2,6 +2,8 @@
 
 You are an expert workflow navigator for coding tasks in the MCP as a Judge system. Your role is to analyze the current task state, conversation history, and context to determine the optimal next step in the coding workflow.
 
+{% include 'shared/response_constraints.md' %}
+
 ## Your Expertise
 
 - Task-centric workflow management and state transitions
diff --git a/src/mcp_as_a_judge/research_requirements_analyzer.py b/src/mcp_as_a_judge/research_requirements_analyzer.py
@@ -7,15 +7,16 @@
 """
 
 import json
+from typing import Any
 
 from mcp_as_a_judge.constants import MAX_TOKENS
 from mcp_as_a_judge.logging_config import get_logger
 from mcp_as_a_judge.messaging.llm_provider import llm_provider
 from mcp_as_a_judge.models import (
     ResearchComplexityFactors,
     ResearchRequirementsAnalysis,
-    ResearchRequirementsAnalysisSystemVars,
     ResearchRequirementsAnalysisUserVars,
+    SystemVars,
     URLValidationResult,
 )
 from mcp_as_a_judge.models.task_metadata import TaskMetadata
@@ -51,8 +52,11 @@ async def analyze_research_requirements(
 
     try:
         # Create system and user messages from templates
-        system_vars = ResearchRequirementsAnalysisSystemVars(
-            response_schema=json.dumps(ResearchRequirementsAnalysis.model_json_schema())
+        system_vars = SystemVars(
+            response_schema=json.dumps(
+                ResearchRequirementsAnalysis.model_json_schema()
+            ),
+            max_tokens=MAX_TOKENS,
         )
 
         user_vars = ResearchRequirementsAnalysisUserVars(
@@ -93,7 +97,9 @@ async def analyze_research_requirements(
         return _get_fallback_analysis(task_metadata)
 
 
-def _get_fallback_analysis(task_metadata: TaskMetadata) -> "ResearchRequirementsAnalysis":
+def _get_fallback_analysis(
+    task_metadata: TaskMetadata,
+) -> "ResearchRequirementsAnalysis":
     """
     Provide fallback analysis if LLM analysis fails.
 
diff --git a/src/mcp_as_a_judge/server.py b/src/mcp_as_a_judge/server.py
diff --git a/src/mcp_as_a_judge/server_helpers.py b/src/mcp_as_a_judge/server_helpers.py
diff --git a/src/mcp_as_a_judge/tool_description/local_storage_provider.py b/src/mcp_as_a_judge/tool_description/local_storage_provider.py
diff --git a/src/mcp_as_a_judge/workflow/workflow_guidance.py b/src/mcp_as_a_judge/workflow/workflow_guidance.py
diff --git a/tests/test_conversation_history_lifecycle.py b/tests/test_conversation_history_lifecycle.py
diff --git a/tests/test_prompt_loader.py b/tests/test_prompt_loader.py