Skip to content

Commit db9b510

Browse files
author
Zvi Fried
committed
approved changes, still under test
1 parent d80a11c commit db9b510

26 files changed

+1868
-434
lines changed

src/mcp_as_a_judge/coding_task_manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from mcp_as_a_judge.db.conversation_history_service import ConversationHistoryService
1212
from mcp_as_a_judge.logging_config import get_logger
13-
from mcp_as_a_judge.models.task_metadata import TaskMetadata, TaskState
13+
from mcp_as_a_judge.models.task_metadata import TaskMetadata, TaskSize, TaskState
1414

1515
# Set up logger using custom get_logger function
1616
logger = get_logger(__name__)
@@ -23,6 +23,7 @@ async def create_new_coding_task(
2323
user_requirements: str,
2424
tags: list[str],
2525
conversation_service: ConversationHistoryService,
26+
task_size: TaskSize,
2627
) -> TaskMetadata:
2728
"""
2829
Create a new coding task with auto-generated task_id.
@@ -34,10 +35,12 @@ async def create_new_coding_task(
3435
user_requirements: Initial requirements
3536
tags: Task tags
3637
conversation_service: Conversation service
38+
task_size: Task size classification (REQUIRED)
3739
3840
Returns:
3941
New TaskMetadata instance
4042
"""
43+
4144
logger.info(f"📝 Creating new coding task: {task_title}")
4245

4346
# Create new TaskMetadata with auto-generated UUID
@@ -46,6 +49,7 @@ async def create_new_coding_task(
4649
description=task_description,
4750
user_requirements=user_requirements,
4851
state=TaskState.CREATED, # Default state for new tasks
52+
task_size=task_size,
4953
tags=tags,
5054
)
5155

src/mcp_as_a_judge/logging_config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def configure_application_loggers(level: int = logging.INFO) -> None:
8989
app_loggers = [
9090
"mcp_as_a_judge.server",
9191
"mcp_as_a_judge.server_helpers",
92-
"mcp_as_a_judge.conversation_history_service",
9392
"mcp_as_a_judge.db.conversation_history_service",
9493
"mcp_as_a_judge.db.providers.in_memory",
9594
"mcp_as_a_judge.db.providers.sqlite_provider",

src/mcp_as_a_judge/models.py

Lines changed: 120 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,74 @@ class ResearchValidationResponse(BaseModel):
103103
)
104104

105105

106+
class ResearchComplexityFactors(BaseModel):
107+
"""Analysis factors for determining research complexity."""
108+
109+
domain_specialization: str = Field(
110+
description="Level of domain specialization: 'general', 'specialized', 'highly_specialized'"
111+
)
112+
technology_maturity: str = Field(
113+
description="Maturity of required technologies: 'established', 'emerging', 'cutting_edge'"
114+
)
115+
integration_scope: str = Field(
116+
description="Scope of system integration: 'isolated', 'moderate', 'system_wide'"
117+
)
118+
existing_solutions: str = Field(
119+
description="Availability of existing solutions: 'abundant', 'limited', 'scarce'"
120+
)
121+
risk_level: str = Field(
122+
description="Implementation risk level: 'low', 'medium', 'high'"
123+
)
124+
125+
126+
class ResearchRequirementsAnalysis(BaseModel):
127+
"""LLM analysis of research requirements for a task."""
128+
129+
expected_url_count: int = Field(
130+
description="Recommended number of research URLs for optimal coverage",
131+
ge=0,
132+
le=10
133+
)
134+
minimum_url_count: int = Field(
135+
description="Minimum acceptable number of URLs for basic adequacy",
136+
ge=0,
137+
le=5
138+
)
139+
reasoning: str = Field(
140+
description="Detailed explanation of why these URL counts are appropriate"
141+
)
142+
complexity_factors: ResearchComplexityFactors = Field(
143+
description="Breakdown of complexity analysis factors"
144+
)
145+
quality_requirements: list[str] = Field(
146+
default_factory=list,
147+
description="Specific requirements for research source quality and types"
148+
)
149+
150+
151+
class URLValidationResult(BaseModel):
152+
"""Result of validating provided URLs against dynamic requirements."""
153+
154+
adequate: bool = Field(
155+
description="Whether the provided URLs meet the dynamic requirements"
156+
)
157+
provided_count: int = Field(
158+
description="Number of URLs actually provided"
159+
)
160+
expected_count: int = Field(
161+
description="Expected number of URLs based on analysis"
162+
)
163+
minimum_count: int = Field(
164+
description="Minimum acceptable number of URLs"
165+
)
166+
feedback: str = Field(
167+
description="Detailed feedback about URL adequacy and suggestions"
168+
)
169+
meets_quality_standards: bool = Field(
170+
description="Whether URLs meet quality requirements beyond just count"
171+
)
172+
173+
106174
# Database models for conversation history
107175
# ConversationRecord is now defined in db/interface.py using SQLModel
108176
# DatabaseConfig is now defined in constants.py
@@ -181,6 +249,20 @@ class JudgeCodingPlanUserVars(BaseModel):
181249
description="Strategies to mitigate identified risks"
182250
)
183251

252+
# Dynamic URL requirements fields - NEW
253+
expected_url_count: int = Field(
254+
default=0,
255+
description="LLM-determined expected number of research URLs for this task"
256+
)
257+
minimum_url_count: int = Field(
258+
default=0,
259+
description="LLM-determined minimum acceptable URL count"
260+
)
261+
url_requirement_reasoning: str = Field(
262+
default="",
263+
description="LLM explanation of why specific URL count is needed"
264+
)
265+
184266

185267
class JudgeCodeChangeSystemVars(BaseModel):
186268
"""Variables for judge_code_change system prompt."""
@@ -223,7 +305,7 @@ class ResearchValidationUserVars(BaseModel):
223305
research: str = Field(description="Research findings to be validated")
224306
research_urls: list[str] = Field(
225307
default_factory=list,
226-
description="URLs from MANDATORY online research - minimum 3 URLs required",
308+
description="URLs from online research - count determined dynamically based on task complexity",
227309
)
228310
context: str = Field(description="Additional context about the research validation")
229311
conversation_history: list = Field(
@@ -241,14 +323,22 @@ class WorkflowGuidanceSystemVars(BaseModel):
241323

242324

243325
class WorkflowGuidanceUserVars(BaseModel):
244-
"""Variables for build_workflow user prompt."""
326+
"""Variables for workflow_guidance user prompt."""
245327

328+
task_id: str = Field(description="Task ID")
329+
task_title: str = Field(description="Task title")
246330
task_description: str = Field(description="Description of the development task")
247-
context: str = Field(description="Additional context about the task")
248-
conversation_history: list = Field(
249-
default_factory=list,
250-
description="Previous conversation history as JSON array with timestamps",
251-
)
331+
user_requirements: str = Field(description="User requirements for the task")
332+
current_state: str = Field(description="Current task state")
333+
state_description: str = Field(description="Description of current state")
334+
current_operation: str = Field(description="Current operation being performed")
335+
task_size: str = Field(description="Task size classification")
336+
task_size_definitions: str = Field(description="Task size definitions")
337+
state_transitions: str = Field(description="Valid state transitions")
338+
tool_descriptions: str = Field(description="Available tool descriptions")
339+
conversation_context: str = Field(description="Conversation history context")
340+
operation_context: str = Field(description="Current operation context")
341+
response_schema: str = Field(description="JSON schema for the expected response format")
252342

253343

254344
class ValidationErrorSystemVars(BaseModel):
@@ -300,6 +390,29 @@ class ElicitationFallbackUserVars(BaseModel):
300390
)
301391

302392

393+
class ResearchRequirementsAnalysisSystemVars(BaseModel):
394+
"""Variables for research_requirements_analysis system prompt."""
395+
396+
response_schema: str = Field(
397+
description="JSON schema for the expected response format"
398+
)
399+
400+
401+
class ResearchRequirementsAnalysisUserVars(BaseModel):
402+
"""Variables for research_requirements_analysis user prompt."""
403+
404+
task_title: str = Field(description="Title of the coding task")
405+
task_description: str = Field(description="Detailed description of the task")
406+
user_requirements: str = Field(description="User requirements for the task")
407+
research_scope: str = Field(description="Current research scope (none/light/deep)")
408+
research_rationale: str = Field(
409+
description="Rationale for why research is needed at current scope"
410+
)
411+
context: str = Field(
412+
description="Additional context about the task and project"
413+
)
414+
415+
303416
class TestingEvaluationSystemVars(BaseModel):
304417
"""Variables for testing evaluation system prompt."""
305418

src/mcp_as_a_judge/models/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
"ResearchValidationUserVars",
5555
"WorkflowGuidanceSystemVars",
5656
"WorkflowGuidanceUserVars",
57+
"DynamicSchemaSystemVars",
58+
"DynamicSchemaUserVars",
5759
]
5860

5961
# Import additional models from the original models.py file
@@ -83,6 +85,8 @@
8385
ResearchValidationUserVars = models_py.ResearchValidationUserVars
8486
WorkflowGuidanceSystemVars = models_py.WorkflowGuidanceSystemVars
8587
WorkflowGuidanceUserVars = models_py.WorkflowGuidanceUserVars
88+
DynamicSchemaSystemVars = models_py.DynamicSchemaSystemVars
89+
DynamicSchemaUserVars = models_py.DynamicSchemaUserVars
8690

8791
except Exception:
8892
# Fallback if models.py doesn't exist or has issues

src/mcp_as_a_judge/models/enhanced_responses.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010

1111
from pydantic import BaseModel, Field
1212

13-
from mcp_as_a_judge.models.task_metadata import TaskMetadata
14-
from mcp_as_a_judge.workflow import WorkflowGuidance
13+
from mcp_as_a_judge.models.task_metadata import TaskMetadata, TaskSize
14+
from mcp_as_a_judge.workflow.workflow_guidance import WorkflowGuidance
1515

1616

17-
class JudgeResponseWithTask(BaseModel):
17+
class JudgeResponse(BaseModel):
1818
"""
1919
Enhanced JudgeResponse that ALWAYS includes current task metadata and workflow guidance.
2020
@@ -35,10 +35,22 @@ class JudgeResponseWithTask(BaseModel):
3535

3636
# Enhanced workflow v3 fields
3737
current_task_metadata: TaskMetadata = Field(
38-
description="ALWAYS current state of task metadata after operation"
38+
default_factory=lambda: TaskMetadata(
39+
title="Unknown Task",
40+
description="No metadata provided",
41+
user_requirements="",
42+
task_size=TaskSize.M,
43+
),
44+
description="ALWAYS current state of task metadata after operation",
3945
)
4046
workflow_guidance: WorkflowGuidance = Field(
41-
description="LLM-generated next steps and instructions from shared method"
47+
default_factory=lambda: WorkflowGuidance(
48+
next_tool="raise_obstacle",
49+
reasoning="Default guidance: insufficient context",
50+
preparation_needed=[],
51+
guidance="Provide required parameters and context",
52+
),
53+
description="LLM-generated next steps and instructions from shared method",
4254
)
4355

4456

@@ -149,10 +161,8 @@ class MissingRequirementsResult(BaseModel):
149161
description="LLM-generated next steps and instructions for requirements clarification"
150162
)
151163

152-
153-
# Backward compatibility aliases for existing code
154-
# These can be removed once all tools are migrated to enhanced responses
155-
JudgeResponse = JudgeResponseWithTask # Alias for backward compatibility
164+
# Backward compatibility alias
165+
JudgeResponseWithTask = JudgeResponse
156166

157167

158168
class EnhancedResponseFactory:
@@ -170,9 +180,9 @@ def create_judge_response(
170180
current_task_metadata: TaskMetadata,
171181
workflow_guidance: WorkflowGuidance,
172182
required_improvements: Optional[List[str]] = None,
173-
) -> JudgeResponseWithTask:
183+
) -> JudgeResponse:
174184
"""
175-
Create a JudgeResponseWithTask with consistent structure.
185+
Create a JudgeResponse with consistent structure.
176186
177187
Args:
178188
approved: Whether the validation passed
@@ -182,9 +192,9 @@ def create_judge_response(
182192
required_improvements: Optional list of required improvements
183193
184194
Returns:
185-
JudgeResponseWithTask instance
195+
JudgeResponse instance
186196
"""
187-
return JudgeResponseWithTask(
197+
return JudgeResponse(
188198
approved=approved,
189199
feedback=feedback,
190200
required_improvements=required_improvements or [],

0 commit comments

Comments
 (0)