diff --git a/edam_mcp/main.py b/edam_mcp/main.py index 03339c5..78e19a0 100644 --- a/edam_mcp/main.py +++ b/edam_mcp/main.py @@ -9,7 +9,9 @@ from .config import settings from .models.requests import MappingRequest, SuggestionRequest from .models.responses import MappingResponse, SuggestionResponse +from .models.workflow import WorkflowSummaryRequest, WorkflowSummaryResponse from .tools import map_to_edam_concept, suggest_new_concept +from .tools.workflow import get_workflow_summary # Configure logging logging.basicConfig( @@ -29,7 +31,11 @@ def create_server() -> FastMCP: # Create server mcp = FastMCP("edam-mcp") - # Register tools using decorators + @mcp.tool + async def get_workflow_summary_tool(request: WorkflowSummaryRequest, context: Context) -> WorkflowSummaryResponse: + """Get comprehensive summary of the EDAM mapping workflow for copilot planning.""" + return await get_workflow_summary(request, context) + @mcp.tool async def map_to_edam_concept_tool(request: MappingRequest, context: Context) -> MappingResponse: return await map_to_edam_concept(request, context) diff --git a/edam_mcp/models/workflow.py b/edam_mcp/models/workflow.py new file mode 100644 index 0000000..312b2f8 --- /dev/null +++ b/edam_mcp/models/workflow.py @@ -0,0 +1,35 @@ +"""Workflow models for the EDAM MCP mapping workflow.""" + +from pydantic import BaseModel, Field + + +class WorkflowSummaryRequest(BaseModel): + """Request model for workflow summary (no parameters needed).""" + + pass + + +class WorkflowFunction(BaseModel): + """Model describing a workflow function.""" + + name: str = Field(..., description="Function name") + description: str = Field(..., description="Function description and purpose") + input_format: dict = Field(..., description="Expected input parameters and types") + output_format: dict = Field(..., description="Expected output structure and types") + configurable_options: list[str] = Field(default_factory=list, description="List of configurable options/parameters") + dependencies: list[str] = Field(default_factory=list, description="Other workflow functions this depends on") + existing_implementation: str | None = Field( + None, + description="Name of existing tool/function that provides partial or full implementation (if any)", + ) + + +class WorkflowSummaryResponse(BaseModel): + """Response model containing the complete workflow summary.""" + + workflow_name: str = Field(..., description="Name of the workflow") + workflow_description: str = Field(..., description="Overall workflow description") + workflow_steps: list[str] = Field(..., description="Ordered list of workflow step names") + functions: list[WorkflowFunction] = Field(..., description="Detailed description of all available functions") + configurable_options: dict = Field(..., description="Global configurable options and their defaults") + workflow_flow: str = Field(..., description="Textual description of the workflow flow") diff --git a/edam_mcp/tools/workflow.py b/edam_mcp/tools/workflow.py new file mode 100644 index 0000000..f411914 --- /dev/null +++ b/edam_mcp/tools/workflow.py @@ -0,0 +1,339 @@ +"""MCP tool for workflow summary and planning.""" + +from fastmcp.server import Context + +from ..models.workflow import WorkflowFunction, WorkflowSummaryRequest, WorkflowSummaryResponse + + +async def get_workflow_summary(request: WorkflowSummaryRequest, context: Context) -> WorkflowSummaryResponse: + """Get comprehensive summary of the EDAM mapping workflow. + + This entry point function provides a complete overview of the EDAM ontology mapping + workflow, including all available functions, their expected inputs/outputs, configurable + options, and workflow steps. This is designed for copilot planning and assessment of + the current state of the mapping process. + + Args: + request: Workflow summary request (no parameters needed). + context: MCP context for logging. + + Returns: + Workflow summary response with complete workflow information. + """ + context.info("Generating workflow summary...") + + # Define all workflow functions + functions = [ + WorkflowFunction( + name="segment_text", + description=( + "Detects whether input text represents a single unit or multiple concepts. " + "Analyzes text structure, length, and semantic content to determine if segmentation " + "is needed. Returns either a single string for simple inputs or a list of segmented " + "items for sequential processing." + ), + input_format={ + "text": "str - Input text (can be single term, summary, paragraph, vignette, or structured metadata)", + "options": { + "max_segment_length": "int (optional) - Maximum length for a single segment", + "delimiter_patterns": "list[str] (optional) - Patterns to detect segment boundaries", + "semantic_threshold": "float (optional) - Threshold for semantic similarity between segments", + }, + }, + output_format={ + "is_segmented": "bool - Whether text was segmented", + "items": "str | list[str] - Single string or list of segmented items", + "segmentation_method": "str - Method used for segmentation (if segmented)", + "confidence": "float - Confidence in segmentation decision", + }, + configurable_options=[ + "max_segment_length", + "delimiter_patterns", + "semantic_threshold", + ], + dependencies=[], + ), + WorkflowFunction( + name="map_to_edam", + description=( + "Main mapping function for linking text items to EDAM ontology terms. " + "Performs semantic search using embeddings to find the most appropriate EDAM concepts. " + "Supports multiple embedding models, optional synthetic term generation to improve search, " + "and returns additional ontology statistics (e.g., concept depth, number of children, " + "hierarchy position) for downstream processing." + ), + existing_implementation=( + "map_to_edam_concept_tool (partial - provides basic semantic matching but lacks " + "synthetic term generation and ontology statistics features)" + ), + input_format={ + "item": "str - Single text item to map to EDAM", + "context": "str | None (optional) - Additional context about the item", + "options": { + "embedding_model": "str (optional) - Embedding model to use (e.g., 'all-MiniLM-L6-v2')", + "generate_synthetic_terms": "bool (optional) - Whether to generate synthetic terms for better matching", + "include_ontology_stats": "bool (optional) - Whether to include ontology statistics", + "max_results": "int (optional) - Maximum number of matches to return", + "min_confidence": "float (optional) - Minimum confidence threshold", + }, + }, + output_format={ + "matches": "list[ConceptMatch] - List of matched EDAM concepts", + "best_match": "ConceptMatch | None - Best matching concept", + "ontology_stats": { + "concept_depth": "int - Depth of concept in ontology hierarchy", + "num_children": "int - Number of child concepts", + "num_parents": "int - Number of parent concepts", + "hierarchy_path": "list[str] - Path from root to concept", + }, + "embedding_model_used": "str - Embedding model used for matching", + "synthetic_terms_generated": "list[str] - Synthetic terms generated (if enabled)", + }, + configurable_options=[ + "embedding_model", + "generate_synthetic_terms", + "include_ontology_stats", + "max_results", + "min_confidence", + ], + dependencies=["segment_text"], + ), + WorkflowFunction( + name="commonsense_check", + description=( + "Validates whether a mapped EDAM term faithfully represents the input text. " + "Uses unsupervised embedding comparison and ontology structure traversal to detect " + "overly generic or overly specific mappings. If the term is too generic, traverses " + "children to find more specific matches. If too specific, traverses parents to find " + "more general matches. Returns adjusted mapping with rationale." + ), + input_format={ + "input_text": "str - Original input text that was mapped", + "mapped_concept": "ConceptMatch - The mapped EDAM concept to validate", + "options": { + "similarity_threshold": "float (optional) - Minimum similarity threshold for validation", + "max_traversal_depth": "int (optional) - Maximum depth for ontology traversal", + "prefer_specific": "bool (optional) - Prefer more specific over generic terms", + }, + }, + output_format={ + "is_valid": "bool - Whether the mapping is considered valid", + "adjusted_match": "ConceptMatch | None - Adjusted concept if validation failed", + "validation_confidence": "float - Confidence in the validation result", + "traversal_path": "list[str] - Path traversed in ontology (if adjusted)", + "rationale": "str - Explanation of validation result or adjustment", + }, + configurable_options=[ + "similarity_threshold", + "max_traversal_depth", + "prefer_specific", + ], + dependencies=["map_to_edam"], + ), + WorkflowFunction( + name="merge_results", + description=( + "Combines results from single or multiple item mappings into a unified structure. " + "Handles aggregation of confidence scores, deduplication of concepts, and merging " + "of ontology statistics. Includes flags for unresolved items and mapping quality metrics." + ), + input_format={ + "mapping_results": "list[MappingResult] - Results from individual item mappings", + "options": { + "deduplicate": "bool (optional) - Whether to deduplicate identical concepts", + "confidence_aggregation": "str (optional) - Method for aggregating confidence ('max', 'mean', 'weighted')", + "include_unresolved": "bool (optional) - Whether to include unresolved items in output", + }, + }, + output_format={ + "merged_matches": "list[ConceptMatch] - Merged and deduplicated concept matches", + "unresolved_items": "list[str] - Items that could not be mapped", + "aggregated_confidence": "float - Overall confidence score", + "mapping_coverage": "float - Percentage of items successfully mapped", + "statistics": { + "total_items": "int - Total number of items processed", + "resolved_items": "int - Number of successfully mapped items", + "unique_concepts": "int - Number of unique EDAM concepts found", + }, + }, + configurable_options=[ + "deduplicate", + "confidence_aggregation", + "include_unresolved", + ], + dependencies=["map_to_edam", "commonsense_check"], + ), + WorkflowFunction( + name="report_summary", + description=( + "Generates a concise summary report of the mapping session. Provides statistics " + "on mapping coverage, confidence distribution, flagged items for review, and " + "overall mapping quality metrics. Useful for assessing the success of the mapping " + "process and identifying areas that need manual review." + ), + input_format={ + "merged_results": "MergedResults - Results from merge_results function", + "options": { + "include_confidence_distribution": "bool (optional) - Include confidence score distribution", + "include_flagged_items": "bool (optional) - Include list of items flagged for review", + "include_statistics": "bool (optional) - Include detailed statistics", + }, + }, + output_format={ + "summary": { + "total_items_processed": "int - Total number of items processed", + "successfully_mapped": "int - Number of successfully mapped items", + "mapping_coverage": "float - Percentage coverage", + "average_confidence": "float - Average confidence score", + "confidence_distribution": "dict[str, int] - Distribution of confidence scores by range", + }, + "flagged_items": "list[dict] - Items flagged for manual review with reasons", + "recommendations": "list[str] - Recommendations for improving mapping quality", + "quality_metrics": { + "high_confidence_mappings": "int - Number of high confidence mappings (>0.8)", + "medium_confidence_mappings": "int - Number of medium confidence mappings (0.5-0.8)", + "low_confidence_mappings": "int - Number of low confidence mappings (<0.5)", + }, + }, + configurable_options=[ + "include_confidence_distribution", + "include_flagged_items", + "include_statistics", + ], + dependencies=["merge_results"], + ), + WorkflowFunction( + name="update_opts", + description=( + "Interface to modify mapping parameters and re-run selected workflow steps. " + "Allows dynamic adjustment of embedding models, confidence thresholds, traversal " + "behavior, and other mapping parameters. Supports re-running specific workflow steps " + "with updated parameters without restarting the entire workflow." + ), + input_format={ + "parameter_updates": "dict - Dictionary of parameter names and new values", + "affected_steps": "list[str] (optional) - Workflow steps to re-run with new parameters", + "options": { + "reset_cache": "bool (optional) - Whether to reset cached embeddings/results", + "validate_updates": "bool (optional) - Whether to validate parameter updates", + }, + }, + output_format={ + "updated_parameters": "dict - Confirmed updated parameters", + "re_run_results": "dict | None - Results from re-running affected steps (if specified)", + "validation_errors": "list[str] - Any validation errors encountered", + }, + configurable_options=[ + "reset_cache", + "validate_updates", + ], + dependencies=[], # Can be called at any point in workflow + ), + ] + + # Define global configurable options + configurable_options = { + "embedding_model": { + "type": "str", + "default": "all-MiniLM-L6-v2", + "description": "Sentence transformer model for generating embeddings", + "options": ["all-MiniLM-L6-v2", "all-mpnet-base-v2", "paraphrase-multilingual-MiniLM-L12-v2"], + }, + "min_confidence_threshold": { + "type": "float", + "default": 0.5, + "description": "Minimum confidence threshold for accepting mappings", + "range": [0.0, 1.0], + }, + "max_results": { + "type": "int", + "default": 5, + "description": "Maximum number of concept matches to return per item", + "range": [1, 20], + }, + "generate_synthetic_terms": { + "type": "bool", + "default": False, + "description": "Whether to generate synthetic terms to improve semantic search", + }, + "include_ontology_stats": { + "type": "bool", + "default": True, + "description": "Whether to include ontology statistics (depth, children count, etc.)", + }, + "commonsense_check_enabled": { + "type": "bool", + "default": True, + "description": "Whether to perform commonsense validation of mappings", + }, + "max_traversal_depth": { + "type": "int", + "default": 3, + "description": "Maximum depth for ontology traversal during commonsense check", + "range": [1, 10], + }, + "prefer_specific_terms": { + "type": "bool", + "default": True, + "description": "Prefer more specific terms over generic ones during traversal", + }, + } + + # Define workflow flow + workflow_flow = """ + EDAM Ontology Mapping Workflow: + + 1. **Input Processing (segment_text)** + - Accept text input (single term, summary, paragraph, vignette, or structured metadata) + - Determine if input represents single unit or multiple concepts + - Return single string or list of segmented items + + 2. **Mapping (map_to_edam)** + - For each item (or single item), perform semantic search against EDAM ontology + - Use selected embedding model to find similar concepts + - Optionally generate synthetic terms to improve search + - Return matched concepts with confidence scores and ontology statistics + + 3. **Validation (commonsense_check)** + - Validate that mapped terms faithfully represent input + - Use ontology traversal to adjust overly generic/specific mappings + - Return validated/adjusted mappings with rationale + + 4. **Result Aggregation (merge_results)** + - Combine results from single or multiple item mappings + - Deduplicate concepts and aggregate confidence scores + - Flag unresolved items + + 5. **Reporting (report_summary)** + - Generate summary statistics of mapping session + - Show mapping coverage, confidence distribution, and flagged items + - Provide recommendations for improvement + + 6. **Parameter Updates (update_opts)** + - Can be called at any point to modify mapping parameters + - Supports re-running specific workflow steps with updated parameters + """ + + response = WorkflowSummaryResponse( + workflow_name="EDAM Ontology Mapping Workflow", + workflow_description=( + "A comprehensive workflow for mapping text descriptions (terms, summaries, paragraphs, " + "vignettes, or structured metadata) to concepts in the EDAM ontology. The workflow supports " + "single and multi-item processing, semantic search with configurable embeddings, validation " + "through ontology traversal, result aggregation, and comprehensive reporting." + ), + workflow_steps=[ + "segment_text", + "map_to_edam", + "commonsense_check", + "merge_results", + "report_summary", + "update_opts", + ], + functions=functions, + configurable_options=configurable_options, + workflow_flow=workflow_flow, + ) + + context.info("Workflow summary generated successfully") + return response