edamontology · albangaignard · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
diff --git a/edam_mcp/main.py b/edam_mcp/main.py
@@ -11,7 +11,7 @@
 from .models.segmentation import SegmentationRequest, SegmentationResponse
 from .models.suggestion import SuggestionRequest, SuggestionResponse
 from .models.workflow import WorkflowSummaryRequest, WorkflowSummaryResponse
-from .tools import map_to_edam_concept, suggest_new_concept
+from .tools import map_to_edam_concept, map_to_edam_operation, suggest_new_concept
 from .tools.segment_text import segment_text
 from .tools.workflow import get_workflow_summary
 
@@ -47,6 +47,10 @@ async def segment_text_tool(request: SegmentationRequest, context: Context) -> S
     async def map_to_edam_concept_tool(request: MappingRequest, context: Context) -> MappingResponse:
         return await map_to_edam_concept(request, context)
 
+    @mcp.tool
+    async def map_to_edam_operation_tool(request: MappingRequest, context: Context) -> MappingResponse:
+        return await map_to_edam_operation(request, context)
+
     @mcp.tool
     async def suggest_new_concept_tool(request: SuggestionRequest, context: Context) -> SuggestionResponse:
         return await suggest_new_concept(request, context)

diff --git a/edam_mcp/ontology/__init__.py b/edam_mcp/ontology/__init__.py
@@ -1,7 +1,7 @@
 """EDAM ontology handling modules."""
 
 from .loader import OntologyLoader
-from .matcher import ConceptMatcher
+from .matcher import ConceptMatcher, EDAMConceptType
 from .suggester import ConceptSuggester
 
-__all__ = ["OntologyLoader", "ConceptMatcher", "ConceptSuggester"]
+__all__ = ["OntologyLoader", "ConceptMatcher", "EDAMConceptType", "ConceptSuggester"]
diff --git a/edam_mcp/ontology/matcher.py b/edam_mcp/ontology/matcher.py
@@ -2,6 +2,7 @@
 
 import logging
 import os
+from enum import Enum
 
 import numpy as np
 
@@ -13,6 +14,15 @@
 logger = logging.getLogger(__name__)
 
 
+# define enum for EDAM concept types
+class EDAMConceptType(Enum):
+    TOPIC = "Topic"
+    OPERATION = "Operation"
+    DATA = "Data"
+    FORMAT = "Format"
+    ANY = "Any"
+
+
 class ConceptMatcher:
     """Handles semantic matching of descriptions to EDAM concepts."""
 
@@ -25,6 +35,7 @@ def __init__(self, ontology_loader: OntologyLoader):
         self.ontology_loader = ontology_loader
         self.embedding_model = None
         self.concept_embeddings: dict[str, np.ndarray] = {}
+        self.operation_embeddings: dict[str, np.ndarray] = {}
         self.use_chromadb = settings.use_chromadb
         self.chroma_db = os.path.join(settings.cache_dir, "default.db")
         # Don't build embeddings immediately - do it lazily when needed
@@ -50,13 +61,20 @@ def _build_embeddings(self) -> None:
                 logger.error("chromadb not available. Install with: pip install chromadb")
                 return
             client = chromadb.PersistentClient(path=self.chroma_db)
+
             # Further details at: https://docs.trychroma.com/docs/collections/configure#hnsw-index-configuration
             embedding_function = SentenceTransformerEmbeddingFunction(settings.embedding_model)
             collection = client.get_or_create_collection(
                 name="concept_embeddings",
                 embedding_function=embedding_function,
                 configuration={"hnsw": {"space": "cosine", "ef_construction": 200}},
             )
+            operation_collection = client.get_or_create_collection(
+                name="operation_embeddings",
+                embedding_function=embedding_function,
+                configuration={"hnsw": {"space": "cosine", "ef_construction": 200}},
+            )
+
             logger.info("Building concept embeddings and storing in ChromaDB...")
         else:
             logger.info("Building concept embeddings and storing in memory...")
@@ -101,18 +119,42 @@ def _build_embeddings(self) -> None:
                         }
                     ],
                 )
+                if concept["type"] == "Operation":
+                    operation_collection.add(
+                        ids=[uri],
+                        embeddings=[embedding.tolist()],
+                        documents=[processed_text],
+                        metadatas=[
+                            {
+                                "label": concept["label"],
+                                "definition": concept.get("definition"),
+                                "synonyms": (
+                                    ", ".join(concept["synonyms"])
+                                    if isinstance(concept.get("synonyms"), list)
+                                    else concept.get("synonyms")
+                                ),
+                            }
+                        ],
+                    )
             else:
                 self.concept_embeddings[uri] = embedding
+                if concept["type"] == "Operation":
+                    self.operation_embeddings[uri] = embedding
 
         if self.use_chromadb:
             logger.info(f"Stored embeddings for {len(self.ontology_loader.concepts)} concepts in ChromaDB")
         else:
             logger.info(f"Built embeddings for {len(self.concept_embeddings)} concepts")
 
+        # Show the size of the embeddings dictionary
+        logger.info(f"Concept embeddings size: {len(self.concept_embeddings)}")
+        logger.info(f"Operation embeddings size: {len(self.operation_embeddings)}")
+
     def match_concepts(
         self,
         description: str,
         context: str | None = None,
+        concept_type: EDAMConceptType = EDAMConceptType.ANY,
         max_results: int = 5,
         min_confidence: float = 0.5,
     ) -> list[ConceptMatch]:
@@ -121,6 +163,7 @@ def match_concepts(
         Args:
             description: Text description to match.
             context: Additional context information.
+            concept_type: Type of EDAM concept to match.
             max_results: Maximum number of matches to return.
             min_confidence: Minimum confidence threshold.
 
@@ -142,7 +185,7 @@ def match_concepts(
         description_embedding = self.embedding_model.encode(processed_description, show_progress_bar=False)
 
         # Calculate similarities
-        similarities = self._calculate_similarities(description_embedding, max_results * 2)
+        similarities = self._calculate_similarities(description_embedding, concept_type, max_results * 2)
 
         # Filter and sort results
         matches = []
@@ -164,11 +207,15 @@ def match_concepts(
         matches.sort(key=lambda x: x.confidence, reverse=True)
         return matches[:max_results]
 
-    def _calculate_similarities(self, description_embedding: np.ndarray, max_results: int) -> list[tuple[str, float]]:
+    def _calculate_similarities(
+        self, description_embedding: np.ndarray, concept_type: EDAMConceptType, max_results: int
+    ) -> list[tuple[str, float]]:
         """Calculate cosine similarities between description and all concepts.
 
         Args:
             description_embedding: Embedding of the description.
+            concept_type: Type of EDAM concept to match.
+            max_results: Maximum number of results to return.
 
         Returns:
             List of (concept_uri, similarity) tuples.
@@ -183,7 +230,14 @@ def _calculate_similarities(self, description_embedding: np.ndarray, max_results
                 return []
 
             client = chromadb.PersistentClient(path=self.chroma_db)
-            collection = client.get_or_create_collection(name="concept_embeddings")
+            if concept_type == EDAMConceptType.OPERATION:
+                logger.info("Using ChromaDB to query operation embeddings")
+                collection = client.get_or_create_collection(name="operation_embeddings")
+                logger.info(f"Collection has {collection.count()} items")
+            else:
+                logger.info("Using ChromaDB to query concept embeddings")
+                collection = client.get_or_create_collection(name="concept_embeddings")
+                logger.info(f"Collection has {collection.count()} items")
             # Use ChromaDB's default query for similarity search
             query_results = collection.query(
                 query_embeddings=[description_embedding],
@@ -195,9 +249,16 @@ def _calculate_similarities(self, description_embedding: np.ndarray, max_results
             similarity_scores = [1.0 - d for d in distances]
             similarities = list(zip(ids, similarity_scores))
         else:
-            for uri, concept_embedding in self.concept_embeddings.items():
-                similarity = self._cosine_similarity(description_embedding, concept_embedding)
-                similarities.append((uri, similarity))
+            if concept_type == EDAMConceptType.OPERATION:
+                logger.info("Calculating similarities using in-memory operation embeddings")
+                for uri, concept_embedding in self.operation_embeddings.items():
+                    similarity = self._cosine_similarity(description_embedding, concept_embedding)
+                    similarities.append((uri, similarity))
+            else:
+                logger.info("Calculating similarities using in-memory concept embeddings")
+                for uri, concept_embedding in self.concept_embeddings.items():
+                    similarity = self._cosine_similarity(description_embedding, concept_embedding)
+                    similarities.append((uri, similarity))
 
         similarities.sort(key=lambda x: x[1], reverse=True)
         return similarities

diff --git a/edam_mcp/tools/__init__.py b/edam_mcp/tools/__init__.py
@@ -1,6 +1,6 @@
 """MCP tools for EDAM ontology operations."""
 
-from .mapping import map_to_edam_concept
+from .mapping import map_to_edam_concept, map_to_edam_operation
 from .suggestion import suggest_new_concept
 
-__all__ = ["map_to_edam_concept", "suggest_new_concept"]
+__all__ = ["map_to_edam_concept", "map_to_edam_operation", "suggest_new_concept"]
diff --git a/edam_mcp/tools/mapping.py b/edam_mcp/tools/mapping.py
@@ -3,7 +3,7 @@
 from fastmcp.server import Context
 
 from ..models.mapping import MappingRequest, MappingResponse
-from ..ontology import ConceptMatcher, OntologyLoader
+from ..ontology import ConceptMatcher, EDAMConceptType, OntologyLoader
 from ..utils.context import MockContext
 
 
@@ -67,6 +67,68 @@ async def map_to_edam_concept(request: MappingRequest, context: Context) -> Mapp
         raise
 
 
+async def map_to_edam_operation(request: MappingRequest, context: Context) -> MappingResponse:
+    """Map a description to existing EDAM operations (i.e. bioinformatics data processing tasks).
+
+    This tool takes a description (metadata, free text) and finds the most
+    appropriate mappings to EDAM operations. It returns matches
+    with confidence scores, indicating how well each operation matches the description.
+
+    Args:
+        request: Mapping request containing description and parameters.
+        context: MCP context for logging and progress reporting.
+
+    Returns:
+        Mapping response with matched operations and confidence scores.
+    """
+
+    try:
+        # Log the request
+        context.info(f"Mapping description: {request.description[:100]}...")
+
+        # Initialize ontology components
+        ontology_loader = OntologyLoader()
+        if not ontology_loader.load_ontology():
+            raise RuntimeError("Failed to load EDAM ontology")
+
+        concept_matcher = ConceptMatcher(ontology_loader)
+
+        # First try exact matches
+        exact_matches = concept_matcher.find_exact_matches(request.description)
+
+        if exact_matches:
+            context.info(f"Found {len(exact_matches)} exact matches")
+            return MappingResponse(
+                matches=exact_matches,
+                total_matches=len(exact_matches),
+                has_exact_match=True,
+                confidence_threshold=request.min_confidence,
+            )
+
+        # Perform semantic matching
+        context.info("Performing semantic matching...")
+        matches = concept_matcher.match_concepts(
+            description=request.description,
+            context=request.context,
+            concept_type=EDAMConceptType.OPERATION,
+            max_results=request.max_results,
+            min_confidence=request.min_confidence,
+        )
+
+        context.info(f"Found {len(matches)} semantic matches")
+
+        return MappingResponse(
+            matches=matches,
+            total_matches=len(matches),
+            has_exact_match=False,
+            confidence_threshold=request.min_confidence,
+        )
+
+    except Exception as e:
+        context.error(f"Error in concept mapping: {e}")
+        raise
+
+
 # Alternative function signature for direct use
 async def map_description_to_concepts(
     description: str,

diff --git a/examples/basic_usage_operation_mapper.py b/examples/basic_usage_operation_mapper.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+"""Basic usage example for the EDAM MCP server."""
+
+import asyncio
+import logging
+import sys
+from pathlib import Path
+
+# Add the project root to the Python path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+try:
+    from edam_mcp.models.mapping import MappingRequest
+    from edam_mcp.tools.mapping import map_to_edam_operation
+    from edam_mcp.utils.context import MockContext
+except ImportError as e:
+    print(f"Error importing edam_mcp: {e}")
+    print("Make sure you have installed the package in development mode:")
+    print("  uv sync --dev")
+    sys.exit(1)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+async def example_operation_mapping():
+    """Example of mapping descriptions to EDAM operations concepts."""
+    print("=== EDAM Operation Mapping Example ===\n")
+
+    # Example descriptions to map
+    descriptions = [
+        "The Spectra package defines an efficient infrastructure for storing "
+        "and handling mass spectrometry spectra and functionality to subset, "
+        "process, visualize and compare spectra data. It provides different implementations "
+        "(backends) to store mass spectrometry data. These comprise backends tuned for fast "
+        "data access and processing and backends for very large data sets ensuring a small memory footprint.",
+    ]
+
+    for description in descriptions:
+        print(f"Mapping: {description}")
+
+        try:
+            request = MappingRequest(
+                description=description,
+                context="bioinformatics",
+                max_results=5,
+                min_confidence=0.5,
+            )
+
+            mock_context = MockContext()
+
+            response = await map_to_edam_operation(request, context=mock_context)
+
+            if response.matches:
+                print(f"  Found {response.total_matches} matches:")
+                for match in response.matches:
+                    print(f"    - {match.concept_label} (confidence: {match.confidence:.2f})")
+                    print(f"      URI: {match.concept_uri}")
+                    print(f"      Type: {match.concept_type}")
+                    assert match.concept_type == "Operation"
+            else:
+                print("  No matches found")
+
+        except Exception as e:
+            print(f"  Error: {e}")
+
+
+async def main():
+    """Run the examples."""
+    print("EDAM MCP Server - Bioconductor Spectra - Mapping")
+    print("=" * 50)
+
+    # Run mapping example
+    await example_operation_mapping()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())