microsoft
diff --git a/‎test/test_mcp_server.py‎
Lines changed: 65 additions & 7 deletions b/‎test/test_mcp_server.py‎
Lines changed: 65 additions & 7 deletions
diff --git a/‎typeagent/mcp/__init__.py‎
Lines changed: 0 additions & 4 deletions b/‎typeagent/mcp/__init__.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎typeagent/mcp/__main__.py‎
Lines changed: 0 additions & 9 deletions b/‎typeagent/mcp/__main__.py‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎typeagent/mcp/server.py‎
Lines changed: 32 additions & 18 deletions b/‎typeagent/mcp/server.py‎
Lines changed: 32 additions & 18 deletions
@@ -3,13 +3,69 @@
 
 """End-to-end tests for the MCP server."""
 
+from typing import Any
+
 import pytest
 
-from mcp.types import TextContent
+from mcp.client.session import ClientSession as ClientSessionType
+from mcp.shared.context import RequestContext
+from mcp.types import CreateMessageRequestParams, CreateMessageResult, TextContent
 
 from fixtures import really_needs_auth
 
 
+async def sampling_callback(
+    context: RequestContext[ClientSessionType, Any, Any],
+    params: CreateMessageRequestParams,
+) -> CreateMessageResult:
+    """Sampling callback that uses OpenAI to generate responses."""
+    # Use OpenAI to generate a response
+    import openai
+    from openai.types.chat import ChatCompletionMessageParam
+
+    client = openai.AsyncOpenAI()
+
+    # Convert MCP SamplingMessage to OpenAI format
+    messages: list[ChatCompletionMessageParam] = []
+    for msg in params.messages:
+        # Handle TextContent
+        content: str
+        if isinstance(msg.content, TextContent):
+            content = msg.content.text
+        elif hasattr(msg.content, "text"):
+            content = msg.content.text  # type: ignore
+        else:
+            content = str(msg.content)
+
+        # MCP roles are "user" or "assistant", which are compatible with OpenAI
+        if msg.role == "user":
+            messages.append({"role": "user", "content": content})
+        else:
+            messages.append({"role": "assistant", "content": content})
+
+    # Add system prompt if provided
+    if params.systemPrompt:
+        messages.insert(0, {"role": "system", "content": params.systemPrompt})
+
+    # Call OpenAI
+    response = await client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=messages,
+        max_tokens=params.maxTokens,
+        temperature=params.temperature if params.temperature is not None else 1.0,
+    )
+
+    # Convert response to MCP format
+    return CreateMessageResult(
+        role="assistant",
+        content=TextContent(
+            type="text", text=response.choices[0].message.content or ""
+        ),
+        model=response.model,
+        stopReason="endTurn",
+    )
+
+
 @pytest.mark.asyncio
 async def test_mcp_server_query_conversation(really_needs_auth):
     """Test the query_conversation tool end-to-end using MCP client."""
@@ -25,7 +81,9 @@ async def test_mcp_server_query_conversation(really_needs_auth):
 
     # Create client session and connect to server
     async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write) as session:
+        async with ClientSession(
+            read, write, sampling_callback=sampling_callback
+        ) as session:
             # Initialize the session
             await session.initialize()
 
@@ -49,9 +107,7 @@ async def test_mcp_server_query_conversation(really_needs_auth):
 
             # Type narrow the content to TextContent
             content_item = result.content[0]
-            assert isinstance(
-                content_item, TextContent
-            ), f"Expected TextContent, got {type(content_item)}"
+            assert isinstance(content_item, TextContent)
             response_text = content_item.text
 
             # Parse response (it should be JSON with success, answer, time_used)
@@ -68,7 +124,7 @@ async def test_mcp_server_query_conversation(really_needs_auth):
 
 
 @pytest.mark.asyncio
-async def test_mcp_server_empty_question(really_needs_auth):
+async def test_mcp_server_empty_question():
     """Test the query_conversation tool with an empty question."""
     from mcp import ClientSession, StdioServerParameters
     from mcp.client.stdio import stdio_client
@@ -82,7 +138,9 @@ async def test_mcp_server_empty_question(really_needs_auth):
 
     # Create client session and connect to server
     async with stdio_client(server_params) as (read, write):
-        async with ClientSession(read, write) as session:
+        async with ClientSession(
+            read, write, sampling_callback=sampling_callback
+        ) as session:
             # Initialize the session
             await session.initialize()
 
 
@@ -7,24 +7,24 @@
 import time
 from typing import Any
 
-from mcp.server.fastmcp import FastMCP
-from mcp.shared.context import RequestContext
-from mcp.types import TextContent
+from mcp.server.fastmcp import Context, FastMCP
+from mcp.server.session import ServerSession
+from mcp.types import SamplingMessage, TextContent
 import typechat
 
 from typeagent.aitools import embeddings, utils
-from typeagent.aitools.embeddings import AsyncEmbeddingModel
 from typeagent.knowpro import answers, query, searchlang
 from typeagent.knowpro.convsettings import ConversationSettings
 from typeagent.knowpro.answer_response_schema import AnswerResponse
 from typeagent.knowpro.search_query_schema import SearchQuery
 from typeagent.podcasts import podcast
+from typeagent.storage.memory.semrefindex import TermToSemanticRefIndex
 
 
 class MCPTypeChatModel(typechat.TypeChatLanguageModel):
     """TypeChat language model that uses MCP sampling API instead of direct API calls."""
 
-    def __init__(self, session: Any):
+    def __init__(self, session: ServerSession):
         """Initialize with MCP session for sampling.
 
         Args:
@@ -37,19 +37,29 @@ async def complete(
     ) -> typechat.Result[str]:
         """Request completion from the MCP client's LLM."""
         try:
-            # Convert prompt to message format
+            # Convert prompt to MCP SamplingMessage format
+            sampling_messages: list[SamplingMessage]
             if isinstance(prompt, str):
-                messages = [{"role": "user", "content": prompt}]
+                sampling_messages = [
+                    SamplingMessage(
+                        role="user", content=TextContent(type="text", text=prompt)
+                    )
+                ]
             else:
-                # PromptSection list: convert to messages
-                messages = []
+                # PromptSection list: convert to SamplingMessage
+                sampling_messages = []
                 for section in prompt:
                     role = "user" if section["role"] == "user" else "assistant"
-                    messages.append({"role": role, "content": section["content"]})
+                    sampling_messages.append(
+                        SamplingMessage(
+                            role=role,
+                            content=TextContent(type="text", text=section["content"]),
+                        )
+                    )
 
             # Use MCP sampling to request completion from client
             result = await self.session.create_message(
-                messages=messages, max_tokens=16384
+                messages=sampling_messages, max_tokens=16384
             )
 
             # Extract text content from response
@@ -58,7 +68,7 @@ async def complete(
                 return typechat.Success(result.content.text)
             elif isinstance(result.content, list):
                 # Handle list of content items
-                text_parts = []
+                text_parts: list[str] = []
                 for item in result.content:
                     if isinstance(item, TextContent):
                         text_parts.append(item.text)
@@ -77,19 +87,21 @@ async def complete(
 class ProcessingContext:
     lang_search_options: searchlang.LanguageSearchOptions
     answer_context_options: answers.AnswerContextOptions
-    query_context: query.QueryEvalContext
+    query_context: query.QueryEvalContext[
+        podcast.PodcastMessage, TermToSemanticRefIndex
+    ]
     embedding_model: embeddings.AsyncEmbeddingModel
     query_translator: typechat.TypeChatJsonTranslator[SearchQuery]
     answer_translator: typechat.TypeChatJsonTranslator[AnswerResponse]
 
     def __repr__(self) -> str:
-        parts = []
+        parts: list[str] = []
         parts.append(f"{self.lang_search_options}")
         parts.append(f"{self.answer_context_options}")
         return f"Context({', '.join(parts)})"
 
 
-async def make_context(session: Any) -> ProcessingContext:
+async def make_context(session: ServerSession) -> ProcessingContext:
     """Create processing context using MCP-based language model.
 
     Args:
@@ -135,7 +147,7 @@ async def make_context(session: Any) -> ProcessingContext:
 
 async def load_podcast_index(
     podcast_file_prefix: str, settings: ConversationSettings
-) -> query.QueryEvalContext:
+) -> query.QueryEvalContext[podcast.PodcastMessage, Any]:
     conversation = await podcast.Podcast.read_from_file(podcast_file_prefix, settings)
     assert (
         conversation is not None
@@ -155,7 +167,9 @@ class QuestionResponse:
 
 
 @mcp.tool()
-async def query_conversation(question: str, ctx: RequestContext) -> QuestionResponse:
+async def query_conversation(
+    question: str, ctx: Context[ServerSession, Any, Any]
+) -> QuestionResponse:
     """Send a question to the memory server and get an answer back"""
     t0 = time.time()
     question = question.strip()
@@ -164,7 +178,7 @@ async def query_conversation(question: str, ctx: RequestContext) -> QuestionResp
         return QuestionResponse(
             success=False, answer="No question provided", time_used=dt
         )
-    context = await make_context(ctx.session)
+    context = await make_context(ctx.request_context.session)
 
     # Stages 1, 2, 3 (LLM -> proto-query, compile, execute query)
     result = await searchlang.search_conversation_with_language(