diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Evaluators/ExpectedToolInputEvaluator.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Evaluators/ExpectedToolInputEvaluator.cs index a84b3747fbe..03d640c3330 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Evaluators/ExpectedToolInputEvaluator.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Evaluators/ExpectedToolInputEvaluator.cs @@ -104,7 +104,7 @@ public ValueTask EvaluateAsync( if (!string.Equals(expectedJson, actualJson, StringComparison.OrdinalIgnoreCase)) { - MetricError($"Tool call arguments did not match. This was tool call #{countCalls}\nExpected Argument JSON:{expectedJson}\nActual Argument JSON:{actualJson}", metric); + MetricError($"Tool call arguments did not match. This was tool call #{countCalls}: {expToolName}\nExpected Argument JSON:{expectedJson}\nActual Argument JSON:{actualJson}", metric); return result; } } diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/AssemblyInfo.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/AssemblyInfo.cs index 3eea85e5674..0d7bd1f0220 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/AssemblyInfo.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/AssemblyInfo.cs @@ -4,4 +4,4 @@ [assembly: Parallelizable(ParallelScope.All)] // Set conservative parallelism -[assembly: LevelOfParallelism(5)] \ No newline at end of file +[assembly: LevelOfParallelism(10)] \ No newline at end of file diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/ChatCompletion.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/ChatCompletion.cs index f270c8e7d27..2e9a855767e 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/ChatCompletion.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/ChatCompletion.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.AI; using ModelContextProtocol.Client; +using OpenAI.Responses; namespace Azure.Sdk.Tools.Cli.Evaluations.Helpers { @@ -14,7 +15,10 @@ public ChatCompletion(IChatClient chatClient, IMcpClient mcpClient) _mcpClient = mcpClient; } - public async Task GetChatResponseWithExpectedResponseAsync(IEnumerable chat, Dictionary expectedToolResults) + public async Task GetChatResponseWithExpectedResponseAsync( + IEnumerable chat, + Dictionary expectedToolResults, + IEnumerable optionalToolNames) { var tools = await _mcpClient.ListToolsAsync(); var conversationMessages = chat.ToList(); @@ -25,6 +29,7 @@ public async Task GetChatResponseWithExpectedResponseAsync(IEnumer }; var response = await _chatClient.GetResponseAsync(chat, chatOptions); var chatInitialIndex = conversationMessages.Count; + var optionalCallIds = new HashSet(); while (response.FinishReason == ChatFinishReason.ToolCalls) { @@ -67,12 +72,49 @@ public async Task GetChatResponseWithExpectedResponseAsync(IEnumer conversationMessages.Add(errorResponseMessage); } + + if(optionalToolNames.Contains(functionCall.Name)) + { + optionalCallIds.Add(functionCall.CallId); + } } response = await _chatClient.GetResponseAsync(conversationMessages, chatOptions); } - return new ChatResponse([.. conversationMessages.Skip(chatInitialIndex)]); + // Add the final assistant message (when there are no further tool calls) + var finalAssistantMessage = response.Messages.FirstOrDefault(); + if (finalAssistantMessage != null) + { + conversationMessages.Add(finalAssistantMessage); + } + + // Filter out any optional tool calls and their corresponding tool results + var conversation = conversationMessages.Skip(chatInitialIndex); + if (optionalCallIds.Count == 0) + { + return new ChatResponse([.. conversation]); + } + + var filtered = FilterOptionalToolResponses(conversation, optionalCallIds); + return new ChatResponse([.. filtered]); + } + + private IEnumerable FilterOptionalToolResponses(IEnumerable messages, HashSet optionalCallIds) + { + foreach (var message in messages) + { + // Remove optional tool calls and results. + message.Contents = [.. message.Contents.Where(content => + !(content is FunctionCallContent fc && !string.IsNullOrEmpty(fc.CallId) && optionalCallIds.Contains(fc.CallId)) && + !(content is FunctionResultContent fr && !string.IsNullOrEmpty(fr.CallId) && optionalCallIds.Contains(fr.CallId)) + )]; + + if (message.Contents.Any()) + { + yield return message; + } + } } public async Task GetChatResponseAsync(IEnumerable chat) diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/EvaluationHelper.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/EvaluationHelper.cs index 7e834948ea8..27341d68cd8 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/EvaluationHelper.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Helpers/EvaluationHelper.cs @@ -10,6 +10,8 @@ namespace Azure.Sdk.Tools.Cli.Evaluations.Helpers { public class EvaluationHelper { + private const string verifySetupToolName = "azsdk_verify_setup"; + public static void ValidateBooleanMetricEvaluator(EvaluationResult result, string metricName) { EvaluationRating[] expectedRatings = [EvaluationRating.Good, EvaluationRating.Exceptional]; @@ -70,16 +72,33 @@ public static async Task RunToolInputScenarioAsync( IEnumerable? evaluators = null, bool enableResponseCaching = true, IEnumerable? additionalContexts = null, - CancellationToken cancellationToken = default) + CancellationToken cancellationToken = default, + IEnumerable? optionalToolNames = null) { evaluators ??= [new ExpectedToolInputEvaluator()]; var fullChat = scenarioData.ChatHistory.Append(scenarioData.NextMessage); - var expectedToolResults = ChatMessageHelper.GetExpectedToolsByName(scenarioData.ExpectedOutcome, toolNames); + + // Default optional tools to empty when not provided + optionalToolNames ??= []; + + // Get expected tool names from the scenario data and optional tool names + var expectedToolNames = ChatMessageHelper.GetExpectedToolsByName(scenarioData.ExpectedOutcome, toolNames).Keys; + var filteredOptionalToolNames = GetOptionalToolNames(optionalToolNames, expectedToolNames); + + // We can use LoadScenarioPrompt with empty prompt to get optional tools + // in the proper format. + var optionalTools = ChatMessageHelper.LoadScenarioFromPrompt("", filteredOptionalToolNames).ExpectedOutcome; + + // Include the optional tools along side the expected. + // Later we will then filter them out from the response. + var toolChatMessages = optionalTools.Concat(scenarioData.ExpectedOutcome); + var toolResults = ChatMessageHelper.GetExpectedToolsByName(toolChatMessages, toolNames); var response = await chatCompletion.GetChatResponseWithExpectedResponseAsync( fullChat, - expectedToolResults); + toolResults, + filteredOptionalToolNames); return await RunScenarioAsync( fullChat, @@ -93,5 +112,15 @@ public static async Task RunToolInputScenarioAsync( additionalContexts, cancellationToken); } + + private static IEnumerable GetOptionalToolNames( + IEnumerable optionalToolNames, + IEnumerable expectedToolNames) + { + // Build optional list excluding any names that are expected + // also make sure to always include verify setup + var combinedOptional = optionalToolNames.Append(verifySetupToolName); + return combinedOptional.Except(expectedToolNames); + } } } diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepo.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepo.cs new file mode 100644 index 00000000000..16b55d7528e --- /dev/null +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepo.cs @@ -0,0 +1,42 @@ +using Azure.Sdk.Tools.Cli.Evaluations.Evaluators; +using Azure.Sdk.Tools.Cli.Evaluations.Helpers; +using Azure.Sdk.Tools.Cli.Evaluations.Models; +using Microsoft.Extensions.AI.Evaluation; +using NUnit.Framework; + +namespace Azure.Sdk.Tools.Cli.Evaluations.Scenarios +{ + public partial class Scenario + { + [Test] + [Category(RepositoryCategories.AzureRestApiSpecs)] + public async Task Evaluate_CheckPublicRepo() + { + const string prompt = "Check if my TypeSpec project is in the public repo. My setup has already been verified, do not run azsdk_verify_setup. Project root: C:\\\\azure-rest-api-specs\\\\specification\\\\contosowidgetmanager\\\\Contoso.WidgetManager."; + string[] expectedTools = + [ + "azsdk_typespec_check_project_in_public_repo" + ]; + + var scenarioData = ChatMessageHelper.LoadScenarioFromPrompt(prompt, expectedTools); + bool checkInputs = true; + + var result = await EvaluationHelper.RunToolInputScenarioAsync( + scenarioName: this.ScenarioName, + scenarioData: scenarioData, + chatCompletion: s_chatCompletion!, + chatConfig: s_chatConfig!, + executionName: s_executionName, + reportingPath: ReportingPath, + toolNames: s_toolNames!, + evaluators: [new ExpectedToolInputEvaluator()], + enableResponseCaching: true, + additionalContexts: new EvaluationContext[] + { + new ExpectedToolInputEvaluatorContext(scenarioData.ExpectedOutcome, s_toolNames!, checkInputs) + }); + + EvaluationHelper.ValidateToolInputsEvaluator(result); + } + } +} diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepoThenValidate.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepoThenValidate.cs new file mode 100644 index 00000000000..b2ed67cea45 --- /dev/null +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_CheckPublicRepoThenValidate.cs @@ -0,0 +1,47 @@ +using Azure.Sdk.Tools.Cli.Evaluations.Evaluators; +using Azure.Sdk.Tools.Cli.Evaluations.Helpers; +using Azure.Sdk.Tools.Cli.Evaluations.Models; +using Microsoft.Extensions.AI.Evaluation; +using NUnit.Framework; + +namespace Azure.Sdk.Tools.Cli.Evaluations.Scenarios +{ + public partial class Scenario + { + [Test] + [Category(RepositoryCategories.AzureRestApiSpecs)] + public async Task Evaluate_CheckPublicRepoThenValidate() + { + const string prompt = + "Confirm the TypeSpec project is in the public repo, then run TypeSpec validation. " + + "Project path: C\\:\\azure-rest-api-specs\\specification\\contosowidgetmanager\\Contoso.WidgetManager. " + + "My setup has already been verified, do not run azsdk_verify_setup."; + + string[] expectedTools = + [ + "azsdk_typespec_check_project_in_public_repo", + "azsdk_run_typespec_validation" + ]; + + var scenarioData = ChatMessageHelper.LoadScenarioFromPrompt(prompt, expectedTools); + bool checkInputs = true; + + var result = await EvaluationHelper.RunToolInputScenarioAsync( + scenarioName: this.ScenarioName, + scenarioData: scenarioData, + chatCompletion: s_chatCompletion!, + chatConfig: s_chatConfig!, + executionName: s_executionName, + reportingPath: ReportingPath, + toolNames: s_toolNames!, + evaluators: [new ExpectedToolInputEvaluator()], + enableResponseCaching: true, + additionalContexts: new EvaluationContext[] + { + new ExpectedToolInputEvaluatorContext(scenarioData.ExpectedOutcome, s_toolNames!, checkInputs) + }); + + EvaluationHelper.ValidateToolInputsEvaluator(result); + } + } +} diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_GetModifiedTypespecProjects.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_GetModifiedTypespecProjects.cs new file mode 100644 index 00000000000..f3bf1d4c0b4 --- /dev/null +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_GetModifiedTypespecProjects.cs @@ -0,0 +1,42 @@ +using Azure.Sdk.Tools.Cli.Evaluations.Evaluators; +using Azure.Sdk.Tools.Cli.Evaluations.Helpers; +using Azure.Sdk.Tools.Cli.Evaluations.Models; +using Microsoft.Extensions.AI.Evaluation; +using NUnit.Framework; + +namespace Azure.Sdk.Tools.Cli.Evaluations.Scenarios +{ + public partial class Scenario + { + [Test] + [Category(RepositoryCategories.AzureRestApiSpecs)] + public async Task Evaluate_GetModifiedTypespecProjects() + { + const string prompt = "List the TypeSpec projects modified in my repo. My setup has already been verified, do not run azsdk_verify_setup. Path to root: C:\\azure-rest-api-specs. Compare against main."; + string[] expectedTools = + [ + "azsdk_get_modified_typespec_projects" + ]; + + var scenarioData = ChatMessageHelper.LoadScenarioFromPrompt(prompt, expectedTools); + bool checkInputs = true; + + var result = await EvaluationHelper.RunToolInputScenarioAsync( + scenarioName: this.ScenarioName, + scenarioData: scenarioData, + chatCompletion: s_chatCompletion!, + chatConfig: s_chatConfig!, + executionName: s_executionName, + reportingPath: ReportingPath, + toolNames: s_toolNames!, + evaluators: [new ExpectedToolInputEvaluator()], + enableResponseCaching: true, + additionalContexts: new EvaluationContext[] + { + new ExpectedToolInputEvaluatorContext(scenarioData.ExpectedOutcome, s_toolNames!, checkInputs) + }); + + EvaluationHelper.ValidateToolInputsEvaluator(result); + } + } +} diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_ValidateTypespec.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_ValidateTypespec.cs index 650340726f0..49ba02580e6 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_ValidateTypespec.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/AzureRestApiSpecs/Evaluate_ValidateTypespec.cs @@ -19,6 +19,11 @@ public async Task Evaluate_ValidateTypespec() "azsdk_run_typespec_validation", ]; + string [] optionalTools = + [ + "azsdk_typespec_check_project_in_public_repo" + ]; + // Build scenario data from prompt var scenarioData = ChatMessageHelper.LoadScenarioFromPrompt(prompt, expectedTools); @@ -38,7 +43,8 @@ public async Task Evaluate_ValidateTypespec() additionalContexts: new EvaluationContext[] { new ExpectedToolInputEvaluatorContext(scenarioData.ExpectedOutcome, s_toolNames!, checkInputs) - }); + }, + optionalToolNames: optionalTools); EvaluationHelper.ValidateToolInputsEvaluator(result); } diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/General/Evaluate_LinkNamespaceApprovalIssue.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/General/Evaluate_LinkNamespaceApprovalIssue.cs new file mode 100644 index 00000000000..1c2458a6eb4 --- /dev/null +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/Scenarios/General/Evaluate_LinkNamespaceApprovalIssue.cs @@ -0,0 +1,41 @@ +using Azure.Sdk.Tools.Cli.Evaluations.Evaluators; +using Azure.Sdk.Tools.Cli.Evaluations.Helpers; +using Azure.Sdk.Tools.Cli.Evaluations.Models; +using Microsoft.Extensions.AI.Evaluation; +using NUnit.Framework; + +namespace Azure.Sdk.Tools.Cli.Evaluations.Scenarios +{ + public partial class Scenario + { + [Test] + public async Task Evaluate_LinkNamespaceApprovalIssue() + { + const string prompt = "Link namespace approval issue https://github.com/Azure/azure-sdk/issues/1234 to release plan 12345. My setup has already been verified, do not run azsdk_verify_setup."; + string[] expectedTools = + [ + "azsdk_link_namespace_approval_issue" + ]; + + var scenarioData = ChatMessageHelper.LoadScenarioFromPrompt(prompt, expectedTools); + bool checkInputs = true; + + var result = await EvaluationHelper.RunToolInputScenarioAsync( + scenarioName: this.ScenarioName, + scenarioData: scenarioData, + chatCompletion: s_chatCompletion!, + chatConfig: s_chatConfig!, + executionName: s_executionName, + reportingPath: ReportingPath, + toolNames: s_toolNames!, + evaluators: [new ExpectedToolInputEvaluator()], + enableResponseCaching: true, + additionalContexts: new EvaluationContext[] + { + new ExpectedToolInputEvaluatorContext(scenarioData.ExpectedOutcome, s_toolNames!, checkInputs) + }); + + EvaluationHelper.ValidateToolInputsEvaluator(result); + } + } +} diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/TestData/example2.json b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/TestData/example2.json index b02f4e9c4db..ec45a76def4 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/TestData/example2.json +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/TestData/example2.json @@ -345,7 +345,7 @@ }, { "role": "user", - "content": "\nThe current date is October 29, 2025.\nTerminals:\nTerminal: pwsh\n\n# Todo List\n\n- [x] Confirm start of SDK process\n - Ask user if they want to begin the TypeSpec → SDK generation workflow now (yes/no).\n- [x] Identify TypeSpec project root\n - Use open file `main.tsp` parent directory as project root; verify presence of `tspconfig.yaml`.\n- [x] Check spec merge status\n - Ask if TypeSpec spec is already merged in Azure/azure-rest-api-specs main branch.\n- [x] Run TypeSpec validation\n - If not merged, run validation tool and handle any errors.\n- [x] Review & commit changes\n - List modified TypeSpec files, stage, commit, push on non-main branch.\n- [x] Create or verify spec PR\n - Check if PR exists for current branch; create if missing.\n- [-] Generate SDKs via pipeline\n - Trigger SDK generation pipeline for appropriate languages (Mgmt or Data Plane) and monitor status.\n- [ ] Create release plan (optional)\n - If user intends to publish, gather required IDs, API version, release type and create plan.\n- [ ] Link SDK PRs to release plan\n - Update release plan with language/package names and link PRs.\n- [ ] Check release readiness\n - Run package readiness checks for each language.\n- [ ] Release SDK packages\n - If ready and approved by user, trigger release for each merged SDK PR.\n- [ ] Provide summary & next steps\n - Summarize PRs, release plan status, remaining actions.\n\n\n\n\nThe user's current file is c:\\\\azure-rest-api-specs\\.vscode\\mcp.json. \n\n\nBelow is the information about the current repository. You can use this information when you need to calculate diffs or compare changes with the default branch.\nRepository name: azure-rest-api-specs\nOwner: jeo02\nCurrent branch: testing-contoso-sdk-gen\n\n\n\nYou are an agent—keep going until the user's query is completely resolved before ending your turn. ONLY stop if solved or genuinely blocked.\nTake action when possible; the user expects you to do useful work without unnecessary questions.\nAfter any parallel, read-only context gathering, give a concise progress update and what's next.\nAvoid repetition across turns: don't restate unchanged plans or sections (like the todo list) verbatim; provide delta updates or only the parts that changed.\nTool batches: You MUST preface each batch with a one-sentence why/what/outcome preamble.\nProgress cadence: After 3 to 5 tool calls, or when you create/edit > ~3 files in a burst, report progress.\nRequirements coverage: Read the user's ask in full and think carefully. Do not omit a requirement. If something cannot be done with available tools, note why briefly and propose a viable alternative.\nWhen using the insert_edit_into_file tool, avoid repeating existing code, instead use a line comment with \\`...existing code...\\` to represent regions of unchanged code.\nSkip filler acknowledgements like \"Sounds good\" or \"Okay, I will…\". Open with a purposeful one-liner about what you're doing next.\nWhen sharing setup or run steps, present terminal commands in fenced code blocks with the correct language tag. Keep commands copyable and on separate lines.\nAvoid definitive claims about the build or runtime setup unless verified from the provided context (or quick tool checks). If uncertain, state what's known from attachments and proceed with minimal steps you can adapt later.\nWhen you create or edit runnable code, run a test yourself to confirm it works; then share optional fenced commands for more advanced runs.\nFor non-trivial code generation, produce a complete, runnable solution: necessary source files, a tiny runner or test/benchmark harness, a minimal `README.md`, and updated dependency manifests (e.g., `package.json`, `requirements.txt`, `pyproject.toml`). Offer quick \"try it\" commands and optional platform-specific speed-ups when relevant.\nYour goal is to act like a pair programmer: be friendly and helpful. If you can do more, do more. Be proactive with your solutions, think about what the user needs and what they want, and implement it proactively.\n\nBefore starting a task, review and follow the guidance in , , and .\nStart your response with a brief acknowledgement, followed by a concise high-level plan outlining your approach.\nDO NOT state your identity or model name unless the user explicitly asks you to. \nYou MUST use the todo list tool to plan and track your progress. NEVER skip this step, and START with this step whenever the task is multi-step. This is essential for maintaining visibility and proper execution of large tasks. Follow the todoListToolInstructions strictly.\nWhen referring to a filename or symbol in the user's workspace, wrap it in backticks.\n\n\n\n\n\nCheck the status of the four sdks that were generated. (See above for file contents. You may not need to search or read the file again.)\n" + "content": "\nThe current date is October 29, 2025.\nTerminals:\nTerminal: pwsh\n\n# Todo List\n\n- [x] Confirm start of SDK process\n - Ask user if they want to begin the TypeSpec → SDK generation workflow now (yes/no).\n- [x] Identify TypeSpec project root\n - Use open file `main.tsp` parent directory as project root; verify presence of `tspconfig.yaml`.\n- [x] Check spec merge status\n - Ask if TypeSpec spec is already merged in Azure/azure-rest-api-specs main branch.\n- [x] Run TypeSpec validation\n - If not merged, run validation tool and handle any errors.\n- [x] Review & commit changes\n - List modified TypeSpec files, stage, commit, push on non-main branch.\n- [x] Create or verify spec PR\n - Check if PR exists for current branch; create if missing.\n- [-] Generate SDKs via pipeline\n - Trigger SDK generation pipeline for appropriate languages (Mgmt or Data Plane) and monitor status.\n- [ ] Create release plan (optional)\n - If user intends to publish, gather required IDs, API version, release type and create plan.\n- [ ] Link SDK PRs to release plan\n - Update release plan with language/package names and link PRs.\n- [ ] Check release readiness\n - Run package readiness checks for each language.\n- [ ] Release SDK packages\n - If ready and approved by user, trigger release for each merged SDK PR.\n- [ ] Provide summary & next steps\n - Summarize PRs, release plan status, remaining actions.\n\n\n\n\nThe user's current file is c:\\\\azure-rest-api-specs\\.vscode\\mcp.json. \n\n\nBelow is the information about the current repository. You can use this information when you need to calculate diffs or compare changes with the default branch.\nRepository name: azure-rest-api-specs\nOwner: jeo02\nCurrent branch: testing-contoso-sdk-gen\n\n\n\nYou are an agent—keep going until the user's query is completely resolved before ending your turn. ONLY stop if solved or genuinely blocked.\nTake action when possible; the user expects you to do useful work without unnecessary questions.\nAfter any parallel, read-only context gathering, give a concise progress update and what's next.\nAvoid repetition across turns: don't restate unchanged plans or sections (like the todo list) verbatim; provide delta updates or only the parts that changed.\nTool batches: You MUST preface each batch with a one-sentence why/what/outcome preamble.\nProgress cadence: After 3 to 5 tool calls, or when you create/edit > ~3 files in a burst, report progress.\nRequirements coverage: Read the user's ask in full and think carefully. Do not omit a requirement. If something cannot be done with available tools, note why briefly and propose a viable alternative.\nWhen using the insert_edit_into_file tool, avoid repeating existing code, instead use a line comment with \\`...existing code...\\` to represent regions of unchanged code.\nSkip filler acknowledgements like \"Sounds good\" or \"Okay, I will…\". Open with a purposeful one-liner about what you're doing next.\nWhen sharing setup or run steps, present terminal commands in fenced code blocks with the correct language tag. Keep commands copyable and on separate lines.\nAvoid definitive claims about the build or runtime setup unless verified from the provided context (or quick tool checks). If uncertain, state what's known from attachments and proceed with minimal steps you can adapt later.\nWhen you create or edit runnable code, run a test yourself to confirm it works; then share optional fenced commands for more advanced runs.\nFor non-trivial code generation, produce a complete, runnable solution: necessary source files, a tiny runner or test/benchmark harness, a minimal `README.md`, and updated dependency manifests (e.g., `package.json`, `requirements.txt`, `pyproject.toml`). Offer quick \"try it\" commands and optional platform-specific speed-ups when relevant.\nYour goal is to act like a pair programmer: be friendly and helpful. If you can do more, do more. Be proactive with your solutions, think about what the user needs and what they want, and implement it proactively.\n\nBefore starting a task, review and follow the guidance in , , and .\nStart your response with a brief acknowledgement, followed by a concise high-level plan outlining your approach.\nDO NOT state your identity or model name unless the user explicitly asks you to. \nYou MUST use the todo list tool to plan and track your progress. NEVER skip this step, and START with this step whenever the task is multi-step. This is essential for maintaining visibility and proper execution of large tasks. Follow the todoListToolInstructions strictly.\nWhen referring to a filename or symbol in the user's workspace, wrap it in backticks.\n\n\n\n\n\nCheck the status of the four sdks that were generated. Only check each pipeline once. (See above for file contents. You may not need to search or read the file again.)\n" }, { "role": "assistant", diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/GetModifiedTypespecProjects.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/GetModifiedTypespecProjects.cs index 2ad357e275a..7fdfef16831 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/GetModifiedTypespecProjects.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/GetModifiedTypespecProjects.cs @@ -31,7 +31,8 @@ public ChatMessage GetMockCall() ToolName, new Dictionary { - { "repoRootPath", "C:\\azure-rest-api-specs" } + { "repoRootPath", "C:\\azure-rest-api-specs" }, + { "targetBranch", "main" } } ) ] diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/LinkNamespaceApprovalIssue.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/LinkNamespaceApprovalIssue.cs new file mode 100644 index 00000000000..8d8928e3f41 --- /dev/null +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/LinkNamespaceApprovalIssue.cs @@ -0,0 +1,42 @@ +using Microsoft.Extensions.AI; + +namespace Azure.Sdk.Tools.Cli.Evaluations.ToolMocks +{ + public class LinkNamespaceApprovalIssue : IToolMock + { + public string ToolName => "azsdk_link_namespace_approval_issue"; + public string CallId => "tooluse_link_namespace_001"; + private string toolResult => """{"result":"Linked namespace approval issue"}"""; + + public ChatMessage GetMockResponse(string callid) + { + return new ChatMessage( + ChatRole.Tool, + [ + new FunctionResultContent( + callid, + toolResult + ) + ] + ); + } + + public ChatMessage GetMockCall() + { + return new ChatMessage( + ChatRole.Assistant, + [ + new FunctionCallContent( + CallId, + ToolName, + new Dictionary + { + { "releasePlanWorkItemId", 12345 }, + { "namespaceApprovalIssue", "https://github.com/Azure/azure-sdk/issues/1234" } + } + ) + ] + ); + } + } +} diff --git a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/ToolMocks.cs b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/ToolMocks.cs index 26842ba8993..87ce603340b 100644 --- a/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/ToolMocks.cs +++ b/tools/azsdk-cli/Azure.Sdk.Tools.Cli.Evaluations/ToolMocks/ToolMocks.cs @@ -27,6 +27,7 @@ private static void RegisterMocks() new CreatePullRequest(), new CreateReleasePlan(), new VerifySetup(), + new LinkNamespaceApprovalIssue(), }; foreach (var mock in mockInstances)