Add core Beast Mode functionality to GPT-4.1/5 agent prompt (#467)

burkeholland · roblourens · pierceboggan · commit 78dd74303833 · 2025-08-11T13:50:54.000-06:00
* Add GPT-4.1 agent prompt and enhance tool capabilities

Implemented the GPT-4.1 specific agent prompt to improve multi-step task execution and problem-solving. Updated the DefaultAgentPrompt to conditionally render the new GPT41AgentPrompt based on the model family. Enhanced the tool capabilities detection function to streamline tool usage and added a new tool name for managing todo lists.

* Add setting for new prompt, tweaks

* Cleanup

---------

Co-authored-by: Rob Lourens &lt;roblourens@gmail.com&gt;
diff --git a/package.json b/package.json
@@ -2762,6 +2762,14 @@
 							"experimental"
 						],
 						"description": "%github.copilot.config.retryAfterFilteredResponse.enabled%"
+					},
+					"github.copilot.chat.alternateGptPrompt.enabled": {
+						"type": "boolean",
+						"default": false,
+						"tags": [
+							"experimental"
+						],
+						"description": "%github.copilot.config.alternateGptPrompt.enabled%"
 					}
 				}
 			}
diff --git a/package.nls.json b/package.nls.json
@@ -135,6 +135,7 @@
 	"github.copilot.config.byok.ollamaEndpoint": "The endpoint to use for the Ollama when accessed via bring your own key. Defaults to localhost.",
 	"github.copilot.config.virtualTools.threshold": "This setting defines the tool count over which virtual tools should be used. Virtual tools group similar sets of tools together and they allow the model to activate them on-demand. Certain tool groups will optimistically be pre-activated. We are actively developing this feature and you experience degraded tool calling once the threshold is hit.\n\nMay be set to `0` to disable virtual tools.",
 	"github.copilot.config.retryAfterFilteredResponse.enabled": "Enables retrying after a filtered response. If enabled, Copilot Chat will retry the request after a content filter blocks the response.",
+	"github.copilot.config.alternateGptPrompt.enabled": "Enables an experimental alternate prompt for GPT models instead of the default prompt.",
 	"github.copilot.command.fixTestFailure": "Fix Test Failure",
 	"copilot.description": "Ask or edit in context",
 	"copilot.edits.description": "Edit files in your workspace",
diff --git a/src/extension/prompts/node/agent/agentInstructions.tsx b/src/extension/prompts/node/agent/agentInstructions.tsx
diff --git a/src/extension/prompts/node/agent/agentPrompt.tsx b/src/extension/prompts/node/agent/agentPrompt.tsx
@@ -18,6 +18,7 @@ import { IAlternativeNotebookContentService } from '../../../../platform/noteboo
 import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';
 import { ITabsAndEditorsService } from '../../../../platform/tabs/common/tabsAndEditorsService';
 import { ITasksService } from '../../../../platform/tasks/common/tasksService';
+import { IExperimentationService } from '../../../../platform/telemetry/common/nullExperimentationService';
 import { IWorkspaceService } from '../../../../platform/workspace/common/workspaceService';
 import { coalesce } from '../../../../util/vs/base/common/arrays';
 import { basename } from '../../../../util/vs/base/common/path';
@@ -44,7 +45,7 @@ import { UserPreferences } from '../panel/preferences';
 import { ChatToolCalls } from '../panel/toolCalling';
 import { MultirootWorkspaceStructure } from '../panel/workspace/workspaceStructure';
 import { AgentConversationHistory } from './agentConversationHistory';
-import { DefaultAgentPrompt, SweBenchAgentPrompt } from './agentInstructions';
+import { AlternateGPTPrompt, DefaultAgentPrompt, SweBenchAgentPrompt } from './agentInstructions';
 import { SummarizedConversationHistory } from './summarizedConversationHistory';
 
 export interface AgentPromptProps extends GenericBasePromptElementProps {
@@ -75,6 +76,7 @@ export class AgentPrompt extends PromptElement<AgentPromptProps> {
 		props: AgentPromptProps,
 		@IConfigurationService private readonly configurationService: IConfigurationService,
 		@IInstantiationService private readonly instantiationService: IInstantiationService,
+		@IExperimentationService private readonly experimentationService: IExperimentationService,
 		@IPromptEndpoint private readonly promptEndpoint: IPromptEndpoint,
 	) {
 		super(props);
@@ -83,11 +85,17 @@ export class AgentPrompt extends PromptElement<AgentPromptProps> {
 	async render(state: void, sizing: PromptSizing) {
 		const instructions = this.configurationService.getConfig(ConfigKey.Internal.SweBenchAgentPrompt) ?
 			<SweBenchAgentPrompt availableTools={this.props.promptContext.tools?.availableTools} modelFamily={this.props.endpoint.family} codesearchMode={undefined} /> :
-			<DefaultAgentPrompt
-				availableTools={this.props.promptContext.tools?.availableTools}
-				modelFamily={this.props.endpoint.family}
-				codesearchMode={this.props.codesearchMode}
-			/>;
+			this.props.endpoint.family.startsWith('gpt-') && this.configurationService.getExperimentBasedConfig(ConfigKey.EnableAlternateGptPrompt, this.experimentationService) ?
+				<AlternateGPTPrompt
+					availableTools={this.props.promptContext.tools?.availableTools}
+					modelFamily={this.props.endpoint.family}
+					codesearchMode={this.props.codesearchMode}
+				/> :
+				<DefaultAgentPrompt
+					availableTools={this.props.promptContext.tools?.availableTools}
+					modelFamily={this.props.endpoint.family}
+					codesearchMode={this.props.codesearchMode}
+				/>;
 
 		const omitBaseAgentInstructions = this.configurationService.getConfig(ConfigKey.Internal.OmitBaseAgentInstructions);
 		const baseAgentInstructions = <>
@@ -304,7 +312,7 @@ export class AgentUserMessage extends PromptElement<AgentUserMessageProps> {
 					<RepoContext />
 					<Tag name='reminderInstructions'>
 						{/* Critical reminders that are effective when repeated right next to the user message */}
-						{getKeepGoingReminder(this.props.endpoint.family)}
+						<KeepGoingReminder modelFamily={this.props.endpoint.family} />
 						{getEditingReminder(hasEditFileTool, hasReplaceStringTool, modelNeedsStrongReplaceStringHint(this.props.endpoint))}
 						<NotebookReminderInstructions chatVariables={this.props.chatVariables} query={this.props.request} />
 						{getExplanationReminder(this.props.endpoint.family, hasTodoTool)}
@@ -639,26 +647,52 @@ export function getEditingReminder(hasEditFileTool: boolean, hasReplaceStringToo
 	return lines;
 }
 
-/**
- * Remind gpt-4.1 to keep going and not stop to ask questions...
- */
-export function getKeepGoingReminder(modelFamily: string | undefined) {
-	return modelFamily === 'gpt-4.1' ?
-		<>
-			You are an agent - you must keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. ONLY terminate your turn when you are sure that the problem is solved, or you absolutely cannot continue.<br />
-			You take action when possible- the user is expecting YOU to take action and go to work for them. Don't ask unnecessary questions about the details if you can simply DO something useful instead.<br />
-		</>
-		: modelFamily === 'gpt-5' ?
-			<>
-				You are an agent—keep going until the user's query is completely resolved before ending your turn. ONLY stop if solved or genuinely blocked.<br />
-				Take action when possible; the user expects you to do useful work without unnecessary questions.<br />
-				After any parallel, read-only context gathering, give a concise progress update and what's next.<br />
-				Avoid repetition across turns: don't restate unchanged plans or sections (like the todo list) verbatim; provide delta updates or only the parts that changed.<br />
-				Tool batches: You MUST preface each batch with a one-sentence why/what/outcome preamble.<br />
-				Progress cadence: After 3 to 5 tool calls, or when you create/edit &gt; ~3 files in a burst, pause and post a compact checkpoint.<br />
-				Requirements coverage: Read the user's ask in full, extract each requirement into checklist items, and keep them visible. Do not omit a requirement. If something cannot be done with available tools, note why briefly and propose a viable alternative.<br />
-			</>
-			: undefined;
+export interface IKeepGoingReminderProps extends BasePromptElementProps {
+	modelFamily: string | undefined;
+}
+
+export class KeepGoingReminder extends PromptElement<IKeepGoingReminderProps> {
+	constructor(
+		props: IKeepGoingReminderProps,
+		@IConfigurationService private readonly configurationService: IConfigurationService,
+		@IExperimentationService private readonly experimentationService: IExperimentationService,
+	) {
+		super(props);
+	}
+
+	async render(state: void, sizing: PromptSizing) {
+		if (this.props.modelFamily === 'gpt-4.1' || this.props.modelFamily === 'gpt-5') {
+			if (this.configurationService.getExperimentBasedConfig(ConfigKey.EnableAlternateGptPrompt, this.experimentationService)) {
+				// Extended reminder
+				return <>
+					You are an agent - you must keep going until the user's query is completely resolved, before ending your turn and yielding back to the user.<br />
+					Your thinking should be thorough and so it's fine if it's very long. However, avoid unnecessary repetition and verbosity. You should be concise, but thorough.<br />
+					You MUST iterate and keep going until the problem is solved.<br />
+					You have everything you need to resolve this problem. I want you to fully solve this autonomously before coming back to me. <br />
+					Only terminate your turn when you are sure that the problem is solved and all items have been checked off. Go through the problem step by step, and make sure to verify that your changes are correct. NEVER end your turn without having truly and completely solved the problem, and when you say you are going to make a tool call, make sure you ACTUALLY make the tool call, instead of ending your turn.<br />
+					Take your time and think through every step - remember to check your solution rigorously and watch out for boundary cases, especially with the changes you made. Your solution must be perfect. If not, continue working on it. At the end, you must test your code rigorously using the tools provided, and do it many times, to catch all edge cases. If it is not robust, iterate more and make it perfect. Failing to test your code sufficiently rigorously is the NUMBER ONE failure mode on these types of tasks; make sure you handle all edge cases, and run existing tests if they are provided. <br />
+					You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.<br />
+					You are a highly capable and autonomous agent, and you can definitely solve this problem without needing to ask the user for further input.<br />
+				</>;
+			} else if (this.props.modelFamily === 'gpt-5') {
+				return <>
+					You are an agent—keep going until the user's query is completely resolved before ending your turn. ONLY stop if solved or genuinely blocked.<br />
+					Take action when possible; the user expects you to do useful work without unnecessary questions.<br />
+					After any parallel, read-only context gathering, give a concise progress update and what's next.<br />
+					Avoid repetition across turns: don't restate unchanged plans or sections (like the todo list) verbatim; provide delta updates or only the parts that changed.<br />
+					Tool batches: You MUST preface each batch with a one-sentence why/what/outcome preamble.<br />
+					Progress cadence: After 3 to 5 tool calls, or when you create/edit &gt; ~3 files in a burst, pause and post a compact checkpoint.<br />
+					Requirements coverage: Read the user's ask in full, extract each requirement into checklist items, and keep them visible. Do not omit a requirement. If something cannot be done with available tools, note why briefly and propose a viable alternative.<br />
+				</>;
+			} else {
+				// Original reminder
+				return <>
+					You are an agent - you must keep going until the user's query is completely resolved, before ending your turn and yielding back to the user. ONLY terminate your turn when you are sure that the problem is solved, or you absolutely cannot continue.<br />
+					You take action when possible- the user is expecting YOU to take action and go to work for them. Don't ask unnecessary questions about the details if you can simply DO something useful instead.<br />
+				</>;
+			}
+		}
+	}
 }
 
 function getExplanationReminder(modelFamily: string | undefined, hasTodoTool?: boolean) {
diff --git a/src/extension/prompts/node/agent/simpleSummarizedHistoryPrompt.tsx b/src/extension/prompts/node/agent/simpleSummarizedHistoryPrompt.tsx
@@ -9,7 +9,7 @@ import { truncate } from '../../../../util/vs/base/common/strings';
 import { IToolCall, IToolCallRound } from '../../../prompt/common/intents';
 import { Tag } from '../base/tag';
 import { ToolResult } from '../panel/toolCalling';
-import { getKeepGoingReminder } from './agentPrompt';
+import { KeepGoingReminder } from './agentPrompt';
 import { SummarizedAgentHistoryProps } from './summarizedConversationHistory';
 
 /**
@@ -81,11 +81,10 @@ export class SimpleSummarizedHistory extends PromptElement<SummarizedAgentHistor
 		}
 
 		if (entry.round.summary) {
-			const keepGoingReminder = getKeepGoingReminder(this.props.endpoint.family);
 			return <ChunkTag name='conversation-summary' priority={priorityOverride}>
 				{entry.round.summary}
-				{keepGoingReminder && <Tag name='reminderInstructions'>
-					{keepGoingReminder}
+				{this.props.endpoint.family === 'gpt-4.1' && <Tag name='reminderInstructions'>
+					<KeepGoingReminder modelFamily={this.props.endpoint.family} />
 				</Tag>}
 			</ChunkTag>;
 		}
diff --git a/src/extension/prompts/node/agent/summarizedConversationHistory.tsx b/src/extension/prompts/node/agent/summarizedConversationHistory.tsx
@@ -35,7 +35,7 @@ import { NotebookSummary } from '../../../tools/node/notebookSummaryTool';
 import { renderPromptElement } from '../base/promptRenderer';
 import { Tag } from '../base/tag';
 import { ChatToolCalls } from '../panel/toolCalling';
-import { AgentPrompt, AgentPromptProps, AgentUserMessage, getKeepGoingReminder, getUserMessagePropsFromAgentProps, getUserMessagePropsFromTurn } from './agentPrompt';
+import { AgentPrompt, AgentPromptProps, AgentUserMessage, getUserMessagePropsFromAgentProps, getUserMessagePropsFromTurn, KeepGoingReminder } from './agentPrompt';
 import { SimpleSummarizedHistory } from './simpleSummarizedHistoryPrompt';
 
 export interface ConversationHistorySummarizationPromptProps extends SummarizedAgentHistoryProps {
@@ -713,13 +713,12 @@ interface SummaryMessageProps extends BasePromptElementProps {
 
 class SummaryMessageElement extends PromptElement<SummaryMessageProps> {
 	override async render(state: void, sizing: PromptSizing) {
-		const keepGoingReminder = getKeepGoingReminder(this.props.endpoint.family);
 		return <UserMessage>
 			<Tag name='conversation-summary'>
 				{this.props.summaryText}
 			</Tag>
-			{keepGoingReminder && <Tag name='reminderInstructions'>
-				{keepGoingReminder}
+			{this.props.endpoint.family === 'gpt-4.1' && <Tag name='reminderInstructions'>
+				<KeepGoingReminder modelFamily={this.props.endpoint.family} />
 			</Tag>}
 		</UserMessage>;
 	}
diff --git a/src/extension/tools/common/toolNames.ts b/src/extension/tools/common/toolNames.ts
@@ -44,7 +44,7 @@ export const enum ToolName {
 	CreateDirectory = 'create_directory',
 	RunVscodeCmd = 'run_vscode_command',
 	GetTaskOutput = 'get_task_output',
-
+	CoreManageTodoList = 'manage_todo_list',
 	CoreRunInTerminal = 'run_in_terminal',
 	CoreGetTerminalOutput = 'get_terminal_output',
 	CoreCreateAndRunTask = 'create_and_run_task',
diff --git a/src/platform/configuration/common/configurationService.ts b/src/platform/configuration/common/configurationService.ts
@@ -767,6 +767,7 @@ export namespace ConfigKey {
 	export const CustomInstructionsInSystemMessage = defineSetting<boolean>('chat.customInstructionsInSystemMessage', true);
 
 	export const EnableRetryAfterFilteredResponse = defineExpSetting<boolean>('chat.enableRetryAfterFilteredResponse', true);
+	export const EnableAlternateGptPrompt = defineExpSetting<boolean>('chat.alternateGptPrompt.enabled', false);
 }
 
 export function getAllConfigKeys(): string[] {

Original file line number	Diff line number	Diff line change
`@@ -2762,6 +2762,14 @@`
`2762`	`2762`	`"experimental"`
`2763`	`2763`	`],`
`2764`	`2764`	`"description": "%github.copilot.config.retryAfterFilteredResponse.enabled%"`
	`2765`	`+ },`
	`2766`	`+ "github.copilot.chat.alternateGptPrompt.enabled": {`
	`2767`	`+ "type": "boolean",`
	`2768`	`+ "default": false,`
	`2769`	`+ "tags": [`
	`2770`	`+ "experimental"`
	`2771`	`+ ],`
	`2772`	`+ "description": "%github.copilot.config.alternateGptPrompt.enabled%"`
`2765`	`2773`	`}`
`2766`	`2774`	`}`
`2767`	`2775`	`}`
Original file line number	Diff line number	Diff line change
`@@ -767,6 +767,7 @@ export namespace ConfigKey {`
`767`	`767`	`export const CustomInstructionsInSystemMessage = defineSetting<boolean>('chat.customInstructionsInSystemMessage', true);`
`768`	`768`
`769`	`769`	`export const EnableRetryAfterFilteredResponse = defineExpSetting<boolean>('chat.enableRetryAfterFilteredResponse', true);`
	`770`	`+ export const EnableAlternateGptPrompt = defineExpSetting<boolean>('chat.alternateGptPrompt.enabled', false);`
`770`	`771`	`}`
`771`	`772`
`772`	`773`	`export function getAllConfigKeys(): string[] {`