Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions src/core/prompts/sections/__tests__/objective.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,19 @@ describe("getObjectiveSection", () => {
describe("when codebase_search is available", () => {
it("should include codebase_search first enforcement in thinking process", () => {
const objective = getObjectiveSection(mockCodeIndexManagerEnabled)

// Check that the objective includes the codebase_search enforcement
expect(objective).toContain("if the task involves understanding existing code or functionality, you MUST use the `codebase_search` tool")
expect(objective).toContain(
"if the task involves understanding existing code or functionality, you MUST use the `codebase_search` tool",
)
expect(objective).toContain("BEFORE using any other search or file exploration tools")
})
})

describe("when codebase_search is not available", () => {
it("should not include codebase_search enforcement", () => {
const objective = getObjectiveSection(mockCodeIndexManagerDisabled)

// Check that the objective does not include the codebase_search enforcement
expect(objective).not.toContain("you MUST use the `codebase_search` tool")
expect(objective).not.toContain("BEFORE using any other search or file exploration tools")
Expand All @@ -39,7 +41,7 @@ describe("getObjectiveSection", () => {
it("should maintain proper structure regardless of codebase_search availability", () => {
const objectiveEnabled = getObjectiveSection(mockCodeIndexManagerEnabled)
const objectiveDisabled = getObjectiveSection(mockCodeIndexManagerDisabled)

// Check that all numbered items are present in both cases
for (const objective of [objectiveEnabled, objectiveDisabled]) {
expect(objective).toContain("1. Analyze the user's task")
Expand All @@ -53,7 +55,7 @@ describe("getObjectiveSection", () => {
it("should include thinking tags guidance regardless of codebase_search availability", () => {
const objectiveEnabled = getObjectiveSection(mockCodeIndexManagerEnabled)
const objectiveDisabled = getObjectiveSection(mockCodeIndexManagerDisabled)

// Check that thinking tags guidance is included in both cases
for (const objective of [objectiveEnabled, objectiveDisabled]) {
expect(objective).toContain("<thinking></thinking> tags")
Expand All @@ -65,13 +67,15 @@ describe("getObjectiveSection", () => {
it("should include parameter inference guidance regardless of codebase_search availability", () => {
const objectiveEnabled = getObjectiveSection(mockCodeIndexManagerEnabled)
const objectiveDisabled = getObjectiveSection(mockCodeIndexManagerDisabled)

// Check parameter inference guidance in both cases
for (const objective of [objectiveEnabled, objectiveDisabled]) {
expect(objective).toContain("Go through each of the required parameters")
expect(objective).toContain("determine if the user has directly provided or given enough information to infer a value")
expect(objective).toContain(
"determine if the user has directly provided or given enough information to infer a value",
)
expect(objective).toContain("DO NOT invoke the tool (not even with fillers for the missing params)")
expect(objective).toContain("ask_followup_question tool")
}
})
})
})
24 changes: 15 additions & 9 deletions src/core/prompts/sections/__tests__/tool-use-guidelines.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,20 @@ describe("getToolUseGuidelinesSection", () => {
describe("when codebase_search is available", () => {
it("should include codebase_search first enforcement", () => {
const guidelines = getToolUseGuidelinesSection(mockCodeIndexManagerEnabled)

// Check that the guidelines include the codebase_search enforcement
expect(guidelines).toContain("IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the `codebase_search` tool FIRST")
expect(guidelines).toContain(
"IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the `codebase_search` tool FIRST",
)
expect(guidelines).toContain("before any other search tools")
expect(guidelines).toContain("semantic search tool helps you find relevant code based on meaning rather than just keywords")
expect(guidelines).toContain(
"semantic search tool helps you find relevant code based on meaning rather than just keywords",
)
})

it("should maintain proper numbering with codebase_search", () => {
const guidelines = getToolUseGuidelinesSection(mockCodeIndexManagerEnabled)

// Check that all numbered items are present
expect(guidelines).toContain("1. In <thinking> tags")
expect(guidelines).toContain("2. **IMPORTANT:")
Expand All @@ -43,15 +47,17 @@ describe("getToolUseGuidelinesSection", () => {
describe("when codebase_search is not available", () => {
it("should not include codebase_search enforcement", () => {
const guidelines = getToolUseGuidelinesSection(mockCodeIndexManagerDisabled)

// Check that the guidelines do not include the codebase_search enforcement
expect(guidelines).not.toContain("IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the `codebase_search` tool FIRST")
expect(guidelines).not.toContain(
"IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the `codebase_search` tool FIRST",
)
expect(guidelines).not.toContain("semantic search tool helps you find relevant code based on meaning")
})

it("should maintain proper numbering without codebase_search", () => {
const guidelines = getToolUseGuidelinesSection(mockCodeIndexManagerDisabled)

// Check that all numbered items are present with correct numbering
expect(guidelines).toContain("1. In <thinking> tags")
expect(guidelines).toContain("2. Choose the most appropriate tool")
Expand All @@ -65,7 +71,7 @@ describe("getToolUseGuidelinesSection", () => {
it("should include iterative process guidelines regardless of codebase_search availability", () => {
const guidelinesEnabled = getToolUseGuidelinesSection(mockCodeIndexManagerEnabled)
const guidelinesDisabled = getToolUseGuidelinesSection(mockCodeIndexManagerDisabled)

// Check that the iterative process section is included in both cases
for (const guidelines of [guidelinesEnabled, guidelinesDisabled]) {
expect(guidelines).toContain("It is crucial to proceed step-by-step")
Expand All @@ -75,4 +81,4 @@ describe("getToolUseGuidelinesSection", () => {
expect(guidelines).toContain("4. Ensure that each action builds correctly")
}
})
})
})
12 changes: 8 additions & 4 deletions src/core/prompts/sections/objective.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
import { EXPERIMENT_IDS, experiments } from "../../../shared/experiments"
import { CodeIndexManager } from "../../../services/code-index/manager"

export function getObjectiveSection(codeIndexManager?: CodeIndexManager, experimentsConfig?: Record<string, boolean>): string {
const isCodebaseSearchAvailable = codeIndexManager &&
export function getObjectiveSection(
codeIndexManager?: CodeIndexManager,
experimentsConfig?: Record<string, boolean>,
): string {
const isCodebaseSearchAvailable =
codeIndexManager &&
codeIndexManager.isFeatureEnabled &&
codeIndexManager.isFeatureConfigured &&
codeIndexManager.isInitialized

const codebaseSearchInstruction = isCodebaseSearchAvailable
? "First, if the task involves understanding existing code or functionality, you MUST use the `codebase_search` tool to search for relevant code based on the task's intent BEFORE using any other search or file exploration tools. Then, "
: "First, "

// Check if command execution is disabled via experiment
const isCommandDisabled = experimentsConfig && experimentsConfig[EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND]

const commandInstruction = !isCommandDisabled
? " You may also provide a CLI command to showcase the result of your task; this can be particularly useful for web development tasks, where you can run e.g. \`open index.html\` to show the website you've built."
: ""
Expand Down
10 changes: 8 additions & 2 deletions src/core/prompts/sections/rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,14 @@ function getEditingInstructions(diffStrategy?: DiffStrategy): string {
return instructions.join("\n")
}

export function getRulesSection(cwd: string, supportsComputerUse: boolean, diffStrategy?: DiffStrategy, codeIndexManager?: CodeIndexManager): string {
const isCodebaseSearchAvailable = codeIndexManager &&
export function getRulesSection(
cwd: string,
supportsComputerUse: boolean,
diffStrategy?: DiffStrategy,
codeIndexManager?: CodeIndexManager,
): string {
const isCodebaseSearchAvailable =
codeIndexManager &&
codeIndexManager.isFeatureEnabled &&
codeIndexManager.isFeatureConfigured &&
codeIndexManager.isInitialized
Expand Down
37 changes: 25 additions & 12 deletions src/core/prompts/sections/tool-use-guidelines.ts
Original file line number Diff line number Diff line change
@@ -1,40 +1,53 @@
import { CodeIndexManager } from "../../../services/code-index/manager"

export function getToolUseGuidelinesSection(codeIndexManager?: CodeIndexManager): string {
const isCodebaseSearchAvailable = codeIndexManager &&
const isCodebaseSearchAvailable =
codeIndexManager &&
codeIndexManager.isFeatureEnabled &&
codeIndexManager.isFeatureConfigured &&
codeIndexManager.isInitialized

// Build guidelines array with automatic numbering
let itemNumber = 1;
const guidelinesList: string[] = [];
let itemNumber = 1
const guidelinesList: string[] = []

// First guideline is always the same
guidelinesList.push(`${itemNumber++}. In <thinking> tags, assess what information you already have and what information you need to proceed with the task.`);
guidelinesList.push(
`${itemNumber++}. In <thinking> tags, assess what information you already have and what information you need to proceed with the task.`,
)

// Conditional codebase search guideline
if (isCodebaseSearchAvailable) {
guidelinesList.push(`${itemNumber++}. **IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the \`codebase_search\` tool FIRST before any other search tools.** This semantic search tool helps you find relevant code based on meaning rather than just keywords. Only after using codebase_search should you use other tools like search_files, list_files, or read_file for more specific exploration.`);
guidelinesList.push(`${itemNumber++}. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like \`ls\` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task.`);
guidelinesList.push(
`${itemNumber++}. **IMPORTANT: When starting a new task or when you need to understand existing code/functionality, you MUST use the \`codebase_search\` tool FIRST before any other search tools.** This semantic search tool helps you find relevant code based on meaning rather than just keywords. Only after using codebase_search should you use other tools like search_files, list_files, or read_file for more specific exploration.`,
)
guidelinesList.push(
`${itemNumber++}. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like \`ls\` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task.`,
)
} else {
guidelinesList.push(`${itemNumber++}. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like \`ls\` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task.`);
guidelinesList.push(
`${itemNumber++}. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like \`ls\` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task.`,
)
}

// Remaining guidelines
guidelinesList.push(`${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`);
guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`);
guidelinesList.push(
`${itemNumber++}. If multiple actions are needed, use one tool at a time per message to accomplish the task iteratively, with each tool use being informed by the result of the previous tool use. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result.`,
)
guidelinesList.push(`${itemNumber++}. Formulate your tool use using the XML format specified for each tool.`)
guidelinesList.push(`${itemNumber++}. After each tool use, the user will respond with the result of that tool use. This result will provide you with the necessary information to continue your task or make further decisions. This response may include:
- Information about whether the tool succeeded or failed, along with any reasons for failure.
- Linter errors that may have arisen due to the changes you made, which you'll need to address.
- New terminal output in reaction to the changes, which you may need to consider or act upon.
- Any other relevant feedback or information related to the tool use.`);
guidelinesList.push(`${itemNumber++}. ALWAYS wait for user confirmation after each tool use before proceeding. Never assume the success of a tool use without explicit confirmation of the result from the user.`);
- Any other relevant feedback or information related to the tool use.`)
guidelinesList.push(
`${itemNumber++}. ALWAYS wait for user confirmation after each tool use before proceeding. Never assume the success of a tool use without explicit confirmation of the result from the user.`,
)

// Join guidelines and add the footer
return `# Tool Use Guidelines

${guidelinesList.join('\n')}
${guidelinesList.join("\n")}

It is crucial to proceed step-by-step, waiting for the user's message after each tool use before moving forward with the task. This approach allows you to:
1. Confirm the success of each step before proceeding.
Expand Down
22 changes: 12 additions & 10 deletions src/core/prompts/tools/attempt-completion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,39 @@ import { ToolArgs } from "./types"

export function getAttemptCompletionDescription(args?: ToolArgs): string {
// Check if command execution is disabled via experiment
const isCommandDisabled = args?.experiments && experiments.isEnabled(
args.experiments,
EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND
)
const isCommandDisabled =
args?.experiments && experiments.isEnabled(args.experiments, EXPERIMENT_IDS.DISABLE_COMPLETION_COMMAND)

const baseDescription = `## attempt_completion
Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user.${!isCommandDisabled ? ' Optionally you may provide a CLI command to showcase the result of your work.' : ''} The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user.${!isCommandDisabled ? " Optionally you may provide a CLI command to showcase the result of your work." : ""} The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
IMPORTANT NOTE: This tool CANNOT be used until you've confirmed from the user that any previous tool uses were successful. Failure to do so will result in code corruption and system failure. Before using this tool, you must ask yourself in <thinking></thinking> tags if you've confirmed from the user that any previous tool uses were successful. If not, then DO NOT use this tool.
Parameters:
- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance.`

const commandParameter = !isCommandDisabled ? `
- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use \`open index.html\` to display a created html website, or \`open localhost:3000\` to display a locally running development server. But DO NOT use commands like \`echo\` or \`cat\` that merely print text. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.` : ''
const commandParameter = !isCommandDisabled
? `
- command: (optional) A CLI command to execute to show a live demo of the result to the user. For example, use \`open index.html\` to display a created html website, or \`open localhost:3000\` to display a locally running development server. But DO NOT use commands like \`echo\` or \`cat\` that merely print text. This command should be valid for the current operating system. Ensure the command is properly formatted and does not contain any harmful instructions.`
: ""

const usage = `
Usage:
<attempt_completion>
<result>
Your final result description here
</result>${!isCommandDisabled ? '\n<command>Command to demonstrate result (optional)</command>' : ''}
</result>${!isCommandDisabled ? "\n<command>Command to demonstrate result (optional)</command>" : ""}
</attempt_completion>`

const example = !isCommandDisabled ? `
const example = !isCommandDisabled
? `

Example: Requesting to attempt completion with a result and command
<attempt_completion>
<result>
I've updated the CSS
</result>
<command>open index.html</command>
</attempt_completion>` : `
</attempt_completion>`
: `

Example: Requesting to attempt completion with a result
<attempt_completion>
Expand Down
Loading
Loading