Skip to content

Commit dbb6f8b

Browse files
fix: delegate get input to orchestrator tool
1 parent e87d017 commit dbb6f8b

File tree

7 files changed

+386
-12
lines changed

7 files changed

+386
-12
lines changed

docs/9_mcp_workflow_engine_extraction/design.md

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,12 @@ export interface MCPToolInvocationData<TWorkflowInputSchema extends z.ZodObject<
781781
inputSchema: TWorkflowInputSchema;
782782
};
783783
input: Omit<z.infer<TWorkflowInputSchema>, 'workflowStateData'>;
784+
/**
785+
* Flag indicating the orchestrator should handle user input collection directly
786+
* instead of delegating to a separate get-input tool.
787+
* When true, the orchestrator generates a user input collection prompt directly.
788+
*/
789+
directUserInputCollection?: boolean;
784790
}
785791
```
786792

@@ -797,6 +803,60 @@ While the orchestrator and base classes provide the workflow infrastructure, man
797803

798804
Rather than force every consumer to implement these common patterns, we provide them as part of the framework.
799805

806+
##### 5.0 Direct User Input Collection (Orchestrator-Handled)
807+
808+
**Purpose**: Allows the orchestrator to handle user input collection directly, eliminating the need for an intermediate tool call.
809+
810+
**Background**: The standard workflow for gathering user input involves two tool calls:
811+
1. The workflow interrupts and instructs the LLM to invoke a get-input tool
812+
2. The get-input tool returns a prompt, and the LLM gathers user input
813+
3. The LLM returns the result to the orchestrator
814+
815+
This adds latency and complexity. The **direct user input collection** feature streamlines this by having the orchestrator generate the user input prompt directly.
816+
817+
**How It Works**:
818+
819+
When a workflow node needs user input, it creates an interrupt with the `directUserInputCollection` flag set to `true`:
820+
821+
```typescript
822+
const mcpToolData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA> = {
823+
llmMetadata: {
824+
name: 'get-input-tool',
825+
description: 'Get user input',
826+
inputSchema: GET_INPUT_WORKFLOW_INPUT_SCHEMA,
827+
},
828+
input: {
829+
propertiesRequiringInput: [
830+
{ propertyName: 'platform', friendlyName: 'Platform', description: 'Target platform' },
831+
],
832+
},
833+
directUserInputCollection: true, // Flag for direct handling
834+
};
835+
return interrupt(mcpToolData);
836+
```
837+
838+
When the orchestrator detects this flag, instead of instructing the LLM to call a separate get-input tool, it generates a user input collection prompt directly. The LLM then:
839+
1. Presents the prompt to the user
840+
2. Waits for user input
841+
3. Returns the result (conforming to `{ userUtterance: ... }` schema) directly to the orchestrator
842+
843+
**Flow Comparison**:
844+
845+
```
846+
Standard Flow (without flag):
847+
GetUserInputNode → interrupt → Orchestrator → LLM calls get-input tool → Tool returns prompt → LLM gathers input → Returns to Orchestrator
848+
849+
Direct Flow (with flag):
850+
GetUserInputNode → interrupt (with flag) → Orchestrator generates prompt directly → LLM gathers input → Returns to Orchestrator
851+
```
852+
853+
**Benefits**:
854+
- Reduced latency (eliminates one tool call round-trip)
855+
- Simpler flow for common user input scenarios
856+
- Same result schema (`{ userUtterance: ... }`) preserved for compatibility
857+
858+
**Implementation**: The `GetInputService` automatically sets this flag, so existing `GetUserInputNode` usage benefits without code changes.
859+
800860
##### 5.1 Get Input Tool
801861

802862
**Purpose**: Prompts the user to provide input for a set of required properties.

packages/mcp-workflow/src/common/metadata.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ export interface MCPToolInvocationData<TWorkflowInputSchema extends z.ZodObject<
5050
};
5151
/** Input parameters for the tool invocation - typed to business logic schema only */
5252
input: Omit<z.infer<TWorkflowInputSchema>, 'workflowStateData'>;
53+
/**
54+
* Flag indicating the orchestrator should handle user input collection directly
55+
* instead of delegating to a separate get-input tool.
56+
* When true, the orchestrator generates a user input collection prompt directly.
57+
*/
58+
directUserInputCollection?: boolean;
5359
}
5460

5561
/**

packages/mcp-workflow/src/services/getInputService.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ export class GetInputService extends AbstractService implements GetInputServiceP
7070
});
7171

7272
const metadata = createGetInputMetadata(this.toolId);
73-
// Create tool invocation data
73+
// Create tool invocation data with directUserInputCollection flag
74+
// This tells the orchestrator to handle user input collection directly
75+
// instead of delegating to a separate get-input tool
7476
const toolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA> = {
7577
llmMetadata: {
7678
name: metadata.toolId,
@@ -80,6 +82,7 @@ export class GetInputService extends AbstractService implements GetInputServiceP
8082
input: {
8183
propertiesRequiringInput: unfulfilledProperties,
8284
},
85+
directUserInputCollection: true,
8386
};
8487

8588
// Execute tool with logging and validation

packages/mcp-workflow/src/tools/orchestrator/metadata.ts

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,47 @@ import {
1313
} from '../../common/metadata.js';
1414
import type { OrchestratorConfig } from './config.js';
1515

16+
/**
17+
* Schema for the initial user request when starting a new workflow.
18+
* This is the expected format when calling the orchestrator for the first time.
19+
*/
20+
export const INITIAL_USER_REQUEST_SCHEMA = z.object({
21+
request: z.string().describe("The user's initial request to start the workflow"),
22+
});
23+
24+
/**
25+
* Schema for resumption user input when continuing an existing workflow.
26+
* This allows any structured data returned from previous tool executions.
27+
*/
28+
export const RESUMPTION_USER_INPUT_SCHEMA = z.record(z.string(), z.unknown());
29+
30+
/**
31+
* Combined user input schema that accepts either:
32+
* - Initial request format: { request: "user's request string" } - for starting new workflows
33+
* - Resumption format: any object - for continuing workflows with tool results
34+
*/
35+
export const USER_INPUT_SCHEMA = z
36+
.union([INITIAL_USER_REQUEST_SCHEMA, RESUMPTION_USER_INPUT_SCHEMA])
37+
.describe(
38+
'User input - for initial calls use { request: "your request" }, for resumption calls use the structured output from the previous tool'
39+
);
40+
1641
/**
1742
* Orchestrator input schema
1843
*
1944
* Note: The workflow state data is optional/defaulted because the orchestrator
2045
* can start new workflows (where it doesn't exist yet) or continue existing ones.
46+
*
47+
* For initial calls (starting a new workflow):
48+
* - userInput should be { request: "your request string" }
49+
* - workflowStateData should be omitted or have empty thread_id
50+
*
51+
* For resumption calls (continuing an existing workflow):
52+
* - userInput should contain the structured output from the previous tool execution
53+
* - workflowStateData must contain the thread_id from the previous response
2154
*/
2255
export const ORCHESTRATOR_INPUT_SCHEMA = z.object({
23-
[WORKFLOW_PROPERTY_NAMES.userInput]: z
24-
.record(z.string(), z.unknown())
25-
.optional()
26-
.describe(
27-
'User input - can be any data structure from initial request or previously executed MCP tool'
28-
),
56+
[WORKFLOW_PROPERTY_NAMES.userInput]: USER_INPUT_SCHEMA.optional(),
2957
[WORKFLOW_PROPERTY_NAMES.workflowStateData]: WORKFLOW_STATE_DATA_SCHEMA.default({
3058
thread_id: '',
3159
}).describe('Opaque workflow state data. Do not populate unless explicitly instructed to do so.'),

packages/mcp-workflow/src/tools/orchestrator/orchestratorTool.ts

Lines changed: 123 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ import {
1616
WORKFLOW_PROPERTY_NAMES,
1717
WorkflowStateData,
1818
} from '../../common/metadata.js';
19+
import {
20+
GET_INPUT_WORKFLOW_INPUT_SCHEMA,
21+
GET_INPUT_WORKFLOW_RESULT_SCHEMA,
22+
} from '../utilities/getInput/metadata.js';
1923
import { WorkflowStateManager } from '../../checkpointing/workflowStateManager.js';
2024
import { OrchestratorConfig } from './config.js';
2125
import {
@@ -165,13 +169,16 @@ export class OrchestratorTool extends AbstractTool<OrchestratorToolMetadata> {
165169

166170
this.logger.info('Invoking next MCP tool', {
167171
toolName: mcpToolInvocationData.llmMetadata?.name,
172+
directUserInputCollection: mcpToolInvocationData.directUserInputCollection,
168173
});
169174

170-
// Create orchestration prompt
171-
const orchestrationPrompt = this.createOrchestrationPrompt(
172-
mcpToolInvocationData,
173-
workflowStateData
174-
);
175+
// Create orchestration prompt - use direct user input collection if flagged
176+
const orchestrationPrompt = mcpToolInvocationData.directUserInputCollection
177+
? this.createDirectUserInputCollectionPrompt(
178+
mcpToolInvocationData as MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>,
179+
workflowStateData
180+
)
181+
: this.createOrchestrationPrompt(mcpToolInvocationData, workflowStateData);
175182

176183
// Save the workflow state.
177184
await this.stateManager.saveCheckpointerState(checkpointer);
@@ -236,4 +243,115 @@ The MCP server tool you invoke will respond with its output, along with further
236243
instructions for continuing the workflow.
237244
`;
238245
}
246+
247+
/**
248+
* Create a direct user input collection prompt.
249+
*
250+
* This method generates a prompt that instructs the LLM to gather user input
251+
* directly, without requiring an intermediate tool call to a separate get-input tool.
252+
* The LLM should then return the user's response back to this orchestrator.
253+
*
254+
* @param mcpToolInvocationData - The tool invocation data containing properties requiring input
255+
* @param workflowStateData - The workflow state data to round-trip back to the orchestrator
256+
* @returns A prompt instructing the LLM to gather user input and return to the orchestrator
257+
*/
258+
private createDirectUserInputCollectionPrompt(
259+
mcpToolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>,
260+
workflowStateData: WorkflowStateData
261+
): string {
262+
const propertiesDescription = this.generatePropertiesDescription(mcpToolInvocationData);
263+
const resultSchema = JSON.stringify(zodToJsonSchema(GET_INPUT_WORKFLOW_RESULT_SCHEMA));
264+
265+
return `
266+
# ROLE
267+
268+
You are an input gathering assistant, responsible for explicitly requesting and gathering the
269+
user's input for a set of unfulfilled properties.
270+
271+
# TASK
272+
273+
Your job is to provide a prompt to the user that outlines the details for a set of properties
274+
that require the user's input. The prompt should be polite and conversational.
275+
276+
# CONTEXT
277+
278+
Here is the list of properties that require the user's input, along with their describing
279+
metadata:
280+
281+
${propertiesDescription}
282+
283+
# INSTRUCTIONS
284+
285+
1. Based on the properties listed in "CONTEXT", generate a prompt that outlines the details
286+
for each property.
287+
2. Present the prompt to the user and instruct the user to provide their input.
288+
3. **IMPORTANT:** YOU MUST NOW WAIT for the user to provide a follow-up response to your prompt.
289+
1. You CANNOT PROCEED FROM THIS STEP until the user has provided THEIR OWN INPUT VALUE.
290+
4. Follow the "Post-Input-Collection" instructions below, to return the user's
291+
response to the orchestrator for further processing.
292+
293+
# Post-Input-Collection Instructions
294+
295+
## 1. Format the results from the user's input
296+
297+
The user's response data MUST be wrapped in a \`userUtterance\` property. The structure should be:
298+
299+
\`\`\`json
300+
{
301+
"userUtterance": <the user's response data here>
302+
}
303+
\`\`\`
304+
305+
For example, if the user provides values for properties like \`platform\` and \`projectName\`, the formatted result should be:
306+
307+
\`\`\`json
308+
{
309+
"userUtterance": {
310+
"platform": "iOS",
311+
"projectName": "MyApp"
312+
}
313+
}
314+
\`\`\`
315+
316+
**JSON Schema for reference:**
317+
\`\`\`json
318+
${resultSchema}
319+
\`\`\`
320+
321+
## 2. Invoke the orchestrator tool to continue the workflow
322+
323+
You MUST initiate the following actions to proceed with the in-progress workflow you are
324+
participating in.
325+
326+
### 2.1. Invoke the \`${this.toolMetadata.toolId}\` tool
327+
328+
Invoke the \`${this.toolMetadata.toolId}\` tool to continue the workflow.
329+
330+
### 2.2 Provide input values to the tool
331+
332+
Provide the following input values to the \`${this.toolMetadata.toolId}\` tool:
333+
334+
- \`${WORKFLOW_PROPERTY_NAMES.userInput}\`: The formatted result from step 1 (an object with a \`userUtterance\` property containing the user's response data).
335+
- \`${WORKFLOW_PROPERTY_NAMES.workflowStateData}\`: ${JSON.stringify(workflowStateData)}
336+
337+
This will continue the workflow orchestration process.
338+
`;
339+
}
340+
341+
/**
342+
* Creates a "prompt-friendly" description of the properties requiring input.
343+
*
344+
* @param mcpToolInvocationData - The tool invocation data containing properties requiring input
345+
* @returns A formatted description of the properties requiring input
346+
*/
347+
private generatePropertiesDescription(
348+
mcpToolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>
349+
): string {
350+
return mcpToolInvocationData.input.propertiesRequiringInput
351+
.map(
352+
property =>
353+
`- Property Name: ${property.propertyName}\n- Friendly Name: ${property.friendlyName}\n- Description: ${property.description}`
354+
)
355+
.join('\n\n');
356+
}
239357
}

packages/mcp-workflow/tests/services/getInputService.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,28 @@ describe('GetInputService', () => {
8484
).toEqual(unfulfilledProperties);
8585
});
8686

87+
it('should set directUserInputCollection flag to true', () => {
88+
const userResponse = 'test response';
89+
mockToolExecutor.setResult(toolId, {
90+
userUtterance: userResponse,
91+
});
92+
93+
const unfulfilledProperties = [
94+
{
95+
propertyName: 'platform',
96+
friendlyName: 'platform',
97+
description: 'Target platform',
98+
},
99+
];
100+
101+
service.getInput(unfulfilledProperties);
102+
103+
const callHistory = mockToolExecutor.getCallHistory();
104+
expect(callHistory.length).toBe(1);
105+
const call = callHistory[0];
106+
expect(call.directUserInputCollection).toBe(true);
107+
});
108+
87109
it('should log debug message with properties', () => {
88110
mockToolExecutor.setResult(toolId, {
89111
userUtterance: 'test',

0 commit comments

Comments
 (0)