fix: delegate get input to orchestrator tool

haifeng-li-at-salesforce · haifeng-li-at-salesforce · commit dbb6f8b29292 · 2026-01-13T12:09:47.000-08:00
diff --git a/docs/9_mcp_workflow_engine_extraction/design.md b/docs/9_mcp_workflow_engine_extraction/design.md
@@ -781,6 +781,12 @@ export interface MCPToolInvocationData<TWorkflowInputSchema extends z.ZodObject<
     inputSchema: TWorkflowInputSchema;
   };
   input: Omit<z.infer<TWorkflowInputSchema>, 'workflowStateData'>;
+  /**
+   * Flag indicating the orchestrator should handle user input collection directly
+   * instead of delegating to a separate get-input tool.
+   * When true, the orchestrator generates a user input collection prompt directly.
+   */
+  directUserInputCollection?: boolean;
 }
 ```
 
@@ -797,6 +803,60 @@ While the orchestrator and base classes provide the workflow infrastructure, man
 
 Rather than force every consumer to implement these common patterns, we provide them as part of the framework.
 
+##### 5.0 Direct User Input Collection (Orchestrator-Handled)
+
+**Purpose**: Allows the orchestrator to handle user input collection directly, eliminating the need for an intermediate tool call.
+
+**Background**: The standard workflow for gathering user input involves two tool calls:
+1. The workflow interrupts and instructs the LLM to invoke a get-input tool
+2. The get-input tool returns a prompt, and the LLM gathers user input
+3. The LLM returns the result to the orchestrator
+
+This adds latency and complexity. The **direct user input collection** feature streamlines this by having the orchestrator generate the user input prompt directly.
+
+**How It Works**:
+
+When a workflow node needs user input, it creates an interrupt with the `directUserInputCollection` flag set to `true`:
+
+```typescript
+const mcpToolData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA> = {
+  llmMetadata: {
+    name: 'get-input-tool',
+    description: 'Get user input',
+    inputSchema: GET_INPUT_WORKFLOW_INPUT_SCHEMA,
+  },
+  input: {
+    propertiesRequiringInput: [
+      { propertyName: 'platform', friendlyName: 'Platform', description: 'Target platform' },
+    ],
+  },
+  directUserInputCollection: true, // Flag for direct handling
+};
+return interrupt(mcpToolData);
+```
+
+When the orchestrator detects this flag, instead of instructing the LLM to call a separate get-input tool, it generates a user input collection prompt directly. The LLM then:
+1. Presents the prompt to the user
+2. Waits for user input
+3. Returns the result (conforming to `{ userUtterance: ... }` schema) directly to the orchestrator
+
+**Flow Comparison**:
+
+```
+Standard Flow (without flag):
+  GetUserInputNode → interrupt → Orchestrator → LLM calls get-input tool → Tool returns prompt → LLM gathers input → Returns to Orchestrator
+
+Direct Flow (with flag):
+  GetUserInputNode → interrupt (with flag) → Orchestrator generates prompt directly → LLM gathers input → Returns to Orchestrator
+```
+
+**Benefits**:
+- Reduced latency (eliminates one tool call round-trip)
+- Simpler flow for common user input scenarios
+- Same result schema (`{ userUtterance: ... }`) preserved for compatibility
+
+**Implementation**: The `GetInputService` automatically sets this flag, so existing `GetUserInputNode` usage benefits without code changes.
+
 ##### 5.1 Get Input Tool
 
 **Purpose**: Prompts the user to provide input for a set of required properties.
diff --git a/packages/mcp-workflow/src/common/metadata.ts b/packages/mcp-workflow/src/common/metadata.ts
@@ -50,6 +50,12 @@ export interface MCPToolInvocationData<TWorkflowInputSchema extends z.ZodObject<
   };
   /** Input parameters for the tool invocation - typed to business logic schema only */
   input: Omit<z.infer<TWorkflowInputSchema>, 'workflowStateData'>;
+  /**
+   * Flag indicating the orchestrator should handle user input collection directly
+   * instead of delegating to a separate get-input tool.
+   * When true, the orchestrator generates a user input collection prompt directly.
+   */
+  directUserInputCollection?: boolean;
 }
 
 /**
diff --git a/packages/mcp-workflow/src/services/getInputService.ts b/packages/mcp-workflow/src/services/getInputService.ts
@@ -70,7 +70,9 @@ export class GetInputService extends AbstractService implements GetInputServiceP
     });
 
     const metadata = createGetInputMetadata(this.toolId);
-    // Create tool invocation data
+    // Create tool invocation data with directUserInputCollection flag
+    // This tells the orchestrator to handle user input collection directly
+    // instead of delegating to a separate get-input tool
     const toolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA> = {
       llmMetadata: {
         name: metadata.toolId,
@@ -80,6 +82,7 @@ export class GetInputService extends AbstractService implements GetInputServiceP
       input: {
         propertiesRequiringInput: unfulfilledProperties,
       },
+      directUserInputCollection: true,
     };
 
     // Execute tool with logging and validation
diff --git a/packages/mcp-workflow/src/tools/orchestrator/metadata.ts b/packages/mcp-workflow/src/tools/orchestrator/metadata.ts
@@ -13,19 +13,47 @@ import {
 } from '../../common/metadata.js';
 import type { OrchestratorConfig } from './config.js';
 
+/**
+ * Schema for the initial user request when starting a new workflow.
+ * This is the expected format when calling the orchestrator for the first time.
+ */
+export const INITIAL_USER_REQUEST_SCHEMA = z.object({
+  request: z.string().describe("The user's initial request to start the workflow"),
+});
+
+/**
+ * Schema for resumption user input when continuing an existing workflow.
+ * This allows any structured data returned from previous tool executions.
+ */
+export const RESUMPTION_USER_INPUT_SCHEMA = z.record(z.string(), z.unknown());
+
+/**
+ * Combined user input schema that accepts either:
+ * - Initial request format: { request: "user's request string" } - for starting new workflows
+ * - Resumption format: any object - for continuing workflows with tool results
+ */
+export const USER_INPUT_SCHEMA = z
+  .union([INITIAL_USER_REQUEST_SCHEMA, RESUMPTION_USER_INPUT_SCHEMA])
+  .describe(
+    'User input - for initial calls use { request: "your request" }, for resumption calls use the structured output from the previous tool'
+  );
+
 /**
  * Orchestrator input schema
  *
  * Note: The workflow state data is optional/defaulted because the orchestrator
  * can start new workflows (where it doesn't exist yet) or continue existing ones.
+ *
+ * For initial calls (starting a new workflow):
+ *   - userInput should be { request: "your request string" }
+ *   - workflowStateData should be omitted or have empty thread_id
+ *
+ * For resumption calls (continuing an existing workflow):
+ *   - userInput should contain the structured output from the previous tool execution
+ *   - workflowStateData must contain the thread_id from the previous response
  */
 export const ORCHESTRATOR_INPUT_SCHEMA = z.object({
-  [WORKFLOW_PROPERTY_NAMES.userInput]: z
-    .record(z.string(), z.unknown())
-    .optional()
-    .describe(
-      'User input - can be any data structure from initial request or previously executed MCP tool'
-    ),
+  [WORKFLOW_PROPERTY_NAMES.userInput]: USER_INPUT_SCHEMA.optional(),
   [WORKFLOW_PROPERTY_NAMES.workflowStateData]: WORKFLOW_STATE_DATA_SCHEMA.default({
     thread_id: '',
   }).describe('Opaque workflow state data. Do not populate unless explicitly instructed to do so.'),
diff --git a/packages/mcp-workflow/src/tools/orchestrator/orchestratorTool.ts b/packages/mcp-workflow/src/tools/orchestrator/orchestratorTool.ts
@@ -16,6 +16,10 @@ import {
   WORKFLOW_PROPERTY_NAMES,
   WorkflowStateData,
 } from '../../common/metadata.js';
+import {
+  GET_INPUT_WORKFLOW_INPUT_SCHEMA,
+  GET_INPUT_WORKFLOW_RESULT_SCHEMA,
+} from '../utilities/getInput/metadata.js';
 import { WorkflowStateManager } from '../../checkpointing/workflowStateManager.js';
 import { OrchestratorConfig } from './config.js';
 import {
@@ -165,13 +169,16 @@ export class OrchestratorTool extends AbstractTool<OrchestratorToolMetadata> {
 
       this.logger.info('Invoking next MCP tool', {
         toolName: mcpToolInvocationData.llmMetadata?.name,
+        directUserInputCollection: mcpToolInvocationData.directUserInputCollection,
       });
 
-      // Create orchestration prompt
-      const orchestrationPrompt = this.createOrchestrationPrompt(
-        mcpToolInvocationData,
-        workflowStateData
-      );
+      // Create orchestration prompt - use direct user input collection if flagged
+      const orchestrationPrompt = mcpToolInvocationData.directUserInputCollection
+        ? this.createDirectUserInputCollectionPrompt(
+            mcpToolInvocationData as MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>,
+            workflowStateData
+          )
+        : this.createOrchestrationPrompt(mcpToolInvocationData, workflowStateData);
 
       // Save the workflow state.
       await this.stateManager.saveCheckpointerState(checkpointer);
@@ -236,4 +243,115 @@ The MCP server tool you invoke will respond with its output, along with further
 instructions for continuing the workflow.
 `;
   }
+
+  /**
+   * Create a direct user input collection prompt.
+   *
+   * This method generates a prompt that instructs the LLM to gather user input
+   * directly, without requiring an intermediate tool call to a separate get-input tool.
+   * The LLM should then return the user's response back to this orchestrator.
+   *
+   * @param mcpToolInvocationData - The tool invocation data containing properties requiring input
+   * @param workflowStateData - The workflow state data to round-trip back to the orchestrator
+   * @returns A prompt instructing the LLM to gather user input and return to the orchestrator
+   */
+  private createDirectUserInputCollectionPrompt(
+    mcpToolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>,
+    workflowStateData: WorkflowStateData
+  ): string {
+    const propertiesDescription = this.generatePropertiesDescription(mcpToolInvocationData);
+    const resultSchema = JSON.stringify(zodToJsonSchema(GET_INPUT_WORKFLOW_RESULT_SCHEMA));
+
+    return `
+# ROLE
+
+You are an input gathering assistant, responsible for explicitly requesting and gathering the
+user's input for a set of unfulfilled properties.
+
+# TASK
+
+Your job is to provide a prompt to the user that outlines the details for a set of properties
+that require the user's input. The prompt should be polite and conversational.
+
+# CONTEXT
+
+Here is the list of properties that require the user's input, along with their describing
+metadata:
+
+${propertiesDescription}
+
+# INSTRUCTIONS
+
+1. Based on the properties listed in "CONTEXT", generate a prompt that outlines the details
+   for each property.
+2. Present the prompt to the user and instruct the user to provide their input.
+3. **IMPORTANT:** YOU MUST NOW WAIT for the user to provide a follow-up response to your prompt.
+   1. You CANNOT PROCEED FROM THIS STEP until the user has provided THEIR OWN INPUT VALUE.
+4. Follow the "Post-Input-Collection" instructions below, to return the user's
+   response to the orchestrator for further processing.
+
+# Post-Input-Collection Instructions
+
+## 1. Format the results from the user's input
+
+The user's response data MUST be wrapped in a \`userUtterance\` property. The structure should be:
+
+\`\`\`json
+{
+  "userUtterance": <the user's response data here>
+}
+\`\`\`
+
+For example, if the user provides values for properties like \`platform\` and \`projectName\`, the formatted result should be:
+
+\`\`\`json
+{
+  "userUtterance": {
+    "platform": "iOS",
+    "projectName": "MyApp"
+  }
+}
+\`\`\`
+
+**JSON Schema for reference:**
+\`\`\`json
+${resultSchema}
+\`\`\`
+
+## 2. Invoke the orchestrator tool to continue the workflow
+
+You MUST initiate the following actions to proceed with the in-progress workflow you are
+participating in.
+
+### 2.1. Invoke the \`${this.toolMetadata.toolId}\` tool
+
+Invoke the \`${this.toolMetadata.toolId}\` tool to continue the workflow.
+
+### 2.2 Provide input values to the tool
+
+Provide the following input values to the \`${this.toolMetadata.toolId}\` tool:
+
+- \`${WORKFLOW_PROPERTY_NAMES.userInput}\`: The formatted result from step 1 (an object with a \`userUtterance\` property containing the user's response data).
+- \`${WORKFLOW_PROPERTY_NAMES.workflowStateData}\`: ${JSON.stringify(workflowStateData)}
+
+This will continue the workflow orchestration process.
+`;
+  }
+
+  /**
+   * Creates a "prompt-friendly" description of the properties requiring input.
+   *
+   * @param mcpToolInvocationData - The tool invocation data containing properties requiring input
+   * @returns A formatted description of the properties requiring input
+   */
+  private generatePropertiesDescription(
+    mcpToolInvocationData: MCPToolInvocationData<typeof GET_INPUT_WORKFLOW_INPUT_SCHEMA>
+  ): string {
+    return mcpToolInvocationData.input.propertiesRequiringInput
+      .map(
+        property =>
+          `- Property Name: ${property.propertyName}\n- Friendly Name: ${property.friendlyName}\n- Description: ${property.description}`
+      )
+      .join('\n\n');
+  }
 }
diff --git a/packages/mcp-workflow/tests/services/getInputService.test.ts b/packages/mcp-workflow/tests/services/getInputService.test.ts
@@ -84,6 +84,28 @@ describe('GetInputService', () => {
       ).toEqual(unfulfilledProperties);
     });
 
+    it('should set directUserInputCollection flag to true', () => {
+      const userResponse = 'test response';
+      mockToolExecutor.setResult(toolId, {
+        userUtterance: userResponse,
+      });
+
+      const unfulfilledProperties = [
+        {
+          propertyName: 'platform',
+          friendlyName: 'platform',
+          description: 'Target platform',
+        },
+      ];
+
+      service.getInput(unfulfilledProperties);
+
+      const callHistory = mockToolExecutor.getCallHistory();
+      expect(callHistory.length).toBe(1);
+      const call = callHistory[0];
+      expect(call.directUserInputCollection).toBe(true);
+    });
+
     it('should log debug message with properties', () => {
       mockToolExecutor.setResult(toolId, {
         userUtterance: 'test',
diff --git a/packages/mcp-workflow/tests/tools/orchestrator/orchestratorTool.test.ts b/packages/mcp-workflow/tests/tools/orchestrator/orchestratorTool.test.ts