Optimize Planner's reasoning capability (#450)

liqul · web-flow · commit b11d4055c110 · 2024-12-23T14:48:18.000+08:00
diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ Unlike many agent frameworks that only track the chat history with LLMs in text,
 
 
 ## 🆕 News
+- 📅2024-12-23: TaskWeaver has been integrated with the [AgentOps](https://microsoft.github.io/TaskWeaver/docs/observability) for better observability and monitoring.🔍
 - 📅2024-09-13: We introduce the shared memory to store information that is shared between the roles in TaskWeaver. Please check the [memory](https://microsoft.github.io/TaskWeaver/docs/memory) for more details.🧠
 - 📅2024-09-13: We have enhanced the experience feature by allowing static and dynamic experience selection. Please check the [experience](https://microsoft.github.io/TaskWeaver/blog/experience) for more details.📚 
 - 📅2024-07-02: We have optimized TaskWeaver to support not-that-large language models served locally. Please check this [post](https://microsoft.github.io/TaskWeaver/blog/local_llm) for more details.🔗
@@ -31,7 +32,7 @@ Unlike many agent frameworks that only track the chat history with LLMs in text,
 - 📅2024-03-27: TaskWeaver now switches to `container` mode by default for code execution. Please check the [code execution](https://microsoft.github.io/TaskWeaver/docs/code_execution) for more details.🐳
 - 📅2024-03-07: TaskWeaver now supports configuration of different LLMs for various components, such as the Planner and CodeInterpreter. Please check the [multi-llm](https://microsoft.github.io/TaskWeaver/docs/llms/multi-llm) for more details.🔗
 - 📅2024-03-04: TaskWeaver now supports a [container](https://microsoft.github.io/TaskWeaver/docs/code_execution) mode, which provides a more secure environment for code execution.🐳
-- 📅2024-02-28: TaskWeaver now offers a [CLI-only](https://microsoft.github.io/TaskWeaver/docs/advanced/cli_only) mode, enabling users to interact seamlessly with the Command Line Interface (CLI) using natural language.📟
+<!-- - 📅2024-02-28: TaskWeaver now offers a [CLI-only](https://microsoft.github.io/TaskWeaver/docs/advanced/cli_only) mode, enabling users to interact seamlessly with the Command Line Interface (CLI) using natural language.📟 -->
 <!-- - 📅2024-02-01: TaskWeaver now has a plugin [document_retriever](https://github.com/microsoft/TaskWeaver/blob/main/project/plugins/README.md#document_retriever) for RAG based on a knowledge base.📚 -->
 <!-- - 📅2024-01-30: TaskWeaver introduces a new plugin-only mode that securely generates calls to specified plugins without producing extraneous code.🪡 --> 
 <!-- - 📅2024-01-23: TaskWeaver can now be personalized by transforming your chat histories into enduring [experiences](https://microsoft.github.io/TaskWeaver/docs/customization/experience) 🎉 -->
diff --git a/project/examples/planner_examples/example-planner-2.yaml b/project/examples/planner_examples/example-planner-2.yaml
@@ -11,11 +11,14 @@ rounds:
         send_from: Planner
         send_to: User
         attachment_list:
-        - type: init_plan
-          content: |-
-            1. Respond to the user's greeting
-        - type: plan
-          content: |-
-            1. Respond to the user's greeting
-        - type: current_plan_step
-          content: 1. Respond to the user's greeting
+          - type: reasoning
+            content: |-
+                The user greets the Planner
+          - type: init_plan
+            content: |-
+              1. Respond to the user's greeting
+          - type: plan
+            content: |-
+              1. Respond to the user's greeting
+          - type: current_plan_step
+            content: 1. Respond to the user's greeting
diff --git a/project/examples/planner_examples/example-planner-echo.yaml b/project/examples/planner_examples/example-planner-echo.yaml
@@ -11,6 +11,9 @@ rounds:
         send_from: Planner
         send_to: Echo
         attachment_list:
+        - type: reasoning
+          content: |-
+            The user wants to echo the input 'Hello World'
         - type: init_plan
           content: |-
             1. Ask Echo to echo the user's input, 'Hello World'
@@ -27,6 +30,9 @@ rounds:
         send_from: Planner
         send_to: User
         attachment_list:
+          - type: reasoning
+            content: |-
+              The user query is successfully answered
           - type: init_plan
             content: |-
               1. Ask Echo to echo the user's input, 'Hello World'
@@ -35,7 +41,4 @@ rounds:
               1. Ask Echo to echo the user's input, 'Hello World'
           - type: current_plan_step
             content: 1. Ask Echo to echo the user's input, 'Hello World'
-          - type: review
-            content: |-
-              The user query is successfully answered
               
diff --git a/project/examples/planner_examples/example-planner.yaml b/project/examples/planner_examples/example-planner.yaml
@@ -11,17 +11,20 @@ rounds:
         send_from: Planner
         send_to: CodeInterpreter
         attachment_list:
-        - type: init_plan
-          content: |-
-            1. Load the data file
-            2. Count the rows of the loaded data <sequentially depends on 1>
-            3. Check the execution result and report the result to the user <interactively depends on 2>
-        - type: plan
-          content: |-
-            1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data
-            2. Check the execution result and report the result to the user if it is correct
-        - type: current_plan_step
-          content: 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data
+          - type: reasoning
+            content: |-
+              The user wants to count the rows of the data file /home/data.csv. The first step is to load the data file and count the rows of the loaded data.
+          - type: init_plan
+            content: |-
+              1. Load the data file
+              2. Count the rows of the loaded data <sequentially depends on 1>
+              3. Check the execution result and report the result to the user <interactively depends on 2>
+          - type: plan
+            content: |-
+              1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data
+              2. Check the execution result and report the result to the user if it is correct
+          - type: current_plan_step
+            content: 1. Instruct CodeInterpreter to load the data file and count the rows of the loaded data
       - message: Load the data file /home/data.csv successfully and there are 100 rows in the data file
         send_from: CodeInterpreter
         send_to: Planner
@@ -30,6 +33,11 @@ rounds:
         send_from: Planner
         send_to: User
         attachment_list:
+          - type: reasoning
+            content: |-
+              The data file /home/data.csv is loaded and there are 100 rows in the data file
+              The execution result is correct
+              The user query is successfully answered
           - type: init_plan
             content: |-
               1. Load the data file
@@ -41,8 +49,3 @@ rounds:
               2. Check the execution result and report the result to the user if it is correct
           - type: current_plan_step
             content: 2. report the result to the user
-          - type: review
-            content: |-
-              The data file /home/data.csv is loaded and there are 100 rows in the data file
-              The execution result is correct
-              The user query is successfully answered
diff --git a/taskweaver/memory/attachment.py b/taskweaver/memory/attachment.py
@@ -12,7 +12,7 @@ class AttachmentType(Enum):
     init_plan = "init_plan"
     plan = "plan"
     current_plan_step = "current_plan_step"
-    review = "review"
+    reasoning = "reasoning"
 
     # CodeInterpreter - generate code
     thought = "thought"
diff --git a/taskweaver/planner/planner_prompt.yaml b/taskweaver/planner/planner_prompt.yaml
@@ -21,15 +21,14 @@ instruction_template: |-
   
   ## Planner Character
   - Planner's main job is to make planning and to instruct Workers to resolve the request from the User.
-  - Planner can conduct basic analysis (e.g., comprehension, extraction, etc.) to solve simple problems after reading the messages from the User and the Workers. 
-  - Planner should first try to solve the task by itself before reaching out to the Workers for their special expertise.
+  - Planner should independently handle basic tasks such as information extraction from text files, using its reasoning and comprehension skills before considering the involvement of Workers.
   - Planner can assign different subtasks to different Workers, and each subtask should be assigned to only one Worker.
   - Planner must reject the User's request if it contains potential security risks or illegal activities.
   - Planner should ask the User to provide additional information critical for problem solving, but only after trying the best.
   - Planner can talk to the User and Workers by specifying the `send_to` field in the response, but MUST NOT talk to the Planner itself.
   - Planner should refine the plan according to its observations from the replies of the Workers or the new requests of User.
   - Planner needs to inform Workers on the User's request, the current step, and necessary information to complete the task.
-  - Planner must check the Worker's response and provide feedback to the Worker if the response is incorrect or incomplete.
+  - Planner must thoroughly review Worker's response and provide feedback to the Worker if the response is incorrect or incomplete.
   - Planner can ignore the permission or file access issues since Workers are powerful and can handle them.
   
   ## Planner's planning process
@@ -120,6 +119,10 @@ response_json_schema: |-
         "response": {
             "type": "object",
             "properties": {
+                "reasoning": {
+                    "type": "string",
+                    "description": "The reasoning of the Planner's decision. It should include the analysis of the User's request, the Workers' responses, and the current environment context."
+                },
                 "init_plan": {
                     "type": "string",
                     "description": "The initial plan to decompose the User's task into subtasks and list them as the detailed subtask steps. The initial plan must contain dependency annotations for sequential and interactive dependencies."
@@ -132,10 +135,6 @@ response_json_schema: |-
                     "type": "string",
                     "description": "The current step Planner is executing."
                 },
-                "review": {
-                    "type": "string",
-                    "description": "The review of the current step. If the Worker's response is incorrect or incomplete, Planner should provide feedback to the Worker."
-                },
                 "send_to": {
                     "type": "string",
                     "description": "The name of character (User or name of the Worker) that Planner wants to speak to."
@@ -146,12 +145,12 @@ response_json_schema: |-
                 }
             },
             "required": [
+                "reasoning",
                 "init_plan",
                 "plan",
                 "current_plan_step",
                 "send_to",
-                "message",
-                "review"
+                "message"
             ],
             "additionalProperties": false
         }