Update flows

BenConstable9 · BenConstable9 · commit 307204d52e87 · 2025-02-01T18:16:26.000Z
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py
@@ -96,9 +96,9 @@ async def on_messages_stream(
             injected_parameters = {}
 
         # Load the json of the last message to populate the final output object
-        message_rewrites = json.loads(last_response)
+        sequential_rounds = json.loads(last_response)
 
-        logging.info(f"Query Rewrites: {message_rewrites}")
+        logging.info(f"Query Rewrites: {sequential_rounds}")
 
         async def consume_inner_messages_from_agentic_flow(
             agentic_flow, identifier, filtered_parallel_messages
@@ -197,7 +197,7 @@ async def consume_inner_messages_from_agentic_flow(
 
         # Convert all_non_database_query to lowercase string and compare
         all_non_database_query = str(
-            message_rewrites.get("all_non_database_query", "false")
+            sequential_rounds.get("all_non_database_query", "false")
         ).lower()
 
         if all_non_database_query == "true":
@@ -210,84 +210,93 @@ async def consume_inner_messages_from_agentic_flow(
             return
 
         # Start processing sub-queries
-        for message_rewrite in message_rewrites["decomposed_user_messages"]:
-            logging.info(f"Processing sub-query: {message_rewrite}")
-            # Create an instance of the InnerAutoGenText2Sql class
-            inner_autogen_text_2_sql = InnerAutoGenText2Sql(**self.kwargs)
-
-            identifier = ", ".join(message_rewrite)
-
-            # Add database connection info to injected parameters
-            query_params = injected_parameters.copy() if injected_parameters else {}
-            if "Text2Sql__Tsql__ConnectionString" in os.environ:
-                query_params["database_connection_string"] = os.environ[
-                    "Text2Sql__Tsql__ConnectionString"
-                ]
-            if "Text2Sql__Tsql__Database" in os.environ:
-                query_params["database_name"] = os.environ["Text2Sql__Tsql__Database"]
-
-            # Launch tasks for each sub-query
-            inner_solving_generators.append(
-                consume_inner_messages_from_agentic_flow(
-                    inner_autogen_text_2_sql.process_user_message(
-                        user_message=message_rewrite,
-                        injected_parameters=query_params,
-                    ),
-                    identifier,
-                    filtered_parallel_messages,
+        for sequential_round in sequential_rounds["decomposed_user_messages"]:
+            logging.info(f"Processing round: {sequential_round}")
+
+            for parallel_message in sequential_round:
+                logging.info(f"Parallel Message: {parallel_message}")
+
+                # Create an instance of the InnerAutoGenText2Sql class
+                inner_autogen_text_2_sql = InnerAutoGenText2Sql(**self.kwargs)
+
+                identifier = ", ".join(parallel_message)
+
+                # Add database connection info to injected parameters
+                query_params = injected_parameters.copy() if injected_parameters else {}
+                if "Text2Sql__Tsql__ConnectionString" in os.environ:
+                    query_params["database_connection_string"] = os.environ[
+                        "Text2Sql__Tsql__ConnectionString"
+                    ]
+                if "Text2Sql__Tsql__Database" in os.environ:
+                    query_params["database_name"] = os.environ["Text2Sql__Tsql__Database"]
+
+                # Launch tasks for each sub-query
+                inner_solving_generators.append(
+                    consume_inner_messages_from_agentic_flow(
+                        inner_autogen_text_2_sql.process_user_message(
+                            user_message=parallel_message,
+                            injected_parameters=query_params,
+                            database_results=filtered_parallel_messages.database_results
+                        ),
+                        identifier,
+                        filtered_parallel_messages,
+                    )
                 )
-            )
-
-        logging.info(
-            "Created %i Inner Solving Generators", len(inner_solving_generators)
-        )
-        logging.info("Starting Inner Solving Generators")
-        combined_message_streams = stream.merge(*inner_solving_generators)
-
-        async with combined_message_streams.stream() as streamer:
-            async for inner_message in streamer:
-                if isinstance(inner_message, TextMessage):
-                    logging.debug(f"Inner Solving Message: {inner_message}")
-                    yield inner_message
-
-        # Log final results for debugging or auditing
-        logging.info(
-            "Database Results: %s", filtered_parallel_messages.database_results
-        )
-        logging.info(
-            "Disambiguation Requests: %s",
-            filtered_parallel_messages.disambiguation_requests,
-        )
 
-        if (
-            max(map(len, filtered_parallel_messages.disambiguation_requests.values()))
-            > 0
-        ):
-            # Final response
-            yield Response(
-                chat_message=TextMessage(
-                    content=json.dumps(
-                        {
-                            "contains_disambiguation_requests": True,
-                            "disambiguation_requests": filtered_parallel_messages.disambiguation_requests,
-                        }
-                    ),
-                    source=self.name,
-                ),
+            logging.info(
+                "Created %i Inner Solving Generators", len(inner_solving_generators)
             )
-        else:
-            # Final response
-            yield Response(
-                chat_message=TextMessage(
-                    content=json.dumps(
-                        {
-                            "contains_database_results": True,
-                            "database_results": filtered_parallel_messages.database_results,
-                        }
+            logging.info("Starting Inner Solving Generators")
+            combined_message_streams = stream.merge(*inner_solving_generators)
+
+            async with combined_message_streams.stream() as streamer:
+                async for inner_message in streamer:
+                    if isinstance(inner_message, TextMessage):
+                        logging.debug(f"Inner Solving Message: {inner_message}")
+                        yield inner_message
+
+            # Log final results for debugging or auditing
+            logging.info(
+                "Database Results: %s", filtered_parallel_messages.database_results
+            )
+            logging.info(
+                "Disambiguation Requests: %s",
+                filtered_parallel_messages.disambiguation_requests,
+            )
+
+            # Check for disambiguation requests before processing the next round
+
+            if (
+                max(map(len, filtered_parallel_messages.disambiguation_requests.values()))
+                > 0
+            ):
+                # Final response
+                yield Response(
+                    chat_message=TextMessage(
+                        content=json.dumps(
+                            {
+                                "contains_disambiguation_requests": True,
+                                "disambiguation_requests": filtered_parallel_messages.disambiguation_requests,
+                            }
+                        ),
+                        source=self.name,
                     ),
-                    source=self.name,
+                )
+
+                break
+
+        # Final response
+        yield Response(
+            chat_message=TextMessage(
+                content=json.dumps(
+                    {
+                        "contains_database_results": True,
+                        "database_results": filtered_parallel_messages.database_results,
+                    }
                 ),
-            )
+                source=self.name,
+            ),
+        )
 
     async def on_reset(self, cancellation_token: CancellationToken) -> None:
         pass
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py b/text_2_sql/autogen/src/autogen_text_2_sql/inner_autogen_text_2_sql.py
@@ -177,6 +177,7 @@ def process_user_message(
         self,
         user_message: str,
         injected_parameters: dict = None,
+        database_results: dict = None,
     ):
         """Process the complete question through the unified system.
 
@@ -200,6 +201,9 @@ def process_user_message(
                 "injected_parameters": injected_parameters,
             }
 
+            if database_results:
+                agent_input["database_results"] = database_results
+
             return self.agentic_flow.run_stream(task=json.dumps(agent_input))
         finally:
             # Restore original environment
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/user_message_rewrite_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/user_message_rewrite_agent.yaml
@@ -2,40 +2,37 @@ model: "4o-mini"
 description: "An agent that preprocesses user inputs by decomposing complex queries into simpler sub-messages that can be processed independently and then combined."
 system_message: |
   <role_and_objective>
-    You are a Senior Data Analyst specializing in breaking down complex questions into simpler sub-messages that can be processed independently and then combined for the final answer. You should identify when a question can be solved through simpler sub-messages and provide clear instructions for combining their results.
+    You are a Senior Data Analyst specializing in breaking down complex questions into simpler sub-messages that can be processed independently and then combined for the final answer. You must think through the steps needed to answer the question and produce a list of sub questions to generate and run SQL statements for.
+
+    You should consider what steps can be done in parallel and what steps depend on the results of other steps. Do not attempt to simplify the question if it is already simple to solve.
     Use the general business use case of '{{ use_case }}' to aid understanding of the user's question.
   </role_and_objective>
 
   <query_complexity_patterns>
-    Complex patterns that should be broken down:
-    1. Superlatives with Time Periods:
-       - "Which product categories showed the biggest improvement in sales between 2007 and 2008?"
-       → Break into:
-         a) "Get total sales by product category for 2007"
-         b) "Get total sales by product category for 2008"
-         c) "Calculate year-over-year growth percentage for each category"
-         d) "Find the category with highest growth"
+    Complex patterns that should be broken down into simpler steps of sub-messages:
 
-    2. Multi-dimension Analysis:
+    1. Multi-dimension Analysis:
        - "What are our top 3 selling products in each region, and how do their profit margins compare?"
        → Break into:
-         a) "Get total sales quantity by product and region"
-         b) "Find top 3 products by sales quantity for each region"
-         c) "Calculate profit margins for these products"
-         d) "Compare profit margins within each region's top 3"
+         a) "Get total sales quantity by product and region and select top 3 products for each region"
+         b) "Calculate profit margins for these products and compare profit margins within each region's top 3"
 
-    3. Comparative Analysis:
+    2. Comparative Analysis:
        - "How do our mountain bike sales compare to road bike sales across different seasons, and which weather conditions affect them most?"
        → Break into:
-         a) "Get sales data for mountain bikes by month"
-         b) "Get sales data for road bikes by month"
-         c) "Group months into seasons"
-         d) "Compare seasonal patterns between bike types"
+         a) "Get sales data for mountain bikes and road bikes by month"
+         b) "Group months into seasons and compare seasonal patterns between bike types"
+
+    3. Completely unrelated questions:
+        - "What is the total revenue for 2024? How many employees do we have in the marketing department?"
+        → Break into:
+          a) "Calculate total revenue for 2024"
+          b) "Get total number of employees in the marketing department"
   </query_complexity_patterns>
 
   <instructions>
       1. Understanding:
-        - Use the chat history (that is available in reverse order) to understand the context of the current question.
+        - Use the chat history to understand the context of the current question.
         - If the current question not fully formed and unclear. Rewrite it based on the general meaning of the old question and the new question. Include spelling and grammar corrections.
         - If the current question is clear, output the new question as is with spelling and grammar corrections.
 
@@ -49,12 +46,11 @@ system_message: |
       3. Analyze Query Complexity:
         - Identify if the query contains patterns that can be simplified
         - Look for superlatives, multiple dimensions, or comparisons
-        - Determine if breaking down would simplify processing
 
       4. Break Down Complex Queries:
         - Create independent sub-messages that can be processed separately.
         - Each sub-message should be a simple, focused task.
-        - Group dependent sub-messages together for sequential processing.
+        - Group dependent sub-messages together for parallel processing.
         - Include clear combination instructions
         - Preserve all necessary context in each sub-message
 
@@ -70,10 +66,9 @@ system_message: |
 
     <rules>
         1. Always consider if a complex query can be broken down
-        2. Make sub-messages as simple as possible
-        3. Include clear instructions for combining results
-        4. Preserve all necessary context in each sub-message
-        5. Resolve any relative dates before decomposition
+        2. Include clear instructions for combining results
+        3. Always preserve all necessary context in each sub-message. Each sub-message should be self-contained.
+        4. Resolve any relative dates before decomposition
     </rules>
 
     <disallowed_topics>
@@ -94,16 +89,17 @@ system_message: |
         - Queries related to data analysis
         - Topics related to {{ use_case }}
         - Questions about what you can do or your capabilities
+    </allowed_topics>
+
     <output_format>
-        Return a JSON object with sub-messages and combination instructions:
+        Return a JSON object with sub-messages and combination instructions. Each round of sub-messages will be processed in parallel:
         {
           "decomposed_user_messages": [
-            ["<sub_message_1>"],
-            ["<sub_message_2>"],
+            ["<1st_round_sub_message_1>", "<1st_round_sub_message_2>", ...],
+            ["<2nd_round_sub_message_1>", "<2nd_round_sub_message>_2", ...],
             ...
           ],
           "combination_logic": "<instructions for combining results>",
-          "query_type": "<simple|complex>",
           "all_non_database_query": "<true|false>"
         }
     </output_format>
@@ -115,10 +111,9 @@ system_message: |
     Output:
     {
       "decomposed_user_messages": [
-        ["Calculate quarterly sales totals by product category for 2008", "For these categories, find their top selling products in 2008"]
+        ["Which product categories have shown consistent growth quarter over quarter in 2008, and what were their top selling items?"]
       ],
-      "combination_logic": "First identify growing categories from quarterly analysis, then find their best-selling products",
-      "query_type": "complex",
+      "combination_logic": "Direct count query, no combination needed",
       "all_non_database_query": "false"
     }
 
@@ -130,7 +125,6 @@ system_message: |
         ["How many orders did we have in 2008?"]
       ],
       "combination_logic": "Direct count query, no combination needed",
-      "query_type": "simple",
       "all_non_database_query": "false"
     }
 
@@ -139,12 +133,9 @@ system_message: |
     Output:
     {
       "decomposed_user_messages": [
-        ["Get total sales by product in European countries"],
-        ["Get total sales by product in North American countries"],
-        ["Calculate total market size for each region", "Find top 5 products by sales in each region"],
+        ["Get total sales by product in European countries and select the top 5 products and calculate the market share", "Get total sales by product in North American countries and select the top 5 products and calculate the market share"]
       ],
       "combination_logic": "First identify top products in each region, then calculate and compare their market shares. Questions that depend on the result of each sub-message are combined.",
-      "query_type": "complex",
       "all_non_database_query": "false"
     }
 
@@ -156,7 +147,6 @@ system_message: |
         ["What are your capabilities?"]
       ],
       "combination_logic": "Simple greeting and capability question",
-      "query_type": "simple",
       "all_non_database_query": "true"
     }
   </examples>