Skip to content

Commit 50b07e1

Browse files
committed
Trying to encourage LLM to do small exploratory queries with results returned directly before doing the 'final' query with results not returned directly (because they overflow the context limit)
1 parent e2d5ec5 commit 50b07e1

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

lib/idp_common_pkg/idp_common/agents/analytics/agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def create_analytics_agent(
6868
When generating SQL:
6969
- ALWAYS put ALL column names in double quotes when including ANYHWERE inside of a query.
7070
- Use standard SQL syntax compatible with Amazon Athena, for example use standard date arithmetic that's compatible with Athena.
71-
- Do not guess at table or column names. Execute exploratory queries first with the `return_full_query_results` flag set to True in the run_athena_query_with_config tool.
71+
- Do not guess at table or column names. Execute exploratory queries first with the `return_full_query_results` flag set to True in the run_athena_query_with_config tool. Your final query should use `return_full_query_results` set to False. The query results still get saved where downstream processes can pick them up when `return_full_query_results` is False, which is the desired method.
7272
- Use a "SHOW TABLES" query to list all dynamic tables available to you.
7373
- Use a "DESCRIBE" query to see the precise names of columns and their associated data types, before writing any of your own queries.
7474
- Include appropriate table joins when needed
@@ -113,6 +113,7 @@ def run_athena_query_with_config(
113113
return_full_query_results: If True, includes the full query results as CSV string in the response.
114114
WARNING: This can return very large strings and should only be used for small exploratory
115115
queries like DESCRIBE, SHOW TABLES, or queries with LIMIT clauses. Default is False.
116+
Use False whenever possible.
116117
117118
Returns:
118119
Dict containing either query results or error information

lib/idp_common_pkg/idp_common/agents/analytics/tools/athena_tool.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
logger = logging.getLogger(__name__)
1616

17+
# Maximum number of rows that can be returned directly when return_full_query_results=True
18+
MAX_ROWS_TO_RETURN_DIRECTLY = 100
19+
1720

1821
@tool
1922
def run_athena_query(
@@ -24,7 +27,7 @@ def run_athena_query(
2427
2528
Uses boto3 to execute the query on Athena. Query results are stored in s3.
2629
Successful execution will return a dict with result_column_metadata,
27-
result_csv_s3_uri, rows_returned, and original_query.
30+
result_csv_s3_uri, number of rows_returned, and original_query.
2831
2932
Args:
3033
query: SQL query string to execute
@@ -101,6 +104,24 @@ def run_athena_query(
101104
# For queries with headers (like SELECT), Athena typically includes headers in the first row
102105
total_rows = len(results["ResultSet"]["Rows"])
103106

107+
# Check if return_full_query_results is True and we have too many rows
108+
if return_full_query_results and total_rows > MAX_ROWS_TO_RETURN_DIRECTLY:
109+
logger.warning(
110+
f"Query returned {total_rows} rows, which exceeds the limit of {MAX_ROWS_TO_RETURN_DIRECTLY} "
111+
f"for return_full_query_results=True"
112+
)
113+
return {
114+
"success": False,
115+
"error": (
116+
f"More than {MAX_ROWS_TO_RETURN_DIRECTLY} rows were retrieved when the tool was called with "
117+
"`return_full_query_results` set to True. This flag should only be used for small queries "
118+
"returning a few rows. Please try again with `return_full_query_results` set to False, "
119+
"in which case the query results will be saved rather than returned directly."
120+
),
121+
"query": query,
122+
"rows_returned": total_rows,
123+
}
124+
104125
result_dict = {
105126
"success": True,
106127
"result_column_metadata": column_metadata,

lib/idp_common_pkg/idp_common/agents/common/monitoring.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def on_before_tool_invocation(self, event) -> None:
180180
tool_name = event.tool_use.get("name", "unknown")
181181
tool_input = event.tool_use.get("input", {})
182182

183-
self.monitor_logger.info(f"🔧 Invoking tool: {tool_name}")
183+
self.monitor_logger.info(f"🔧 Invoking tool: {tool_name} --> {event}")
184184
if self.enable_detailed_logging:
185185
self.monitor_logger.debug(f"Tool input: {json.dumps(tool_input, indent=2)}")
186186

0 commit comments

Comments
 (0)