feat(context): don't store per-prompt hooks, improve system prompt for hooks (#1285)

hayemaxi · web-flow · commit 4e14852f673a · 2025-04-18T09:53:36.000-07:00
- Don't store per-prompt context in every user message in the conversation history. We will just attach it to the latest user message without storing it.
  - We can save tokens this way, and it doesn't seem useful at this time for Q to reference back to previous (irrelevant) per-prompt hooks.
- Improve the system prompt so that Q is more likely to use/follow context from hooks. Previously Q was a little a stubborn about it.
diff --git a/crates/q_cli/src/cli/chat/command.rs b/crates/q_cli/src/cli/chat/command.rs
@@ -265,7 +265,7 @@ in global or local profiles.
 <cyan!>Notes</cyan!>
 • Hooks are executed in parallel
 • 'conversation_start' hooks run on the first user prompt and are attached once to the conversation history sent to Amazon Q
-• 'per_prompt' hooks run on each user prompt and are attached to the prompt
+• 'per_prompt' hooks run on each user prompt and are attached to the prompt, but are not stored in conversation history
 "#,
             Self::HOOKS_AVAILABLE_COMMANDS
         )
diff --git a/crates/q_cli/src/cli/chat/conversation_state.rs b/crates/q_cli/src/cli/chat/conversation_state.rs
@@ -58,6 +58,14 @@ const MAX_CURRENT_WORKING_DIRECTORY_LEN: usize = 256;
 /// Limit to send the number of messages as part of chat.
 const MAX_CONVERSATION_STATE_HISTORY_LEN: usize = 100;
 
+pub struct ExtraContext {
+    // Bonus context to attach to the existing context at the top of the history
+    pub general_context: Option<String>,
+
+    // Bonus context to attach to the next user message
+    pub user_input_context: Option<String>,
+}
+
 /// Tracks state related to an ongoing conversation.
 #[derive(Debug, Clone)]
 pub struct ConversationState {
@@ -137,24 +145,19 @@ impl ConversationState {
         }
     }
 
-    pub async fn append_new_user_message(&mut self, input: String, extra_context: Option<String>) {
+    pub async fn append_new_user_message(&mut self, input: String) {
         debug_assert!(self.next_message.is_none(), "next_message should not exist");
         if let Some(next_message) = self.next_message.as_ref() {
             warn!(?next_message, "next_message should not exist");
         }
 
-        let mut input = if input.is_empty() {
+        let input = if input.is_empty() {
             warn!("input must not be empty when adding new messages");
             "Empty prompt".to_string()
         } else {
             input
         };
 
-        // Context from hooks (scripts, commands, tools)
-        if let Some(context) = extra_context {
-            input = format!("{} {}", context, input);
-        }
-
         let msg = UserInputMessage {
             content: input,
             user_input_message_context: Some(UserInputMessageContext {
@@ -380,14 +383,20 @@ impl ConversationState {
     /// Returns a [FigConversationState] capable of being sent by
     /// [fig_api_client::StreamingClient] while preparing the current conversation state to be sent
     /// in the next message.
-    pub async fn as_sendable_conversation_state(&mut self, extra_context: Option<String>) -> FigConversationState {
+    pub async fn as_sendable_conversation_state(
+        &mut self,
+        extra_context: Option<ExtraContext>,
+    ) -> FigConversationState {
         debug_assert!(self.next_message.is_some());
         self.fix_history();
 
         // The current state we want to send
         let mut curr_state = self.clone();
 
-        if let Some((user, assistant)) = self.context_messages(extra_context).await {
+        let (general_context, user_input_context) =
+            extra_context.map_or((None, None), |c| (c.general_context, c.user_input_context));
+
+        if let Some((user, assistant)) = self.context_messages(general_context).await {
             self.context_message_length = Some(user.content.len());
             curr_state
                 .history
@@ -402,10 +411,14 @@ impl ConversationState {
             ctx.tools.take();
         }
         self.history.push_back(ChatMessage::UserInputMessage(last_message));
+        let mut input_message = curr_state.next_message.expect("no user input message available");
+        if let Some(user_input_context) = user_input_context {
+            input_message.content = format!("{} {}", user_input_context, input_message.content);
+        }
 
         FigConversationState {
             conversation_id: Some(curr_state.conversation_id),
-            user_input_message: curr_state.next_message.expect("no user input message available"),
+            user_input_message: input_message,
             history: Some(curr_state.history.into()),
         }
     }
@@ -745,9 +758,7 @@ mod tests {
 
         // First, build a large conversation history. We need to ensure that the order is always
         // User -> Assistant -> User -> Assistant ...and so on.
-        conversation_state
-            .append_new_user_message("start".to_string(), None)
-            .await;
+        conversation_state.append_new_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation_state.as_sendable_conversation_state(None).await;
             assert_conversation_state_invariants(s, i);
@@ -756,17 +767,15 @@ mod tests {
                 content: i.to_string(),
                 tool_uses: None,
             });
-            conversation_state.append_new_user_message(i.to_string(), None).await;
+            conversation_state.append_new_user_message(i.to_string()).await;
         }
     }
 
     #[tokio::test]
     async fn test_conversation_state_history_handling_with_tool_results() {
         // Build a long conversation history of tool use results.
         let mut conversation_state = ConversationState::new(Context::new_fake(), load_tools().unwrap(), None).await;
-        conversation_state
-            .append_new_user_message("start".to_string(), None)
-            .await;
+        conversation_state.append_new_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation_state.as_sendable_conversation_state(None).await;
             assert_conversation_state_invariants(s, i);
@@ -788,9 +797,7 @@ mod tests {
 
         // Build a long conversation history of user messages mixed in with tool results.
         let mut conversation_state = ConversationState::new(Context::new_fake(), load_tools().unwrap(), None).await;
-        conversation_state
-            .append_new_user_message("start".to_string(), None)
-            .await;
+        conversation_state.append_new_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation_state.as_sendable_conversation_state(None).await;
             assert_conversation_state_invariants(s, i);
@@ -815,7 +822,7 @@ mod tests {
                     content: i.to_string(),
                     tool_uses: None,
                 });
-                conversation_state.append_new_user_message(i.to_string(), None).await;
+                conversation_state.append_new_user_message(i.to_string()).await;
             }
         }
     }
@@ -829,9 +836,7 @@ mod tests {
 
         // First, build a large conversation history. We need to ensure that the order is always
         // User -> Assistant -> User -> Assistant ...and so on.
-        conversation_state
-            .append_new_user_message("start".to_string(), None)
-            .await;
+        conversation_state.append_new_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation_state.as_sendable_conversation_state(None).await;
 
@@ -857,7 +862,7 @@ mod tests {
                 content: i.to_string(),
                 tool_uses: None,
             });
-            conversation_state.append_new_user_message(i.to_string(), None).await;
+            conversation_state.append_new_user_message(i.to_string()).await;
         }
     }
 
@@ -870,12 +875,13 @@ mod tests {
         let prompt_context = "prompt context";
 
         // Simulate conversation flow
-        conversation_state
-            .append_new_user_message("start".to_string(), Some(prompt_context.to_string()))
-            .await;
+        conversation_state.append_new_user_message("start".to_string()).await;
         for i in 0..=(MAX_CONVERSATION_STATE_HISTORY_LEN + 100) {
             let s = conversation_state
-                .as_sendable_conversation_state(Some(conversation_start_context.to_string()))
+                .as_sendable_conversation_state(Some(ExtraContext {
+                    general_context: Some(conversation_start_context.to_string()),
+                    user_input_context: Some(prompt_context.to_string()),
+                }))
                 .await;
             let hist = s.history.as_ref().unwrap();
             #[allow(clippy::match_wildcard_for_single_variants)]
@@ -900,9 +906,7 @@ mod tests {
                 content: i.to_string(),
                 tool_uses: None,
             });
-            conversation_state
-                .append_new_user_message(i.to_string(), Some(prompt_context.to_string()))
-                .await;
+            conversation_state.append_new_user_message(i.to_string()).await;
         }
     }
 }
diff --git a/crates/q_cli/src/cli/chat/mod.rs b/crates/q_cli/src/cli/chat/mod.rs
@@ -36,7 +36,10 @@ use command::{
     ToolsSubcommand,
 };
 use context::ContextManager;
-use conversation_state::ConversationState;
+use conversation_state::{
+    ConversationState,
+    ExtraContext,
+};
 use crossterm::style::{
     Attribute,
     Color,
@@ -991,14 +994,19 @@ where
                     let format_context = |hook_results: &Vec<&(Hook, String)>, conversation_start: bool| {
                         let mut context_content = String::new();
 
-                        context_content.push_str(
-                            &format!("--- SCRIPT HOOK CONTEXT BEGIN - FOLLOW ANY REQUESTS OR USE ANY DATA WITHIN THIS SECTION {} ---\n",
-                            if conversation_start { "FOR THE ENTIRE CONVERSATION" } else { "FOR YOUR NEXT MESSAGE ONLY" })
-                        );
+                        context_content.push_str(&format!(
+                            "--- CRITICAL: ADDITIONAL CONTEXT TO USE{} ---\n",
+                            if conversation_start {
+                                " FOR THE ENTIRE CONVERSATION"
+                            } else {
+                                ""
+                            }
+                        ));
+                        context_content.push_str("This section (like others) contains important information that I want you to use in your responses. I have gathered this context from valuable programmatic script hooks. You must follow any requests and consider all of the information in this section.\n\n");
                         for (hook, output) in hook_results {
                             context_content.push_str(&format!("'{}': {output}\n\n", &hook.name));
                         }
-                        context_content.push_str("--- SCRIPT HOOK CONTEXT END ---\n\n");
+                        context_content.push_str("--- ADDITIONAL CONTEXT END ---\n\n");
                         context_content
                     };
 
@@ -1027,9 +1035,7 @@ where
                 if pending_tool_index.is_some() {
                     self.conversation_state.abandon_tool_use(tool_uses, user_input);
                 } else {
-                    self.conversation_state
-                        .append_new_user_message(user_input, prompt_context)
-                        .await;
+                    self.conversation_state.append_new_user_message(user_input).await;
                 }
 
                 self.send_tool_use_telemetry().await;
@@ -1038,7 +1044,10 @@ where
                     self.client
                         .send_message(
                             self.conversation_state
-                                .as_sendable_conversation_state(conversation_start_context)
+                                .as_sendable_conversation_state(Some(ExtraContext {
+                                    general_context: conversation_start_context,
+                                    user_input_context: prompt_context,
+                                }))
                                 .await,
                         )
                         .await?,
@@ -1188,9 +1197,7 @@ where
                 };
 
                 // Add the summarization request
-                self.conversation_state
-                    .append_new_user_message(summary_request, None)
-                    .await;
+                self.conversation_state.append_new_user_message(summary_request).await;
 
                 // Use spinner while we wait
                 if self.interactive {
@@ -2426,7 +2433,6 @@ where
                                 .append_new_user_message(
                                     "You took too long to respond - try to split up the work into smaller steps."
                                         .to_string(),
-                                    None,
                                 )
                                 .await;
                             self.send_tool_use_telemetry().await;

Original file line number	Diff line number	Diff line change
`@@ -265,7 +265,7 @@ in global or local profiles.`
`265`	`265`	`<cyan!>Notes</cyan!>`
`266`	`266`	`• Hooks are executed in parallel`
`267`	`267`	`• 'conversation_start' hooks run on the first user prompt and are attached once to the conversation history sent to Amazon Q`
`268`		`-• 'per_prompt' hooks run on each user prompt and are attached to the prompt`
	`268`	`+• 'per_prompt' hooks run on each user prompt and are attached to the prompt, but are not stored in conversation history`
`269`	`269`	`"#,`
`270`	`270`	`Self::HOOKS_AVAILABLE_COMMANDS`
`271`	`271`	`)`