Skip to content

Commit 02e7741

Browse files
authored
Merge pull request #132 from aws/hayemaxi-telemetry
feat(telemetry): error, cancelled, auth metrics
2 parents 6170098 + 94535a3 commit 02e7741

File tree

9 files changed

+269
-59
lines changed

9 files changed

+269
-59
lines changed

crates/chat-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ clap = { version = "4.5.32", features = [
5454
] }
5555
clap_complete = "4.5.46"
5656
clap_complete_fig = "4.4.0"
57-
color-eyre = "0.6.2"
57+
color-eyre = "0.6.5"
5858
color-print = "0.3.5"
5959
convert_case = "0.8.0"
6060
cookie = "0.18.1"

crates/chat-cli/src/auth/builder_id.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,16 @@ pub async fn logout(database: &mut Database) -> Result<(), AuthError> {
536536
Ok(())
537537
}
538538

539+
pub async fn get_start_url_and_region(database: &Database) -> (Option<String>, Option<String>) {
540+
// NOTE: Database provides direct methods to access the start_url and region, but they are not
541+
// guaranteed to be up to date in the chat session. Example: login is changed mid-chat session.
542+
let token = BuilderIdToken::load(database).await;
543+
match token {
544+
Ok(Some(t)) => (t.start_url, t.region),
545+
_ => (None, None),
546+
}
547+
}
548+
539549
#[derive(Debug, Clone)]
540550
pub struct BearerResolver;
541551

crates/chat-cli/src/cli/chat/mod.rs

Lines changed: 128 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::collections::{
2424
HashSet,
2525
VecDeque,
2626
};
27+
use std::error::Error;
2728
use std::io::{
2829
IsTerminal,
2930
Read,
@@ -66,6 +67,7 @@ use crossterm::{
6667
terminal,
6768
};
6869
use eyre::{
70+
Chain,
6971
ErrReport,
7072
Result,
7173
bail,
@@ -160,8 +162,11 @@ use crate::mcp_client::{
160162
PromptGetResult,
161163
};
162164
use crate::platform::Context;
163-
use crate::telemetry::TelemetryThread;
164165
use crate::telemetry::core::ToolUseEventBuilder;
166+
use crate::telemetry::{
167+
TelemetryResult,
168+
TelemetryThread,
169+
};
165170
use crate::util::CLI_BINARY_NAME;
166171

167172
/// Help text for the compact command
@@ -843,7 +848,7 @@ impl ChatContext {
843848
} => {
844849
let tool_uses_clone = tool_uses.clone();
845850
tokio::select! {
846-
res = self.handle_input(telemetry, input, tool_uses, pending_tool_index) => res,
851+
res = self.handle_input(telemetry, database, input, tool_uses, pending_tool_index) => res,
847852
Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: tool_uses_clone })
848853
}
849854
},
@@ -856,7 +861,7 @@ impl ChatContext {
856861
} => {
857862
let tool_uses_clone = tool_uses.clone();
858863
tokio::select! {
859-
res = self.compact_history(telemetry, tool_uses, pending_tool_index, prompt, show_summary, help) => res,
864+
res = self.compact_history(telemetry, database, tool_uses, pending_tool_index, prompt, show_summary, help) => res,
860865
Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: tool_uses_clone })
861866
}
862867
},
@@ -875,19 +880,24 @@ impl ChatContext {
875880
},
876881
ChatState::HandleResponseStream(response) => tokio::select! {
877882
res = self.handle_response(database, telemetry, response) => res,
878-
Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: None })
883+
Ok(_) = ctrl_c_stream => {
884+
self.send_chat_telemetry(database, telemetry, None, TelemetryResult::Cancelled, None).await;
885+
886+
Err(ChatError::Interrupted { tool_uses: None })
887+
}
879888
},
880889
ChatState::Exit => return Ok(()),
881890
};
882891

883-
next_state = Some(self.handle_state_execution_result(database, result).await?);
892+
next_state = Some(self.handle_state_execution_result(telemetry, database, result).await?);
884893
}
885894
}
886895

887896
/// Handles the result of processing a [ChatState], returning the next [ChatState] to change
888897
/// to.
889898
async fn handle_state_execution_result(
890899
&mut self,
900+
telemetry: &TelemetryThread,
891901
database: &mut Database,
892902
result: Result<ChatState, ChatError>,
893903
) -> Result<ChatState, ChatError> {
@@ -896,6 +906,9 @@ impl ChatContext {
896906
match result {
897907
Ok(state) => Ok(state),
898908
Err(e) => {
909+
self.send_error_telemetry(database, telemetry, get_error_string(&e))
910+
.await;
911+
899912
macro_rules! print_err {
900913
($prepend_msg:expr, $err:expr) => {{
901914
queue!(
@@ -1027,9 +1040,11 @@ impl ChatContext {
10271040
/// model.
10281041
///
10291042
/// The last two user messages in the history are not included in the compaction process.
1043+
#[allow(clippy::too_many_arguments)]
10301044
async fn compact_history(
10311045
&mut self,
10321046
telemetry: &TelemetryThread,
1047+
database: &mut Database,
10331048
tool_uses: Option<Vec<QueuedTool>>,
10341049
pending_tool_index: Option<usize>,
10351050
custom_prompt: Option<String>,
@@ -1085,32 +1100,43 @@ impl ChatContext {
10851100
// retry except with less context included.
10861101
let response = match response {
10871102
Ok(res) => res,
1088-
Err(e) => match e {
1089-
crate::api_client::ApiClientError::ContextWindowOverflow => {
1090-
self.conversation_state.clear(true);
1091-
if self.interactive {
1092-
self.spinner.take();
1093-
execute!(
1094-
self.output,
1095-
terminal::Clear(terminal::ClearType::CurrentLine),
1096-
cursor::MoveToColumn(0),
1097-
style::SetForegroundColor(Color::Yellow),
1098-
style::Print(
1099-
"The context window usage has overflowed. Clearing the conversation history.\n\n"
1100-
),
1101-
style::SetAttribute(Attribute::Reset)
1102-
)?;
1103-
}
1104-
return Ok(ChatState::PromptUser {
1105-
tool_uses,
1106-
pending_tool_index,
1107-
skip_printing_tools: true,
1108-
});
1109-
},
1110-
e => return Err(e.into()),
1103+
Err(e) => {
1104+
self.send_chat_telemetry(
1105+
database,
1106+
telemetry,
1107+
None,
1108+
TelemetryResult::Failed,
1109+
Some(get_error_string(&e)),
1110+
)
1111+
.await;
1112+
match e {
1113+
crate::api_client::ApiClientError::ContextWindowOverflow => {
1114+
self.conversation_state.clear(true);
1115+
if self.interactive {
1116+
self.spinner.take();
1117+
execute!(
1118+
self.output,
1119+
terminal::Clear(terminal::ClearType::CurrentLine),
1120+
cursor::MoveToColumn(0),
1121+
style::SetForegroundColor(Color::Yellow),
1122+
style::Print(
1123+
"The context window usage has overflowed. Clearing the conversation history.\n\n"
1124+
),
1125+
style::SetAttribute(Attribute::Reset)
1126+
)?;
1127+
}
1128+
return Ok(ChatState::PromptUser {
1129+
tool_uses,
1130+
pending_tool_index,
1131+
skip_printing_tools: true,
1132+
});
1133+
},
1134+
e => return Err(e.into()),
1135+
}
11111136
},
11121137
};
11131138

1139+
let request_id = response.request_id().map(|s| s.to_string());
11141140
let summary = {
11151141
let mut parser = ResponseParser::new(response);
11161142
loop {
@@ -1123,6 +1149,14 @@ impl ChatContext {
11231149
if let Some(request_id) = &err.request_id {
11241150
self.failed_request_ids.push(request_id.clone());
11251151
};
1152+
self.send_chat_telemetry(
1153+
database,
1154+
telemetry,
1155+
err.request_id.clone(),
1156+
TelemetryResult::Failed,
1157+
Some(get_error_string(&err)),
1158+
)
1159+
.await;
11261160
return Err(err.into());
11271161
},
11281162
}
@@ -1138,16 +1172,8 @@ impl ChatContext {
11381172
cursor::Show
11391173
)?;
11401174
}
1141-
1142-
if let Some(message_id) = self.conversation_state.message_id() {
1143-
telemetry
1144-
.send_chat_added_message(
1145-
self.conversation_state.conversation_id().to_owned(),
1146-
message_id.to_owned(),
1147-
self.conversation_state.context_message_length(),
1148-
)
1149-
.ok();
1150-
}
1175+
self.send_chat_telemetry(database, telemetry, request_id, TelemetryResult::Succeeded, None)
1176+
.await;
11511177

11521178
self.conversation_state.replace_history_with_summary(summary.clone());
11531179

@@ -1308,6 +1334,7 @@ impl ChatContext {
13081334
async fn handle_input(
13091335
&mut self,
13101336
telemetry: &TelemetryThread,
1337+
database: &mut Database,
13111338
mut user_input: String,
13121339
tool_uses: Option<Vec<QueuedTool>>,
13131340
pending_tool_index: Option<usize>,
@@ -1440,6 +1467,7 @@ impl ChatContext {
14401467
} => {
14411468
self.compact_history(
14421469
telemetry,
1470+
database,
14431471
Some(tool_uses),
14441472
pending_tool_index,
14451473
prompt,
@@ -3261,6 +3289,15 @@ impl ChatContext {
32613289
self.failed_request_ids.push(request_id.clone());
32623290
};
32633291

3292+
self.send_chat_telemetry(
3293+
database,
3294+
telemetry,
3295+
recv_error.request_id.clone(),
3296+
TelemetryResult::Failed,
3297+
Some(get_error_string(&recv_error)),
3298+
)
3299+
.await;
3300+
32643301
match recv_error.source {
32653302
RecvErrorKind::StreamTimeout { source, duration } => {
32663303
error!(
@@ -3395,15 +3432,8 @@ impl ChatContext {
33953432
}
33963433

33973434
if ended {
3398-
if let Some(message_id) = self.conversation_state.message_id() {
3399-
telemetry
3400-
.send_chat_added_message(
3401-
self.conversation_state.conversation_id().to_owned(),
3402-
message_id.to_owned(),
3403-
self.conversation_state.context_message_length(),
3404-
)
3405-
.ok();
3406-
}
3435+
self.send_chat_telemetry(database, telemetry, request_id, TelemetryResult::Succeeded, None)
3436+
.await;
34073437

34083438
if self.interactive
34093439
&& database
@@ -3691,6 +3721,41 @@ impl ChatContext {
36913721

36923722
Ok(())
36933723
}
3724+
3725+
async fn send_chat_telemetry(
3726+
&self,
3727+
database: &Database,
3728+
telemetry: &TelemetryThread,
3729+
request_id: Option<String>,
3730+
result: TelemetryResult,
3731+
reason: Option<String>,
3732+
) {
3733+
telemetry
3734+
.send_chat_added_message(
3735+
database,
3736+
self.conversation_state.conversation_id().to_owned(),
3737+
self.conversation_state.message_id().map(|s| s.to_owned()),
3738+
request_id,
3739+
self.conversation_state.context_message_length(),
3740+
result,
3741+
reason,
3742+
)
3743+
.await
3744+
.ok();
3745+
}
3746+
3747+
async fn send_error_telemetry(&self, database: &Database, telemetry: &TelemetryThread, reason: String) {
3748+
telemetry
3749+
.send_response_error(
3750+
database,
3751+
self.conversation_state.conversation_id().to_owned(),
3752+
self.conversation_state.context_message_length(),
3753+
TelemetryResult::Failed,
3754+
Some(reason),
3755+
)
3756+
.await
3757+
.ok();
3758+
}
36943759
}
36953760

36963761
/// Prints hook configuration grouped by trigger: conversation session start or per user message
@@ -3789,6 +3854,22 @@ fn create_stream(model_responses: serde_json::Value) -> StreamingClient {
37893854
StreamingClient::mock(mock)
37903855
}
37913856

3857+
/// Returns surface error + root cause as a string. If there is only one error
3858+
/// in the chain, return that as a string.
3859+
fn get_error_string(error: &(dyn Error + 'static)) -> String {
3860+
let err_chain = Chain::new(error);
3861+
3862+
if err_chain.len() > 1 {
3863+
format!(
3864+
"'{}' caused by: {}",
3865+
error,
3866+
err_chain.last().map_or("UNKNOWN".to_string(), |e| e.to_string())
3867+
)
3868+
} else {
3869+
error.to_string()
3870+
}
3871+
}
3872+
37923873
#[cfg(test)]
37933874
mod tests {
37943875
use super::*;

crates/chat-cli/src/database/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ impl Database {
279279
}
280280

281281
/// Get the start URL used for IdC login.
282-
pub fn get_start_url(&mut self) -> Result<Option<String>, DatabaseError> {
282+
pub fn get_start_url(&self) -> Result<Option<String>, DatabaseError> {
283283
self.get_json_entry::<String>(Table::State, START_URL_KEY)
284284
}
285285

@@ -289,7 +289,7 @@ impl Database {
289289
}
290290

291291
/// Get the region used for IdC login.
292-
pub fn get_idc_region(&mut self) -> Result<Option<String>, DatabaseError> {
292+
pub fn get_idc_region(&self) -> Result<Option<String>, DatabaseError> {
293293
// Annoyingly, this is encoded as a JSON string on older clients
294294
self.get_json_entry::<String>(Table::State, IDC_REGION_KEY)
295295
}

0 commit comments

Comments
 (0)