Skip to content

Commit a0e9698

Browse files
feat: add experimental model for amazon users (#2495)
1 parent a3a1777 commit a0e9698

File tree

10 files changed

+141
-55
lines changed

10 files changed

+141
-55
lines changed

crates/chat-cli/src/auth/builder_id.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,20 @@ impl BuilderIdToken {
303303

304304
/// Load the token from the keychain, refresh the token if it is expired and return it
305305
pub async fn load(database: &Database) -> Result<Option<Self>, AuthError> {
306+
// Can't use #[cfg(test)] without breaking lints, and we don't want to require
307+
// authentication in order to run ChatSession tests. Hence, adding this here with cfg!(test)
308+
if cfg!(test) {
309+
return Ok(Some(Self {
310+
access_token: Secret("test_access_token".to_string()),
311+
expires_at: time::OffsetDateTime::now_utc() + time::Duration::minutes(60),
312+
refresh_token: Some(Secret("test_refresh_token".to_string())),
313+
region: Some(OIDC_BUILDER_ID_REGION.to_string()),
314+
start_url: Some(START_URL.to_string()),
315+
oauth_flow: OAuthFlow::DeviceCode,
316+
scopes: Some(SCOPES.iter().map(|s| (*s).to_owned()).collect()),
317+
}));
318+
}
319+
306320
trace!("loading builder id token from the secret store");
307321
match database.get_secret(Self::SECRET_KEY).await {
308322
Ok(Some(secret)) => {

crates/chat-cli/src/cli/chat/cli/context.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ use crossterm::{
1010
style,
1111
};
1212

13-
use crate::cli::chat::consts::{
14-
AGENT_FORMAT_HOOKS_DOC_URL,
15-
CONTEXT_FILES_MAX_SIZE,
13+
use crate::cli::chat::consts::AGENT_FORMAT_HOOKS_DOC_URL;
14+
use crate::cli::chat::context::{
15+
ContextFilePath,
16+
calc_max_context_files_size,
1617
};
17-
use crate::cli::chat::context::ContextFilePath;
1818
use crate::cli::chat::token_counter::TokenCounter;
1919
use crate::cli::chat::util::drop_matched_context_files;
2020
use crate::cli::chat::{
@@ -222,11 +222,12 @@ impl ContextSubcommand {
222222
execute!(session.stderr, style::Print(format!("{}\n\n", "▔".repeat(3))),)?;
223223
}
224224

225+
let context_files_max_size = calc_max_context_files_size(session.conversation.model.as_deref());
225226
let mut files_as_vec = profile_context_files
226227
.iter()
227228
.map(|(path, content, _)| (path.clone(), content.clone()))
228229
.collect::<Vec<_>>();
229-
let dropped_files = drop_matched_context_files(&mut files_as_vec, CONTEXT_FILES_MAX_SIZE).ok();
230+
let dropped_files = drop_matched_context_files(&mut files_as_vec, context_files_max_size).ok();
230231

231232
execute!(
232233
session.stderr,
@@ -240,7 +241,7 @@ impl ContextSubcommand {
240241
style::SetForegroundColor(Color::DarkYellow),
241242
style::Print(format!(
242243
"Total token count exceeds limit: {}. The following files will be automatically dropped when interacting with Q. Consider removing them. \n\n",
243-
CONTEXT_FILES_MAX_SIZE
244+
context_files_max_size
244245
)),
245246
style::SetForegroundColor(Color::Reset)
246247
)?;

crates/chat-cli/src/cli/chat/cli/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ impl SlashCommand {
134134
Self::Hooks(args) => args.execute(session).await,
135135
Self::Usage(args) => args.execute(os, session).await,
136136
Self::Mcp(args) => args.execute(session).await,
137-
Self::Model(args) => args.execute(session).await,
137+
Self::Model(args) => args.execute(os, session).await,
138138
Self::Subscribe(args) => args.execute(os, session).await,
139139
Self::Persist(subcommand) => subcommand.execute(os, session).await,
140140
// Self::Root(subcommand) => {

crates/chat-cli/src/cli/chat/cli/model.rs

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crossterm::{
99
};
1010
use dialoguer::Select;
1111

12+
use crate::auth::AuthError;
1213
use crate::auth::builder_id::{
1314
BuilderIdToken,
1415
TokenType,
@@ -21,18 +22,37 @@ use crate::cli::chat::{
2122
use crate::os::Os;
2223

2324
pub struct ModelOption {
25+
/// Display name
2426
pub name: &'static str,
27+
/// Actual model id to send in the API
2528
pub model_id: &'static str,
29+
/// Size of the model's context window, in tokens
30+
pub context_window_tokens: usize,
2631
}
2732

28-
pub const MODEL_OPTIONS: [ModelOption; 2] = [
33+
const MODEL_OPTIONS: [ModelOption; 2] = [
2934
ModelOption {
3035
name: "claude-4-sonnet",
3136
model_id: "CLAUDE_SONNET_4_20250514_V1_0",
37+
context_window_tokens: 200_000,
3238
},
3339
ModelOption {
3440
name: "claude-3.7-sonnet",
3541
model_id: "CLAUDE_3_7_SONNET_20250219_V1_0",
42+
context_window_tokens: 200_000,
43+
},
44+
];
45+
46+
const OPENAI_MODEL_OPTIONS: [ModelOption; 2] = [
47+
ModelOption {
48+
name: "experimental-gpt-oss-120b",
49+
model_id: "OPENAI_GPT_OSS_120B_1_0",
50+
context_window_tokens: 128_000,
51+
},
52+
ModelOption {
53+
name: "experimental-gpt-oss-20b",
54+
model_id: "OPENAI_GPT_OSS_20B_1_0",
55+
context_window_tokens: 128_000,
3656
},
3757
];
3858

@@ -41,17 +61,19 @@ pub const MODEL_OPTIONS: [ModelOption; 2] = [
4161
pub struct ModelArgs;
4262

4363
impl ModelArgs {
44-
pub async fn execute(self, session: &mut ChatSession) -> Result<ChatState, ChatError> {
45-
Ok(select_model(session)?.unwrap_or(ChatState::PromptUser {
64+
pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {
65+
Ok(select_model(os, session).await?.unwrap_or(ChatState::PromptUser {
4666
skip_printing_tools: false,
4767
}))
4868
}
4969
}
5070

51-
pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, ChatError> {
71+
pub async fn select_model(os: &Os, session: &mut ChatSession) -> Result<Option<ChatState>, ChatError> {
5272
queue!(session.stderr, style::Print("\n"))?;
5373
let active_model_id = session.conversation.model.as_deref();
54-
let labels: Vec<String> = MODEL_OPTIONS
74+
let model_options = get_model_options(os).await?;
75+
76+
let labels: Vec<String> = model_options
5577
.iter()
5678
.map(|opt| {
5779
if (opt.model_id.is_empty() && active_model_id.is_none()) || Some(opt.model_id) == active_model_id {
@@ -83,7 +105,7 @@ pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, Chat
83105
queue!(session.stderr, style::ResetColor)?;
84106

85107
if let Some(index) = selection {
86-
let selected = &MODEL_OPTIONS[index];
108+
let selected = &model_options[index];
87109
let model_id_str = selected.model_id.to_string();
88110
session.conversation.model = Some(model_id_str);
89111

@@ -104,6 +126,8 @@ pub fn select_model(session: &mut ChatSession) -> Result<Option<ChatState>, Chat
104126
}))
105127
}
106128

129+
/// Returns a default model id to use if none has been otherwise provided.
130+
///
107131
/// Returns Claude 3.7 for: Amazon IDC users, FRA region users
108132
/// Returns Claude 4.0 for: Builder ID users, other regions
109133
pub async fn default_model_id(os: &Os) -> &'static str {
@@ -124,3 +148,35 @@ pub async fn default_model_id(os: &Os) -> &'static str {
124148
// Default to 4.0
125149
"CLAUDE_SONNET_4_20250514_V1_0"
126150
}
151+
152+
/// Returns the available models for use.
153+
pub async fn get_model_options(os: &Os) -> Result<Vec<ModelOption>, ChatError> {
154+
let is_amzn_user = BuilderIdToken::load(&os.database)
155+
.await?
156+
.ok_or(AuthError::NoToken)?
157+
.is_amzn_user();
158+
159+
let mut model_options = MODEL_OPTIONS.into_iter().collect::<Vec<_>>();
160+
if is_amzn_user {
161+
for opt in OPENAI_MODEL_OPTIONS {
162+
model_options.push(opt);
163+
}
164+
}
165+
166+
Ok(model_options)
167+
}
168+
169+
/// Returns the context window length in tokens for the given model_id.
170+
pub fn context_window_tokens(model_id: Option<&str>) -> usize {
171+
const DEFAULT_CONTEXT_WINDOW_LENGTH: usize = 200_000;
172+
173+
let Some(model_id) = model_id else {
174+
return DEFAULT_CONTEXT_WINDOW_LENGTH;
175+
};
176+
177+
MODEL_OPTIONS
178+
.iter()
179+
.chain(OPENAI_MODEL_OPTIONS.iter())
180+
.find(|m| m.model_id == model_id)
181+
.map_or(DEFAULT_CONTEXT_WINDOW_LENGTH, |m| m.context_window_tokens)
182+
}

crates/chat-cli/src/cli/chat/cli/usage.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crossterm::{
99
style,
1010
};
1111

12-
use crate::cli::chat::consts::CONTEXT_WINDOW_SIZE;
12+
use super::model::context_window_tokens;
1313
use crate::cli::chat::token_counter::{
1414
CharCount,
1515
TokenCount,
@@ -62,14 +62,16 @@ impl UsageArgs {
6262
// set a max width for the progress bar for better aesthetic
6363
let progress_bar_width = std::cmp::min(window_width, 80);
6464

65+
let context_window_size = context_window_tokens(session.conversation.model.as_deref());
66+
6567
let context_width =
66-
((context_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
68+
((context_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
6769
let assistant_width =
68-
((assistant_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
70+
((assistant_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
6971
let tools_width =
70-
((tools_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
72+
((tools_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
7173
let user_width =
72-
((user_token_count.value() as f64 / CONTEXT_WINDOW_SIZE as f64) * progress_bar_width as f64) as usize;
74+
((user_token_count.value() as f64 / context_window_size as f64) * progress_bar_width as f64) as usize;
7375

7476
let left_over_width = progress_bar_width
7577
- std::cmp::min(
@@ -85,15 +87,15 @@ impl UsageArgs {
8587
style::Print(format!(
8688
"\nCurrent context window ({} of {}k tokens used)\n",
8789
total_token_used,
88-
CONTEXT_WINDOW_SIZE / 1000
90+
context_window_size / 1000
8991
)),
9092
style::SetForegroundColor(Color::DarkRed),
9193
style::Print("█".repeat(progress_bar_width)),
9294
style::SetForegroundColor(Color::Reset),
9395
style::Print(" "),
9496
style::Print(format!(
9597
"{:.2}%",
96-
(total_token_used.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
98+
(total_token_used.value() as f32 / context_window_size as f32) * 100.0
9799
)),
98100
)?;
99101
} else {
@@ -102,7 +104,7 @@ impl UsageArgs {
102104
style::Print(format!(
103105
"\nCurrent context window ({} of {}k tokens used)\n",
104106
total_token_used,
105-
CONTEXT_WINDOW_SIZE / 1000
107+
context_window_size / 1000
106108
)),
107109
// Context files
108110
style::SetForegroundColor(Color::DarkCyan),
@@ -140,7 +142,7 @@ impl UsageArgs {
140142
style::SetForegroundColor(Color::Reset),
141143
style::Print(format!(
142144
"{:.2}%",
143-
(total_token_used.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
145+
(total_token_used.value() as f32 / context_window_size as f32) * 100.0
144146
)),
145147
)?;
146148
}
@@ -155,31 +157,31 @@ impl UsageArgs {
155157
style::Print(format!(
156158
"~{} tokens ({:.2}%)\n",
157159
context_token_count,
158-
(context_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
160+
(context_token_count.value() as f32 / context_window_size as f32) * 100.0
159161
)),
160162
style::SetForegroundColor(Color::DarkRed),
161163
style::Print("█ Tools: "),
162164
style::SetForegroundColor(Color::Reset),
163165
style::Print(format!(
164166
" ~{} tokens ({:.2}%)\n",
165167
tools_token_count,
166-
(tools_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
168+
(tools_token_count.value() as f32 / context_window_size as f32) * 100.0
167169
)),
168170
style::SetForegroundColor(Color::Blue),
169171
style::Print("█ Q responses: "),
170172
style::SetForegroundColor(Color::Reset),
171173
style::Print(format!(
172174
" ~{} tokens ({:.2}%)\n",
173175
assistant_token_count,
174-
(assistant_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
176+
(assistant_token_count.value() as f32 / context_window_size as f32) * 100.0
175177
)),
176178
style::SetForegroundColor(Color::Magenta),
177179
style::Print("█ Your prompts: "),
178180
style::SetForegroundColor(Color::Reset),
179181
style::Print(format!(
180182
" ~{} tokens ({:.2}%)\n\n",
181183
user_token_count,
182-
(user_token_count.value() as f32 / CONTEXT_WINDOW_SIZE as f32) * 100.0
184+
(user_token_count.value() as f32 / context_window_size as f32) * 100.0
183185
)),
184186
)?;
185187

crates/chat-cli/src/cli/chat/consts.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use super::token_counter::TokenCounter;
2-
31
// These limits are the internal undocumented values from the service for each item
42

53
pub const MAX_CURRENT_WORKING_DIRECTORY_LEN: usize = 256;
@@ -13,13 +11,6 @@ pub const MAX_TOOL_RESPONSE_SIZE: usize = 400_000;
1311
/// Actual service limit is 600_000
1412
pub const MAX_USER_MESSAGE_SIZE: usize = 400_000;
1513

16-
/// In tokens
17-
pub const CONTEXT_WINDOW_SIZE: usize = 200_000;
18-
19-
pub const CONTEXT_FILES_MAX_SIZE: usize = 150_000;
20-
21-
pub const MAX_CHARS: usize = TokenCounter::token_to_chars(CONTEXT_WINDOW_SIZE); // Character-based warning threshold
22-
2314
pub const DUMMY_TOOL_NAME: &str = "dummy";
2415

2516
pub const MAX_NUMBER_OF_IMAGES_PER_REQUEST: usize = 10;

crates/chat-cli/src/cli/chat/context.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use serde::{
1414
Serializer,
1515
};
1616

17-
use super::consts::CONTEXT_FILES_MAX_SIZE;
17+
use super::cli::model::context_window_tokens;
1818
use super::util::drop_matched_context_files;
1919
use crate::cli::agent::Agent;
2020
use crate::cli::agent::hook::{
@@ -103,7 +103,7 @@ pub struct ContextManager {
103103
}
104104

105105
impl ContextManager {
106-
pub fn from_agent(agent: &Agent, max_context_files_size: Option<usize>) -> Result<Self> {
106+
pub fn from_agent(agent: &Agent, max_context_files_size: usize) -> Result<Self> {
107107
let paths = agent
108108
.resources
109109
.iter()
@@ -112,7 +112,7 @@ impl ContextManager {
112112
.collect::<Vec<_>>();
113113

114114
Ok(Self {
115-
max_context_files_size: max_context_files_size.unwrap_or(CONTEXT_FILES_MAX_SIZE),
115+
max_context_files_size,
116116
current_profile: agent.name.clone(),
117117
paths,
118118
hooks: agent.hooks.clone(),
@@ -254,6 +254,12 @@ impl ContextManager {
254254
}
255255
}
256256

257+
/// Calculates the maximum context files size to use for the given model id.
258+
pub fn calc_max_context_files_size(model_id: Option<&str>) -> usize {
259+
// Sets the max as 75% of the context window
260+
context_window_tokens(model_id).saturating_mul(3) / 4
261+
}
262+
257263
/// Process a path, handling glob patterns and file types.
258264
///
259265
/// This method:
@@ -424,4 +430,13 @@ mod tests {
424430

425431
Ok(())
426432
}
433+
434+
#[test]
435+
fn test_calc_max_context_files_size() {
436+
assert_eq!(
437+
calc_max_context_files_size(Some("CLAUDE_SONNET_4_20250514_V1_0")),
438+
150_000
439+
);
440+
assert_eq!(calc_max_context_files_size(Some("OPENAI_GPT_OSS_120B_1_0")), 96_000);
441+
}
427442
}

0 commit comments

Comments
 (0)