Skip to content

Commit 9170221

Browse files
authored
Support o3/o4 models (#110)
1 parent af52be1 commit 9170221

File tree

7 files changed

+16
-5
lines changed

7 files changed

+16
-5
lines changed

tiktoken-rs/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ println!("max_tokens: {}", max_tokens);
105105

106106
| Encoding name | OpenAI models |
107107
| ----------------------- | ------------------------------------------------------------------------- |
108-
| `o200k_base` | GPT-4o models, o1 models |
108+
| `o200k_base` | GPT-4o models, GPT-4.1, o1, o3, and o4 models |
109109
| `cl100k_base` | ChatGPT models, `text-embedding-ada-002` |
110110
| `p50k_base` | Code models, `text-davinci-002`, `text-davinci-003` |
111111
| `p50k_edit` | Use for edit models like `text-davinci-edit-001`, `code-davinci-edit-001` |

tiktoken-rs/src/model.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@ pub fn get_context_size(model: &str) -> usize {
3636
let base = rest.split(':').next().unwrap_or(rest);
3737
return get_context_size(base);
3838
}
39-
if starts_with_any!(model, "o1-") {
40-
return 128_000;
39+
if starts_with_any!(model, "o1", "o3", "o4") {
40+
return 200_000;
41+
}
42+
if starts_with_any!(model, "gpt-4.1") {
43+
return 1_047_576;
4144
}
4245
if starts_with_any!(model, "gpt-4o") {
4346
return 128_000;

tiktoken-rs/src/singleton.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub fn cl100k_base_singleton() -> &'static CoreBPE {
4949
}
5050

5151
/// Returns a singleton instance of the o200k_base tokenizer.
52-
/// Use for GPT-4o models.
52+
/// Use for GPT-4o models and other `o` series models like `o1`, `o3`, and `o4`.
5353
///
5454
/// This function will only initialize the tokenizer once, and then return a reference the tokenizer
5555
pub fn o200k_base_singleton() -> &'static CoreBPE {

tiktoken-rs/src/tiktoken_ext/openai_public.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ pub fn cl100k_base() -> Result<CoreBPE> {
118118
Ok(bpe)
119119
}
120120

121-
/// Use for GPT-4o models.
121+
/// Use for GPT-4o models and other `o` series models like `o1`, `o3`, and `o4`.
122122
/// Initializes and returns a new instance of the o200k_base tokenizer.
123123
pub fn o200k_base() -> Result<CoreBPE> {
124124
let o200k_base = include_str!("../../assets/o200k_base.tiktoken");

tiktoken-rs/src/tokenizer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ const MODEL_TO_TOKENIZER: &[(&str, Tokenizer)] = &[
5555
// reasoning
5656
("o1", Tokenizer::O200kBase),
5757
("o3", Tokenizer::O200kBase),
58+
("o4", Tokenizer::O200kBase),
5859
// chat
5960
("gpt-4.1", Tokenizer::O200kBase),
6061
("chatgpt-4o-latest", Tokenizer::O200kBase),

tiktoken-rs/src/vendor_tiktoken.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ impl std::fmt::Display for DecodeKeyError {
164164
impl std::error::Error for DecodeKeyError {}
165165

166166
#[derive(Debug, Clone)]
167+
#[allow(dead_code)]
167168
pub struct DecodeError {
168169
pub message: String,
169170
}

tiktoken-rs/tests/model.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,9 @@ fn test_finetuned_context_size() {
1111
get_context_size("gpt-4o")
1212
);
1313
}
14+
15+
#[test]
16+
fn test_o_series_context_size() {
17+
assert_eq!(get_context_size("o3-small"), 200_000);
18+
assert_eq!(get_context_size("o4"), 200_000);
19+
}

0 commit comments

Comments
 (0)