From 33f2e762b99f480610823abd4a9aa3b0e79e9f42 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sat, 9 Aug 2025 16:28:02 -0700 Subject: [PATCH 1/2] Support OpenAI OSS Models with Tiktoken tokenizer --- .../Model/TiktokenTokenizer.cs | 94 ++++++++++++-- .../TiktokenTests.cs | 120 ++++++++++++++---- 2 files changed, 175 insertions(+), 39 deletions(-) diff --git a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs index 69cc3dfb9c..48af382c7a 100644 --- a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs @@ -1013,6 +1013,15 @@ public override OperationStatus Decode(IEnumerable ids, Span destinat private const string IMStart = "<|im_start|>"; private const string IMEnd = "<|im_end|>"; private const string IMSep = "<|im_sep|>"; + private const string StartOfText = "<|startoftext|>"; + private const string Return = "<|return|>"; + private const string Constrain = "<|constrain|>"; + private const string Channel = "<|channel|>"; + private const string Start = "<|start|>"; + private const string End = "<|end|>"; + private const string Message = "<|message|>"; + private const string Call = "<|call|>"; + private const string ReservedPrefix = "<|reserved_"; private enum ModelEncoding { @@ -1022,40 +1031,67 @@ private enum ModelEncoding P50kEdit, R50kBase, GPT2, - O200kBase + O200kBase, + O200kHarmony } private const string Phi4ModelName = "phi-4"; private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixToEncoding = [ - // chat ( "o1-", ModelEncoding.O200kBase ), // e.g. o1-mini ( "o3-", ModelEncoding.O200kBase ), // e.g. o3-mini + ( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini + + // chat ( "gpt-4.1-", ModelEncoding.O200kBase), // e.g., gpt-4.1-mini + ( "gpt-4.5-", ModelEncoding.O200kBase), // e.g., gpt-4.5 ( "gpt-4o-", ModelEncoding.O200kBase), // e.g., gpt-4o-2024-05-13 + ( "chatgpt-4o-", ModelEncoding.O200kBase), ( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k ( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc. - ( "gpt-35-", ModelEncoding.Cl100kBase ) // Azure deployment name + ( "gpt-35-", ModelEncoding.Cl100kBase ), // Azure deployment name + ( "gpt-oss-", ModelEncoding.O200kHarmony ), + + // fine-tuned + ( "ft:gpt-4o", ModelEncoding.O200kBase ), + ( "ft:gpt-4", ModelEncoding.Cl100kBase ), + ( "ft:gpt-3.5-turbo", ModelEncoding.Cl100kBase ), + ( "ft:davinci-002", ModelEncoding.Cl100kBase ), + ( "ft:babbage-002", ModelEncoding.Cl100kBase ), ]; private static readonly Dictionary _modelToEncoding = new Dictionary(StringComparer.OrdinalIgnoreCase) { - // chat - { "gpt-4o", ModelEncoding.O200kBase }, + // reasoning { "o1", ModelEncoding.O200kBase }, { "o3", ModelEncoding.O200kBase }, { "o4-mini", ModelEncoding.O200kBase }, + + // chat { "gpt-4.1", ModelEncoding.O200kBase }, + { "gpt-4o", ModelEncoding.O200kBase }, { "gpt-4", ModelEncoding.Cl100kBase }, { "gpt-3.5-turbo", ModelEncoding.Cl100kBase }, + { "gpt-3.5", ModelEncoding.Cl100kBase }, { "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase }, { "gpt-35", ModelEncoding.Cl100kBase }, // Azure deployment name { "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name { "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name - // text + // Base + { "davinci-002", ModelEncoding.Cl100kBase }, + { "babbage-002", ModelEncoding.Cl100kBase }, + + // embeddings + // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings + { "text-embedding-ada-002", ModelEncoding.Cl100kBase }, + { "text-embedding-3-small", ModelEncoding.Cl100kBase }, + { "text-embedding-3-large", ModelEncoding.Cl100kBase }, + + // DEPRECATED MODELS + // text (DEPRECATED) { "text-davinci-003", ModelEncoding.P50kBase }, { "text-davinci-002", ModelEncoding.P50kBase }, { "text-davinci-001", ModelEncoding.R50kBase }, @@ -1067,7 +1103,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "babbage", ModelEncoding.R50kBase }, { "ada", ModelEncoding.R50kBase }, - // code + // code (DEPRECATED) { "code-davinci-002", ModelEncoding.P50kBase }, { "code-davinci-001", ModelEncoding.P50kBase }, { "code-cushman-002", ModelEncoding.P50kBase }, @@ -1075,17 +1111,12 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "davinci-codex", ModelEncoding.P50kBase }, { "cushman-codex", ModelEncoding.P50kBase }, - // edit + // edit (DEPRECATED) { "text-davinci-edit-001", ModelEncoding.P50kEdit }, { "code-davinci-edit-001", ModelEncoding.P50kEdit }, - // embeddings - // https://platform.openai.com/docs/guides/embeddings/what-are-embeddings - { "text-embedding-ada-002", ModelEncoding.Cl100kBase }, - { "text-embedding-3-small", ModelEncoding.Cl100kBase }, - { "text-embedding-3-large", ModelEncoding.Cl100kBase }, - // old embeddings + // old embeddings (DEPRECATED) { "text-similarity-davinci-001", ModelEncoding.R50kBase }, { "text-similarity-curie-001", ModelEncoding.R50kBase }, { "text-similarity-babbage-001", ModelEncoding.R50kBase }, @@ -1099,6 +1130,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo // open source { "gpt2", ModelEncoding.GPT2 }, + { "gpt-2", ModelEncoding.GPT2 }, // phi-4 { Phi4ModelName, ModelEncoding.Cl100kBase }, @@ -1126,6 +1158,32 @@ private static ModelEncoding GetModelEncoding(string modelName) return encoder; } + private static Dictionary CreateHarmonyEncodingSpecialTokens() => + new Dictionary + { + { StartOfText, 199998 }, + { EndOfText, 199999 }, + { $"{ReservedPrefix}200000|>", 200000 }, + { $"{ReservedPrefix}200001|>", 200001 }, + { Return, 200002 }, + { Constrain, 200003 }, + { $"{ReservedPrefix}200004|>", 200004 }, + { Channel, 200005 }, + { Start, 200006 }, + { End, 200007 }, + { Message, 200008 }, + { $"{ReservedPrefix}200009|>", 200009 }, + { $"{ReservedPrefix}200010|>", 200010 }, + { $"{ReservedPrefix}200011|>", 200011 }, + { Call, 200012 }, + { $"{ReservedPrefix}200013|>", 200013 }, + { $"{ReservedPrefix}200014|>", 200014 }, + { $"{ReservedPrefix}200015|>", 200015 }, + { $"{ReservedPrefix}200016|>", 200016 }, + { $"{ReservedPrefix}200017|>", 200017 }, + { EndOfPrompt, 200018 }, + }; + private static (Dictionary SpecialTokens, Regex Regex, string VocabFile, Type? DataType, string PackageName) GetTiktokenConfigurations(string modelName) => GetTiktokenConfigurations(GetModelEncoding(modelName), modelName); private static (Dictionary SpecialTokens, Regex Regex, string VocabFile, Type? DataType, string PackageName) GetTiktokenConfigurations(ModelEncoding modelEncoding, string? modelName = null) @@ -1157,6 +1215,9 @@ private static (Dictionary SpecialTokens, Regex Regex, string Vocab case ModelEncoding.R50kBase: return (new Dictionary { { EndOfText, 50256 } }, P50kBaseRegex(), R50RanksFile, Type.GetType(R50kBaseTypeName), R50kBasePackageName); + case ModelEncoding.O200kHarmony: + return (CreateHarmonyEncodingSpecialTokens(), O200kBaseRegex(), O200kBaseFile, Type.GetType(O200kBaseTypeName), O200kBasePackageName); + default: throw new NotSupportedException($"The model '{modelName ?? modelEncoding.ToString()}' is not supported."); } @@ -1179,6 +1240,7 @@ private static (Dictionary SpecialTokens, Regex Regex, string Vocab internal const string P50kEditEncodingName = "p50k_edit"; internal const string R50kBaseEncodingName = "r50k_base"; internal const string O200kBaseEncodingName = "o200k_base"; + internal const string O200kHarmonyEncodingName = "o200k_harmony"; internal const string Cl100kBasePackageName = "Microsoft.ML.Tokenizers.Data.Cl100kBase"; internal const string Gpt2PackageName = "Microsoft.ML.Tokenizers.Data.Gpt2"; @@ -1474,6 +1536,10 @@ public static TiktokenTokenizer CreateForEncoding(string encodingName, IReadOnly { modelEncoding = ModelEncoding.O200kBase; } + else if (encodingName.Equals(O200kHarmonyEncodingName, StringComparison.OrdinalIgnoreCase)) + { + modelEncoding = ModelEncoding.O200kHarmony; + } else if (encodingName.Equals(P50kBaseEncodingName, StringComparison.OrdinalIgnoreCase)) { modelEncoding = ModelEncoding.P50kBase; diff --git a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs index 1ace27f1fb..ac0175eea7 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs @@ -36,6 +36,7 @@ public class TiktokenTests public static Tokenizer P50kEdit { get; } = TiktokenTokenizer.CreateForModel("text-davinci-edit-001"); public static Tokenizer GPT4o { get; } = TiktokenTokenizer.CreateForModel("gpt-4o"); public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4"); + public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b"); [Fact] public async Task TestTokenizerCreation() @@ -282,40 +283,43 @@ public void TestEncode5() } [Fact] - public void TestEncodeGpt4o() + public void TestEncodeO200kBaseEncoding() { - string text = ReadAndSanitizeFile("./Data/lib.rs.txt"); - IReadOnlyList encoded = GPT4o.EncodeToIds(text); - int idsCount = GPT4o.CountTokens(text); + foreach (TiktokenTokenizer tokenizer in new[] { GPT4o, GptOss }) + { + string text = ReadAndSanitizeFile("./Data/lib.rs.txt"); + IReadOnlyList encoded = tokenizer.EncodeToIds(text); + int idsCount = tokenizer.CountTokens(text); - Assert.Equal(5609, encoded.Count); - Assert.Equal(encoded.Count, idsCount); + Assert.Equal(5609, encoded.Count); + Assert.Equal(encoded.Count, idsCount); - using (Stream stream = File.OpenRead("./Data/tokens_gpt4o.json")) - { - int[]? expected = JsonSerializer.Deserialize(stream) as int[]; - Assert.Equal(expected!, encoded); - } + using (Stream stream = File.OpenRead("./Data/tokens_gpt4o.json")) + { + int[]? expected = JsonSerializer.Deserialize(stream) as int[]; + Assert.Equal(expected!, encoded); + } - Assert.Equal(text, GPT4o.Decode(encoded)); - TestDecodingWithSpan((GPT4o as TiktokenTokenizer)!, encoded.ToArray(), text); + Assert.Equal(text, tokenizer.Decode(encoded)); + TestDecodingWithSpan(tokenizer, encoded.ToArray(), text); - text = "<|endoftext|>Hello ⭐ World<|endofprompt|>"; + text = "<|endoftext|>Hello ⭐ World<|endofprompt|>"; - encoded = GPT4o.EncodeToIds(text); - idsCount = GPT4o.CountTokens(text); - Assert.Equal(new List() { 199999, 13225, 161181, 5922, 200018 }, encoded); - Assert.Equal(text, GPT4o.Decode(encoded)); - TestDecodingWithSpan((GPT4o as TiktokenTokenizer)!, encoded.ToArray(), text); + encoded = tokenizer.EncodeToIds(text); + idsCount = tokenizer.CountTokens(text); + Assert.Equal(new List() { 199999, 13225, 161181, 5922, 200018 }, encoded); + Assert.Equal(text, tokenizer.Decode(encoded)); + TestDecodingWithSpan(tokenizer, encoded.ToArray(), text); - IReadOnlyList result = GPT4o.EncodeToTokens(text, out string? normalizedText); + IReadOnlyList result = tokenizer.EncodeToTokens(text, out string? normalizedText); - Assert.Equal(encoded, result.Select(token => token.Id).ToArray()); - Assert.Equal(encoded.Count, idsCount); - Assert.Equal(new string[] { "<|endoftext|>", "Hello", " ⭐", " World", "<|endofprompt|>" }, result.Select(token => token.Value).ToArray()); - Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); + Assert.Equal(encoded, result.Select(token => token.Id).ToArray()); + Assert.Equal(encoded.Count, idsCount); + Assert.Equal(new string[] { "<|endoftext|>", "Hello", " ⭐", " World", "<|endofprompt|>" }, result.Select(token => token.Value).ToArray()); + Assert.Equal(new List<(int, int)> { (0, 13), (13, 5), (18, 2), (20, 6), (26, 15) }, result.Select(token => (token.Offset.Start.Value, token.Offset.End.Value - token.Offset.Start.Value)).ToArray()); - TokenizerTests.TestTokenLimits(GPT4o); + TokenizerTests.TestTokenLimits(tokenizer); + } } [Fact] @@ -398,16 +402,20 @@ public void TestEncodeR50kBase() [InlineData("o1")] [InlineData("o1-")] [InlineData("o1-mini")] + [InlineData("o4-mini-")] [InlineData("o3")] [InlineData("o3-")] [InlineData("o3-mini")] [InlineData("o4-mini")] [InlineData("gpt-4.1")] [InlineData("gpt-4.1-mini")] + [InlineData("gpt-4.5-")] [InlineData("gpt-4o")] [InlineData("gpt-4o-")] + [InlineData("chatgpt-4o-")] [InlineData("gpt-4")] [InlineData("gpt-4-")] + [InlineData("gpt-3.5")] [InlineData("gpt-3.5-")] [InlineData("gpt-3.5-turbo")] [InlineData("gpt-3.5-turbo-")] @@ -424,8 +432,10 @@ public void TestEncodeR50kBase() [InlineData("text-babbage-001")] [InlineData("text-ada-001")] [InlineData("davinci")] + [InlineData("davinci-002")] [InlineData("curie")] [InlineData("babbage")] + [InlineData("babbage-002")] [InlineData("ada")] [InlineData("code-davinci-002")] [InlineData("code-davinci-001")] @@ -449,7 +459,16 @@ public void TestEncodeR50kBase() [InlineData("code-search-babbage-code-001")] [InlineData("code-search-ada-code-001")] [InlineData("gpt2")] + [InlineData("gpt-2")] [InlineData("phi-4")] + [InlineData("gpt-oss-")] + [InlineData("gpt-oss-120b")] + [InlineData("gpt-oss-20b")] + [InlineData("ft:gpt-4o")] + [InlineData("ft:gpt-4")] + [InlineData("ft:gpt-3.5-turbo")] + [InlineData("ft:davinci-002")] + [InlineData("ft:babbage-002")] public void TestAllSupportedModelNames(string modelName) { Tokenizer tokenizer = TiktokenTokenizer.CreateForModel(modelName); @@ -463,6 +482,7 @@ public void TestAllSupportedModelNames(string modelName) [InlineData("p50k_edit")] [InlineData("cl100k_base")] [InlineData("o200k_base")] + [InlineData("o200k_harmony")] public void TestAllSupportedEncodingNames(string encodingName) { Tokenizer tokenizer = TiktokenTokenizer.CreateForEncoding(encodingName); @@ -476,6 +496,7 @@ public void TestAllSupportedEncodingNames(string encodingName) "p50k_edit" => "text-davinci-edit-001", "cl100k_base" => "gpt-4", "o200k_base" => "gpt-4o", + "o200k_harmony" => "gpt-oss-120b", _ => throw new ArgumentException("Invalid encoding name"), }; @@ -502,6 +523,7 @@ public void TestEncodingNamesNegativeCases() Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("p50k_edit_")); Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("cl100k_base_")); Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("o200k_base_")); + Assert.Throws(() => TiktokenTokenizer.CreateForEncoding("o200k_harmony_")); } [InlineData("gpt-4")] @@ -514,6 +536,7 @@ public void TestEncodingNamesNegativeCases() [InlineData("text-curie-001")] [InlineData("text-davinci-edit-001")] [InlineData("phi-4")] + [InlineData("gpt-oss-20b")] [ConditionalTheory(typeof(RemoteExecutor), nameof(RemoteExecutor.IsSupported))] public void TestCreationUsingModel(string modelName) { @@ -757,6 +780,53 @@ public void TestPhi4SpecialCases() Assert.Equal(text, Phi4.Decode(encoded)); } + [Fact] + public void TestOss() + { + Assert.Equal( + new Dictionary + { + { "<|startoftext|>", 199998 }, + { "<|endoftext|>", 199999 }, + { "<|reserved_200000|>", 200000 }, + { "<|reserved_200001|>", 200001 }, + { "<|return|>", 200002 }, + { "<|constrain|>", 200003 }, + { "<|reserved_200004|>", 200004 }, + { "<|channel|>", 200005 }, + { "<|start|>", 200006 }, + { "<|end|>", 200007 }, + { "<|message|>", 200008 }, + { "<|reserved_200009|>", 200009 }, + { "<|reserved_200010|>", 200010 }, + { "<|reserved_200011|>", 200011 }, + { "<|call|>", 200012 }, + { "<|reserved_200013|>", 200013 }, + { "<|reserved_200014|>", 200014 }, + { "<|reserved_200015|>", 200015 }, + { "<|reserved_200016|>", 200016 }, + { "<|reserved_200017|>", 200017 }, + { "<|endofprompt|>", 200018 }, + }, GptOss.SpecialTokens); + + string text = "<|startoftext|><|start|><|message|>Hello World<|end|><|endoftext|>"; + + IReadOnlyList ids = GptOss.EncodeToIds(text); + + Assert.Equal( + new List { 199998, 200006, 200008, 13225, 5922, 200007, 199999 }, + ids); + Assert.Equal(text, GptOss.Decode(ids)); + + Assert.Equal(new string[] { "<|startoftext|>", "<|start|>", "<|message|>", "Hello", " World", "<|end|>", "<|endoftext|>" }, + GptOss.EncodeToTokens(text, out _).Select(t => t.Value).ToArray()); + + Assert.Equal(new List<(int, int)> { (0, 15), (15, 24), (24, 35), (35, 40), (40, 46), (46, 53), (53, 66) }, + GptOss.EncodeToTokens(text, out _).Select(t => (t.Offset.Start.Value, t.Offset.End.Value)).ToList()); + + Assert.Equal(ids, GptOss.EncodeToTokens(text, out _).Select(t => t.Id).ToList()); + } + // We are not exposing the Encoder, Decoder, or Vocabulary so far. For now, use reflection to test it. private static IReadOnlyDictionary, int>? GetEncoder(TiktokenTokenizer tiktoken) => typeof(TiktokenTokenizer).GetProperty("Encoder", BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(tiktoken) as IReadOnlyDictionary, int>; From 805c08d02d9728456e7c918aef5302d42e574f57 Mon Sep 17 00:00:00 2001 From: Tarek Mahmoud Sayed Date: Sun, 10 Aug 2025 18:11:58 -0700 Subject: [PATCH 2/2] Add gpt-5 support --- src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs | 2 ++ test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs index 48af382c7a..0b9e64cec9 100644 --- a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs @@ -1044,6 +1044,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo ( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini // chat + ( "gpt-5-", ModelEncoding.O200kBase), ( "gpt-4.1-", ModelEncoding.O200kBase), // e.g., gpt-4.1-mini ( "gpt-4.5-", ModelEncoding.O200kBase), // e.g., gpt-4.5 ( "gpt-4o-", ModelEncoding.O200kBase), // e.g., gpt-4o-2024-05-13 @@ -1070,6 +1071,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "o4-mini", ModelEncoding.O200kBase }, // chat + { "gpt-5", ModelEncoding.O200kBase }, { "gpt-4.1", ModelEncoding.O200kBase }, { "gpt-4o", ModelEncoding.O200kBase }, { "gpt-4", ModelEncoding.Cl100kBase }, diff --git a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs index ac0175eea7..e7a0bf5acc 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs @@ -35,6 +35,7 @@ public class TiktokenTests public static Tokenizer R50kBase { get; } = TiktokenTokenizer.CreateForModel("ada"); public static Tokenizer P50kEdit { get; } = TiktokenTokenizer.CreateForModel("text-davinci-edit-001"); public static Tokenizer GPT4o { get; } = TiktokenTokenizer.CreateForModel("gpt-4o"); + public static Tokenizer GPT5 { get; } = TiktokenTokenizer.CreateForModel("gpt-5"); public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4"); public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b"); @@ -285,7 +286,7 @@ public void TestEncode5() [Fact] public void TestEncodeO200kBaseEncoding() { - foreach (TiktokenTokenizer tokenizer in new[] { GPT4o, GptOss }) + foreach (TiktokenTokenizer tokenizer in new[] { GPT4o, GptOss, GPT5 }) { string text = ReadAndSanitizeFile("./Data/lib.rs.txt"); IReadOnlyList encoded = tokenizer.EncodeToIds(text); @@ -412,6 +413,8 @@ public void TestEncodeR50kBase() [InlineData("gpt-4.5-")] [InlineData("gpt-4o")] [InlineData("gpt-4o-")] + [InlineData("gpt-5")] + [InlineData("gpt-5-chat")] [InlineData("chatgpt-4o-")] [InlineData("gpt-4")] [InlineData("gpt-4-")] @@ -529,6 +532,7 @@ public void TestEncodingNamesNegativeCases() [InlineData("gpt-4")] [InlineData("gpt-4.1")] [InlineData("gpt-4o")] + [InlineData("gpt-5")] [InlineData("o1")] [InlineData("o3")] [InlineData("o4-mini")]