@@ -228,7 +228,7 @@ enum TokenizerModel {
228228 tokenizerData: Config ,
229229 addedTokens: [ String : Int ] ,
230230 tokenizerVocab: TokenizerVocab ? ,
231- tokenizerMerges: [ Any ] ? ,
231+ tokenizerMerges: TokenizerMerges ? ,
232232 strict: Bool = true
233233 ) throws -> TokenizingModel {
234234 guard let tokenizerClassName = tokenizerConfig. tokenizerClass. string ( ) else {
@@ -250,7 +250,7 @@ enum TokenizerModel {
250250 // Note: includes empty subclasses (creates BPETokenizer instance)
251251 if tokenizerClass is BPETokenizer . Type ,
252252 case . bpe( let rawVocab) = tokenizerVocab,
253- let rawMerges = tokenizerMerges
253+ let rawMerges = tokenizerMerges? . rules
254254 {
255255 return try BPETokenizer (
256256 tokenizerConfig: tokenizerConfig,
@@ -285,7 +285,7 @@ enum TokenizerModel {
285285 tokenizerData: Config ,
286286 addedTokens: [ String : Int ] ,
287287 tokenizerVocab: TokenizerVocab ? ,
288- tokenizerMerges: [ Any ] ? ,
288+ tokenizerMerges: TokenizerMerges ? ,
289289 strict: Bool = true
290290 ) async throws -> TokenizingModel {
291291 guard let tokenizerClassName = tokenizerConfig. tokenizerClass. string ( ) else {
@@ -307,7 +307,7 @@ enum TokenizerModel {
307307 // Note: includes empty subclasses (creates BPETokenizer instance)
308308 if tokenizerClass is BPETokenizer . Type ,
309309 case . bpe( let rawVocab) = tokenizerVocab,
310- let rawMerges = tokenizerMerges
310+ let rawMerges = tokenizerMerges? . rules
311311 {
312312 return await BPETokenizer . createAsync (
313313 tokenizerConfig: tokenizerConfig,
@@ -628,7 +628,7 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
628628 tokenizerConfig: Config ,
629629 tokenizerData: Config ,
630630 tokenizerVocab: TokenizerVocab ? = nil ,
631- tokenizerMerges: [ Any ] ? = nil ,
631+ tokenizerMerges: TokenizerMerges ? = nil ,
632632 strict: Bool = true
633633 ) throws {
634634 var addedTokens : [ String : Int ] = [ : ]
@@ -751,7 +751,7 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
751751 tokenizerConfig: Config ,
752752 tokenizerData: Config ,
753753 tokenizerVocab: TokenizerVocab ? ,
754- tokenizerMerges: [ Any ] ? ,
754+ tokenizerMerges: TokenizerMerges ? ,
755755 strict: Bool = true
756756 ) async throws -> PreTrainedTokenizer {
757757 // Parse addedTokens (small data, used for model init)
@@ -1202,7 +1202,7 @@ public extension AutoTokenizer {
12021202 tokenizerConfig: Config ,
12031203 tokenizerData: Config ,
12041204 tokenizerVocab: TokenizerVocab ? ,
1205- tokenizerMerges: [ Any ] ? ,
1205+ tokenizerMerges: TokenizerMerges ? ,
12061206 strict: Bool = true
12071207 ) throws -> Tokenizer {
12081208 let tokenizerClass = tokenizerClass ( for: tokenizerConfig)
@@ -1220,7 +1220,7 @@ public extension AutoTokenizer {
12201220 tokenizerConfig: Config ,
12211221 tokenizerData: Config ,
12221222 tokenizerVocab: TokenizerVocab ? ,
1223- tokenizerMerges: [ Any ] ? ,
1223+ tokenizerMerges: TokenizerMerges ? ,
12241224 strict: Bool = true
12251225 ) async throws -> Tokenizer {
12261226 let selectedClass = tokenizerClass ( for: tokenizerConfig)
@@ -1294,7 +1294,7 @@ class LlamaPreTrainedTokenizer: PreTrainedTokenizer, @unchecked Sendable {
12941294 tokenizerConfig: Config ,
12951295 tokenizerData: Config ,
12961296 tokenizerVocab: TokenizerVocab ? = nil ,
1297- tokenizerMerges: [ Any ] ? = nil ,
1297+ tokenizerMerges: TokenizerMerges ? = nil ,
12981298 strict: Bool = true
12991299 ) throws {
13001300 isLegacy = tokenizerConfig. legacy. boolean ( or: true )
@@ -1328,7 +1328,7 @@ class LlamaPreTrainedTokenizer: PreTrainedTokenizer, @unchecked Sendable {
13281328 tokenizerConfig: Config ,
13291329 tokenizerData: Config ,
13301330 tokenizerVocab: TokenizerVocab ? ,
1331- tokenizerMerges: [ Any ] ? ,
1331+ tokenizerMerges: TokenizerMerges ? ,
13321332 strict: Bool = true
13331333 ) async throws -> PreTrainedTokenizer {
13341334 let isLegacy = tokenizerConfig. legacy. boolean ( or: true )
0 commit comments