@@ -176,7 +176,8 @@ def build_model_from_huggingface_ckpt(
176
176
177
177
t5_model_state_dict = {
178
178
"token_embeddings.weight" : hf_weights ["shared.weight" ],
179
- "norm1.weight" : hf_weights ["encoder.final_layer_norm.weight" ],
179
+ "encoder.token_embeddings.weight" : hf_weights ["shared.weight" ],
180
+ "encoder.norm.weight" : hf_weights ["encoder.final_layer_norm.weight" ],
180
181
"encoder.layers.0.self_attn.relative_attention_bias.weight" : hf_weights [
181
182
"encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"
182
183
],
@@ -210,7 +211,7 @@ def build_model_from_huggingface_ckpt(
210
211
211
212
# Convert decoder layers if model is encoder-decoder
212
213
if not config .encoder_only :
213
- t5_model_state_dict ["norm2 .weight" ] = hf_weights ["decoder.final_layer_norm.weight" ]
214
+ t5_model_state_dict ["decoder.norm .weight" ] = hf_weights ["decoder.final_layer_norm.weight" ]
214
215
t5_model_state_dict ["decoder.layers.0.self_attn.relative_attention_bias.weight" ] = hf_weights [
215
216
"decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight"
216
217
]
@@ -331,7 +332,7 @@ def config(self) -> T5Conf:
331
332
"""
332
333
333
334
T5_BASE_ENCODER = T5Bundle (
334
- _path = urljoin (_TEXT_BUCKET , "t5.base.encoder.pt" ),
335
+ _path = urljoin (_TEXT_BUCKET , "t5.base.encoder.v2. pt" ),
335
336
_config = T5Conf (encoder_only = True ),
336
337
transform = lambda : T5Transform (
337
338
urljoin (_TEXT_BUCKET , "t5_tokenizer_base.model" ),
@@ -344,7 +345,7 @@ def config(self) -> T5Conf:
344
345
T5_BASE_ENCODER .__doc__ = ENCODER_DOC .format ("BASE" , "base" )
345
346
346
347
T5_BASE = T5Bundle (
347
- _path = urljoin (_TEXT_BUCKET , "t5.base.pt" ),
348
+ _path = urljoin (_TEXT_BUCKET , "t5.base.v2. pt" ),
348
349
_config = T5Conf (encoder_only = False ),
349
350
transform = lambda : T5Transform (
350
351
urljoin (_TEXT_BUCKET , "t5_tokenizer_base.model" ),
@@ -357,7 +358,7 @@ def config(self) -> T5Conf:
357
358
T5_BASE .__doc__ = MODEL_DOC .format ("BASE" , "base" )
358
359
359
360
T5_BASE_GENERATION = T5Bundle (
360
- _path = urljoin (_TEXT_BUCKET , "t5.base.generation.pt" ),
361
+ _path = urljoin (_TEXT_BUCKET , "t5.base.generation.v2. pt" ),
361
362
_config = T5Conf (encoder_only = False , linear_head = True ),
362
363
transform = lambda : T5Transform (
363
364
urljoin (_TEXT_BUCKET , "t5_tokenizer_base.model" ),
@@ -370,7 +371,7 @@ def config(self) -> T5Conf:
370
371
T5_BASE_GENERATION .__doc__ = GENERATION_DOC .format ("BASE" , "base" )
371
372
372
373
T5_SMALL_ENCODER = T5Bundle (
373
- _path = urljoin (_TEXT_BUCKET , "t5.small.encoder.pt" ),
374
+ _path = urljoin (_TEXT_BUCKET , "t5.small.encoder.v2. pt" ),
374
375
_config = T5Conf (
375
376
encoder_only = True ,
376
377
embedding_dim = 512 ,
@@ -391,7 +392,7 @@ def config(self) -> T5Conf:
391
392
392
393
393
394
T5_SMALL = T5Bundle (
394
- _path = urljoin (_TEXT_BUCKET , "t5.small.pt" ),
395
+ _path = urljoin (_TEXT_BUCKET , "t5.small.v2. pt" ),
395
396
_config = T5Conf (
396
397
encoder_only = False ,
397
398
embedding_dim = 512 ,
@@ -411,7 +412,7 @@ def config(self) -> T5Conf:
411
412
T5_SMALL .__doc__ = MODEL_DOC .format ("SMALL" , "small" )
412
413
413
414
T5_SMALL_GENERATION = T5Bundle (
414
- _path = urljoin (_TEXT_BUCKET , "t5.small.generation.pt" ),
415
+ _path = urljoin (_TEXT_BUCKET , "t5.small.generation.v2. pt" ),
415
416
_config = T5Conf (
416
417
encoder_only = False ,
417
418
linear_head = True ,
@@ -432,7 +433,7 @@ def config(self) -> T5Conf:
432
433
T5_SMALL_GENERATION .__doc__ = GENERATION_DOC .format ("SMALL" , "small" )
433
434
434
435
T5_LARGE_ENCODER = T5Bundle (
435
- _path = urljoin (_TEXT_BUCKET , "t5.large.encoder.pt" ),
436
+ _path = urljoin (_TEXT_BUCKET , "t5.large.encoder.v2. pt" ),
436
437
_config = T5Conf (
437
438
encoder_only = True ,
438
439
embedding_dim = 1024 ,
@@ -452,7 +453,7 @@ def config(self) -> T5Conf:
452
453
T5_LARGE_ENCODER .__doc__ = ENCODER_DOC .format ("LARGE" , "large" )
453
454
454
455
T5_LARGE = T5Bundle (
455
- _path = urljoin (_TEXT_BUCKET , "t5.large.pt" ),
456
+ _path = urljoin (_TEXT_BUCKET , "t5.large.v2. pt" ),
456
457
_config = T5Conf (
457
458
encoder_only = False ,
458
459
embedding_dim = 1024 ,
@@ -472,7 +473,7 @@ def config(self) -> T5Conf:
472
473
T5_LARGE .__doc__ = MODEL_DOC .format ("LARGE" , "large" )
473
474
474
475
T5_LARGE_GENERATION = T5Bundle (
475
- _path = urljoin (_TEXT_BUCKET , "t5.large.generation.pt" ),
476
+ _path = urljoin (_TEXT_BUCKET , "t5.large.generation.v2. pt" ),
476
477
_config = T5Conf (
477
478
encoder_only = False ,
478
479
linear_head = True ,
@@ -493,7 +494,7 @@ def config(self) -> T5Conf:
493
494
T5_LARGE_GENERATION .__doc__ = GENERATION_DOC .format ("LARGE" , "large" )
494
495
495
496
T5_3B_ENCODER = T5Bundle (
496
- _path = urljoin (_TEXT_BUCKET , "t5.3b.encoder.pt" ),
497
+ _path = urljoin (_TEXT_BUCKET , "t5.3b.encoder.v2. pt" ),
497
498
_config = T5Conf (
498
499
encoder_only = True ,
499
500
embedding_dim = 1024 ,
@@ -514,7 +515,7 @@ def config(self) -> T5Conf:
514
515
T5_3B_ENCODER .__doc__ = ENCODER_DOC .format ("3B" , "3B" )
515
516
516
517
T5_3B = T5Bundle (
517
- _path = urljoin (_TEXT_BUCKET , "t5.3b.pt" ),
518
+ _path = urljoin (_TEXT_BUCKET , "t5.3b.v2. pt" ),
518
519
_config = T5Conf (
519
520
encoder_only = False ,
520
521
embedding_dim = 1024 ,
@@ -535,7 +536,7 @@ def config(self) -> T5Conf:
535
536
T5_3B .__doc__ = MODEL_DOC .format ("3B" , "3B" )
536
537
537
538
T5_3B_GENERATION = T5Bundle (
538
- _path = urljoin (_TEXT_BUCKET , "t5.3b.generation.pt" ),
539
+ _path = urljoin (_TEXT_BUCKET , "t5.3b.generation.v2. pt" ),
539
540
_config = T5Conf (
540
541
encoder_only = False ,
541
542
linear_head = True ,
@@ -557,7 +558,7 @@ def config(self) -> T5Conf:
557
558
T5_3B_GENERATION .__doc__ = GENERATION_DOC .format ("3B" , "3B" )
558
559
559
560
T5_11B_ENCODER = T5Bundle (
560
- _path = urljoin (_TEXT_BUCKET , "t5.11b.encoder.pt" ),
561
+ _path = urljoin (_TEXT_BUCKET , "t5.11b.encoder.v2. pt" ),
561
562
_config = T5Conf (
562
563
encoder_only = True ,
563
564
embedding_dim = 1024 ,
@@ -578,7 +579,7 @@ def config(self) -> T5Conf:
578
579
T5_11B_ENCODER .__doc__ = ENCODER_DOC .format ("11B" , "11B" )
579
580
580
581
T5_11B = T5Bundle (
581
- _path = urljoin (_TEXT_BUCKET , "t5.11b.pt" ),
582
+ _path = urljoin (_TEXT_BUCKET , "t5.11b.v2. pt" ),
582
583
_config = T5Conf (
583
584
encoder_only = False ,
584
585
embedding_dim = 1024 ,
@@ -599,7 +600,7 @@ def config(self) -> T5Conf:
599
600
T5_11B .__doc__ = MODEL_DOC .format ("11B" , "11B" )
600
601
601
602
T5_11B_GENERATION = T5Bundle (
602
- _path = urljoin (_TEXT_BUCKET , "t5.11b.generation.pt" ),
603
+ _path = urljoin (_TEXT_BUCKET , "t5.11b.generation.v2. pt" ),
603
604
_config = T5Conf (
604
605
encoder_only = False ,
605
606
linear_head = True ,
0 commit comments