Skip to content

Commit 2129e7d

Browse files
Fix mistral 3 tokenizer code failing on latest transformers version and other breakage. (#12095)
* Fix mistral 3 tokenizer code failing on latest transformers version. * Add requests to the requirements
1 parent 7ee77ff commit 2129e7d

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

comfy/sd1_clip.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
466466
return embed_out
467467

468468
class SDTokenizer:
469-
def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, min_padding=None, pad_left=False, disable_weights=False, tokenizer_data={}, tokenizer_args={}):
469+
def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, start_token=None, min_padding=None, pad_left=False, disable_weights=False, tokenizer_data={}, tokenizer_args={}):
470470
if tokenizer_path is None:
471471
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer")
472472
self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args)
@@ -479,16 +479,23 @@ def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedd
479479
empty = self.tokenizer('')["input_ids"]
480480
self.tokenizer_adds_end_token = has_end_token
481481
if has_start_token:
482-
self.tokens_start = 1
483-
self.start_token = empty[0]
482+
if len(empty) > 0:
483+
self.tokens_start = 1
484+
self.start_token = empty[0]
485+
else:
486+
self.tokens_start = 0
487+
self.start_token = start_token
488+
if start_token is None:
489+
logging.warning("WARNING: There's something wrong with your tokenizers.'")
490+
484491
if end_token is not None:
485492
self.end_token = end_token
486493
else:
487494
if has_end_token:
488495
self.end_token = empty[1]
489496
else:
490497
self.tokens_start = 0
491-
self.start_token = None
498+
self.start_token = start_token
492499
if end_token is not None:
493500
self.end_token = end_token
494501
else:

comfy/text_encoders/flux.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def from_pretrained(path, **kwargs):
118118
class Mistral3Tokenizer(sd1_clip.SDTokenizer):
119119
def __init__(self, embedding_directory=None, tokenizer_data={}):
120120
self.tekken_data = tokenizer_data.get("tekken_model", None)
121-
super().__init__("", pad_with_end=False, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data)
121+
super().__init__("", pad_with_end=False, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data)
122122

123123
def state_dict(self):
124124
return {"tekken_model": self.tekken_data}

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ alembic
2222
SQLAlchemy
2323
av>=14.2.0
2424
comfy-kitchen>=0.2.7
25+
requests
2526

2627
#non essential dependencies:
2728
kornia>=0.7.1

0 commit comments

Comments
 (0)