Make it easier to set a custom template for hunyuan video.

comfyanonymous · comfyanonymous · commit 65042f7d395e · 2025-03-04T09:26:05.000-05:00
diff --git a/comfy/sd.py b/comfy/sd.py
@@ -134,8 +134,8 @@ def add_patches(self, patches, strength_patch=1.0, strength_model=1.0):
     def clip_layer(self, layer_idx):
         self.layer_idx = layer_idx
 
-    def tokenize(self, text, return_word_ids=False):
-        return self.tokenizer.tokenize_with_weights(text, return_word_ids)
+    def tokenize(self, text, return_word_ids=False, **kwargs):
+        return self.tokenizer.tokenize_with_weights(text, return_word_ids, **kwargs)
 
     def add_hooks_to_dict(self, pooled_dict: dict[str]):
         if self.apply_hooks_to_conds:
diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
@@ -482,7 +482,7 @@ def _try_get_embedding(self, embedding_name:str):
         return (embed, leftover)
 
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         '''
         Takes a prompt and converts it to a list of (token, weight, word id) elements.
         Tokens can both be integer tokens and pre computed CLIP tensors.
@@ -596,7 +596,7 @@ def __init__(self, embedding_directory=None, tokenizer_data={}, clip_name="l", t
         tokenizer = tokenizer_data.get("{}_tokenizer_class".format(self.clip), tokenizer)
         setattr(self, self.clip, tokenizer(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data))
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         out = {}
         out[self.clip_name] = getattr(self, self.clip).tokenize_with_weights(text, return_word_ids)
         return out
diff --git a/comfy/sdxl_clip.py b/comfy/sdxl_clip.py
@@ -26,7 +26,7 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory)
         self.clip_g = SDXLClipGTokenizer(embedding_directory=embedding_directory)
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         out = {}
         out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py
@@ -18,7 +18,7 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.clip_l = clip_l_tokenizer_class(embedding_directory=embedding_directory)
         self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory)
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         out = {}
         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
         out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids)
diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py
@@ -41,11 +41,14 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"""  # 95 tokens
         self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1)
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, llama_template=None, **kwargs):
         out = {}
         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
 
-        llama_text = "{}{}".format(self.llama_template, text)
+        if llama_template is None:
+            llama_text = "{}{}".format(self.llama_template, text)
+        else:
+            llama_text = "{}{}".format(llama_template, text)
         out["llama"] = self.llama.tokenize_with_weights(llama_text, return_word_ids)
         return out
 
diff --git a/comfy/text_encoders/hydit.py b/comfy/text_encoders/hydit.py
@@ -37,7 +37,7 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.hydit_clip = HyditBertTokenizer(embedding_directory=embedding_directory)
         self.mt5xl = MT5XLTokenizer(tokenizer_data={"spiece_model": mt5_tokenizer_data}, embedding_directory=embedding_directory)
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         out = {}
         out["hydit_clip"] = self.hydit_clip.tokenize_with_weights(text, return_word_ids)
         out["mt5xl"] = self.mt5xl.tokenize_with_weights(text, return_word_ids)
diff --git a/comfy/text_encoders/sd3_clip.py b/comfy/text_encoders/sd3_clip.py
@@ -43,7 +43,7 @@ def __init__(self, embedding_directory=None, tokenizer_data={}):
         self.clip_g = sdxl_clip.SDXLClipGTokenizer(embedding_directory=embedding_directory)
         self.t5xxl = T5XXLTokenizer(embedding_directory=embedding_directory)
 
-    def tokenize_with_weights(self, text:str, return_word_ids=False):
+    def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs):
         out = {}
         out["g"] = self.clip_g.tokenize_with_weights(text, return_word_ids)
         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)