ft fixes: speaker selection, weight mismatch

AustinMroz · AustinMroz · commit 566641ae168a · 2025-03-28T13:09:29.000-05:00
Fix an error when loading the fine tuned model caused by incorrect vocab
size

Add an option to the prompt node to set a speaker

Move tokenizer initialization to initializer for prompt. It's only used
here and produces excessive console spam when when orpheus workflows are
not in use
diff --git a/nodes.py b/nodes.py
@@ -137,11 +137,6 @@ def __call__(self, input_ids, score):
         new_score[:,code_base:code_base+4096] = score[:,code_base:code_base+4096]
         return new_score
 
-
-
-#TODO: properly load this
-tok = LLAMA3Tokenizer()
-
 class LoadOrpheus:
     @classmethod
     def INPUT_TYPES(s):
@@ -156,6 +151,8 @@ def loadorpheus(self, model):
         conf = os.path.join(os.path.split(__file__)[0], 'orpheus-config.json')
         config = PretrainedConfig.from_json_file(conf)
         sd = safetensors.torch.load_file(model)
+        #TODO: use this to detect pt/ft and add further tweaks?
+        config.vocab_size = sd['lm_head.weight'].size(0)
         model = LlamaForCausalLM.from_pretrained(None, config=config, state_dict=sd)
         return model,
 
@@ -206,14 +203,21 @@ def sample(self, model, prompt, add_start_token, seed=None):
 class OrpheusPrompt:
     @classmethod
     def INPUT_TYPES(s):
-        return {"required": {"text": ("STRING", {"multiline": True})},}
+        return {"required": {"text": ("STRING", {"multiline": True})},
+                "optional": {"speaker": (['None','tara','leah','jess','leo','dan','mia','zac','zoe'],)}}
     FUNCTION = "encodeprompt"
     RETURN_TYPES = ("ORPH_TOKENS",)
     CATEOGRY = "Orpheus"
-    def encodeprompt(self, text):
+    def __init__(self):
+        #TODO: properly load this
+        tok = LLAMA3Tokenizer()
+        self.tokenizer = tok.tokenizer
+    def encodeprompt(self, text, speaker='None'):
+        if speaker != 'None':
+            text = speaker + ": " + text
         #start_of_text is included during tokenization automatically
         tokens = [TOKENS['start_of_human']] \
-                + tok.tokenizer(text).input_ids \
+                + self.tokenizer(text).input_ids \
                 + [TOKENS['end_of_text'], TOKENS['end_of_human']]
         return tokens,
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "comfyui-orpheus"
 description = "Nodes for using Orpheus-TTS in ComfyUI"
-version = "0.1.2"
+version = "0.1.3"
 classifiers = [
     "License :: OSI Approved :: GNU General Public License v3 (GPLv3)"
 ]

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`[project]`
`2`	`2`	`name = "comfyui-orpheus"`
`3`	`3`	`description = "Nodes for using Orpheus-TTS in ComfyUI"`
`4`		`-version = "0.1.2"`
	`4`	`+version = "0.1.3"`
`5`	`5`	`classifiers = [`
`6`	`6`	`"License :: OSI Approved :: GNU General Public License v3 (GPLv3)"`
`7`	`7`	`]`