@@ -44,13 +44,18 @@ def define_schema(cls):
4444 io .Combo .Input ("timesignature" , options = ['2' , '3' , '4' , '6' ]),
4545 io .Combo .Input ("language" , options = ["en" , "ja" , "zh" , "es" , "de" , "fr" , "pt" , "ru" , "it" , "nl" , "pl" , "tr" , "vi" , "cs" , "fa" , "id" , "ko" , "uk" , "hu" , "ar" , "sv" , "ro" , "el" ]),
4646 io .Combo .Input ("keyscale" , options = [f"{ root } { quality } " for quality in ["major" , "minor" ] for root in ["C" , "C#" , "Db" , "D" , "D#" , "Eb" , "E" , "F" , "F#" , "Gb" , "G" , "G#" , "Ab" , "A" , "A#" , "Bb" , "B" ]]),
47+ io .Boolean .Input ("generate_audio_codes" , default = True , tooltip = "Enable the LLM that generates audio codes. This can be slow but will increase the quality of the generated audio. Turn this off if you are giving the model an audio reference." , advanced = True ),
48+ io .Float .Input ("cfg_scale" , default = 2.0 , min = 0.0 , max = 100.0 , step = 0.1 , advanced = True ),
49+ io .Float .Input ("temperature" , default = 0.85 , min = 0.0 , max = 2.0 , step = 0.01 , advanced = True ),
50+ io .Float .Input ("top_p" , default = 0.9 , min = 0.0 , max = 2000.0 , step = 0.01 , advanced = True ),
51+ io .Int .Input ("top_k" , default = 0 , min = 0 , max = 100 , advanced = True ),
4752 ],
4853 outputs = [io .Conditioning .Output ()],
4954 )
5055
5156 @classmethod
52- def execute (cls , clip , tags , lyrics , seed , bpm , duration , timesignature , language , keyscale ) -> io .NodeOutput :
53- tokens = clip .tokenize (tags , lyrics = lyrics , bpm = bpm , duration = duration , timesignature = int (timesignature ), language = language , keyscale = keyscale , seed = seed )
57+ def execute (cls , clip , tags , lyrics , seed , bpm , duration , timesignature , language , keyscale , generate_audio_codes , cfg_scale , temperature , top_p , top_k ) -> io .NodeOutput :
58+ tokens = clip .tokenize (tags , lyrics = lyrics , bpm = bpm , duration = duration , timesignature = int (timesignature ), language = language , keyscale = keyscale , seed = seed , generate_audio_codes = generate_audio_codes , cfg_scale = cfg_scale , temperature = temperature , top_p = top_p , top_k = top_k )
5459 conditioning = clip .encode_from_tokens_scheduled (tokens )
5560 return io .NodeOutput (conditioning )
5661
@@ -100,14 +105,15 @@ def execute(cls, seconds, batch_size) -> io.NodeOutput:
100105 latent = torch .zeros ([batch_size , 64 , length ], device = comfy .model_management .intermediate_device ())
101106 return io .NodeOutput ({"samples" : latent , "type" : "audio" })
102107
103- class ReferenceTimbreAudio (io .ComfyNode ):
108+ class ReferenceAudio (io .ComfyNode ):
104109 @classmethod
105110 def define_schema (cls ):
106111 return io .Schema (
107112 node_id = "ReferenceTimbreAudio" ,
113+ display_name = "Reference Audio" ,
108114 category = "advanced/conditioning/audio" ,
109115 is_experimental = True ,
110- description = "This node sets the reference audio for timbre (for ace step 1.5) " ,
116+ description = "This node sets the reference audio for ace step 1.5" ,
111117 inputs = [
112118 io .Conditioning .Input ("conditioning" ),
113119 io .Latent .Input ("latent" , optional = True ),
@@ -131,7 +137,7 @@ async def get_node_list(self) -> list[type[io.ComfyNode]]:
131137 EmptyAceStepLatentAudio ,
132138 TextEncodeAceStepAudio15 ,
133139 EmptyAceStep15LatentAudio ,
134- ReferenceTimbreAudio ,
140+ ReferenceAudio ,
135141 ]
136142
137143async def comfy_entrypoint () -> AceExtension :
0 commit comments