comfyanonymous
diff --git a/‎comfy/autoregressive_sampling.py‎
Lines changed: 30 additions & 30 deletions b/‎comfy/autoregressive_sampling.py‎
Lines changed: 30 additions & 30 deletions
diff --git a/‎comfy/ldm/higgsv2/cuda_graph_runner.py‎
Lines changed: 1 addition & 1 deletion b/‎comfy/ldm/higgsv2/cuda_graph_runner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎comfy/ldm/higgsv2/loudness.py‎
Lines changed: 4 additions & 4 deletions b/‎comfy/ldm/higgsv2/loudness.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎comfy/ldm/higgsv2/model.py‎
Lines changed: 17 additions & 17 deletions b/‎comfy/ldm/higgsv2/model.py‎
Lines changed: 17 additions & 17 deletions
@@ -19,27 +19,27 @@ def estimate_autoregressive_vram(
     max_seq_len: int,
     batch_size: int = 1,
     dtype = torch.float16,
-    intermediate_factor: float = 4.0, 
+    intermediate_factor: float = 4.0,
     device = torch.device('cuda')
 ) -> bool:
-    
+
     dtype_size = torch.finfo(dtype).bits // 8
     kv_cache_bytes = num_layers * max_seq_len * hidden_dim * 2 * batch_size * dtype_size
 
-    # we only calculate hidden states in cuda graphs, so we don't care about the output logits    
-    input_bytes = output_bytes = batch_size * max_seq_len * hidden_dim * dtype_size    
-    
+    # we only calculate hidden states in cuda graphs, so we don't care about the output logits
+    input_bytes = output_bytes = batch_size * max_seq_len * hidden_dim * dtype_size
+
     # rough calculation for activation sizes
     intermediate_bytes = intermediate_factor * output_bytes
-    
+
     total_estimated = kv_cache_bytes + input_bytes + output_bytes + intermediate_bytes
-    
+
     # get vram info
     free_vram = get_free_memory(device)
     minimum_vram = minimum_inference_memory()
-    
+
     enough_vram = free_vram - minimum_vram >= total_estimated
-    
+
     return enough_vram
 
 class TopKLogits:
@@ -64,7 +64,7 @@ def __init__(self, temperature: float):
     def __call__(self, scores: torch.FloatTensor) -> torch.FloatTensor:
         scores_processed = scores / self.temperature
         return scores_processed
-    
+
 class TopPLogitsWarper:
     def __init__(self, top_p: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
         top_p = float(top_p)
@@ -175,7 +175,7 @@ def from_model_config(cls, config_dict: dict, **kwargs) -> GenerationConfig:
 
         config_dict = {key: value for key, value in config_dict.items() if value is not None}
         valid_fields = {f.name for f in fields(cls)}
-        
+
         filtered_args = {k: v for k, v in {**config_dict, **kwargs}.items() if k in valid_fields}
 
         generation_config = cls(**filtered_args)
@@ -216,7 +216,7 @@ def __init__(self, model, device, kv_cache_lengths: list = [1024, 4096, 8192]):
         self.model.cache_config = self.cache_config
 
         self.kv_caches = {
-            
+
             length: StaticCache(
                 config=self.cache_config,
                 max_batch_size = self.cache_config.max_batch,
@@ -234,8 +234,8 @@ def __init__(self, model, device, kv_cache_lengths: list = [1024, 4096, 8192]):
 
         # cuda graphs only help if input shapes are constant
         if (
-            device == "cuda" 
-            and hasattr(model, "capture_model") 
+            device == "cuda"
+            and hasattr(model, "capture_model")
             and self.model.cache_implementation == "static"
             and self.model.use_kv_buckets
             and enough_vram
@@ -247,7 +247,7 @@ def __init__(self, model, device, kv_cache_lengths: list = [1024, 4096, 8192]):
     @torch.inference_mode()
     def generate(self, input_ids: Optional[torch.LongTensor] = None, max_new_length: int = 1024, min_new_length = 0,
                  top_k: int = 50, top_p: float = 1.0, temperature: float = 1.0, do_sample: bool = False, seed = None, **kwargs):
-        
+
         if seed is not None:
             torch_generator = torch.Generator(device = input_ids.device).manual_seed(seed)
         else:
@@ -335,7 +335,7 @@ def generate(self, input_ids: Optional[torch.LongTensor] = None, max_new_length:
             # TODO: have a default self._sample fn and a default check if the model supports autoregGen or not
             if not hasattr(self.model, "_sample"):
                 raise ValueError("Model doesn't support AutoRegressive Generation!")
-            
+
             self._prepare_kv_caches()
 
             result = self.model._sample(
@@ -347,7 +347,7 @@ def generate(self, input_ids: Optional[torch.LongTensor] = None, max_new_length:
             )
 
         return result
-    
+
     def _prepare_kv_caches(self):
         for kv_cache in self.kv_caches.values():
             kv_cache.reset()
@@ -357,13 +357,13 @@ def get_generation_mode(self, config: GenerationConfig):
             return GenerationSampling.BEAM_SAMPLING
         else:
             return GenerationSampling.GREEDY_SEARCH
-        
+
     def _prepare_generated_length(
         self,
         generation_config: GenerationConfig,
         input_ids_length,
     ):
-        
+
         """ max_length = user_input_id_tokens + generation_max_length """
 
         if generation_config.max_new_length is not None:
@@ -374,11 +374,11 @@ def _prepare_generated_length(
             generation_config.min_length = generation_config.min_new_length + input_ids_length
 
         return generation_config
-    
+
     def _get_cache(
         self, cache_implementation: str, batch_size: int, max_cache_len: int, device: torch.device, model_kwargs
     ) -> Cache:
-        
+
         assert cache_implementation == "static", f"Only 'static' cache is supported, got {cache_implementation}"
 
         cache_cls: Cache = NEED_SETUP_CACHE_CLASSES_MAPPING[cache_implementation]
@@ -412,7 +412,7 @@ def _get_cache(
 
         return self.model._cache
 
-    
+
     def _prepare_cache_for_generation(
         self,
         generation_config: GenerationConfig,
@@ -466,7 +466,7 @@ def _prepare_generation_config(self, generation_config: GenerationConfig, **kwar
         model_kwargs = generation_config.update(**kwargs)
 
         return generation_config, model_kwargs
-    
+
     def _validate_generated_length(self, generation_config: GenerationConfig, input_ids_length):
         """Performs validation related to the resulting generated length"""
 
@@ -498,7 +498,7 @@ def _validate_generated_length(self, generation_config: GenerationConfig, input_
                     f" the maximum possible length ({generation_config.max_length})." + min_length_error_suffix,
                     UserWarning,
                 )
-    
+
     def _expand_inputs_for_generation(
         self,
         expand_size: int = 1,
@@ -526,13 +526,13 @@ def _expand_dict_for_generation(dict_to_expand):
         model_kwargs = _expand_dict_for_generation(model_kwargs)
 
         return input_ids, model_kwargs
-    
+
     def _prepare_special_tokens(
         self,
         generation_config: GenerationConfig,
         device: Optional[Union[torch.device, str]] = None,
     ):
-        
+
         def _tensor_or_none(token, device=None):
             if token is None:
                 return token
@@ -564,7 +564,7 @@ def _prepare_attention_mask_for_generation(
         generation_config: GenerationConfig,
         model_kwargs: dict[str, Any],
     ) -> torch.LongTensor:
-        
+
         pad_token_id = generation_config._pad_token_tensor
         eos_token_id = generation_config._eos_token_tensor
 
@@ -593,12 +593,12 @@ def _prepare_attention_mask_for_generation(
             attention_mask_from_padding * can_infer_attention_mask + default_attention_mask * ~can_infer_attention_mask
         )
         return attention_mask
-    
+
 def auto_sample(node, patcher, input_ids, max_new_length=1024, min_new_length=0, top_k=50, top_p=1.0, temperature=1.0, do_sample = False, seed=None, **kwargs):
     # to work with BaseModel
     if hasattr(patcher, "model") and hasattr(patcher.model, "diffusion_model"):
         model = patcher.model.diffusion_model
-    
+
     if node._cached_autoregressive_sampler is None or node._cached_autoregressive_sampler.model is not model:
         if model.device != patcher.load_device:
             model = model.to(patcher.load_device, dtype=model.dtype)
@@ -610,7 +610,7 @@ def auto_sample(node, patcher, input_ids, max_new_length=1024, min_new_length=0,
         kwargs.update({k: v for k, v in input_ids.items() if k != "input_ids"})
     else:
         main_input_ids = input_ids
-    
+
     device = node._cached_autoregressive_sampler.device
 
     main_input_ids = main_input_ids.to(device)
 
@@ -22,7 +22,7 @@ def graph(self):
 
     def capture(self, *args, **kwargs):
         assert self._graph is None
-        
+
         for _ in range(_NUM_WARMUP_ITERS):
             self.model(*args, **kwargs)
 
 
@@ -125,7 +125,7 @@ def apply_filter(self, data):
     @property
     def b_and_a(self):
         return self.generate_coefficients()
-    
+
 class Meter(torch.nn.Module):
 
     def __init__(
@@ -227,7 +227,7 @@ def _unfold(self, input_data):
         return unfolded
 
     def integrated_loudness(self, data: torch.Tensor):
-        
+
         if not torch.is_tensor(data):
             data = torch.from_numpy(data).float()
         else:
@@ -291,10 +291,10 @@ def integrated_loudness(self, data: torch.Tensor):
 
 def loudness(
     audio_data, sample_rate: int, target_loudness: int, filter_class: str = "K-weighting", block_size: float = 0.400, **kwargs
-):  
+):
     MIN_LOUDNESS = -70
     device = audio_data.device
-    
+
     original_length = audio_data.shape[-1]
     signal_duration = original_length / sample_rate
 
 
@@ -19,8 +19,8 @@
 from typing import Optional, Tuple, Union, List
 
 class GenerationMode(Enum):
-    TEXT = 0 
-    AUDIO_INIT = 1 
+    TEXT = 0
+    AUDIO_INIT = 1
     AUDIO_IN_PROGRESS = 2
 
 def _ignore_causal_mask_sdpa(
@@ -413,7 +413,7 @@ class HiggsAudioModel(nn.Module):
 
     def __init__(self, device = None, dtype = None, operations = None, **kwargs):
         super().__init__()
-        
+
         self.padding_idx = kwargs["pad_token_id"]
         self.audio_in_token_idx = kwargs["audio_in_token_idx"]
         self.audio_out_token_idx = kwargs["audio_out_token_idx"]
@@ -439,7 +439,7 @@ def __init__(self, device = None, dtype = None, operations = None, **kwargs):
 
         self.audio_out_bos_token_id = 128013
         self.audio_eos_token_id = 128012
-        
+
         text_config = kwargs["text_config"]
         llama_config = Llama2Config(num_attention_heads = text_config["num_attention_heads"],
                             num_key_value_heads = text_config["num_key_value_heads"],
@@ -616,7 +616,7 @@ def _sample_text_tokens(
             next_audio_tokens = None
 
         return next_tokens, next_audio_tokens
-    
+
     def _update_causal_mask(
         self,
         attention_mask: torch.Tensor,
@@ -677,7 +677,7 @@ def _update_causal_mask(
             causal_mask =  causal_mask.mul(~torch.all(causal_mask == min_dtype, dim=-1, keepdim=True))
 
         return causal_mask
-    
+
     def _embed_audio_ids(self, audio_ids):
         codebook_shift = (
             torch.arange(self.config["audio_num_codebooks"], device=audio_ids.device) * self.audio_codebook_size
@@ -712,7 +712,7 @@ def _prepare_all_static_kv_cache_masks(self, hidden_states, attention_mask, audi
         )
         audio_attention_mask = attention_mask.masked_fill(no_audio_out_mask, min_dtype)
         return fast_forward_attention_mask, audio_attention_mask
-    
+
     def _forward_core(
         self,
         hidden_states: torch.Tensor,
@@ -728,7 +728,7 @@ def _forward_core(
         is_using_cuda_graph: Optional[bool] = False,
     ):
 
-        position_id_offset = cache_position[0] if use_cache else 0 
+        position_id_offset = cache_position[0] if use_cache else 0
         position_embeddings = self.rotary_emb(hidden_states, position_ids + position_id_offset)
 
         for decoder_layer in self.layers:
@@ -927,7 +927,7 @@ def forward(
         )
 
         return ret
-    
+
     def _update_model_kwargs_for_generation(
         self,
         outputs,
@@ -956,13 +956,13 @@ def _update_model_kwargs_for_generation(
                 )
 
         return model_kwargs
-    
+
     def _copy_kv_cache(self, from_cache: Cache, to_cache: Cache):
         from_cache_size = from_cache.get_max_cache_shape()
         assert to_cache.get_max_cache_shape() >= from_cache_size, (
             f"The target cache size {to_cache.get_max_cache_shape()} is smaller than the source cache size {from_cache_size}."
         )
-    
+
         n_layers = self.num_hidden_layers
 
         for i in range(n_layers):
@@ -977,7 +977,7 @@ def _copy_kv_cache(self, from_cache: Cache, to_cache: Cache):
                      self.cache_config.head_dim),
                     device=self.device, dtype=self.dtype
                 )
-                
+
             if getattr(to_layer, "values", None) is None:
                 to_layer.values = torch.zeros(
                     (self.cache_config.max_batch, self.cache_config.num_key_value_heads,
@@ -1011,7 +1011,7 @@ def _prepare_kv_cache(
             f"The current sequence length {current_sequence_length} is larger than "
             f"all past key values buckets {past_key_values_buckets.keys()}."
         )
-    
+
     def _sample(
         self,
         input_ids: torch.LongTensor,
@@ -1020,7 +1020,7 @@ def _sample(
         past_key_values_buckets: Optional[OrderedDict[int, Cache]],
         **model_kwargs,
     ):
-        
+
         # code supports only non-mixed batchs
 
         audio_out_bos_token_id = generation_config.generation_kwargs.get("audio_out_bos_token_id", None)
@@ -1069,7 +1069,7 @@ def _sample(
 
         while not this_peer_finished:
             eos_token_tensor = torch.tensor([self.config["text_config"]["eos_token_id"]], device=input_ids.device)
-            
+
             if input_ids[0][-1] == audio_out_bos_token_id:
                 generation_mode = GenerationMode.AUDIO_INIT
             elif input_ids[0][-1] == self.audio_out_token_idx:
@@ -1211,7 +1211,7 @@ def _sample(
                 pbar.update(pbar.total - pbar.current)
 
         return audio_sequences
-    
+
     @torch.inference_mode()
     def generate(
         self,
@@ -1222,7 +1222,7 @@ def generate(
         generation_functions = None,
         **kwargs,
     ):
-        
+
         if generation_config is None:
             generation_config = GenerationConfig()