Fix black formatting in model.py and text.py

yoshphys · claude · yoshphys · commit 7d8698f3bd8d · 2026-03-22T11:39:19.000+09:00
Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/mlx_audio/tts/models/irodori_tts/model.py b/mlx_audio/tts/models/irodori_tts/model.py
@@ -113,9 +113,7 @@ def __init__(self, model_dim: int, rank: int, eps: float):
         self.scale_up = nn.Linear(rank, model_dim, bias=True)
         self.gate_up = nn.Linear(rank, model_dim, bias=True)
 
-    def __call__(
-        self, x: mx.array, cond_embed: mx.array
-    ) -> Tuple[mx.array, mx.array]:
+    def __call__(self, x: mx.array, cond_embed: mx.array) -> Tuple[mx.array, mx.array]:
         shift, scale, gate = mx.split(cond_embed, 3, axis=-1)
         shift = self.shift_up(self.shift_down(nn.silu(shift))) + shift
         scale = self.scale_up(self.scale_down(nn.silu(scale))) + scale
@@ -324,7 +322,9 @@ def __init__(self, dim: int, heads: int, mlp_hidden_dim: int, norm_eps: float):
     def __call__(
         self, x: mx.array, mask: Optional[mx.array], freqs_cis: RotaryCache
     ) -> mx.array:
-        x = x + self.attention(self.attention_norm(x), key_mask=mask, freqs_cis=freqs_cis)
+        x = x + self.attention(
+            self.attention_norm(x), key_mask=mask, freqs_cis=freqs_cis
+        )
         x = x + self.mlp(self.mlp_norm(x))
         return x
 
@@ -394,9 +394,7 @@ def __init__(
             TextBlock(dim, heads, mlp_hidden, norm_eps) for _ in range(num_layers)
         ]
 
-    def __call__(
-        self, latent: mx.array, mask: Optional[mx.array] = None
-    ) -> mx.array:
+    def __call__(self, latent: mx.array, mask: Optional[mx.array] = None) -> mx.array:
         x = self.in_proj(latent) / 6.0
         freqs_cis = precompute_freqs_cis(self.head_dim, x.shape[1])
         if mask is not None:
@@ -453,8 +451,13 @@ def __call__(
     ) -> mx.array:
         x_norm, attn_gate = self.attention_adaln(x, cond_embed)
         x = x + attn_gate * self.attention(
-            x_norm, text_mask, speaker_mask, freqs_cis,
-            kv_cache_text, kv_cache_speaker, start_pos,
+            x_norm,
+            text_mask,
+            speaker_mask,
+            freqs_cis,
+            kv_cache_text,
+            kv_cache_speaker,
+            start_pos,
         )
         x_norm, mlp_gate = self.mlp_adaln(x, cond_embed)
         x = x + mlp_gate * self.mlp(x_norm)
@@ -554,7 +557,9 @@ def build_kv_cache(
         speaker_state: mx.array,
     ) -> Tuple[List[KVCache], List[KVCache]]:
         """Pre-compute per-layer text/speaker KV projections for fast sampling."""
-        kv_text = [block.attention.get_kv_cache_text(text_state) for block in self.blocks]
+        kv_text = [
+            block.attention.get_kv_cache_text(text_state) for block in self.blocks
+        ]
         kv_speaker = [
             block.attention.get_kv_cache_speaker(speaker_state) for block in self.blocks
         ]
@@ -576,7 +581,9 @@ def forward_with_conditions(
         kv_speaker: Optional[List[KVCache]] = None,
         start_pos: int = 0,
     ) -> mx.array:
-        t_embed = get_timestep_embedding(t, self.cfg.timestep_embed_dim).astype(x_t.dtype)
+        t_embed = get_timestep_embedding(t, self.cfg.timestep_embed_dim).astype(
+            x_t.dtype
+        )
         cond_embed = self.cond_module(t_embed)[:, None, :]  # (B, 1, 3*model_dim)
 
         x = self.in_proj(x_t)
@@ -594,8 +601,14 @@ def forward_with_conditions(
                 else block.attention.get_kv_cache_speaker(speaker_state)
             )
             x = block(
-                x, cond_embed, text_mask, speaker_mask,
-                freqs_cis, kv_t, kv_s, start_pos,
+                x,
+                cond_embed,
+                text_mask,
+                speaker_mask,
+                freqs_cis,
+                kv_t,
+                kv_s,
+                start_pos,
             )
 
         x = self.out_norm(x)
diff --git a/mlx_audio/tts/models/irodori_tts/text.py b/mlx_audio/tts/models/irodori_tts/text.py
@@ -14,8 +14,8 @@
 _REPLACE_MAP: dict[str, str] = {
     r"\t": "",
     r"\[n\]": "",
-    r" ": "",       # narrow no-break space (U+202F) / ideographic space handled below
-    r"　": "",       # ideographic space
+    r" ": "",  # narrow no-break space (U+202F) / ideographic space handled below
+    r"　": "",  # ideographic space
     r"[;▼♀♂《》≪≫①②③④⑤⑥]": "",
     r"[\u02d7\u2010-\u2015\u2043\u2212\u23af\u23e4\u2500\u2501\u2e3a\u2e3b]": "",
     r"[\uff5e\u301C]": "ー",
@@ -38,7 +38,10 @@
 
 # Fullwidth 0-9 → halfwidth
 _FULLWIDTH_DIGITS_TO_HALFWIDTH = str.maketrans(
-    {chr(full): chr(half) for full, half in zip(range(0xFF10, 0xFF1A), range(0x30, 0x3A))}
+    {
+        chr(full): chr(half)
+        for full, half in zip(range(0xFF10, 0xFF1A), range(0x30, 0x3A))
+    }
 )
 
 # Halfwidth katakana → fullwidth katakana
@@ -67,7 +70,11 @@ def normalize_text(text: str) -> str:
 
     # Strip surrounding bracket pairs
     for open_br, close_br in [
-        ("「", "」"), ("『", "』"), ("（", "）"), ("【", "】"), ("(", ")")
+        ("「", "」"),
+        ("『", "』"),
+        ("（", "）"),
+        ("【", "】"),
+        ("(", ")"),
     ]:
         if text.startswith(open_br) and text.endswith(close_br):
             text = text[1:-1]