chore(format): run black on dev (#915)

github-actions[bot] · web-flow · commit 4090ff266527 · 2025-03-14T12:34:46.000+09:00
Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/examples/api/openai_api.py b/examples/api/openai_api.py
@@ -12,11 +12,12 @@
 - Use asyncio.Lock to manage model access, improving concurrency performance
 - Load and manage speaker embedding files to support personalized speech synthesis
 """
+
 import io
 import os
 import sys
 import asyncio
-import time 
+import time
 from typing import Optional, Dict
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import StreamingResponse, JSONResponse
@@ -57,48 +58,72 @@
 # Allowed audio formats
 ALLOWED_FORMATS = {"mp3", "wav", "ogg"}
 
+
 @app.on_event("startup")
 async def startup_event():
     """Load ChatTTS model and default speaker embedding when the application starts"""
     # Initialize ChatTTS and async lock
     app.state.chat = ChatTTS.Chat(get_logger("ChatTTS"))
     app.state.model_lock = asyncio.Lock()  # Use async lock instead of thread lock
-    
+
     # Register text normalizers
     app.state.chat.normalizer.register("en", normalizer_en_nemo_text())
     app.state.chat.normalizer.register("zh", normalizer_zh_tn())
-    
+
     logger.info("Initializing ChatTTS...")
     if app.state.chat.load(source="huggingface"):
         logger.info("Model loaded successfully.")
     else:
         logger.error("Model loading failed, exiting application.")
         raise RuntimeError("Failed to load ChatTTS model")
-    
+
     # Load default speaker embedding
     # Preload all supported speaker embeddings into memory at startup to avoid repeated loading during runtime
     app.state.spk_emb_map = {}
     for voice, spk_path in VOICE_MAP.items():
         if os.path.exists(spk_path):
-            app.state.spk_emb_map[voice] = torch.load(spk_path, map_location=torch.device("cpu"))
+            app.state.spk_emb_map[voice] = torch.load(
+                spk_path, map_location=torch.device("cpu")
+            )
             logger.info(f"Preloading speaker embedding: {voice} -> {spk_path}")
         else:
             logger.warning(f"Speaker embedding not found: {spk_path}, skipping preload")
     app.state.spk_emb = app.state.spk_emb_map.get("default")  # Default embedding
 
+
 # Request parameter whitelist
-ALLOWED_PARAMS = {"model", "input", "voice", "response_format", "speed", "stream", "output_format"}
+ALLOWED_PARAMS = {
+    "model",
+    "input",
+    "voice",
+    "response_format",
+    "speed",
+    "stream",
+    "output_format",
+}
+
 
 class OpenAITTSRequest(BaseModel):
     """OpenAI TTS request data model"""
+
     model: str = Field(..., description="Speech synthesis model, fixed as 'tts-1'")
-    input: str = Field(..., description="Text content to synthesize", max_length=2048)  # Length limit
-    voice: Optional[str] = Field("default", description="Voice selection, supports: default, alloy, echo")
-    response_format: Optional[str] = Field("mp3", description="Audio format: mp3, wav, ogg")
-    speed: Optional[float] = Field(1.0, ge=0.5, le=2.0, description="Speed, range 0.5-2.0")
+    input: str = Field(
+        ..., description="Text content to synthesize", max_length=2048
+    )  # Length limit
+    voice: Optional[str] = Field(
+        "default", description="Voice selection, supports: default, alloy, echo"
+    )
+    response_format: Optional[str] = Field(
+        "mp3", description="Audio format: mp3, wav, ogg"
+    )
+    speed: Optional[float] = Field(
+        1.0, ge=0.5, le=2.0, description="Speed, range 0.5-2.0"
+    )
     stream: Optional[bool] = Field(False, description="Whether to stream")
     output_format: Optional[str] = "mp3"  # Optional formats: mp3, wav, ogg
-    extra_params: Dict[str, Optional[str]] = Field(default_factory=dict, description="Unsupported extra parameters")
+    extra_params: Dict[str, Optional[str]] = Field(
+        default_factory=dict, description="Unsupported extra parameters"
+    )
 
     @classmethod
     def validate_request(cls, request_data: Dict):
@@ -109,31 +134,38 @@ def validate_request(cls, request_data: Dict):
             logger.warning(f"Ignoring unsupported parameters: {unsupported_params}")
         return {key: request_data[key] for key in ALLOWED_PARAMS if key in request_data}
 
+
 # Unified error response
 @app.exception_handler(Exception)
 async def custom_exception_handler(request, exc):
     """Custom exception handler"""
     logger.error(f"Error: {str(exc)}")
     return JSONResponse(
         status_code=getattr(exc, "status_code", 500),
-        content={"error": {"message": str(exc), "type": exc.__class__.__name__}}
+        content={"error": {"message": str(exc), "type": exc.__class__.__name__}},
     )
 
+
 @app.post("/v1/audio/speech")
 async def generate_voice(request_data: Dict):
     """Handle speech synthesis request"""
     request_data = OpenAITTSRequest.validate_request(request_data)
     request = OpenAITTSRequest(**request_data)
-    
-    logger.info(f"Received request: text={request.input}..., voice={request.voice}, stream={request.stream}")
-    
+
+    logger.info(
+        f"Received request: text={request.input}..., voice={request.voice}, stream={request.stream}"
+    )
+
     # Validate audio format
     if request.response_format not in ALLOWED_FORMATS:
-        raise HTTPException(400, detail=f"Unsupported audio format: {request.response_format}, supported formats: {', '.join(ALLOWED_FORMATS)}")
+        raise HTTPException(
+            400,
+            detail=f"Unsupported audio format: {request.response_format}, supported formats: {', '.join(ALLOWED_FORMATS)}",
+        )
 
     # Load speaker embedding for the specified voice
     spk_emb = app.state.spk_emb_map.get(request.voice, app.state.spk_emb)
-    
+
     # Inference parameters
     params_infer_main = {
         "text": [request.input],
@@ -145,13 +177,13 @@ async def generate_voice(request_data: Dict):
         "audio_seed": 12345678,
         # "text_seed": 87654321,  # Random seed for text processing, used to control text refinement
         "do_text_normalization": True,  # Perform text normalization
-        "do_homophone_replacement": True, # Perform homophone replacement
+        "do_homophone_replacement": True,  # Perform homophone replacement
     }
-     
+
     # Inference code parameters
     params_infer_code = app.state.chat.InferCodeParams(
-        #prompt=f"[speed_{int(request.speed * 10)}]",  # Convert to format supported by ChatTTS
-        prompt="[speed_5]", 
+        # prompt=f"[speed_{int(request.speed * 10)}]",  # Convert to format supported by ChatTTS
+        prompt="[speed_5]",
         top_P=0.5,
         top_K=10,
         temperature=0.1,
@@ -166,21 +198,21 @@ async def generate_voice(request_data: Dict):
         txt_smp=None,
         stream_batch=24,
         stream_speed=12000,
-        pass_first_n_batches=2
+        pass_first_n_batches=2,
     )
 
     try:
         async with app.state.model_lock:
             wavs = app.state.chat.infer(
-                text = params_infer_main["text"],
-                stream = params_infer_main["stream"],
-                lang = params_infer_main["lang"],
-                skip_refine_text = params_infer_main["skip_refine_text"],
-                use_decoder = params_infer_main["use_decoder"],
-                do_text_normalization = params_infer_main["do_text_normalization"], 
-                do_homophone_replacement = params_infer_main['do_homophone_replacement'],
-                # params_refine_text = params_refine_text,  
-                params_infer_code=params_infer_code,   
+                text=params_infer_main["text"],
+                stream=params_infer_main["stream"],
+                lang=params_infer_main["lang"],
+                skip_refine_text=params_infer_main["skip_refine_text"],
+                use_decoder=params_infer_main["use_decoder"],
+                do_text_normalization=params_infer_main["do_text_normalization"],
+                do_homophone_replacement=params_infer_main["do_homophone_replacement"],
+                # params_refine_text = params_refine_text,
+                params_infer_code=params_infer_code,
             )
     except Exception as e:
         raise HTTPException(500, detail=f"Speech synthesis failed: {str(e)}")
@@ -189,7 +221,7 @@ def generate_wav_header(sample_rate=24000, bits_per_sample=16, channels=1):
         """Generate WAV file header (without data length)"""
         header = bytearray()
         header.extend(b"RIFF")
-        header.extend(b"\xFF\xFF\xFF\xFF")  # File size unknown
+        header.extend(b"\xff\xff\xff\xff")  # File size unknown
         header.extend(b"WAVEfmt ")
         header.extend((16).to_bytes(4, "little"))  # fmt chunk size
         header.extend((1).to_bytes(2, "little"))  # PCM format
@@ -201,7 +233,7 @@ def generate_wav_header(sample_rate=24000, bits_per_sample=16, channels=1):
         header.extend((block_align).to_bytes(2, "little"))  # Block align
         header.extend((bits_per_sample).to_bytes(2, "little"))  # Bits per sample
         header.extend(b"data")
-        header.extend(b"\xFF\xFF\xFF\xFF")  # Data size unknown
+        header.extend(b"\xff\xff\xff\xff")  # Data size unknown
         return bytes(header)
 
     # Handle audio output format
@@ -210,35 +242,44 @@ def convert_audio(wav, format):
         if format == "mp3":
             return pcm_arr_to_mp3_view(wav)
         elif format == "wav":
-            return pcm_arr_to_wav_view(wav, include_header=False)  # No header in streaming
+            return pcm_arr_to_wav_view(
+                wav, include_header=False
+            )  # No header in streaming
         elif format == "ogg":
             return pcm_arr_to_ogg_view(wav)
-        return pcm_arr_to_mp3_view(wav) 
-    
+        return pcm_arr_to_mp3_view(wav)
+
     # Return streaming audio data
     if request.stream:
         first_chunk = True
+
         async def audio_stream():
             nonlocal first_chunk
             for wav in wavs:
                 if request.response_format == "wav" and first_chunk:
                     yield generate_wav_header()  # Send WAV header
                     first_chunk = False
                 yield convert_audio(wav, request.response_format)
+
         media_type = "audio/wav" if request.response_format == "wav" else "audio/mpeg"
         return StreamingResponse(audio_stream(), media_type=media_type)
-        
+
     # Return audio file directly
-    if request.response_format == 'wav':
+    if request.response_format == "wav":
         music_data = pcm_arr_to_wav_view(wavs[0])
     else:
         music_data = convert_audio(wavs[0], request.response_format)
-        
-    return StreamingResponse(io.BytesIO(music_data), media_type="audio/mpeg", headers={
-        "Content-Disposition": f"attachment; filename=output.{request.response_format}"
-    })
+
+    return StreamingResponse(
+        io.BytesIO(music_data),
+        media_type="audio/mpeg",
+        headers={
+            "Content-Disposition": f"attachment; filename=output.{request.response_format}"
+        },
+    )
+
 
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
-    return {"status": "healthy", "model_loaded": bool(app.state.chat)}
+    return {"status": "healthy", "model_loaded": bool(app.state.chat)}
diff --git a/openai_api.ipynb b/openai_api.ipynb
@@ -29,23 +29,20 @@
     "from IPython.display import Audio, display\n",
     "\n",
     "# Initialize the client\n",
-    "client = OpenAI(\n",
-    "    api_key=\"dummy-key\",\n",
-    "    base_url=\"http://localhost:8000/v1\"\n",
-    ")\n",
+    "client = OpenAI(api_key=\"dummy-key\", base_url=\"http://localhost:8000/v1\")\n",
     "\n",
     "# Generate audio\n",
     "response = client.audio.speech.create(\n",
     "    model=\"tts-1\",\n",
     "    voice=\"echo\",\n",
-    "    input= \"\"\"\n",
+    "    input=\"\"\"\n",
     "    以下是一些中英文对照的话语。 \n",
     "    1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. \n",
     "    2. 你好呀，最近怎么样？Hello there, how have you been recently? \n",
     "    3. 别放弃，你能做到的！Don't give up, you can do it! \n",
     "    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.\n",
     "    \"\"\",\n",
-    "    response_format=\"wav\"\n",
+    "    response_format=\"wav\",\n",
     ")\n",
     "\n",
     "# Get audio binary data\n",
@@ -101,19 +98,21 @@
     "    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.\n",
     "    \"\"\",\n",
     "    \"voice\": \"echo\",\n",
-    "    \"response_format\": \"wav\", \n",
-    "    \"stream\": True\n",
+    "    \"response_format\": \"wav\",\n",
+    "    \"stream\": True,\n",
     "}\n",
     "\n",
     "try:\n",
-    "    response = requests.post(\"http://localhost:8000/v1/audio/speech\", json=payload, stream=True)\n",
+    "    response = requests.post(\n",
+    "        \"http://localhost:8000/v1/audio/speech\", json=payload, stream=True\n",
+    "    )\n",
     "    response.raise_for_status()  # Check the status code\n",
-    "    \n",
+    "\n",
     "    audio_buffer = io.BytesIO()\n",
     "    for chunk in response.iter_content(chunk_size=8192):\n",
     "        if chunk:\n",
     "            audio_buffer.write(chunk)\n",
-    "    \n",
+    "\n",
     "    audio_buffer.seek(0)\n",
     "    display(Audio(audio_buffer.getvalue(), autoplay=False))\n",
     "    print(\"Audio has been loaded into the Notebook and can be played manually\")\n",
@@ -462,7 +461,7 @@
     "    'curl -X POST \"http://localhost:8000/v1/audio/speech\" '\n",
     "    '-H \"Content-Type: application/json\" '\n",
     "    '-d \\'{\"model\": \"tts-1\", \"input\": \"以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.\", \"voice\": \"echo\", \"response_format\": \"wav\", \"stream\": true}\\' '\n",
-    "    '-s | mpv --no-video -'\n",
+    "    \"-s | mpv --no-video -\"\n",
     ")\n",
     "subprocess.run(cmd, shell=True, check=True)"
    ]
@@ -1125,7 +1124,7 @@
     "    'curl -X POST \"http://localhost:8000/v1/audio/speech\" '\n",
     "    '-H \"Content-Type: application/json\" '\n",
     "    '-d \\'{\"model\": \"tts-1\", \"input\": \"以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.\", \"voice\": \"echo\", \"response_format\": \"mp3\", \"stream\": true}\\' '\n",
-    "    '-s | mpv --no-video -'\n",
+    "    \"-s | mpv --no-video -\"\n",
     ")\n",
     "subprocess.run(cmd, shell=True, check=True)"
    ]
@@ -1690,7 +1689,7 @@
     "    'curl -X POST \"http://localhost:8000/v1/audio/speech\" '\n",
     "    '-H \"Content-Type: application/json\" '\n",
     "    '-d \\'{\"model\": \"tts-1\", \"input\": \"以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.\", \"voice\": \"echo\", \"response_format\": \"ogg\", \"stream\": true}\\' '\n",
-    "    '-s | mpv --no-video -'\n",
+    "    \"-s | mpv --no-video -\"\n",
     ")\n",
     "subprocess.run(cmd, shell=True, check=True)"
    ]
diff --git a/tools/audio/pcm.py b/tools/audio/pcm.py