v1.1.12 bump gradio from 5.0 to 6.0, several fixes to ensure compatibility with new gradio version

SWivid · SWivid · commit 39617fcf7afa · 2025-12-20T18:44:43.000+08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "f5-tts"
-version = "1.1.11"
+version = "1.1.12"
 description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
 readme = "README.md"
 license = {text = "MIT License"}
diff --git a/src/f5_tts/infer/infer_gradio.py b/src/f5_tts/infer/infer_gradio.py
@@ -816,7 +816,9 @@ def load_chat_model(chat_model_name):
                         lines=2,
                     )
 
-        chatbot_interface = gr.Chatbot(label="Conversation", type="messages")
+        chatbot_interface = gr.Chatbot(
+            label="Conversation"
+        )  # type="messages" hard-coded and no need to pass in since gradio 6.0
 
         with gr.Row():
             with gr.Column():
@@ -853,6 +855,10 @@ def process_audio_input(conv_state, audio_path, text):
         @gpu_decorator
         def generate_text_response(conv_state, system_prompt):
             """Generate text response from AI"""
+            for single_state in conv_state:
+                if isinstance(single_state["content"], list):
+                    assert len(single_state["content"]) == 1 and single_state["content"][0]["type"] == "text"
+                    single_state["content"] = single_state["content"][0]["text"]
 
             system_prompt_state = [{"role": "system", "content": system_prompt}]
             response = chat_model_inference(system_prompt_state + conv_state, chat_model_state, chat_tokenizer_state)
@@ -866,7 +872,7 @@ def generate_audio_response(conv_state, ref_audio, ref_text, remove_silence, ran
             if not conv_state or not ref_audio:
                 return None, ref_text, seed_input
 
-            last_ai_response = conv_state[-1]["content"]
+            last_ai_response = conv_state[-1]["content"][0]["text"]
             if not last_ai_response or conv_state[-1]["role"] != "assistant":
                 return None, ref_text, seed_input
 
@@ -1108,7 +1114,6 @@ def main(port, host, share, api, root_path, inbrowser):
         server_name=host,
         server_port=port,
         share=share,
-        show_api=api,
         root_path=root_path,
         inbrowser=inbrowser,
     )