Qwen3-TTS-Openai-Fastapi/.env.example at main · groxaxo/Qwen3-TTS-Openai-Fastapi · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Qwen3-TTS API Server Configuration
# Copy this file to .env and adjust values for your system

# ============================================================================
# Backend Selection
# ============================================================================

# Backend type: official | vllm | pytorch | openvino
# - official: Auto-detect GPU/CPU (default)
# - vllm: vLLM-Omni for optimized GPU inference
# - pytorch: CPU-optimized PyTorch backend
# - openvino: Experimental Intel CPU/NPU backend
TTS_BACKEND=official

# ============================================================================
# Model Selection
# ============================================================================

# Model ID from HuggingFace
# Options:
# - Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice (default, best quality)
# - Qwen/Qwen3-TTS-12Hz-1.7B-Base (voice cloning support)
# - Qwen/Qwen3-TTS-12Hz-0.6B-Base (smaller, faster on CPU)
TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice

# ============================================================================
# Device and Precision Settings
# ============================================================================

# Device: auto | cpu | cuda | cuda:0
TTS_DEVICE=auto

# Data type: auto | float32 | float16 | bfloat16
# Recommended: auto (bfloat16 on GPU, float32 on CPU)
TTS_DTYPE=auto

# Attention implementation: auto | flash_attention_2 | sdpa | eager
# Recommended: auto (tries flash_attention_2, falls back to sdpa, then eager)
TTS_ATTN=auto

# ============================================================================
# CPU Performance Tuning
# ============================================================================

# Number of threads for PyTorch CPU operations
# Set to number of physical cores (not logical cores)
# Examples:
# - i5-1240P: 12 (4 P-cores + 8 E-cores)
# - i7-12700K: 12 (8 P-cores + 4 E-cores)
# - Generic 4-core: 4
CPU_THREADS=12

# Number of threads for inter-op parallelism (recommended: 1-2)
CPU_INTEROP=2

# Use Intel Extension for PyTorch (IPEX)
# Requires: pip install intel-extension-for-pytorch
# Only for Intel CPUs, provides 20-40% speedup
USE_IPEX=false

# ============================================================================
# OpenVINO Settings (Experimental)
# ============================================================================

# OpenVINO device target: CPU | GPU | AUTO
OV_DEVICE=CPU

# Directory for OpenVINO compilation cache
OV_CACHE_DIR=./.ov_cache

# Directory containing exported OpenVINO IR models
OV_MODEL_DIR=./.ov_models

# ============================================================================
# Server Settings
# ============================================================================

# Server host and port
HOST=0.0.0.0
PORT=8880

# Number of workers (default: 1)
# Note: Multiple workers share model memory
WORKERS=1

# Warmup on server startup (recommended for production)
# First request after warmup will be faster
TTS_WARMUP_ON_START=false

# ============================================================================
# CORS Settings
# ============================================================================

# Allowed CORS origins (comma-separated)
# Use * for all origins (not recommended for production)
CORS_ORIGINS=*

# ============================================================================
# Voice Studio (Optional)
# ============================================================================

# Enable the Voice Studio web interface
ENABLE_VOICE_STUDIO=false

# Directory for voice library
VOICE_LIBRARY_DIR=./voice_library

# ============================================================================
# Example Configurations
# ============================================================================

# ──────────────────────────────────────────────────────────────────────────
# Configuration 1: GPU (Default)
# ──────────────────────────────────────────────────────────────────────────
# TTS_BACKEND=official
# TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
# TTS_DEVICE=auto
# TTS_DTYPE=auto
# TTS_ATTN=auto

# ──────────────────────────────────────────────────────────────────────────
# Configuration 2: CPU - i5-1240P (Recommended)
# ──────────────────────────────────────────────────────────────────────────
# TTS_BACKEND=pytorch
# TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-0.6B-Base
# TTS_DEVICE=cpu
# TTS_DTYPE=float32
# TTS_ATTN=sdpa
# CPU_THREADS=12
# CPU_INTEROP=2
# USE_IPEX=false

# ──────────────────────────────────────────────────────────────────────────
# Configuration 3: CPU - i5-1240P with IPEX (Maximum Performance)
# ──────────────────────────────────────────────────────────────────────────
# TTS_BACKEND=pytorch
# TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-0.6B-Base
# TTS_DEVICE=cpu
# TTS_DTYPE=float32
# TTS_ATTN=sdpa
# CPU_THREADS=12
# CPU_INTEROP=2
# USE_IPEX=true

# ──────────────────────────────────────────────────────────────────────────
# Configuration 4: Generic CPU (4 cores)
# ──────────────────────────────────────────────────────────────────────────
# TTS_BACKEND=pytorch
# TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-0.6B-Base
# TTS_DEVICE=cpu
# TTS_DTYPE=float32
# TTS_ATTN=sdpa
# CPU_THREADS=4
# CPU_INTEROP=1

# ──────────────────────────────────────────────────────────────────────────
# Configuration 5: vLLM-Omni Backend (GPU)
# ──────────────────────────────────────────────────────────────────────────
# TTS_BACKEND=vllm
# TTS_MODEL_ID=Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice