-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path.env.example
More file actions
77 lines (64 loc) · 1.76 KB
/
.env.example
File metadata and controls
77 lines (64 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Server Configuration
HOST=0.0.0.0
PORT=8000
WORKERS=1
DEBUG=false
# Security Configuration
AUTH_ENABLED=false
API_KEY=
# Model Configuration
DEFAULT_MODEL=large-v3
DETECTOR_MODEL=small
DEVICE=cuda
BATCH_SIZE=8
# Language Detection
MIN_PROB=0.6
# DETECTOR_BATCH_SIZE overrides BATCH_SIZE for language detection when set
DETECTOR_BATCH_SIZE=4
DETECTOR_COMPUTE_TYPE=int8
# Language Detection Weighting
CONFIDENCE_WEIGHT=2.0
# Enhanced Language Detection
NUM_LANGUAGE_CHUNKS=6
MIN_LANGUAGE_CONFIDENCE=0.3
DYNAMIC_THRESHOLD_ENABLED=true
MIN_CONSENSUS_RATIO=0.5
# Audio Quality Filtering
ENABLE_QUALITY_FILTERING=true
MIN_CHUNK_RMS=100
MIN_CHUNK_DURATION_MS=1000
MIN_CHUNK_AMPLITUDE=1000
QUALITY_FALLBACK_THRESHOLD=0.5
# File Upload
MAX_FILE_SIZE=209715200
# Audio Processing
SILENCE_MIN_DURATION=300
SILENCE_THRESHOLD=-35
CHUNK_DURATION=10000
CHUNK_OFFSET=5000
# ASR Options
ASR_BEAM_SIZE=2
ASR_CONDITION_ON_PREVIOUS_TEXT=false
ASR_TEMPERATURES=[0.0]
ASR_NO_SPEECH_THRESHOLD=0.6
# VAD Options
VAD_MIN_SILENCE_DURATION_MS=500
VAD_SPEECH_PAD_MS=200
# Suppression Tokens
# Comma-separated list of phrases to suppress
SUPPRESS_PHRASES="дима торжок,dima torzok,dima torzhok,субтитры подогнал"
# Speaker Diarization
# Enable speaker diarization by default for all requests
ENABLE_DIARIZATION=false
# HuggingFace token for accessing pyannote models (required for diarization)
# Get token at: https://huggingface.co/settings/tokens
# Accept agreements: https://huggingface.co/pyannote/speaker-diarization-3.1
HF_TOKEN=
# Diarization model to use
DIARIZATION_MODEL=pyannote/speaker-diarization-3.1
# Default speaker count constraints
DEFAULT_MIN_SPEAKERS=1
DEFAULT_MAX_SPEAKERS=10
# Logging
LOG_LEVEL=INFO
LOG_FORMAT="%(asctime)s - %(name)s - %(levelname)s - %(message)s"