deepgram-self-hosted-chart/common/standard_deploy/api.aura-2-es.toml at master · voiceflow/deepgram-self-hosted-chart · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
### Keep in mind that all paths are in-container paths, and do not need to exist
### on the host machine.


### Configure license validation by passing in a DEEPGRAM_API_KEY environment variable
### See https://developers.deepgram.com/docs/deploy-deepgram-services#credentials
[license]
### Docker Compose and Podman Compose create a dedicated network that allows inter-container communication by app name.
### See [Networking in Compose](https://docs.docker.com/compose/networking/) for details.
server_url = ["https://license-proxy:8443", "https://license.deepgram.com"]


### Configure how the API will listen for your requests
[server]
### The base URL (prefix) for requests to the API.
base_url = "/v1"
### The IP address to listen on. Since this is likely running in a Docker
### container, you will probably want to listen on all interfaces.
host = "0.0.0.0"
### The port to listen on
port = 8080

### How long to wait for a connection to a callback URL.
callback_conn_timeout = "1s"
### How long to wait for a response to a callback URL.
callback_timeout = "10s"

### How long to wait for a connection to a fetch URL.
fetch_conn_timeout = "1s"
### How long to wait for a response to a fetch URL.
fetch_timeout = "60s"


### By default, the API listens over HTTP. By passing both a certificate
### and a key file, the API will instead listen over HTTPS.
###
### This performs TLS termination only, and does not provide any
### additional authentication.
[server.https]
# cert_file = "/path/to/cert.pem"
# key_file = "/path/to/key.pem"


### Specify custom DNS resolution options.
[resolver]
### Specify custom domain name server(s).
### Format is "{IP} {PORT} {PROTOCOL (tcp or udp)}"
# nameservers = ["127.0.0.1 53 udp"]

### If specifying a custom DNS nameserver, set the DNS TTL value.
# max_ttl = 10


### Limit the number of active requests handled by a single API container.
### If additional requests beyond the limit are sent, API will return
### a 429 HTTP status code. Default is no limit.
[concurrency_limit]
# active_requests =


### Enable ancillary features
[features]
### Enables topic detection *if* a valid topic detection model is available
topic_detection = true # or false

### Enables summarization *if* a valid summarization model is available
summarization = true # or false

### Enables pre-recorded entity detection *if* a valid entity detection model is available
entity_detection = false # or true

### Enables pre-recorded entity-based redaction *if* a valid entity detection model is available
entity_redaction = false # or true

### Enables pre-recorded entity formatting *if* a valid NER model is available
format_entity_tags = false # or true

### If API is receiving requests faster than Engine can process them, a request
### queue will form. By default, this queue is stored in memory. Under high load,
### the queue may grow too large and cause Out-Of-Memory errors. To avoid this,
### set a disk_buffer_path to buffer the overflow on the request queue to disk.
###
### WARN: This is only to temporarily buffer requests during high load.
### If there is not enough Engine capacity to process the queued requests over time,
### the queue (and response time) will grow indefinitely.
# disk_buffer_path = "/path/to/disk/buffer/directory"

### Enables streaming TTS *if* a valid Aura TTS model is available
speak_streaming = true # or false

### Toggles usage data redaction; set to false to disable redaction of usage data; defaults to true if not present
# redact_usage = true # or false

### Enables Flux turn-based streaming STT
listen_v2 = false # or true

### Configure the backend pool of speech engines (generically referred to as
### "drivers" here). The API will load-balance among drivers in the standard
### pool; if one standard driver fails, the next one will be tried.
###
### Each driver URL will have its hostname resolved to an IP address. If a domain
### name resolves to multiple IP addresses, the API will load-balance across each
### IP address.
###
### This behavior is provided for convenience, and in a production environment
### other tools can be used, such as HAProxy.
###
### Below is a new Speech Engine ("driver") in the "standard" pool.
[[driver_pool.standard]]
### Host to connect to. If you are using a different method of orchestrating,
### then adjust the IP address accordingly.
###
### WARN: This must be HTTPS.
###
### Docker Compose and Podman Compose create a dedicated network that allows inter-container communication by app name.
### See [Networking in Compose](https://docs.docker.com/compose/networking/) for details.
url = "https://engine-es:8080/v2"
### Factor to increase the timeout by for each additional retry (for
### exponential backoff).
timeout_backoff = 1.2

### Before attempting a retry, sleep for this long (in seconds)
retry_sleep = "2s"
### Factor to increase the retry sleep by for each additional retry (for
### exponential backoff).
retry_backoff = 1.6

### Maximum response to deserialize from Driver (in bytes)
max_response_size = 1073741824 # 1GB