Skip to content

Commit c2469c0

Browse files
feat: configurable whisper recording command (#91)
* Add option to default config * Override automatic detection * chore: deprecate whisper_max_time * feat: fully configurable whisper_rec_cmd * chore: formating * fix: typo
1 parent 62254bd commit c2469c0

File tree

2 files changed

+50
-23
lines changed

2 files changed

+50
-23
lines changed

lua/gp/config.lua

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,21 @@ local config = {
179179
-- decrease this number to pick up only louder sounds as possible speech
180180
-- you can disable silence trimming by setting this a very high number (like 1000.0)
181181
whisper_silence = "1.75",
182-
-- whisper max recording time (mm:ss)
183-
whisper_max_time = "05:00",
184182
-- whisper tempo (1.0 is normal speed)
185183
whisper_tempo = "1.75",
186184
-- The language of the input audio, in ISO-639-1 format.
187185
whisper_language = "en",
186+
-- command to use for recording can be nil (unset) for automatic selection
187+
-- string ("sox", "arecord", "ffmpeg") or table with command and arguments:
188+
-- sox is the most universal, but can have start/end cropping issues caused by latency
189+
-- arecord is linux only, but has no cropping issues and is faster
190+
-- ffmpeg in the default configuration is macos only, but can be used on any platform
191+
-- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info)
192+
-- below is the default configuration for all three commands:
193+
-- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"},
194+
-- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"},
195+
-- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"},
196+
whisper_rec_cmd = nil,
188197

189198
-- image generation settings
190199
-- image prompt prefix for asking user for input (supports {{agent}} template variable)

lua/gp/init.lua

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ local deprecated = {
1717
chat_system_prompt = "`chat_system_prompt`\n" .. switch_to_agent,
1818
command_prompt_prefix = "`command_prompt_prefix`\nPlease use `command_prompt_prefix_template`"
1919
.. " with support for \n`{{agent}}` variable so you know which agent is currently active",
20+
whisper_max_time = "`whisper_max_time`\nPlease use fully customizable `whisper_rec_cmd`",
2021
}
2122

2223
--------------------------------------------------------------------------------
@@ -2784,41 +2785,35 @@ M.Whisper = function(callback)
27842785
return
27852786
end
27862787

2788+
local rec_file = M.config.whisper_dir .. "/rec.wav"
27872789
local rec_options = {
27882790
sox = {
27892791
cmd = "sox",
27902792
opts = {
2791-
-- single channel
27922793
"-c",
27932794
"1",
2794-
-- small buffer
27952795
"--buffer",
27962796
"32",
27972797
"-d",
2798-
-- output file
2799-
M.config.whisper_dir .. "/rec.wav",
2800-
-- max recording time
2798+
"rec.wav",
28012799
"trim",
28022800
"0",
2803-
M.config.whisper_max_time,
2801+
"3600",
28042802
},
28052803
exit_code = 0,
28062804
},
28072805
arecord = {
28082806
cmd = "arecord",
28092807
opts = {
2810-
-- single channel
28112808
"-c",
28122809
"1",
28132810
"-f",
28142811
"S16_LE",
28152812
"-r",
28162813
"48000",
2817-
-- max recording time
28182814
"-d",
28192815
3600,
2820-
-- output file
2821-
M.config.whisper_dir .. "/rec.wav",
2816+
"rec.wav",
28222817
},
28232818
exit_code = 1,
28242819
},
@@ -2832,7 +2827,7 @@ M.Whisper = function(callback)
28322827
":0",
28332828
"-t",
28342829
"3600",
2835-
M.config.whisper_dir .. "/rec.wav",
2830+
"rec.wav",
28362831
},
28372832
exit_code = 255,
28382833
},
@@ -2968,25 +2963,48 @@ M.Whisper = function(callback)
29682963
end)
29692964
end
29702965

2971-
local rec_cmd = "sox"
2972-
if vim.fn.executable("ffmpeg") == 1 then
2973-
local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l")
2974-
devices = string.gsub(devices, "^%s*(.-)%s*$", "%1")
2975-
if devices == "1" then
2976-
rec_cmd = "ffmpeg"
2966+
local cmd = {}
2967+
2968+
local rec_cmd = M.config.whisper_rec_cmd
2969+
-- if rec_cmd not set explicitly, try to autodetect
2970+
if not rec_cmd then
2971+
rec_cmd = "sox"
2972+
if vim.fn.executable("ffmpeg") == 1 then
2973+
local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l")
2974+
devices = string.gsub(devices, "^%s*(.-)%s*$", "%1")
2975+
if devices == "1" then
2976+
rec_cmd = "ffmpeg"
2977+
end
29772978
end
2979+
if vim.fn.executable("arecord") == 1 then
2980+
rec_cmd = "arecord"
2981+
end
2982+
end
2983+
2984+
if type(rec_cmd) == "table" and rec_cmd[1] and rec_options[rec_cmd[1]] then
2985+
rec_cmd = vim.deepcopy(rec_cmd)
2986+
cmd.cmd = table.remove(rec_cmd, 1)
2987+
cmd.exit_code = rec_options[cmd.cmd].exit_code
2988+
cmd.opts = rec_cmd
2989+
elseif type(rec_cmd) == "string" and rec_options[rec_cmd] then
2990+
cmd = rec_options[rec_cmd]
2991+
else
2992+
M.error(string.format("Whisper got invalid recording command: %s", rec_cmd))
2993+
close()
2994+
return
29782995
end
2979-
if vim.fn.executable("arecord") == 1 then
2980-
rec_cmd = "arecord"
2996+
for i, v in ipairs(cmd.opts) do
2997+
if v == "rec.wav" then
2998+
cmd.opts[i] = rec_file
2999+
end
29813000
end
29823001

2983-
local cmd = rec_options[rec_cmd]
29843002
M._H.process(nil, cmd.cmd, cmd.opts, function(code, signal, stdout, stderr)
29853003
close()
29863004

29873005
if code and code ~= cmd.exit_code then
29883006
M.error(
2989-
rec_cmd
3007+
cmd.cmd
29903008
.. " exited with code and signal:\ncode: "
29913009
.. code
29923010
.. ", signal: "

0 commit comments

Comments
 (0)