Skip to content

Commit f364d00

Browse files
committed
Add "Use GPU" checkbox to Speech to Text
It defaults to on, and the job will automatically retry without GPU if it fails. If the --no-gpu job succeeds after retrying, it changes the checkbox default value in settings.
1 parent 9d05d8d commit f364d00

File tree

7 files changed

+71
-16
lines changed

7 files changed

+71
-16
lines changed

src/dialogs/transcribeaudiodialog.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,14 @@ TranscribeAudioDialog::TranscribeAudioDialog(const QString &trackName, QWidget *
138138
grid->addWidget(m_nonspoken, 4, 0, Qt::AlignRight);
139139
grid->addWidget(new QLabel(tr("Include non-spoken sounds")), 4, 1, Qt::AlignLeft);
140140

141+
m_useGpu = new QCheckBox(this);
142+
m_useGpu->setCheckState(Settings.whisperUseGpu() ? Qt::Checked : Qt::Unchecked);
143+
grid->addWidget(m_useGpu, 5, 0, Qt::AlignRight);
144+
grid->addWidget(new QLabel(tr("Use GPU")), 5, 1, Qt::AlignLeft);
145+
141146
QLabel *tracksLabel = new QLabel(tr("Tracks with speech"));
142147
tracksLabel->setToolTip(tr("Select tracks that contain speech to be transcribed."));
143-
grid->addWidget(tracksLabel, 5, 0, Qt::AlignRight);
148+
grid->addWidget(tracksLabel, 6, 0, Qt::AlignRight);
144149
m_trackList = new QListWidget(this);
145150
m_trackList->setSelectionMode(QAbstractItemView::NoSelection);
146151
m_trackList->setSizeAdjustPolicy(QAbstractScrollArea::AdjustToContentsOnFirstShow);
@@ -172,7 +177,7 @@ TranscribeAudioDialog::TranscribeAudioDialog(const QString &trackName, QWidget *
172177
}
173178
}
174179
}
175-
grid->addWidget(m_trackList, 5, 1, Qt::AlignLeft);
180+
grid->addWidget(m_trackList, 6, 1, Qt::AlignLeft);
176181

177182
// The config section is a single widget with a unique grid layout inside of it.
178183
// The config section is hidden by hiding the config widget (and the layout it contains)
@@ -274,7 +279,7 @@ TranscribeAudioDialog::TranscribeAudioDialog(const QString &trackName, QWidget *
274279

275280
configLayout->addWidget(m_table, 4, 0, 1, 3);
276281

277-
grid->addWidget(m_configWidget, 6, 0, 1, 2);
282+
grid->addWidget(m_configWidget, 7, 0, 1, 2);
278283

279284
// Add a button box to the dialog
280285
m_buttonBox = new QDialogButtonBox(QDialogButtonBox::Ok | QDialogButtonBox::Cancel);
@@ -294,7 +299,7 @@ TranscribeAudioDialog::TranscribeAudioDialog(const QString &trackName, QWidget *
294299
m_configWidget->setVisible(false);
295300
}
296301
m_buttonBox->addButton(configButton, QDialogButtonBox::ActionRole);
297-
grid->addWidget(m_buttonBox, 7, 0, 1, 2);
302+
grid->addWidget(m_buttonBox, 8, 0, 1, 2);
298303
connect(m_buttonBox,
299304
SIGNAL(clicked(QAbstractButton *)),
300305
this,
@@ -321,6 +326,7 @@ void TranscribeAudioDialog::onButtonClicked(QAbstractButton *button)
321326
{
322327
QDialogButtonBox::ButtonRole role = m_buttonBox->buttonRole(button);
323328
if (role == QDialogButtonBox::AcceptRole) {
329+
Settings.setWhisperUseGpu(m_useGpu->checkState() == Qt::Checked);
324330
LOG_DEBUG() << "Accept";
325331
accept();
326332
} else if (role == QDialogButtonBox::RejectRole) {
@@ -378,6 +384,11 @@ bool TranscribeAudioDialog::includeNonspoken()
378384
return m_nonspoken->checkState() == Qt::Checked;
379385
}
380386

387+
bool TranscribeAudioDialog::useGpu()
388+
{
389+
return m_useGpu->checkState() == Qt::Checked;
390+
}
391+
381392
void TranscribeAudioDialog::showEvent(QShowEvent *event)
382393
{
383394
QDialog::showEvent(event);

src/dialogs/transcribeaudiodialog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class TranscribeAudioDialog : public QDialog
4343
bool translate();
4444
int maxLineLength();
4545
bool includeNonspoken();
46+
bool useGpu();
4647

4748
protected:
4849
virtual void showEvent(QShowEvent *event) override;
@@ -63,6 +64,7 @@ private slots:
6364
QCheckBox *m_translate;
6465
QSpinBox *m_maxLength;
6566
QCheckBox *m_nonspoken;
67+
QCheckBox *m_useGpu;
6668
QListWidget *m_trackList;
6769
QWidget *m_configWidget;
6870
QLineEdit *m_exeLabel;

src/docks/subtitlesdock.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1341,7 +1341,8 @@ void SubtitlesDock::speechToText()
13411341
tmpSrt->fileName(),
13421342
dialog.language(),
13431343
dialog.translate(),
1344-
dialog.maxLineLength());
1344+
dialog.maxLineLength(),
1345+
dialog.useGpu());
13451346
// Ensure the language code is 3 character (part 2)
13461347
QString langCode = dialog.language();
13471348
QLocale::Language lang = QLocale::codeToLanguage(langCode);

src/jobs/whisperjob.cpp

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024 Meltytech, LLC
2+
* Copyright (c) 2024-2026 Meltytech, LLC
33
*
44
* This program is free software: you can redistribute it and/or modify
55
* it under the terms of the GNU General Public License as published by
@@ -33,13 +33,16 @@ WhisperJob::WhisperJob(const QString &name,
3333
const QString &lang,
3434
bool translate,
3535
int maxLength,
36+
bool useGpu,
3637
QThread::Priority priority)
3738
: AbstractJob(name, priority)
3839
, m_iWavFile(iWavFile)
3940
, m_oSrtFile(oSrtFile)
4041
, m_lang(lang)
4142
, m_translate(translate)
4243
, m_maxLength(maxLength)
44+
, m_useGpu(useGpu)
45+
, m_retryingWithoutGpu(false)
4346
, m_previousPercent(0)
4447
{
4548
setTarget(oSrtFile);
@@ -60,25 +63,29 @@ void WhisperJob::start()
6063
QString of = m_oSrtFile;
6164
of.remove(".srt");
6265
QStringList args;
63-
args << "-f" << m_iWavFile;
64-
args << "-m" << modelPath;
65-
args << "-l" << m_lang;
66+
args << "--file" << m_iWavFile;
67+
args << "--model" << modelPath;
68+
args << "--language" << m_lang;
6669
if (m_translate) {
67-
args << "-tr";
70+
args << "--translate";
6871
}
69-
args << "-of" << of;
70-
args << "-osrt";
71-
args << "-pp";
72-
args << "-ml" << QString::number(m_maxLength);
73-
args << "-sow";
72+
args << "--output-file" << of;
73+
args << "--output-srt";
74+
args << "--print-progress";
75+
args << "--max-len" << QString::number(m_maxLength);
76+
args << "--split-on-word";
7477

7578
#if QT_POINTER_SIZE == 4
7679
// Limit to 1 rendering thread on 32-bit process to reduce memory usage.
7780
auto threadCount = 1;
7881
#else
7982
auto threadCount = qMax(1, QThread::idealThreadCount() - 1);
8083
#endif
81-
args << "-t" << QString::number(threadCount);
84+
args << "--threads" << QString::number(threadCount);
85+
86+
if (!m_useGpu || m_retryingWithoutGpu) {
87+
args << "--no-gpu";
88+
}
8289

8390
LOG_DEBUG() << whisperPath + " " + args.join(' ');
8491
AbstractJob::start(whisperPath, args);
@@ -116,3 +123,21 @@ void WhisperJob::onReadyRead()
116123
}
117124
} while (!msg.isEmpty());
118125
}
126+
127+
void WhisperJob::onFinished(int exitCode, QProcess::ExitStatus exitStatus)
128+
{
129+
if ((exitStatus != QProcess::NormalExit || exitCode != 0) && !stopped() && m_useGpu
130+
&& !m_retryingWithoutGpu) {
131+
m_retryingWithoutGpu = true;
132+
m_previousPercent = 0;
133+
QString message(tr("Speech to Text job failed; trying again without GPU."));
134+
MAIN.showStatusMessage(message);
135+
appendToLog(message.append("\n"));
136+
QTimer::singleShot(0, this, &WhisperJob::start);
137+
return;
138+
}
139+
if (m_retryingWithoutGpu && exitStatus == QProcess::NormalExit && exitCode == 0) {
140+
Settings.setWhisperUseGpu(false);
141+
}
142+
AbstractJob::onFinished(exitCode, exitStatus);
143+
}

src/jobs/whisperjob.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class WhisperJob : public AbstractJob
3232
const QString &lang,
3333
bool translate,
3434
int maxLength,
35+
bool useGpu = true,
3536
QThread::Priority priority = Settings.jobPriority());
3637
virtual ~WhisperJob();
3738

@@ -41,13 +42,16 @@ public slots:
4142

4243
protected slots:
4344
void onReadyRead();
45+
void onFinished(int exitCode, QProcess::ExitStatus exitStatus) override;
4446

4547
private:
4648
const QString m_iWavFile;
4749
const QString m_oSrtFile;
4850
const QString m_lang;
4951
const bool m_translate;
5052
const int m_maxLength;
53+
const bool m_useGpu;
54+
bool m_retryingWithoutGpu;
5155
int m_previousPercent;
5256
};
5357

src/settings.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1699,6 +1699,16 @@ QString ShotcutSettings::whisperModel()
16991699
return settings.value("subtitles/whisperModel", "").toString();
17001700
}
17011701

1702+
void ShotcutSettings::setWhisperUseGpu(bool b)
1703+
{
1704+
settings.setValue("subtitles/whisperUseGpu", b);
1705+
}
1706+
1707+
bool ShotcutSettings::whisperUseGpu() const
1708+
{
1709+
return settings.value("subtitles/whisperUseGpu", true).toBool();
1710+
}
1711+
17021712
void ShotcutSettings::setNotesZoom(int zoom)
17031713
{
17041714
settings.setValue("notes/zoom", zoom);

src/settings.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ class ShotcutSettings : public QObject
370370
QString whisperExe();
371371
void setWhisperModel(const QString &path);
372372
QString whisperModel();
373+
void setWhisperUseGpu(bool b);
374+
bool whisperUseGpu() const;
373375

374376
// Notes
375377
void setNotesZoom(int zoom);

0 commit comments

Comments
 (0)