Skip to content

Commit d91d08e

Browse files
committed
Add more output formats to pass to whisper.cpp
1 parent 1c414a6 commit d91d08e

File tree

3 files changed

+37
-21
lines changed

3 files changed

+37
-21
lines changed

README.md

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ Node.js bindings for OpenAI's Whisper model.
77
## Features
88

99
- Automatically convert the audio to WAV format with a 16000 Hz frequency to support the whisper model.
10-
- Output transcripts to (.txt .srt .vtt)
10+
- Output transcripts to (.txt .srt .vtt .json .wts .lrc)
1111
- Optimized for CPU (Including Apple Silicon ARM)
1212
- Timestamp precision to single word
1313
- Split on word rather than on token (Optional)
1414
- Translate from source language to english (Optional)
15-
- Convert audio formet to wav to support whisper model
15+
- Convert audio format to wav to support whisper model
1616

1717

1818
## Installation
@@ -21,16 +21,16 @@ Node.js bindings for OpenAI's Whisper model.
2121

2222
```bash
2323
sudo apt update
24-
sudo apt install build-essential
24+
sudo apt install build-essential
2525
```
2626

27-
1. Install nodejs-whisper with npm
27+
2. Install nodejs-whisper with npm
2828

2929
```bash
3030
npm i nodejs-whisper
3131
```
3232

33-
2. Download whisper model
33+
3. Download whisper model
3434

3535
```bash
3636
npx nodejs-whisper download
@@ -50,18 +50,22 @@ const filePath = path.resolve(__dirname, 'YourAudioFileName')
5050
await nodewhisper(filePath, {
5151
modelName: 'base.en', //Downloaded models name
5252
autoDownloadModelName: 'base.en', // (optional) autodownload a model if model is not present
53-
verbose?: boolean
54-
removeWavFileAfterTranscription?: boolean
55-
withCuda?: boolean // (optional) use cuda for faster processing
53+
verbose: false, // (optional) output more dubugging information
54+
removeWavFileAfterTranscription: false, // (optional) remove wav file once transcribed
55+
withCuda: false // (optional) use cuda for faster processing
5656
whisperOptions: {
57+
outputInCsv: false, // get output result in csv file
58+
outputInJson: false, // get output result in json file
59+
outputInJsonFull: false, // get output result in json file including more information
60+
outputInLrc: false, // get output result in lrc file
61+
outputInSrt: true, // get output result in srt file
5762
outputInText: false, // get output result in txt file
5863
outputInVtt: false, // get output result in vtt file
59-
outputInSrt: true, // get output result in srt file
60-
outputInCsv: false, // get output result in csv file
61-
translateToEnglish: false, //translate from source language to english
62-
wordTimestamps: false, // Word-level timestamps
64+
outputInWords: false, // get output result in wts file for karaoke
65+
translateToEnglish: false, // translate from source language to english
66+
wordTimestamps: false, // word-level timestamps
6367
timestamps_length: 20, // amount of dialogue per timestamp pair
64-
splitOnWord: true, //split on word rather than on token
68+
splitOnWord: true, // split on word rather than on token
6569
},
6670
})
6771

@@ -93,10 +97,14 @@ const MODELS_LIST = [
9397
}
9498
9599
interface WhisperOptions {
100+
outputInCsv?: boolean
101+
outputInJson?: boolean
102+
outputInJsonFull?: boolean
103+
outputInLrc?: boolean
104+
outputInSrt?: boolean
96105
outputInText?: boolean
97106
outputInVtt?: boolean
98-
outputInSrt?: boolean
99-
outputInCsv?: boolean
107+
outputInWords?: boolean
100108
translateToEnglish?: boolean
101109
timestamps_length?: number
102110
wordTimestamps?: boolean
@@ -105,7 +113,7 @@ const MODELS_LIST = [
105113
106114
```
107115

108-
## Run Locally
116+
## Run locally
109117

110118
Clone the project
111119

@@ -131,7 +139,7 @@ Start the server
131139
npm run dev
132140
```
133141

134-
Build Project
142+
Build project
135143

136144
```bash
137145
npm run build

src/WhisperHelper.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,14 @@ export const constructCommand = (filePath: string, args: IOptions): string => {
3434

3535
const constructOptionsFlags = (args: IOptions): string => {
3636
let flags = [
37+
args.whisperOptions?.outputInCsv ? '-ocsv ' : '',
38+
args.whisperOptions?.outputInJson ? '-oj ' : '',
39+
args.whisperOptions?.outputInJsonFull ? '-ojf ' : '',
40+
args.whisperOptions?.outputInLrc ? '-olrc ' : '',
41+
args.whisperOptions?.outputInSrt ? '-osrt ' : '',
3742
args.whisperOptions?.outputInText ? '-otxt ' : '',
3843
args.whisperOptions?.outputInVtt ? '-ovtt ' : '',
39-
args.whisperOptions?.outputInSrt ? '-osrt ' : '',
40-
args.whisperOptions?.outputInCsv ? '-ocsv ' : '',
44+
args.whisperOptions?.outputInWords ? '-owts ' : '',
4145
args.whisperOptions?.translateToEnglish ? '-tr ' : '',
4246
args.whisperOptions?.wordTimestamps ? '-ml 1 ' : '',
4347
args.whisperOptions?.timestamps_length ? `-ml ${args.whisperOptions.timestamps_length} ` : '',

src/types.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
export interface WhisperOptions {
2+
outputInCsv?: boolean
3+
outputInJson?: boolean
4+
outputInJsonFull?: boolean
5+
outputInLrc?: boolean
6+
outputInSrt?: boolean
27
outputInText?: boolean
38
outputInVtt?: boolean
4-
outputInSrt?: boolean
5-
outputInCsv?: boolean
9+
outputInWords?: boolean
610
translateToEnglish?: boolean
711
language?: string
812
timestamps_length?: number

0 commit comments

Comments
 (0)