Skip to content

Commit c008d29

Browse files
committed
examples : add wer cli example
This commit add as suggestion for a Word Error Rate calculation example. The motivation for this is that this could be used for WER testing. Usage: ```console $ ./build/bin/whisper-wer Usage: ./build/bin/whisper-wer [options] Options: -r, --reference PATH Full path to reference transcriptions directory -a, --actual PATH Full path to actual transcriptions directory --help Display this help message ``` Example usage: ```console $ ./build/bin/whisper-wer -r examples/wer/reference_transcriptions/ \ -a examples/wer/actual_transcriptions/ Word Error Rate for : jfk.wav.txt Reference words: 22 Actual words: 22 Substitutions: 1 Deletions: 0 Insertions: 0 Total edits: 1 WER: 0.045455 ``` A more detailed description can be found in examples/wer/README.md. A unit test is provided in tests/test-wer.cpp. ```console $ cmake --build build --target test-wer && \ ctest -R test-wer --test-dir build --output-on-failure ```
1 parent f31b404 commit c008d29

File tree

10 files changed

+359
-0
lines changed

10 files changed

+359
-0
lines changed

examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ add_library(${TARGET} STATIC
5656
common-whisper.cpp
5757
grammar-parser.h
5858
grammar-parser.cpp
59+
wer.h
60+
wer.cpp
5961
${COMMON_SOURCES_FFMPEG}
6062
)
6163

@@ -114,6 +116,7 @@ else()
114116
add_subdirectory(sycl)
115117
endif()
116118
endif (WHISPER_SDL2)
119+
add_subdirectory(wer)
117120

118121
add_subdirectory(deprecation-warning)
119122
endif()

examples/wer.cpp

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#include "wer.h"
2+
3+
#include <cstdio>
4+
#include <iostream>
5+
#include <vector>
6+
#include <string>
7+
#include <algorithm>
8+
#include <sstream>
9+
10+
std::vector<std::string> split_into_words(const std::string& text) {
11+
std::vector<std::string> words;
12+
std::stringstream ss(text);
13+
std::string word;
14+
15+
while (ss >> word) {
16+
words.push_back(word);
17+
}
18+
19+
return words;
20+
}
21+
22+
std::tuple<int, int, int> count_edit_ops(const std::vector<std::string>& reference,
23+
const std::vector<std::string>& actual) {
24+
int m = reference.size();
25+
int n = actual.size();
26+
27+
std::vector<std::vector<int>> distance(m + 1, std::vector<int>(n + 1, 0));
28+
29+
// Initialize the matrix
30+
for (int i = 0; i <= m; i++) {
31+
distance[i][0] = i;
32+
}
33+
34+
for (int j = 0; j <= n; j++) {
35+
distance[0][j] = j;
36+
}
37+
38+
for (int i = 1; i <= m; i++) {
39+
for (int j = 1; j <= n; j++) {
40+
if (reference[i-1] == actual[j-1]) {
41+
distance[i][j] = distance[i-1][j-1];
42+
} else {
43+
distance[i][j] = 1 + std::min({
44+
distance[i-1][j], // Deletion
45+
distance[i][j-1], // Insertion
46+
distance[i-1][j-1] // Substitution
47+
});
48+
}
49+
}
50+
}
51+
52+
int i = m;
53+
int j = n;
54+
int substitutions = 0;
55+
int deletions = 0;
56+
int insertions = 0;
57+
58+
while (i > 0 || j > 0) {
59+
if (i > 0 && j > 0 && reference[i-1] == actual[j-1]) {
60+
// Match - no operation
61+
i--;
62+
j--;
63+
} else if (i > 0 && j > 0 && distance[i][j] == distance[i-1][j-1] + 1) {
64+
// Substitution
65+
substitutions++;
66+
i--;
67+
j--;
68+
} else if (i > 0 && distance[i][j] == distance[i-1][j] + 1) {
69+
// Deletion
70+
deletions++;
71+
i--;
72+
} else {
73+
// Insertion
74+
insertions++;
75+
j--;
76+
}
77+
}
78+
79+
return {substitutions, deletions, insertions};
80+
}
81+
82+
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text) {
83+
std::vector<std::string> reference = split_into_words(reference_text);
84+
std::vector<std::string> actual = split_into_words(actual_text);
85+
86+
auto [n_sub, n_del, n_ins] = count_edit_ops(reference, actual);
87+
int n_edits = n_sub + n_del + n_ins;
88+
89+
double wer = 0.0;
90+
if (!reference.empty()) {
91+
wer = static_cast<double>(n_edits) / reference.size();
92+
}
93+
94+
return wer_result{
95+
.n_ref_words = reference.size(),
96+
.n_act_words = actual.size(),
97+
.n_sub = n_sub,
98+
.n_del = n_del,
99+
.n_ins = n_ins,
100+
.n_edits = n_edits,
101+
.wer = wer
102+
};
103+
}

examples/wer.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#ifndef WER_H
2+
#define WER_H
3+
#include <vector>
4+
#include <string>
5+
6+
struct wer_result {
7+
size_t n_ref_words; // Number of words in the reference text.
8+
size_t n_act_words; // Number of words in the actual (transcribed) text.
9+
int n_sub; // Number of substitutions.
10+
int n_del; // Number of deletions.
11+
int n_ins; // Number of insertions.
12+
int n_edits; // Total number of edits.
13+
double wer; // The word error rate.
14+
};
15+
16+
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text);
17+
18+
#endif // WER_H

examples/wer/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
set(TARGET whisper-wer)
2+
add_executable(${TARGET} cli.cpp)
3+
4+
include(DefaultTargetOptions)
5+
6+
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
7+
8+
install(TARGETS ${TARGET} RUNTIME)

examples/wer/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# whisper.cpp/examples/wer
2+
3+
This is a command line tool for calculating the Word Error Rate (WER). This tool
4+
expects that reference transcriptions (the known correct transcriptions)
5+
and acutual transcriptions from whisper.cpp are available in two separate
6+
directories where the file names are the identical.
7+
8+
### Usage
9+
```console
10+
$ ./build/bin/whisper-wer
11+
Usage: ./build/bin/whisper-wer [options]
12+
Options:
13+
-r, --reference PATH Full path to reference transcriptions directory
14+
-a, --actual PATH Full path to actual transcriptions directory
15+
--help Display this help message
16+
```
17+
18+
### Example Usage with whisper-cli
19+
First, generate transcription(s) using whisper-cli:
20+
```
21+
./build/bin/whisper-cli \
22+
-m models/ggml-base.en.bin \
23+
-f samples/jfk.wav \
24+
--output-txt
25+
...
26+
output_txt: saving output to 'samples/jfk.wav.txt'
27+
```
28+
Next, copy the transcription to a directory where the actual transcriptions
29+
are stored. In this example we will use a directory called `actual_transcriptions`
30+
in this examples directory:
31+
```console
32+
$ cp samples/jfk.wav.txt examples/wer/actual_transcriptions
33+
```
34+
In a real world scenario the reference transcriptions would be available
35+
representing the known correct text. In this case we have already placed a file
36+
in `examples/wer/reference_transcriptions` that can be used for testing, where
37+
only a single word was changed (`Americans` -> `Swedes`).
38+
39+
Finally, run the whisper-wer tool:
40+
```console
41+
$ ./build/bin/whisper-wer -r examples/wer/reference_transcriptions/ -a examples/wer/actual_transcriptions/
42+
Word Error Rate for : jfk.wav.txt
43+
Reference words: 22
44+
Actual words: 22
45+
Substitutions: 1
46+
Deletions: 0
47+
Insertions: 0
48+
Total edits: 1
49+
WER: 0.045455
50+
```
51+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

examples/wer/cli.cpp

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#include "wer.h"
2+
3+
#include <cstdio>
4+
#include <vector>
5+
#include <string>
6+
#include <filesystem>
7+
#include <fstream>
8+
#include <cstring>
9+
#include <map>
10+
11+
std::vector<std::string> read_files_from_directory(const std::string& dir_path) {
12+
std::vector<std::string> file_paths;
13+
try {
14+
for (const auto& entry : std::filesystem::directory_iterator(dir_path)) {
15+
if (entry.is_regular_file() && entry.path().extension() == ".txt") {
16+
file_paths.push_back(entry.path().string());
17+
}
18+
}
19+
} catch (const std::filesystem::filesystem_error& e) {
20+
printf("Error reading directory %s: %s\n", dir_path.c_str(), e.what());
21+
}
22+
return file_paths;
23+
}
24+
25+
std::string read_file_content(const std::string& file_path) {
26+
std::ifstream file(file_path);
27+
std::string content;
28+
29+
if (file.is_open()) {
30+
std::string line;
31+
while (std::getline(file, line)) {
32+
content += line + "\n";
33+
}
34+
file.close();
35+
} else {
36+
printf("Unable to open file: %s\n", file_path.c_str());
37+
}
38+
39+
return content;
40+
}
41+
42+
std::string get_base_filename(const std::string& path) {
43+
return std::filesystem::path(path).filename().string();
44+
}
45+
46+
void print_usage(const char* program_name) {
47+
printf("Usage: %s [options]\n", program_name);
48+
printf("Options:\n");
49+
printf(" -r, --reference PATH Full path to reference transcriptions directory\n");
50+
printf(" -a, --actual PATH Full path to actual transcriptions directory\n");
51+
printf(" --help Display this help message\n");
52+
}
53+
54+
int main(int argc, char** argv) {
55+
if (argc == 1) {
56+
print_usage(argv[0]);
57+
return 0;
58+
}
59+
60+
std::string reference_path;
61+
std::string actual_path;
62+
bool reference_set = false;
63+
bool actual_set = false;
64+
65+
for (int i = 1; i < argc; i++) {
66+
if (strcmp(argv[i], "--help") == 0) {
67+
print_usage(argv[0]);
68+
return 0;
69+
} else if (strcmp(argv[i], "-r") == 0 || strcmp(argv[i], "--reference") == 0) {
70+
if (i + 1 < argc) {
71+
reference_path = argv[++i];
72+
reference_set = true;
73+
} else {
74+
printf("Error: Missing path after %s\n", argv[i]);
75+
print_usage(argv[0]);
76+
return 1;
77+
}
78+
} else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--actual") == 0) {
79+
if (i + 1 < argc) {
80+
actual_path = argv[++i];
81+
actual_set = true;
82+
} else {
83+
printf("Error: Missing path after %s\n", argv[i]);
84+
print_usage(argv[0]);
85+
return 1;
86+
}
87+
} else {
88+
printf("Error: Unknown option: %s\n", argv[i]);
89+
print_usage(argv[0]);
90+
return 1;
91+
}
92+
}
93+
94+
if (!reference_set || !actual_set) {
95+
printf("Error: Both reference and actual paths must be provided\n");
96+
print_usage(argv[0]);
97+
return 1;
98+
}
99+
100+
if (!std::filesystem::exists(reference_path) || !std::filesystem::is_directory(reference_path)) {
101+
printf("Error: Reference path '%s' does not exist or is not a directory\n", reference_path.c_str());
102+
return 1;
103+
}
104+
105+
if (!std::filesystem::exists(actual_path) || !std::filesystem::is_directory(actual_path)) {
106+
printf("Error: Actual path '%s' does not exist or is not a directory\n", actual_path.c_str());
107+
return 1;
108+
}
109+
110+
std::vector<std::string> reference_files = read_files_from_directory(reference_path);
111+
std::vector<std::string> actual_files = read_files_from_directory(actual_path);
112+
113+
//printf("Found %zu reference files in %s\n", reference_files.size(), reference_path.c_str());
114+
//printf("Found %zu actual files in %s\n", actual_files.size(), actual_path.c_str());
115+
116+
std::map<std::string, std::string> reference_map;
117+
std::map<std::string, std::string> actual_map;
118+
119+
for (const auto& file : reference_files) {
120+
reference_map[get_base_filename(file)] = file;
121+
}
122+
123+
for (const auto& file : actual_files) {
124+
actual_map[get_base_filename(file)] = file;
125+
}
126+
127+
for (const auto& [filename, ref_path] : reference_map) {
128+
auto actual_it = actual_map.find(filename);
129+
if (actual_it != actual_map.end()) {
130+
std::string reference_content = read_file_content(ref_path);
131+
std::string actual_content = read_file_content(actual_it->second);
132+
133+
wer_result result = calculate_wer(reference_content, actual_content);
134+
printf("Word Error Rate for : %s\n", filename.c_str());
135+
printf(" Reference words: %ld\n", result.n_ref_words);
136+
printf(" Actual words: %ld\n", result.n_act_words);
137+
printf(" Substitutions: %d\n", result.n_sub);
138+
printf(" Deletions: %d\n", result.n_del);
139+
printf(" Insertions: %d\n", result.n_ins);
140+
printf(" Total edits: %d\n", result.n_edits);
141+
printf(" WER: %f\n", result.wer);
142+
} else {
143+
printf("Warning: No matching actual file found for reference file: %s\n", filename.c_str());
144+
}
145+
}
146+
147+
return 0;
148+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
And so my fellow Swedes, ask not what your country can do for you, ask what you can do for your country.

tests/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,9 @@ if (WHISPER_FFMPEG)
8585
set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "tiny;mp3")
8686
endif()
8787

88+
# WER Unit Test
89+
add_executable(test-wer test-wer.cpp)
90+
target_include_directories(test-wer PRIVATE ../examples)
91+
target_link_libraries(test-wer PRIVATE common)
92+
add_test(NAME test-wer COMMAND test-wer)
93+
set_tests_properties(test-wer PROPERTIES LABELS "unit")

tests/test-wer.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include "wer.h"
2+
3+
#include <cassert>
4+
#include <cstdio>
5+
6+
int main() {
7+
std::string reference = "the cat sat on the mat";
8+
std::string actual = "the cat sat mat";
9+
10+
wer_result result = calculate_wer(reference, actual);
11+
assert(result.n_ref_words == 6);
12+
assert(result.n_act_words == 4);
13+
assert(result.n_sub == 0);
14+
assert(result.n_del == 2);
15+
assert(result.n_ins == 0);
16+
assert(result.n_edits == 2);
17+
assert(std::abs(result.wer - 0.333333) < 0.0001);
18+
19+
return 0;
20+
}

0 commit comments

Comments
 (0)