Skip to content

Commit a7a5f79

Browse files
authored
Add update on change feature and threshold setting (#4)
* Add update on change feature and threshold setting This commit adds the "UpdateOnChange" feature and "UpdateOnChangeThreshold" setting to the application. The "UpdateOnChange" feature allows the application to update only when there is a change in the input image. The "UpdateOnChangeThreshold" setting determines the percentage of change required to trigger an update. These new features improve the efficiency of the application by reducing unnecessary updates. * lint * Remove unnecessary log statements and add error handling in tesseract_thread function * Refactor tesseract-ocr-utils.cpp * Add Output Formatting option to en-US.ini
1 parent e554661 commit a7a5f79

File tree

6 files changed

+109
-25
lines changed

6 files changed

+109
-25
lines changed

cmake/FetchOpenCV.cmake

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ if(MSVC)
7575
${opencv_SOURCE_DIR}/x64/vc17/staticlib/zlib.lib)
7676
target_include_directories(OpenCV SYSTEM INTERFACE ${opencv_SOURCE_DIR}/include)
7777
else()
78-
target_link_libraries(OpenCV INTERFACE ${opencv_SOURCE_DIR}/lib/libopencv_imgproc.a
79-
${opencv_SOURCE_DIR}/lib/libopencv_core.a)
78+
target_link_libraries(
79+
OpenCV INTERFACE ${opencv_SOURCE_DIR}/lib/libopencv_imgproc.a ${opencv_SOURCE_DIR}/lib/libopencv_core.a
80+
${opencv_SOURCE_DIR}/lib/opencv4/3rdparty/libzlib.a)
8081
target_include_directories(OpenCV SYSTEM INTERFACE ${opencv_SOURCE_DIR}/include/opencv4)
8182
endif()

data/locale/en-US.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ OutputTextSource="Output Text Source"
1111
NoOutput="No Output"
1212
UpdateTimer="Update Timer (ms)"
1313
AdvancedSettings="Advanced Settings"
14+
UpdateOnChange="Update Only on Image Change"
15+
UpdateOnChangeThreshold="Change Threshold %"
16+
OutputFormatting="Output Formatting"

src/filter-data.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct filter_data {
2727
gs_stagesurf_t *stagesurface;
2828

2929
cv::Mat inputBGRA;
30+
cv::Mat lastInputBGRA;
3031
tesseract::TessBaseAPI *tesseract_model;
3132
std::string language;
3233
int pageSegmentationMode;
@@ -39,6 +40,8 @@ struct filter_data {
3940
size_t window_size;
4041
uint32_t update_timer_ms;
4142
std::string output_format_template;
43+
bool update_on_change;
44+
int update_on_change_threshold;
4245

4346
bool isDisabled;
4447

src/ocr-filter.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ const char *ocr_filter_getname(void *unused)
2828

2929
/** PROPERTIES */
3030

31+
bool update_on_change_modified(obs_properties_t *props, obs_property_t *property,
32+
obs_data_t *settings)
33+
{
34+
bool update_on_change = obs_data_get_bool(settings, "update_on_change");
35+
obs_property_set_visible(obs_properties_get(props, "update_on_change_threshold"),
36+
update_on_change);
37+
UNUSED_PARAMETER(property);
38+
return true;
39+
}
40+
3141
// Change the type of enable_smoothing_modified to obs_property_modified_t
3242
bool enable_smoothing_modified(obs_properties_t *props, obs_property_t *property,
3343
obs_data_t *settings)
@@ -67,19 +77,30 @@ obs_properties_t *ocr_filter_properties(void *data)
6777
// add advanced settings checkbox
6878
obs_properties_add_bool(props, "advanced_settings", obs_module_text("AdvancedSettings"));
6979

80+
// Add property for "update on change" checkbox
81+
obs_properties_add_bool(props, "update_on_change", obs_module_text("UpdateOnChange"));
82+
// Add update threshold property
83+
obs_properties_add_int_slider(props, "update_on_change_threshold",
84+
obs_module_text("UpdateOnChangeThreshold"), 1, 100, 1);
85+
// Add a callback to enable or disable the update threshold property
86+
obs_property_set_modified_callback(obs_properties_get(props, "update_on_change"),
87+
update_on_change_modified);
88+
7089
obs_property_set_modified_callback(
7190
obs_properties_get(props, "advanced_settings"),
7291
[](obs_properties_t *props_modified, obs_property_t *property,
7392
obs_data_t *settings) {
7493
bool advanced_settings = obs_data_get_bool(settings, "advanced_settings");
7594
for (const char *prop :
7695
{"page_segmentation_mode", "char_whitelist", "conf_threshold",
77-
"user_patterns", "enable_smoothing", "word_length", "window_size"}) {
96+
"user_patterns", "enable_smoothing", "word_length", "window_size",
97+
"update_on_change_threshold", "update_on_change"}) {
7898
obs_property_set_visible(obs_properties_get(props_modified, prop),
7999
advanced_settings);
80100
}
81101
if (advanced_settings) {
82102
enable_smoothing_modified(props_modified, nullptr, settings);
103+
update_on_change_modified(props_modified, nullptr, settings);
83104
}
84105
UNUSED_PARAMETER(property);
85106
return true;
@@ -154,6 +175,8 @@ obs_properties_t *ocr_filter_properties(void *data)
154175
void ocr_filter_defaults(obs_data_t *settings)
155176
{
156177
obs_data_set_default_int(settings, "update_timer", 100);
178+
obs_data_set_default_bool(settings, "update_on_change", true);
179+
obs_data_set_default_int(settings, "update_on_change_threshold", 15);
157180
obs_data_set_default_string(settings, "language", "eng");
158181
obs_data_set_default_bool(settings, "advanced_settings", false);
159182
obs_data_set_default_int(settings, "page_segmentation_mode", tesseract::PSM_SINGLE_WORD);
@@ -176,8 +199,16 @@ void ocr_filter_update(void *data, obs_data_t *settings)
176199
// Update the output text source
177200
update_text_source_on_settings(tf, settings);
178201

202+
bool hard_tesseract_init_required = false;
203+
204+
std::string new_language = obs_data_get_string(settings, "language");
205+
if (new_language != tf->language) {
206+
// if the language changed, we need to reinitialize the tesseract model
207+
hard_tesseract_init_required = true;
208+
}
209+
tf->language = new_language;
210+
179211
tf->pageSegmentationMode = (int)obs_data_get_int(settings, "page_segmentation_mode");
180-
tf->language = obs_data_get_string(settings, "language");
181212
tf->char_whitelist = obs_data_get_string(settings, "char_whitelist");
182213
tf->user_patterns = obs_data_get_string(settings, "user_patterns");
183214
tf->conf_threshold = (int)obs_data_get_int(settings, "conf_threshold");
@@ -186,9 +217,12 @@ void ocr_filter_update(void *data, obs_data_t *settings)
186217
tf->window_size = obs_data_get_int(settings, "window_size");
187218
tf->update_timer_ms = (uint32_t)obs_data_get_int(settings, "update_timer");
188219
tf->output_format_template = obs_data_get_string(settings, "output_formatting");
220+
tf->update_on_change = obs_data_get_bool(settings, "update_on_change");
221+
tf->update_on_change_threshold =
222+
(int)obs_data_get_int(settings, "update_on_change_threshold");
189223

190224
// Initialize the Tesseract OCR model
191-
initialize_tesseract_ocr(tf);
225+
initialize_tesseract_ocr(tf, hard_tesseract_init_required);
192226
}
193227

194228
void ocr_filter_activate(void *data)
@@ -242,6 +276,8 @@ void ocr_filter_destroy(void *data)
242276

243277
stop_and_join_tesseract_thread(tf);
244278

279+
cleanup_config_files(tf->unique_id);
280+
245281
if (tf->tesseractTraineddataFilepath != nullptr) {
246282
bfree(tf->tesseractTraineddataFilepath);
247283
}

src/tesseract-ocr-utils.cpp

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
#include <obs-module.h>
66

7-
#include <opencv2/core/mat.hpp>
7+
#include <opencv2/core.hpp>
8+
#include <opencv2/imgproc.hpp>
89

910
#include <tesseract/baseapi.h>
1011

@@ -24,17 +25,34 @@ inline uint64_t get_time_ns(void)
2425
.count();
2526
}
2627

27-
void initialize_tesseract_ocr(filter_data *tf)
28+
void cleanup_config_files(const std::string &unique_id)
29+
{
30+
check_plugin_config_folder_exists();
31+
32+
// delete the user patterns file
33+
std::string filename = "user-patterns-" + unique_id + ".txt";
34+
std::string user_patterns_filepath = obs_module_config_path(filename.c_str());
35+
std::remove(user_patterns_filepath.c_str());
36+
37+
// delete the user patterns config file
38+
filename = "user-patterns" + unique_id + ".config";
39+
std::string patterns_config_filepath = obs_module_config_path(filename.c_str());
40+
std::remove(patterns_config_filepath.c_str());
41+
}
42+
43+
void initialize_tesseract_ocr(filter_data *tf, bool hard_tesseract_init_required)
2844
{
2945
// Load model
3046
obs_log(LOG_INFO, "Loading tesseract model from: %s", tf->tesseractTraineddataFilepath);
3147
try {
3248
stop_and_join_tesseract_thread(tf);
3349

34-
if (tf->tesseract_model != nullptr) {
35-
tf->tesseract_model->End();
36-
delete tf->tesseract_model;
37-
tf->tesseract_model = nullptr;
50+
if (hard_tesseract_init_required) {
51+
if (tf->tesseract_model != nullptr) {
52+
tf->tesseract_model->End();
53+
delete tf->tesseract_model;
54+
tf->tesseract_model = nullptr;
55+
}
3856
}
3957

4058
char **configs = nullptr;
@@ -71,16 +89,20 @@ void initialize_tesseract_ocr(filter_data *tf)
7189
strcpy(configs[0], patterns_config_filepath.c_str());
7290
}
7391

74-
tf->tesseract_model = new tesseract::TessBaseAPI();
92+
if (hard_tesseract_init_required) {
93+
tf->tesseract_model = new tesseract::TessBaseAPI();
7594

76-
// set tesseract page segmentation mode to single word
77-
int retval = tf->tesseract_model->Init(tf->tesseractTraineddataFilepath,
78-
tf->language.c_str(),
79-
tesseract::OEM_LSTM_ONLY, configs,
80-
configs_size, nullptr, nullptr, false);
81-
if (retval != 0) {
82-
throw std::runtime_error("Failed to initialize tesseract model");
95+
int retval = tf->tesseract_model->Init(tf->tesseractTraineddataFilepath,
96+
tf->language.c_str(),
97+
tesseract::OEM_LSTM_ONLY, configs,
98+
configs_size, nullptr, nullptr,
99+
false);
100+
if (retval != 0) {
101+
throw std::runtime_error("Failed to initialize tesseract model");
102+
}
83103
}
104+
105+
// set tesseract page segmentation mode
84106
tf->tesseract_model->SetPageSegMode(
85107
static_cast<tesseract::PageSegMode>(tf->pageSegmentationMode));
86108

@@ -139,8 +161,6 @@ std::string run_tesseract_ocr(filter_data *tf, const cv::Mat &imageBGRA)
139161
recognitionResult = tf->smoothing_filter->add_reading(recognitionResult);
140162
}
141163

142-
obs_log(LOG_DEBUG, "OCR result: %s", recognitionResult.c_str());
143-
144164
return recognitionResult;
145165
}
146166

@@ -237,14 +257,34 @@ void tesseract_thread(void *data)
237257
cv::Mat imageBGRA;
238258
{
239259
std::unique_lock<std::mutex> lock(tf->inputBGRALock, std::try_to_lock);
240-
if (!lock.owns_lock()) {
241-
return;
260+
if (lock.owns_lock()) {
261+
imageBGRA = tf->inputBGRA.clone();
242262
}
243-
imageBGRA = tf->inputBGRA.clone();
244263
}
245264

246265
if (!imageBGRA.empty()) {
247266
try {
267+
// if update on change is true check if the image has changed
268+
if (tf->update_on_change &&
269+
imageBGRA.size() == tf->lastInputBGRA.size()) {
270+
const int change_threshold_from_image_area =
271+
(int)((float)tf->update_on_change_threshold /
272+
100.0f *
273+
(float)(imageBGRA.cols * imageBGRA.rows));
274+
// if the image has not changed, skip the processing
275+
// take the absolute difference between the images, convert to gray and count the non-zero pixels
276+
cv::Mat diff;
277+
cv::absdiff(imageBGRA, tf->lastInputBGRA, diff);
278+
cv::cvtColor(diff, diff, cv::COLOR_BGRA2GRAY);
279+
if (cv::countNonZero(diff) <
280+
change_threshold_from_image_area) {
281+
// obs_log(LOG_INFO, "Image has not changed, skipping processing");
282+
// skip the processing
283+
continue;
284+
}
285+
}
286+
tf->lastInputBGRA = imageBGRA.clone();
287+
248288
// Process the image
249289
std::string ocr_result = run_tesseract_ocr(tf, imageBGRA);
250290

src/tesseract-ocr-utils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
#include <deque>
77
#include <string>
88

9-
void initialize_tesseract_ocr(filter_data *tf);
9+
void cleanup_config_files(const std::string &unique_id);
10+
void initialize_tesseract_ocr(filter_data *tf, bool hard_tesseract_init_required = false);
1011
std::string run_tesseract_ocr(filter_data *tf, const cv::Mat &imageBGRA);
1112
std::string strip(const std::string &str);
1213
void stop_and_join_tesseract_thread(struct filter_data *tf);

0 commit comments

Comments
 (0)