Skip to content

Commit 4e96e5f

Browse files
committed
Fix critical hardsubx OCR bugs affecting accuracy and stability
- Fix iterator initialization in check_color_alternate_pixels - Add proper null checks before dereferencing OCR results - Fix confidence threshold comparison (use abs for negative confidences) - Fix c_char array initialization in avc/nal.rs for cross-platform compatibility - Prevent segmentation faults in OCR processing
1 parent 24f7184 commit 4e96e5f

File tree

3 files changed

+35
-15
lines changed

3 files changed

+35
-15
lines changed

src/rust/src/avc/nal.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ pub unsafe fn slice_header(
596596
pic_order_cnt_lsb, (*dec_ctx.timing).current_tref,
597597
current_index, (*dec_ctx.avc_ctx).currref, (*dec_ctx.avc_ctx).lastmaxidx, (*dec_ctx.avc_ctx).maxtref);
598598

599-
let mut buf = [c_char::from(0i8); 64];
599+
let mut buf = [0 as c_char; 64];
600600
debug!(
601601
msg_type = DebugMessageFlag::TIME;
602602
" sync_pts:{} ({:8})",

src/rust/src/hardsubx/classifier.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,11 @@ pub unsafe extern "C" fn get_ocr_text_wordwise_threshold(
179179
}
180180
}
181181

182-
text_out = format!("{} {}", text_out, word);
182+
if text_out.is_empty() {
183+
text_out = word;
184+
} else {
185+
text_out = format!("{} {}", text_out, word);
186+
}
183187
}
184188
}
185189

@@ -234,7 +238,7 @@ pub unsafe extern "C" fn get_ocr_text_letterwise_threshold(
234238
let mut total_conf: std::os::raw::c_float = 0.0;
235239
let mut num_characters: std::os::raw::c_int = 0;
236240

237-
let mut first_iter: bool = false;
241+
let mut first_iter: bool = true;
238242

239243
if it != null::<TessResultIterator>() as *mut TessResultIterator {
240244
loop {
@@ -245,17 +249,25 @@ pub unsafe extern "C" fn get_ocr_text_letterwise_threshold(
245249
}
246250

247251
let letter = _tess_string_helper(it, level);
248-
text_out = format!("{}{}", text_out, letter);
252+
if letter.is_empty() {
253+
continue;
254+
}
249255

256+
// Check confidence BEFORE adding to output
257+
let mut should_add = true;
250258
if threshold > 0.0 {
251259
// we don't even want to bother with this call if threshold is 0 or less
252260
let conf: std::os::raw::c_float = TessResultIteratorConfidence(it, level);
253261
if conf < threshold {
254-
continue;
262+
should_add = false;
263+
} else {
264+
total_conf += conf;
265+
num_characters += 1;
255266
}
267+
}
256268

257-
total_conf += conf;
258-
num_characters += 1;
269+
if should_add {
270+
text_out = format!("{}{}", text_out, letter);
259271
}
260272
}
261273
}

src/rust/src/hardsubx/decoder.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,14 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut
4141
match (*ctx).ocr_mode {
4242
0 => {
4343
let ret_char_arr = get_ocr_text_simple_threshold(ctx, im, (*ctx).conf_thresh);
44-
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
45-
match text_out_result {
46-
Ok(T) => T,
47-
Err(_E) => "".to_string(),
44+
if ret_char_arr.is_null() {
45+
"".to_string()
46+
} else {
47+
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
48+
match text_out_result {
49+
Ok(T) => T,
50+
Err(_E) => "".to_string(),
51+
}
4852
}
4953
}
5054
1 => {
@@ -59,10 +63,14 @@ pub unsafe fn dispatch_classifier_functions(ctx: *mut lib_hardsubx_ctx, im: *mut
5963
}
6064
2 => {
6165
let ret_char_arr = get_ocr_text_letterwise_threshold(ctx, im, (*ctx).conf_thresh);
62-
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
63-
match text_out_result {
64-
Ok(T) => T,
65-
Err(_E) => "".to_string(),
66+
if ret_char_arr.is_null() {
67+
"".to_string()
68+
} else {
69+
let text_out_result = ffi::CString::from_raw(ret_char_arr).into_string();
70+
match text_out_result {
71+
Ok(T) => T,
72+
Err(_E) => "".to_string(),
73+
}
6674
}
6775
}
6876

0 commit comments

Comments
 (0)