Skip to content

Commit 25d68b7

Browse files
authored
fix(708): Support Korean EUC-KR encoding in CEA-708 decoder
2 parents 0fdfb75 + 73cd19f commit 25d68b7

File tree

6 files changed

+125
-41
lines changed

6 files changed

+125
-41
lines changed

src/rust/src/avc/nal.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use crate::{ccx_options, current_fps, total_frames_count, MPEG_CLOCK_FREQ};
99
use lib_ccxr::common::{AvcNalType, BitStreamRust, BitstreamError, FRAMERATES_VALUES, SLICE_TYPES};
1010
use lib_ccxr::util::log::DebugMessageFlag;
1111
use lib_ccxr::{debug, info};
12-
use std::os::raw::{c_char, c_long};
12+
use std::os::raw::c_char;
1313

1414
/// Process sequence parameter set RBSP
1515
pub fn seq_parameter_set_rbsp(
@@ -630,7 +630,7 @@ pub unsafe fn slice_header(
630630
msg_type = DebugMessageFlag::TIME;
631631
" sync_pts:{} ({:8})",
632632
std::ffi::CStr::from_ptr(ccxr_print_mstime_static(
633-
((*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64)) as c_long,
633+
(*dec_ctx.timing).sync_pts / ((MPEG_CLOCK_FREQ as i64) / 1000i64),
634634
buf.as_mut_ptr()
635635
))
636636
.to_str()

src/rust/src/decoder/output.rs

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,41 @@ impl<'a> Writer<'a> {
8585

8686
/// Write the symbol to the provided buffer
8787
///
88-
/// Always writes 2 bytes for consistent UTF-16BE encoding.
89-
/// Previously, this function wrote 1 byte for ASCII characters and 2 bytes
90-
/// for non-ASCII, creating an invalid mix that encoding conversion couldn't
91-
/// handle properly. This caused garbled output with Japanese/Chinese characters
92-
/// (issue #1451).
93-
pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec<u8>) {
94-
buf.push((sym.sym >> 8) as u8);
95-
buf.push((sym.sym & 0xff) as u8);
88+
/// The `use_utf16` parameter controls the output format:
89+
/// - `true`: Always writes 2 bytes (UTF-16BE format). Use for UTF-16/UCS-2 charsets.
90+
/// - `false`: Writes 1 byte for ASCII (high byte == 0), 2 bytes for extended chars.
91+
/// Use for variable-width encodings like EUC-KR, CP949, Shift-JIS, etc.
92+
///
93+
/// Issue #1451: Japanese/Chinese with UTF-16BE need 2 bytes for all characters.
94+
/// Issue #1065: Korean with EUC-KR needs 1 byte for ASCII, 2 bytes for Korean.
95+
pub fn write_char(sym: &dtvcc_symbol, buf: &mut Vec<u8>, use_utf16: bool) {
96+
let high = (sym.sym >> 8) as u8;
97+
let low = (sym.sym & 0xff) as u8;
98+
99+
if use_utf16 {
100+
// UTF-16BE: Always write 2 bytes
101+
buf.push(high);
102+
buf.push(low);
103+
} else {
104+
// Variable-width: Only write high byte if non-zero
105+
if high != 0 {
106+
buf.push(high);
107+
}
108+
buf.push(low);
109+
}
110+
}
111+
112+
/// Check if a charset name indicates UTF-16 or UCS-2 encoding
113+
///
114+
/// These are fixed-width 16-bit encodings where even ASCII needs 2 bytes.
115+
pub fn is_utf16_charset(charset: &str) -> bool {
116+
let upper = charset.to_uppercase();
117+
upper.contains("UTF-16")
118+
|| upper.contains("UTF16")
119+
|| upper.contains("UCS-2")
120+
|| upper.contains("UCS2")
121+
|| upper.contains("UTF_16")
122+
|| upper.contains("UCS_2")
96123
}
97124

98125
/// Convert from CEA-708 color representation to hex code
@@ -114,27 +141,71 @@ mod tests {
114141
use super::*;
115142

116143
#[test]
117-
fn test_write_char() {
144+
fn test_write_char_utf16_mode() {
118145
let mut buf = Vec::new();
119146

120-
// Write ASCII symbol - UTF-16BE always uses 2 bytes
121-
// 'A' (0x41) becomes [0x00, 0x41] in UTF-16BE
147+
// UTF-16 mode: ASCII symbol 'A' (0x41) becomes [0x00, 0x41]
122148
let sym = dtvcc_symbol { sym: 0x41, init: 0 };
123-
write_char(&sym, &mut buf);
149+
write_char(&sym, &mut buf, true);
124150
assert_eq!(buf, vec![0x00, 0x41]);
125151

126152
buf.clear();
127153

128-
// Write non-ASCII symbol (e.g., Japanese character)
129-
// Already 16-bit, writes as [high_byte, low_byte]
154+
// UTF-16 mode: Non-ASCII symbol writes as [high_byte, low_byte]
130155
let sym = dtvcc_symbol {
131156
sym: 0x1234,
132157
init: 0,
133158
};
134-
write_char(&sym, &mut buf);
159+
write_char(&sym, &mut buf, true);
135160
assert_eq!(buf, vec![0x12, 0x34]);
136161
}
137162

163+
#[test]
164+
fn test_write_char_variable_width_mode() {
165+
let mut buf = Vec::new();
166+
167+
// Variable-width mode: ASCII symbol 'A' (0x41) becomes [0x41] (1 byte)
168+
let sym = dtvcc_symbol { sym: 0x41, init: 0 };
169+
write_char(&sym, &mut buf, false);
170+
assert_eq!(buf, vec![0x41]);
171+
172+
buf.clear();
173+
174+
// Variable-width mode: Korean EUC-KR char becomes [high, low] (2 bytes)
175+
// Example: Korean '인' = 0xC0CE in EUC-KR
176+
let sym = dtvcc_symbol {
177+
sym: 0xC0CE,
178+
init: 0,
179+
};
180+
write_char(&sym, &mut buf, false);
181+
assert_eq!(buf, vec![0xC0, 0xCE]);
182+
183+
buf.clear();
184+
185+
// Variable-width mode: Space (0x20) becomes [0x20] (1 byte, no NUL)
186+
let sym = dtvcc_symbol { sym: 0x20, init: 0 };
187+
write_char(&sym, &mut buf, false);
188+
assert_eq!(buf, vec![0x20]);
189+
}
190+
191+
#[test]
192+
fn test_is_utf16_charset() {
193+
// Should return true for UTF-16 variants
194+
assert!(is_utf16_charset("UTF-16BE"));
195+
assert!(is_utf16_charset("UTF-16LE"));
196+
assert!(is_utf16_charset("utf-16"));
197+
assert!(is_utf16_charset("UTF16"));
198+
assert!(is_utf16_charset("UCS-2"));
199+
assert!(is_utf16_charset("UCS2"));
200+
201+
// Should return false for variable-width encodings
202+
assert!(!is_utf16_charset("EUC-KR"));
203+
assert!(!is_utf16_charset("CP949"));
204+
assert!(!is_utf16_charset("Shift-JIS"));
205+
assert!(!is_utf16_charset("UTF-8"));
206+
assert!(!is_utf16_charset("ISO-8859-1"));
207+
}
208+
138209
#[test]
139210
fn test_color_to_hex() {
140211
assert_eq!(color_to_hex(0b00_00_00), (0, 0, 0)); // Black

src/rust/src/decoder/tv_screen.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use std::{ffi::CStr, fs::File};
1313
#[cfg(windows)]
1414
use crate::bindings::_get_osfhandle;
1515

16-
use super::output::{color_to_hex, write_char, Writer};
16+
use super::output::{color_to_hex, is_utf16_charset, write_char, Writer};
1717
use super::timing::{get_scc_time_str, get_time_str};
1818
use super::{CCX_DTVCC_SCREENGRID_COLUMNS, CCX_DTVCC_SCREENGRID_ROWS};
1919
use crate::{
@@ -177,6 +177,23 @@ impl dtvcc_tv_screen {
177177
let (first, last) = self.get_write_interval(row_index);
178178
debug!("First: {first}, Last: {last}");
179179

180+
// Determine if we should use UTF-16 mode (2 bytes for all chars) or
181+
// variable-width mode (1 byte for ASCII, 2 bytes for extended chars).
182+
// UTF-16/UCS-2 encodings require 2 bytes even for ASCII.
183+
// Variable-width encodings (EUC-KR, CP949, Shift-JIS, etc.) use 1 byte for ASCII.
184+
let use_utf16 = if !writer.writer_ctx.charset.is_null() {
185+
let charset = unsafe {
186+
CStr::from_ptr(writer.writer_ctx.charset)
187+
.to_str()
188+
.unwrap_or("")
189+
};
190+
is_utf16_charset(charset)
191+
} else {
192+
// No charset specified - default to variable-width for backward compatibility
193+
// with raw byte output (no encoding conversion)
194+
false
195+
};
196+
180197
for i in 0..last + 1 {
181198
if use_colors {
182199
self.change_pen_color(
@@ -219,7 +236,7 @@ impl dtvcc_tv_screen {
219236
if i < first {
220237
buf.push(b' ');
221238
} else {
222-
write_char(&self.chars[row_index][i], &mut buf)
239+
write_char(&self.chars[row_index][i], &mut buf, use_utf16)
223240
}
224241
}
225242
// there can be unclosed tags or colors after the last symbol in a row

src/rust/src/es/pic.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,8 @@ pub unsafe fn read_pic_info(
277277
// set_fts() is not called for each picture when use_gop_as_pts == 1.
278278
if ccx_options.use_gop_as_pts == 1 {
279279
// Calculate current FTS based on GOP start time + frame offset
280-
// Cast fts_at_gop_start to i64 for cross-platform compatibility (c_long is i32 on Windows)
281280
let frame_offset_ms = (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps) as i64;
282-
#[allow(clippy::unnecessary_cast)]
283-
{
284-
(*dec_ctx.timing).fts_now = (fts_at_gop_start as i64) + frame_offset_ms;
285-
}
281+
(*dec_ctx.timing).fts_now = fts_at_gop_start + frame_offset_ms;
286282

287283
// Update fts_max if needed
288284
if (*dec_ctx.timing).fts_now > (*dec_ctx.timing).fts_max {

src/rust/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use std::os::raw::{c_uchar, c_void};
5151
use std::{
5252
ffi::CStr,
5353
io::Write,
54-
os::raw::{c_char, c_double, c_int, c_long, c_uint},
54+
os::raw::{c_char, c_double, c_int, c_uint},
5555
};
5656

5757
// Mock data for rust unit tests
@@ -67,7 +67,7 @@ cfg_if! {
6767

6868
static mut frames_since_ref_time: c_int = 0;
6969
static mut total_frames_count: c_uint = 0;
70-
static mut fts_at_gop_start: c_long = 0;
70+
static mut fts_at_gop_start: i64 = 0;
7171
static mut gop_rollover: c_int = 0;
7272
static mut pts_big_change: c_uint = 0;
7373

@@ -144,7 +144,7 @@ extern "C" {
144144
static mut total_frames_count: c_uint;
145145
static mut gop_time: gop_time_code;
146146
static mut first_gop_time: gop_time_code;
147-
static mut fts_at_gop_start: c_long;
147+
static mut fts_at_gop_start: i64;
148148
static mut gop_rollover: c_int;
149149
static mut ccx_common_timing_settings: ccx_common_timing_settings_t;
150150
static mut capitalization_list: word_list;

src/rust/src/libccxr_exports/time.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![allow(clippy::useless_conversion)]
22

33
use std::convert::TryInto;
4-
use std::ffi::{c_char, c_int, c_long, CStr};
4+
use std::ffi::{c_char, c_int, CStr};
55

66
use crate::{
77
bindings::*, cb_708, cb_field1, cb_field2, ccx_common_timing_settings as timing_settings,
@@ -330,7 +330,7 @@ unsafe fn write_back_from_timing_info() {
330330
.unwrap_or(0);
331331
gop_time = write_gop_time_code(timing_info.gop_time);
332332
first_gop_time = write_gop_time_code(timing_info.first_gop_time);
333-
fts_at_gop_start = timing_info.fts_at_gop_start.millis() as c_long;
333+
fts_at_gop_start = timing_info.fts_at_gop_start.millis();
334334
gop_rollover = if timing_info.gop_rollover { 1 } else { 0 };
335335
timing_settings.disable_sync_check = if timing_info.timing_settings.disable_sync_check {
336336
1
@@ -412,7 +412,7 @@ pub unsafe fn write_gop_time_code(g: Option<GopTimeCode>) -> gop_time_code {
412412
///
413413
/// `ctx` must not be null.
414414
#[no_mangle]
415-
pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) {
415+
pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) {
416416
apply_timing_info();
417417
let mut context = generate_timing_context(ctx);
418418

@@ -428,7 +428,7 @@ pub unsafe extern "C" fn ccxr_add_current_pts(ctx: *mut ccx_common_timing_ctx, p
428428
///
429429
/// `ctx` must not be null.
430430
#[no_mangle]
431-
pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: c_long) {
431+
pub unsafe extern "C" fn ccxr_set_current_pts(ctx: *mut ccx_common_timing_ctx, pts: i64) {
432432
apply_timing_info();
433433
let mut context = generate_timing_context(ctx);
434434

@@ -469,7 +469,7 @@ pub unsafe extern "C" fn ccxr_set_fts(ctx: *mut ccx_common_timing_ctx) -> c_int
469469
pub unsafe extern "C" fn ccxr_get_fts(
470470
ctx: *mut ccx_common_timing_ctx,
471471
current_field: c_int,
472-
) -> c_long {
472+
) -> i64 {
473473
apply_timing_info();
474474
let mut context = generate_timing_context(ctx);
475475

@@ -485,7 +485,7 @@ pub unsafe extern "C" fn ccxr_get_fts(
485485
write_back_to_common_timing_ctx(ctx, &context);
486486
write_back_from_timing_info();
487487

488-
ans.millis().try_into().unwrap_or(0)
488+
ans.millis()
489489
}
490490

491491
/// Rust equivalent for `get_visible_end` function in C. Uses C-native types as input and output.
@@ -503,7 +503,7 @@ pub unsafe extern "C" fn ccxr_get_fts(
503503
pub unsafe extern "C" fn ccxr_get_visible_end(
504504
ctx: *mut ccx_common_timing_ctx,
505505
_current_field: c_int,
506-
) -> c_long {
506+
) -> i64 {
507507
apply_timing_info();
508508
let mut context = generate_timing_context(ctx);
509509

@@ -518,7 +518,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end(
518518
write_back_to_common_timing_ctx(ctx, &context);
519519
write_back_from_timing_info();
520520

521-
fts as c_long
521+
fts
522522
}
523523

524524
/// Rust equivalent for `get_visible_start` function in C. Uses C-native types as input and output.
@@ -537,7 +537,7 @@ pub unsafe extern "C" fn ccxr_get_visible_end(
537537
pub unsafe extern "C" fn ccxr_get_visible_start(
538538
ctx: *mut ccx_common_timing_ctx,
539539
_current_field: c_int,
540-
) -> c_long {
540+
) -> i64 {
541541
apply_timing_info();
542542
let context = generate_timing_context(ctx);
543543

@@ -554,7 +554,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start(
554554
write_back_to_common_timing_ctx(ctx, &context);
555555
write_back_from_timing_info();
556556

557-
fts as c_long
557+
fts
558558
}
559559

560560
/// Rust equivalent for `get_fts_max` function in C. Uses C-native types as input and output.
@@ -563,7 +563,7 @@ pub unsafe extern "C" fn ccxr_get_visible_start(
563563
///
564564
/// `ctx` must not be null.
565565
#[no_mangle]
566-
pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_long {
566+
pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> i64 {
567567
apply_timing_info();
568568
let mut context = generate_timing_context(ctx);
569569

@@ -572,7 +572,7 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_
572572
write_back_to_common_timing_ctx(ctx, &context);
573573
write_back_from_timing_info();
574574

575-
ans.millis().try_into().unwrap_or(0)
575+
ans.millis()
576576
}
577577

578578
/// Rust equivalent for `print_mstime_static` function in C. Uses C-native types as input and output.
@@ -581,8 +581,8 @@ pub unsafe extern "C" fn ccxr_get_fts_max(ctx: *mut ccx_common_timing_ctx) -> c_
581581
///
582582
/// `buf` must not be null. It must have sufficient length to hold the time in string form.
583583
#[no_mangle]
584-
pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: c_long, buf: *mut c_char) -> *mut c_char {
585-
let time = Timestamp::from_millis(mstime.into());
584+
pub unsafe extern "C" fn ccxr_print_mstime_static(mstime: i64, buf: *mut c_char) -> *mut c_char {
585+
let time = Timestamp::from_millis(mstime);
586586
let ans = c::print_mstime_static(time, ':');
587587
write_string_into_pointer(buf, &ans);
588588
buf

0 commit comments

Comments
 (0)