Skip to content

Commit ef42736

Browse files
Merge pull request #370 from joshrotenberg/fix/unicode-truncation-182
fix: add Unicode-safe string truncation in table formatting
2 parents 01567fd + a1b7c86 commit ef42736

File tree

3 files changed

+94
-4
lines changed

3 files changed

+94
-4
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/redisctl/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ colored = "2.1"
3838
tabled = { version = "0.17", features = ["ansi"] }
3939
terminal_size = "0.4"
4040
indicatif = "0.17"
41+
unicode-segmentation = "1.12"
4142

4243
# Shared utility dependencies
4344
thiserror = { workspace = true }

crates/redisctl/src/commands/cloud/utils.rs

Lines changed: 92 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use serde_json::Value;
77
use std::fs;
88
use std::io::{self, Write};
99
use tabled::Tabled;
10+
use unicode_segmentation::UnicodeSegmentation;
1011

1112
#[cfg(unix)]
1213
use std::io::IsTerminal;
@@ -25,14 +26,17 @@ pub struct DetailRow {
2526
pub value: String,
2627
}
2728

28-
/// Truncate string to max length with ellipsis
29+
/// Truncate string to max length with ellipsis (Unicode-safe)
2930
pub fn truncate_string(s: &str, max_len: usize) -> String {
30-
if s.len() <= max_len {
31+
let graphemes: Vec<&str> = s.graphemes(true).collect();
32+
33+
if graphemes.len() <= max_len {
3134
s.to_string()
3235
} else if max_len > 3 {
33-
format!("{}...", &s[..max_len - 3])
36+
let truncated: String = graphemes[..max_len - 3].join("");
37+
format!("{}...", truncated)
3438
} else {
35-
s[..max_len].to_string()
39+
graphemes[..max_len].join("")
3640
}
3741
}
3842

@@ -265,3 +269,87 @@ pub fn read_file_input(input: &str) -> CliResult<String> {
265269
Ok(input.to_string())
266270
}
267271
}
272+
273+
#[cfg(test)]
274+
mod tests {
275+
use super::*;
276+
277+
#[test]
278+
fn test_truncate_string_ascii() {
279+
// Test basic ASCII truncation
280+
assert_eq!(truncate_string("hello", 10), "hello");
281+
assert_eq!(truncate_string("hello world", 8), "hello...");
282+
assert_eq!(truncate_string("hello", 5), "hello");
283+
assert_eq!(truncate_string("hello", 4), "h...");
284+
assert_eq!(truncate_string("abc", 2), "ab");
285+
}
286+
287+
#[test]
288+
fn test_truncate_string_unicode() {
289+
// Test with emoji (each emoji is one grapheme cluster)
290+
assert_eq!(truncate_string("Hello 👋 World", 10), "Hello 👋...");
291+
assert_eq!(truncate_string("🚀🎉🎊🎈", 6), "🚀🎉🎊🎈");
292+
assert_eq!(truncate_string("🚀🎉🎊🎈", 3), "🚀🎉🎊");
293+
assert_eq!(truncate_string("🚀🎉🎊🎈", 2), "🚀🎉");
294+
295+
// Test with combined emoji (family emoji is one grapheme)
296+
assert_eq!(truncate_string("👨‍👩‍👧‍👦👋", 2), "👨‍👩‍👧‍👦👋");
297+
assert_eq!(truncate_string("👨‍👩‍👧‍👦👋🎉", 3), "👨‍👩‍👧‍👦👋🎉");
298+
assert_eq!(truncate_string("👨‍👩‍👧‍👦👋🎉", 2), "👨‍👩‍👧‍👦👋");
299+
}
300+
301+
#[test]
302+
fn test_truncate_string_cjk() {
303+
// Test with Chinese characters
304+
assert_eq!(truncate_string("你好世界", 10), "你好世界");
305+
assert_eq!(truncate_string("你好世界", 3), "你好世");
306+
assert_eq!(truncate_string("你好世界", 2), "你好");
307+
308+
// Test with Japanese
309+
assert_eq!(truncate_string("こんにちは", 10), "こんにちは");
310+
assert_eq!(truncate_string("こんにちは", 4), "こ...");
311+
312+
// Test with Korean
313+
assert_eq!(truncate_string("안녕하세요", 10), "안녕하세요");
314+
assert_eq!(truncate_string("안녕하세요", 4), "안...");
315+
}
316+
317+
#[test]
318+
fn test_truncate_string_mixed() {
319+
// Test with mixed ASCII and Unicode
320+
assert_eq!(truncate_string("Hello 世界", 10), "Hello 世界");
321+
assert_eq!(truncate_string("Hello 世界", 8), "Hello 世界");
322+
assert_eq!(truncate_string("Hello 世界", 7), "Hell...");
323+
assert_eq!(truncate_string("Redis🚀Fast", 10), "Redis🚀Fast");
324+
}
325+
326+
#[test]
327+
fn test_truncate_string_edge_cases() {
328+
// Empty string
329+
assert_eq!(truncate_string("", 10), "");
330+
331+
// Very short max length
332+
assert_eq!(truncate_string("hello", 0), "");
333+
assert_eq!(truncate_string("hello", 1), "h");
334+
assert_eq!(truncate_string("hello", 2), "he");
335+
assert_eq!(truncate_string("hello", 3), "hel");
336+
337+
// Exactly at boundary
338+
assert_eq!(truncate_string("abc", 3), "abc");
339+
assert_eq!(truncate_string("abcd", 4), "abcd");
340+
}
341+
342+
#[test]
343+
fn test_truncate_string_doesnt_panic() {
344+
// These used to panic with the old byte-based implementation
345+
let _ = truncate_string("Hello 👋 World 🌍", 10);
346+
let _ = truncate_string("🚀", 5);
347+
let _ = truncate_string("你好世界", 3);
348+
let _ = truncate_string("👨‍👩‍👧‍👦", 2);
349+
350+
// Complex Unicode that could cause issues
351+
let _ = truncate_string("é", 1); // combining character
352+
let _ = truncate_string("🇺🇸", 1); // flag emoji (two code points)
353+
let _ = truncate_string("👍🏽", 1); // emoji with skin tone modifier
354+
}
355+
}

0 commit comments

Comments
 (0)