Skip to content

Commit bf9778c

Browse files
committed
date(locale): use actual locale format strings, add comprehensive tests
Replace hardcoded format string selection with direct nl_langinfo(D_T_FMT) usage. This ensures locale-specific formatting details (leading zeros, component ordering, hour formats) are properly respected from system locale data instead of detecting 12/24-hour preference and returning hardcoded alternatives. Changes to locale.rs: - Remove detect_12_hour_format() and uses_12_hour_format() - Simplify get_locale_default_format() to use D_T_FMT directly - Add timezone injection if %Z missing from locale format - Add use nix::libc import Add test coverage (tests/by-util/test_date.rs): - 4 new unit tests for locale format structure validation - 7 new integration tests verifying locale-specific behavior - Tests prevent regression to hardcoded format strings Addresses feedback from PR uutils#9654 comment #3676971020
1 parent 1fca829 commit bf9778c

File tree

2 files changed

+316
-102
lines changed

2 files changed

+316
-102
lines changed

src/uu/date/src/locale.rs

Lines changed: 139 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -28,114 +28,58 @@ macro_rules! cfg_langinfo {
2828
cfg_langinfo! {
2929
use std::ffi::CStr;
3030
use std::sync::OnceLock;
31+
use nix::libc;
3132
}
3233

3334
cfg_langinfo! {
34-
/// Cached result of locale time format detection
35-
static TIME_FORMAT_CACHE: OnceLock<bool> = OnceLock::new();
36-
37-
/// Safe wrapper around libc setlocale
38-
fn set_time_locale() {
39-
unsafe {
40-
nix::libc::setlocale(nix::libc::LC_TIME, c"".as_ptr());
41-
}
42-
}
43-
44-
/// Safe wrapper around libc nl_langinfo that returns `Option<String>`
45-
fn get_locale_info(item: nix::libc::nl_item) -> Option<String> {
46-
unsafe {
47-
let ptr = nix::libc::nl_langinfo(item);
48-
if ptr.is_null() {
49-
None
50-
} else {
51-
CStr::from_ptr(ptr).to_str().ok().map(String::from)
52-
}
53-
}
54-
}
55-
56-
/// Internal function that performs the actual locale detection
57-
fn detect_12_hour_format() -> bool {
58-
// Helper function to check for 12-hour format indicators
59-
fn has_12_hour_indicators(format_str: &str) -> bool {
60-
const INDICATORS: &[&str] = &["%I", "%l", "%r"];
61-
INDICATORS.iter().any(|&indicator| format_str.contains(indicator))
62-
}
63-
64-
// Helper function to check for 24-hour format indicators
65-
fn has_24_hour_indicators(format_str: &str) -> bool {
66-
const INDICATORS: &[&str] = &["%H", "%k", "%R", "%T"];
67-
INDICATORS.iter().any(|&indicator| format_str.contains(indicator))
68-
}
69-
70-
// Set locale from environment variables (empty string = use LC_TIME/LANG env vars)
71-
set_time_locale();
72-
73-
// Get locale format strings using safe wrappers
74-
let d_t_fmt = get_locale_info(nix::libc::D_T_FMT);
75-
let t_fmt_opt = get_locale_info(nix::libc::T_FMT);
76-
let t_fmt_ampm_opt = get_locale_info(nix::libc::T_FMT_AMPM);
77-
78-
// Check D_T_FMT first
79-
if let Some(ref format) = d_t_fmt {
80-
// Check for 12-hour indicators first (higher priority)
81-
if has_12_hour_indicators(format) {
82-
return true;
83-
}
84-
85-
// If we find 24-hour indicators, it's definitely not 12-hour
86-
if has_24_hour_indicators(format) {
87-
return false;
88-
}
89-
}
90-
91-
// Also check the time-only format as a fallback
92-
if let Some(ref time_format) = t_fmt_opt {
93-
if has_12_hour_indicators(time_format) {
94-
return true;
95-
}
96-
}
97-
98-
// Check if there's a specific 12-hour format defined
99-
if let Some(ref ampm_format) = t_fmt_ampm_opt {
100-
// If T_FMT_AMPM is non-empty and different from T_FMT, locale supports 12-hour
101-
if !ampm_format.is_empty() {
102-
if let Some(ref time_format) = t_fmt_opt {
103-
if ampm_format != time_format {
104-
return true;
105-
}
106-
} else {
107-
return true;
108-
}
109-
}
110-
}
111-
112-
// Default to 24-hour format if we can't determine
113-
false
114-
}
115-
}
116-
117-
cfg_langinfo! {
118-
/// Detects whether the current locale prefers 12-hour or 24-hour time format
119-
/// Results are cached for performance
120-
pub fn uses_12_hour_format() -> bool {
121-
*TIME_FORMAT_CACHE.get_or_init(detect_12_hour_format)
122-
}
123-
124-
/// Cached default format string
35+
/// Cached locale date/time format string
12536
static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new();
12637

127-
/// Get the locale-appropriate default format string for date output
128-
/// This respects the locale's preference for 12-hour vs 24-hour time
129-
/// Results are cached for performance (following uucore patterns)
38+
/// Returns the default date format string for the current locale.
39+
///
40+
/// The format respects locale preferences for time display (12-hour vs 24-hour),
41+
/// component ordering, and numeric formatting conventions. Ensures timezone
42+
/// information is included in the output.
13043
pub fn get_locale_default_format() -> &'static str {
13144
DEFAULT_FORMAT_CACHE.get_or_init(|| {
132-
if uses_12_hour_format() {
133-
// Use 12-hour format with AM/PM
134-
"%a %b %e %r %Z %Y"
135-
} else {
136-
// Use 24-hour format
137-
"%a %b %e %X %Z %Y"
45+
unsafe {
46+
// Set locale from environment variables (empty string = use LC_TIME/LANG env vars)
47+
libc::setlocale(libc::LC_TIME, c"".as_ptr());
48+
49+
// Get the actual date/time format string from the locale
50+
let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT);
51+
if !d_t_fmt_ptr.is_null() {
52+
if let Ok(format) = CStr::from_ptr(d_t_fmt_ptr).to_str() {
53+
if !format.is_empty() {
54+
// Use locale format, but ensure it includes timezone if not present
55+
let final_format = if format.contains("%Z") {
56+
format.to_string()
57+
} else {
58+
// Insert %Z before %Y if year is present, otherwise append it
59+
if let Some(pos) = format.find("%Y") {
60+
let mut result = String::with_capacity(format.len() + 3);
61+
result.push_str(&format[..pos]);
62+
result.push_str("%Z ");
63+
result.push_str(&format[pos..]);
64+
result
65+
} else if let Some(pos) = format.find("%y") {
66+
let mut result = String::with_capacity(format.len() + 3);
67+
result.push_str(&format[..pos]);
68+
result.push_str("%Z ");
69+
result.push_str(&format[pos..]);
70+
result
71+
} else {
72+
format.to_string() + " %Z"
73+
}
74+
};
75+
return Box::leak(final_format.into_boxed_str());
76+
}
77+
}
78+
}
13879
}
80+
// Fallback only if locale info unavailable
81+
// Use 24-hour format as safe default
82+
"%a %b %e %X %Z %Y"
13983
})
14084
}
14185
}
@@ -161,7 +105,6 @@ mod tests {
161105
#[test]
162106
fn test_locale_detection() {
163107
// Just verify the function doesn't panic
164-
let _ = uses_12_hour_format();
165108
let _ = get_locale_default_format();
166109
}
167110

@@ -170,8 +113,102 @@ mod tests {
170113
let format = get_locale_default_format();
171114
assert!(format.contains("%a")); // abbreviated weekday
172115
assert!(format.contains("%b")); // abbreviated month
173-
assert!(format.contains("%Y")); // year
116+
assert!(format.contains("%Y") || format.contains("%y")); // year (4-digit or 2-digit)
174117
assert!(format.contains("%Z")); // timezone
175118
}
119+
120+
#[test]
121+
fn test_locale_format_structure() {
122+
// Verify we're using actual locale format strings, not hardcoded ones
123+
let format = get_locale_default_format();
124+
125+
// The format should not be empty
126+
assert!(!format.is_empty(), "Locale format should not be empty");
127+
128+
// Should contain date/time components
129+
let has_date_component = format.contains("%a")
130+
|| format.contains("%A")
131+
|| format.contains("%b")
132+
|| format.contains("%B")
133+
|| format.contains("%d")
134+
|| format.contains("%e");
135+
assert!(has_date_component, "Format should contain date components");
136+
137+
// Should contain time component (hour)
138+
let has_time_component = format.contains("%H")
139+
|| format.contains("%I")
140+
|| format.contains("%k")
141+
|| format.contains("%l")
142+
|| format.contains("%r")
143+
|| format.contains("%R")
144+
|| format.contains("%T")
145+
|| format.contains("%X");
146+
assert!(has_time_component, "Format should contain time components");
147+
}
148+
149+
#[test]
150+
fn test_c_locale_format() {
151+
// Save original locale
152+
let original_lc_all = std::env::var("LC_ALL").ok();
153+
let original_lc_time = std::env::var("LC_TIME").ok();
154+
let original_lang = std::env::var("LANG").ok();
155+
156+
unsafe {
157+
// Set C locale
158+
std::env::set_var("LC_ALL", "C");
159+
std::env::remove_var("LC_TIME");
160+
std::env::remove_var("LANG");
161+
}
162+
163+
// Get the locale format
164+
let format = unsafe {
165+
libc::setlocale(libc::LC_TIME, c"C".as_ptr());
166+
let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT);
167+
if !d_t_fmt_ptr.is_null() {
168+
std::ffi::CStr::from_ptr(d_t_fmt_ptr).to_str().ok()
169+
} else {
170+
None
171+
}
172+
};
173+
174+
if let Some(locale_format) = format {
175+
// C locale typically uses 24-hour format
176+
// Common patterns: %H (24-hour with leading zero) or %T (HH:MM:SS)
177+
let uses_24_hour = locale_format.contains("%H")
178+
|| locale_format.contains("%T")
179+
|| locale_format.contains("%R");
180+
assert!(uses_24_hour, "C locale should use 24-hour format, got: {}", locale_format);
181+
}
182+
183+
// Restore original locale
184+
unsafe {
185+
if let Some(val) = original_lc_all {
186+
std::env::set_var("LC_ALL", val);
187+
} else {
188+
std::env::remove_var("LC_ALL");
189+
}
190+
if let Some(val) = original_lc_time {
191+
std::env::set_var("LC_TIME", val);
192+
} else {
193+
std::env::remove_var("LC_TIME");
194+
}
195+
if let Some(val) = original_lang {
196+
std::env::set_var("LANG", val);
197+
} else {
198+
std::env::remove_var("LANG");
199+
}
200+
}
201+
}
202+
203+
#[test]
204+
fn test_timezone_included_in_format() {
205+
// The implementation should ensure %Z is present
206+
let format = get_locale_default_format();
207+
assert!(
208+
format.contains("%Z") || format.contains("%z"),
209+
"Format should contain timezone indicator: {}",
210+
format
211+
);
212+
}
176213
}
177214
}

0 commit comments

Comments
 (0)