Skip to content

Commit 585f46a

Browse files
naoNao89sylvestre
authored andcommitted
date(locale): use actual locale format strings, add comprehensive tests
Replace hardcoded format string selection with direct nl_langinfo(D_T_FMT) usage. This ensures locale-specific formatting details (leading zeros, component ordering, hour formats) are properly respected from system locale data instead of detecting 12/24-hour preference and returning hardcoded alternatives. Changes to locale.rs: - Remove detect_12_hour_format() and uses_12_hour_format() - Simplify get_locale_default_format() to use D_T_FMT directly - Add timezone injection if %Z missing from locale format - Add use nix::libc import Add test coverage (tests/by-util/test_date.rs): - 4 new unit tests for locale format structure validation - 7 new integration tests verifying locale-specific behavior - Tests prevent regression to hardcoded format strings Addresses feedback from PR #9654 comment #3676971020
1 parent 6f696b9 commit 585f46a

File tree

2 files changed

+359
-95
lines changed

2 files changed

+359
-95
lines changed

src/uu/date/src/locale.rs

Lines changed: 141 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -28,115 +28,69 @@ macro_rules! cfg_langinfo {
2828
cfg_langinfo! {
2929
use std::ffi::CStr;
3030
use std::sync::OnceLock;
31+
use nix::libc;
3132
}
3233

3334
cfg_langinfo! {
34-
/// Cached result of locale time format detection
35-
static TIME_FORMAT_CACHE: OnceLock<bool> = OnceLock::new();
36-
37-
/// Safe wrapper around libc setlocale
38-
fn set_time_locale() {
39-
unsafe {
40-
nix::libc::setlocale(nix::libc::LC_TIME, c"".as_ptr());
41-
}
42-
}
35+
/// Cached locale date/time format string
36+
static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new();
4337

44-
/// Safe wrapper around libc nl_langinfo that returns `Option<String>`
45-
fn get_locale_info(item: nix::libc::nl_item) -> Option<String> {
46-
unsafe {
47-
let ptr = nix::libc::nl_langinfo(item);
48-
if ptr.is_null() {
49-
None
50-
} else {
51-
CStr::from_ptr(ptr).to_str().ok().map(String::from)
38+
/// Returns the default date format string for the current locale.
39+
///
40+
/// The format respects locale preferences for time display (12-hour vs 24-hour),
41+
/// component ordering, and numeric formatting conventions. Ensures timezone
42+
/// information is included in the output.
43+
pub fn get_locale_default_format() -> &'static str {
44+
DEFAULT_FORMAT_CACHE.get_or_init(|| {
45+
// Try to get locale format string
46+
if let Some(format) = get_locale_format_string() {
47+
let format_with_tz = ensure_timezone_in_format(&format);
48+
return Box::leak(format_with_tz.into_boxed_str());
5249
}
53-
}
54-
}
55-
56-
/// Internal function that performs the actual locale detection
57-
fn detect_12_hour_format() -> bool {
58-
// Helper function to check for 12-hour format indicators
59-
fn has_12_hour_indicators(format_str: &str) -> bool {
60-
const INDICATORS: &[&str] = &["%I", "%l", "%r"];
61-
INDICATORS.iter().any(|&indicator| format_str.contains(indicator))
62-
}
6350

64-
// Helper function to check for 24-hour format indicators
65-
fn has_24_hour_indicators(format_str: &str) -> bool {
66-
const INDICATORS: &[&str] = &["%H", "%k", "%R", "%T"];
67-
INDICATORS.iter().any(|&indicator| format_str.contains(indicator))
68-
}
69-
70-
// Set locale from environment variables (empty string = use LC_TIME/LANG env vars)
71-
set_time_locale();
72-
73-
// Get locale format strings using safe wrappers
74-
let d_t_fmt = get_locale_info(nix::libc::D_T_FMT);
75-
let t_fmt_opt = get_locale_info(nix::libc::T_FMT);
76-
let t_fmt_ampm_opt = get_locale_info(nix::libc::T_FMT_AMPM);
51+
// Fallback: use 24-hour format as safe default
52+
"%a %b %e %X %Z %Y"
53+
})
54+
}
7755

78-
// Check D_T_FMT first
79-
if let Some(ref format) = d_t_fmt {
80-
// Check for 12-hour indicators first (higher priority)
81-
if has_12_hour_indicators(format) {
82-
return true;
83-
}
56+
/// Retrieves the date/time format string from the system locale
57+
fn get_locale_format_string() -> Option<String> {
58+
unsafe {
59+
// Set locale from environment variables
60+
libc::setlocale(libc::LC_TIME, c"".as_ptr());
8461

85-
// If we find 24-hour indicators, it's definitely not 12-hour
86-
if has_24_hour_indicators(format) {
87-
return false;
62+
// Get the date/time format string
63+
let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT);
64+
if d_t_fmt_ptr.is_null() {
65+
return None;
8866
}
89-
}
9067

91-
// Also check the time-only format as a fallback
92-
if let Some(ref time_format) = t_fmt_opt {
93-
if has_12_hour_indicators(time_format) {
94-
return true;
68+
let format = CStr::from_ptr(d_t_fmt_ptr).to_str().ok()?;
69+
if format.is_empty() {
70+
return None;
9571
}
96-
}
9772

98-
// Check if there's a specific 12-hour format defined
99-
if let Some(ref ampm_format) = t_fmt_ampm_opt {
100-
// If T_FMT_AMPM is non-empty and different from T_FMT, locale supports 12-hour
101-
if !ampm_format.is_empty() {
102-
if let Some(ref time_format) = t_fmt_opt {
103-
if ampm_format != time_format {
104-
return true;
105-
}
106-
} else {
107-
return true;
108-
}
109-
}
73+
Some(format.to_string())
11074
}
111-
112-
// Default to 24-hour format if we can't determine
113-
false
11475
}
115-
}
11676

117-
cfg_langinfo! {
118-
/// Detects whether the current locale prefers 12-hour or 24-hour time format
119-
/// Results are cached for performance
120-
pub fn uses_12_hour_format() -> bool {
121-
*TIME_FORMAT_CACHE.get_or_init(detect_12_hour_format)
122-
}
123-
124-
/// Cached default format string
125-
static DEFAULT_FORMAT_CACHE: OnceLock<&'static str> = OnceLock::new();
77+
/// Ensures the format string includes timezone (%Z)
78+
fn ensure_timezone_in_format(format: &str) -> String {
79+
if format.contains("%Z") {
80+
return format.to_string();
81+
}
12682

127-
/// Get the locale-appropriate default format string for date output
128-
/// This respects the locale's preference for 12-hour vs 24-hour time
129-
/// Results are cached for performance (following uucore patterns)
130-
pub fn get_locale_default_format() -> &'static str {
131-
DEFAULT_FORMAT_CACHE.get_or_init(|| {
132-
if uses_12_hour_format() {
133-
// Use 12-hour format with AM/PM
134-
"%a %b %e %r %Z %Y"
135-
} else {
136-
// Use 24-hour format
137-
"%a %b %e %X %Z %Y"
138-
}
139-
})
83+
// Try to insert %Z before year specifier (%Y or %y)
84+
if let Some(pos) = format.find("%Y").or_else(|| format.find("%y")) {
85+
let mut result = String::with_capacity(format.len() + 3);
86+
result.push_str(&format[..pos]);
87+
result.push_str("%Z ");
88+
result.push_str(&format[pos..]);
89+
result
90+
} else {
91+
// No year found, append %Z at the end
92+
format.to_string() + " %Z"
93+
}
14094
}
14195
}
14296

@@ -161,7 +115,6 @@ mod tests {
161115
#[test]
162116
fn test_locale_detection() {
163117
// Just verify the function doesn't panic
164-
let _ = uses_12_hour_format();
165118
let _ = get_locale_default_format();
166119
}
167120

@@ -170,8 +123,101 @@ mod tests {
170123
let format = get_locale_default_format();
171124
assert!(format.contains("%a")); // abbreviated weekday
172125
assert!(format.contains("%b")); // abbreviated month
173-
assert!(format.contains("%Y")); // year
126+
assert!(format.contains("%Y") || format.contains("%y")); // year (4-digit or 2-digit)
174127
assert!(format.contains("%Z")); // timezone
175128
}
129+
130+
#[test]
131+
fn test_locale_format_structure() {
132+
// Verify we're using actual locale format strings, not hardcoded ones
133+
let format = get_locale_default_format();
134+
135+
// The format should not be empty
136+
assert!(!format.is_empty(), "Locale format should not be empty");
137+
138+
// Should contain date/time components
139+
let has_date_component = format.contains("%a")
140+
|| format.contains("%A")
141+
|| format.contains("%b")
142+
|| format.contains("%B")
143+
|| format.contains("%d")
144+
|| format.contains("%e");
145+
assert!(has_date_component, "Format should contain date components");
146+
147+
// Should contain time component (hour)
148+
let has_time_component = format.contains("%H")
149+
|| format.contains("%I")
150+
|| format.contains("%k")
151+
|| format.contains("%l")
152+
|| format.contains("%r")
153+
|| format.contains("%R")
154+
|| format.contains("%T")
155+
|| format.contains("%X");
156+
assert!(has_time_component, "Format should contain time components");
157+
}
158+
159+
#[test]
160+
fn test_c_locale_format() {
161+
// Save original locale
162+
let original_lc_all = std::env::var("LC_ALL").ok();
163+
let original_lc_time = std::env::var("LC_TIME").ok();
164+
let original_lang = std::env::var("LANG").ok();
165+
166+
unsafe {
167+
// Set C locale
168+
std::env::set_var("LC_ALL", "C");
169+
std::env::remove_var("LC_TIME");
170+
std::env::remove_var("LANG");
171+
}
172+
173+
// Get the locale format
174+
let format = unsafe {
175+
libc::setlocale(libc::LC_TIME, c"C".as_ptr());
176+
let d_t_fmt_ptr = libc::nl_langinfo(libc::D_T_FMT);
177+
if d_t_fmt_ptr.is_null() {
178+
None
179+
} else {
180+
std::ffi::CStr::from_ptr(d_t_fmt_ptr).to_str().ok()
181+
}
182+
};
183+
184+
if let Some(locale_format) = format {
185+
// C locale typically uses 24-hour format
186+
// Common patterns: %H (24-hour with leading zero) or %T (HH:MM:SS)
187+
let uses_24_hour = locale_format.contains("%H")
188+
|| locale_format.contains("%T")
189+
|| locale_format.contains("%R");
190+
assert!(uses_24_hour, "C locale should use 24-hour format, got: {locale_format}");
191+
}
192+
193+
// Restore original locale
194+
unsafe {
195+
if let Some(val) = original_lc_all {
196+
std::env::set_var("LC_ALL", val);
197+
} else {
198+
std::env::remove_var("LC_ALL");
199+
}
200+
if let Some(val) = original_lc_time {
201+
std::env::set_var("LC_TIME", val);
202+
} else {
203+
std::env::remove_var("LC_TIME");
204+
}
205+
if let Some(val) = original_lang {
206+
std::env::set_var("LANG", val);
207+
} else {
208+
std::env::remove_var("LANG");
209+
}
210+
}
211+
}
212+
213+
#[test]
214+
fn test_timezone_included_in_format() {
215+
// The implementation should ensure %Z is present
216+
let format = get_locale_default_format();
217+
assert!(
218+
format.contains("%Z") || format.contains("%z"),
219+
"Format should contain timezone indicator: {format}"
220+
);
221+
}
176222
}
177223
}

0 commit comments

Comments
 (0)