Skip to content

Commit ad1344f

Browse files
Fix GH-11952: better locale strings canonicalization for IntlDateFormatter and NumberFormatter
1 parent 96c0bc5 commit ad1344f

File tree

7 files changed

+116
-6
lines changed

7 files changed

+116
-6
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ PHP NEWS
1010
. Fixed bug GH-19544 (GC treats ZEND_WEAKREF_TAG_MAP references as WeakMap
1111
references). (Arnaud, timwolla)
1212

13+
- Intl:
14+
. Fixed bug GH-11952 (Fix locale strings canonicalization for IntlDateFormatter
15+
and NumberFormatter). (alexandre-daubois)
16+
1317
- OpenSSL:
1418
. Fixed bug GH-19245 (Success error message on TLS stream accept failure).
1519
(Jakub Zelenka)

ext/intl/dateformat/dateformat_create.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
extern "C" {
2323
#include <unicode/ustring.h>
2424
#include <unicode/udat.h>
25+
#include <unicode/uloc.h>
2526

2627
#include "php_intl.h"
2728
#include "dateformat_create.h"
@@ -110,7 +111,12 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
110111
if (locale_len == 0) {
111112
locale_str = (char *) intl_locale_get_default();
112113
}
113-
locale = Locale::createFromName(locale_str);
114+
115+
char* canonicalized_locale = canonicalize_locale_string(locale_str);
116+
const char* final_locale = canonicalized_locale ? canonicalized_locale : locale_str;
117+
const char* stored_locale = canonicalized_locale ? canonicalized_locale : locale_str;
118+
119+
locale = Locale::createFromName(final_locale);
114120
/* get*Name accessors being set does not preclude being bogus */
115121
if (locale.isBogus() || ((locale_len == 1 && locale_str[0] != 'C') || (locale_len > 1 && strlen(locale.getISO3Language()) == 0))) {
116122
goto error;
@@ -148,7 +154,7 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
148154
}
149155

150156
DATE_FORMAT_OBJECT(dfo) = udat_open((UDateFormatStyle)time_type,
151-
(UDateFormatStyle)date_type, locale_str, NULL, 0, svalue,
157+
(UDateFormatStyle)date_type, final_locale, NULL, 0, svalue,
152158
slength, &INTL_DATA_ERROR_CODE(dfo));
153159

154160
if (pattern_str && pattern_str_len > 0) {
@@ -181,9 +187,13 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
181187
dfo->date_type = date_type;
182188
dfo->time_type = time_type;
183189
dfo->calendar = calendar_type;
184-
dfo->requested_locale = estrdup(locale_str);
190+
/* Store the canonicalized locale, or fallback to original if canonicalization failed */
191+
dfo->requested_locale = estrdup(stored_locale);
185192

186193
error:
194+
if (canonicalized_locale) {
195+
efree(canonicalized_locale);
196+
}
187197
if (svalue) {
188198
efree(svalue);
189199
}

ext/intl/formatter/formatter_main.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#endif
1818

1919
#include <unicode/ustring.h>
20+
#include <unicode/uloc.h>
2021

2122
#include "php_intl.h"
2223
#include "formatter_class.h"
@@ -63,12 +64,18 @@ static int numfmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_
6364
locale = intl_locale_get_default();
6465
}
6566

66-
/* Create an ICU number formatter. */
67-
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
67+
char* canonicalized_locale = canonicalize_locale_string(locale);
68+
const char* final_locale = canonicalized_locale ? canonicalized_locale : locale;
6869

69-
if(spattern) {
70+
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, final_locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
71+
72+
if (spattern) {
7073
efree(spattern);
7174
}
75+
76+
if (canonicalized_locale) {
77+
efree(canonicalized_locale);
78+
}
7279

7380
INTL_CTOR_CHECK_STATUS(nfo, "numfmt_create: number formatter creation failed");
7481
return SUCCESS;

ext/intl/php_intl.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,20 @@ const char *intl_locale_get_default( void )
9494
return INTL_G(default_locale);
9595
}
9696

97+
static char* canonicalize_locale_string(const char* locale) {
98+
char canonicalized[ULOC_FULLNAME_CAPACITY];
99+
UErrorCode status = U_ZERO_ERROR;
100+
int32_t canonicalized_len;
101+
102+
canonicalized_len = uloc_canonicalize(locale, canonicalized, sizeof(canonicalized), &status);
103+
104+
if (U_FAILURE(status) || canonicalized_len <= 0) {
105+
return NULL;
106+
}
107+
108+
return estrdup(canonicalized);
109+
}
110+
97111
/* {{{ INI Settings */
98112
PHP_INI_BEGIN()
99113
STD_PHP_INI_ENTRY(LOCALE_INI_NAME, NULL, PHP_INI_ALL, OnUpdateStringUnempty, default_locale, zend_intl_globals, intl_globals)

ext/intl/php_intl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ PHP_RSHUTDOWN_FUNCTION(intl);
6868
PHP_MINFO_FUNCTION(intl);
6969

7070
const char *intl_locale_get_default( void );
71+
static char* canonicalize_locale_string(const char* locale);
7172

7273
#define PHP_INTL_VERSION PHP_VERSION
7374

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: IntlDateFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing IntlDateFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new IntlDateFormatter($input, IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$dateFormatter = new IntlDateFormatter('pt_PT.utf8', IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
27+
$dateResult = $dateFormatter->format(1691585260);
28+
echo "\nDateFormatter with pt_PT.utf8: " . $dateResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing IntlDateFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
DateFormatter with pt_PT.utf8: 09/08/23
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: NumberFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing NumberFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new NumberFormatter($input, NumberFormatter::DECIMAL);
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$numFormatter = new NumberFormatter('pt_PT.utf8', NumberFormatter::DECIMAL);
27+
$numResult = $numFormatter->format(1234.56);
28+
echo "\nNumberFormatter with pt_PT.utf8: " . $numResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing NumberFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
NumberFormatter with pt_PT.utf8: 1 234,56

0 commit comments

Comments
 (0)