Skip to content

Commit 7a493fd

Browse files
Fix GH-11952: better locale strings canonicalization for IntlDateFormatter and NumberFormatter
1 parent 96c0bc5 commit 7a493fd

File tree

5 files changed

+133
-6
lines changed

5 files changed

+133
-6
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ PHP NEWS
1010
. Fixed bug GH-19544 (GC treats ZEND_WEAKREF_TAG_MAP references as WeakMap
1111
references). (Arnaud, timwolla)
1212

13+
- Intl:
14+
. Fixed bug GH-11952 (Fix locale strings canonicalization for IntlDateFormatter
15+
and NumberFormatter). (alexandre-daubois)
16+
1317
- OpenSSL:
1418
. Fixed bug GH-19245 (Success error message on TLS stream accept failure).
1519
(Jakub Zelenka)

ext/intl/dateformat/dateformat_create.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
extern "C" {
2323
#include <unicode/ustring.h>
2424
#include <unicode/udat.h>
25+
#include <unicode/uloc.h>
2526

2627
#include "php_intl.h"
2728
#include "dateformat_create.h"
@@ -36,6 +37,20 @@ extern "C" {
3637
#include "dateformat_helpers.h"
3738
#include "zend_exceptions.h"
3839

40+
static char* canonicalize_locale_string(const char* locale) {
41+
char canonicalized[ULOC_FULLNAME_CAPACITY];
42+
UErrorCode status = U_ZERO_ERROR;
43+
int32_t canonicalized_len;
44+
45+
canonicalized_len = uloc_canonicalize(locale, canonicalized, sizeof(canonicalized), &status);
46+
47+
if (U_FAILURE(status) || canonicalized_len <= 0) {
48+
return NULL;
49+
}
50+
51+
return estrdup(canonicalized);
52+
}
53+
3954
#define INTL_UDATE_FMT_OK(i) \
4055
(UDAT_FULL == (i) || UDAT_LONG == (i) || \
4156
UDAT_MEDIUM == (i) || UDAT_SHORT == (i) || \
@@ -110,7 +125,16 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
110125
if (locale_len == 0) {
111126
locale_str = (char *) intl_locale_get_default();
112127
}
113-
locale = Locale::createFromName(locale_str);
128+
129+
char* canonicalized_locale = NULL;
130+
const char* final_locale;
131+
const char* stored_locale;
132+
133+
canonicalized_locale = canonicalize_locale_string(locale_str);
134+
final_locale = canonicalized_locale ? canonicalized_locale : locale_str;
135+
stored_locale = canonicalized_locale ? canonicalized_locale : locale_str;
136+
137+
locale = Locale::createFromName(final_locale);
114138
/* get*Name accessors being set does not preclude being bogus */
115139
if (locale.isBogus() || ((locale_len == 1 && locale_str[0] != 'C') || (locale_len > 1 && strlen(locale.getISO3Language()) == 0))) {
116140
goto error;
@@ -148,7 +172,7 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
148172
}
149173

150174
DATE_FORMAT_OBJECT(dfo) = udat_open((UDateFormatStyle)time_type,
151-
(UDateFormatStyle)date_type, locale_str, NULL, 0, svalue,
175+
(UDateFormatStyle)date_type, final_locale, NULL, 0, svalue,
152176
slength, &INTL_DATA_ERROR_CODE(dfo));
153177

154178
if (pattern_str && pattern_str_len > 0) {
@@ -181,9 +205,13 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
181205
dfo->date_type = date_type;
182206
dfo->time_type = time_type;
183207
dfo->calendar = calendar_type;
184-
dfo->requested_locale = estrdup(locale_str);
208+
/* Store the canonicalized locale, or fallback to original if canonicalization failed */
209+
dfo->requested_locale = estrdup(stored_locale);
185210

186211
error:
212+
if (canonicalized_locale) {
213+
efree(canonicalized_locale);
214+
}
187215
if (svalue) {
188216
efree(svalue);
189217
}

ext/intl/formatter/formatter_main.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,26 @@
1717
#endif
1818

1919
#include <unicode/ustring.h>
20+
#include <unicode/uloc.h>
2021

2122
#include "php_intl.h"
2223
#include "formatter_class.h"
2324
#include "intl_convert.h"
2425

26+
static char* canonicalize_locale_string(const char* locale) {
27+
char canonicalized[ULOC_FULLNAME_CAPACITY];
28+
UErrorCode status = U_ZERO_ERROR;
29+
int32_t canonicalized_len;
30+
31+
canonicalized_len = uloc_canonicalize(locale, canonicalized, sizeof(canonicalized), &status);
32+
33+
if (U_FAILURE(status) || canonicalized_len <= 0) {
34+
return NULL;
35+
}
36+
37+
return estrdup(canonicalized);
38+
}
39+
2540
/* {{{ */
2641
static int numfmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
2742
{
@@ -63,12 +78,18 @@ static int numfmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_
6378
locale = intl_locale_get_default();
6479
}
6580

66-
/* Create an ICU number formatter. */
67-
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
81+
char* canonicalized_locale = canonicalize_locale_string(locale);
82+
const char* final_locale = canonicalized_locale ? canonicalized_locale : locale;
6883

69-
if(spattern) {
84+
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, final_locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
85+
86+
if (spattern) {
7087
efree(spattern);
7188
}
89+
90+
if (canonicalized_locale) {
91+
efree(canonicalized_locale);
92+
}
7293

7394
INTL_CTOR_CHECK_STATUS(nfo, "numfmt_create: number formatter creation failed");
7495
return SUCCESS;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: IntlDateFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing IntlDateFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new IntlDateFormatter($input, IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$dateFormatter = new IntlDateFormatter('pt_PT.utf8', IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
27+
$dateResult = $dateFormatter->format(1691585260);
28+
echo "\nDateFormatter with pt_PT.utf8: " . $dateResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing IntlDateFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
DateFormatter with pt_PT.utf8: 09/08/23
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: NumberFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing NumberFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new NumberFormatter($input, NumberFormatter::DECIMAL);
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$numFormatter = new NumberFormatter('pt_PT.utf8', NumberFormatter::DECIMAL);
27+
$numResult = $numFormatter->format(1234.56);
28+
echo "\nNumberFormatter with pt_PT.utf8: " . $numResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing NumberFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
NumberFormatter with pt_PT.utf8: 1 234,56

0 commit comments

Comments
 (0)