Skip to content

Commit 8564487

Browse files
Fix GH-11952: better locale strings canonicalization for IntlDateFormatter and NumberFormatter
1 parent 96c0bc5 commit 8564487

File tree

5 files changed

+141
-6
lines changed

5 files changed

+141
-6
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ PHP NEWS
1010
. Fixed bug GH-19544 (GC treats ZEND_WEAKREF_TAG_MAP references as WeakMap
1111
references). (Arnaud, timwolla)
1212

13+
- Intl:
14+
. Fixed bug GH-11952 (Fix locale strings canonicalization for IntlDateFormatter
15+
and NumberFormatter). (alexandre-daubois)
16+
1317
- OpenSSL:
1418
. Fixed bug GH-19245 (Success error message on TLS stream accept failure).
1519
(Jakub Zelenka)

ext/intl/dateformat/dateformat_create.cpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
extern "C" {
2323
#include <unicode/ustring.h>
2424
#include <unicode/udat.h>
25+
#include <unicode/uloc.h>
2526

2627
#include "php_intl.h"
2728
#include "dateformat_create.h"
@@ -36,6 +37,24 @@ extern "C" {
3637
#include "dateformat_helpers.h"
3738
#include "zend_exceptions.h"
3839

40+
static char* canonicalize_locale_string(const char* locale) {
41+
char canonicalized[ULOC_FULLNAME_CAPACITY];
42+
UErrorCode status = U_ZERO_ERROR;
43+
int32_t canonicalized_len;
44+
45+
if (!locale || strlen(locale) == 0) {
46+
return NULL;
47+
}
48+
49+
canonicalized_len = uloc_canonicalize(locale, canonicalized, sizeof(canonicalized), &status);
50+
51+
if (U_FAILURE(status) || canonicalized_len <= 0) {
52+
return NULL;
53+
}
54+
55+
return estrdup(canonicalized);
56+
}
57+
3958
#define INTL_UDATE_FMT_OK(i) \
4059
(UDAT_FULL == (i) || UDAT_LONG == (i) || \
4160
UDAT_MEDIUM == (i) || UDAT_SHORT == (i) || \
@@ -110,7 +129,16 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
110129
if (locale_len == 0) {
111130
locale_str = (char *) intl_locale_get_default();
112131
}
113-
locale = Locale::createFromName(locale_str);
132+
133+
char* canonicalized_locale = NULL;
134+
const char* final_locale;
135+
const char* stored_locale;
136+
137+
canonicalized_locale = canonicalize_locale_string(locale_str);
138+
final_locale = canonicalized_locale ? canonicalized_locale : locale_str;
139+
stored_locale = canonicalized_locale ? canonicalized_locale : locale_str;
140+
141+
locale = Locale::createFromName(final_locale);
114142
/* get*Name accessors being set does not preclude being bogus */
115143
if (locale.isBogus() || ((locale_len == 1 && locale_str[0] != 'C') || (locale_len > 1 && strlen(locale.getISO3Language()) == 0))) {
116144
goto error;
@@ -148,7 +176,7 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
148176
}
149177

150178
DATE_FORMAT_OBJECT(dfo) = udat_open((UDateFormatStyle)time_type,
151-
(UDateFormatStyle)date_type, locale_str, NULL, 0, svalue,
179+
(UDateFormatStyle)date_type, final_locale, NULL, 0, svalue,
152180
slength, &INTL_DATA_ERROR_CODE(dfo));
153181

154182
if (pattern_str && pattern_str_len > 0) {
@@ -181,9 +209,13 @@ static zend_result datefmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handlin
181209
dfo->date_type = date_type;
182210
dfo->time_type = time_type;
183211
dfo->calendar = calendar_type;
184-
dfo->requested_locale = estrdup(locale_str);
212+
/* Store the canonicalized locale, or fallback to original if canonicalization failed */
213+
dfo->requested_locale = estrdup(stored_locale);
185214

186215
error:
216+
if (canonicalized_locale) {
217+
efree(canonicalized_locale);
218+
}
187219
if (svalue) {
188220
efree(svalue);
189221
}

ext/intl/formatter/formatter_main.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,30 @@
1717
#endif
1818

1919
#include <unicode/ustring.h>
20+
#include <unicode/uloc.h>
2021

2122
#include "php_intl.h"
2223
#include "formatter_class.h"
2324
#include "intl_convert.h"
2425

26+
static char* canonicalize_locale_string(const char* locale) {
27+
char canonicalized[ULOC_FULLNAME_CAPACITY];
28+
UErrorCode status = U_ZERO_ERROR;
29+
int32_t canonicalized_len;
30+
31+
if (!locale || strlen(locale) == 0) {
32+
return NULL;
33+
}
34+
35+
canonicalized_len = uloc_canonicalize(locale, canonicalized, sizeof(canonicalized), &status);
36+
37+
if (U_FAILURE(status) || canonicalized_len <= 0) {
38+
return NULL;
39+
}
40+
41+
return estrdup(canonicalized);
42+
}
43+
2544
/* {{{ */
2645
static int numfmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_handling, bool *error_handling_replaced)
2746
{
@@ -63,12 +82,18 @@ static int numfmt_ctor(INTERNAL_FUNCTION_PARAMETERS, zend_error_handling *error_
6382
locale = intl_locale_get_default();
6483
}
6584

66-
/* Create an ICU number formatter. */
67-
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
85+
char* canonicalized_locale = canonicalize_locale_string(locale);
86+
const char* final_locale = canonicalized_locale ? canonicalized_locale : locale;
6887

69-
if(spattern) {
88+
FORMATTER_OBJECT(nfo) = unum_open(style, spattern, spattern_len, final_locale, NULL, &INTL_DATA_ERROR_CODE(nfo));
89+
90+
if (spattern) {
7091
efree(spattern);
7192
}
93+
94+
if (canonicalized_locale) {
95+
efree(canonicalized_locale);
96+
}
7297

7398
INTL_CTOR_CHECK_STATUS(nfo, "numfmt_create: number formatter creation failed");
7499
return SUCCESS;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: IntlDateFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing IntlDateFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new IntlDateFormatter($input, IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$dateFormatter = new IntlDateFormatter('pt_PT.utf8', IntlDateFormatter::SHORT, IntlDateFormatter::NONE, 'UTC');
27+
$dateResult = $dateFormatter->format(1691585260);
28+
echo "\nDateFormatter with pt_PT.utf8: " . $dateResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing IntlDateFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
DateFormatter with pt_PT.utf8: 09/08/23
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
Fix GH-11942: NumberFormatter should canonicalize locale strings
3+
--EXTENSIONS--
4+
intl
5+
--FILE--
6+
<?php
7+
8+
$test_cases = [
9+
['pt', 'pt'],
10+
['pt-PT', 'pt_PT'],
11+
['pt_PT.utf8', 'pt_PT'],
12+
['fr_CA@euro', 'fr_CA'],
13+
];
14+
15+
echo "Testing NumberFormatter locale canonicalization:\n";
16+
foreach ($test_cases as $test_case) {
17+
[$input, $expected] = $test_case;
18+
19+
$formatter = new NumberFormatter($input, NumberFormatter::DECIMAL);
20+
$actual = $formatter->getLocale();
21+
22+
$status = ($actual === $expected) ? 'PASS' : 'FAIL';
23+
echo "Input: $input -> Expected: $expected -> Actual: $actual -> $status\n";
24+
}
25+
26+
$numFormatter = new NumberFormatter('pt_PT.utf8', NumberFormatter::DECIMAL);
27+
$numResult = $numFormatter->format(1234.56);
28+
echo "\nNumberFormatter with pt_PT.utf8: " . $numResult . "\n";
29+
?>
30+
--EXPECT--
31+
Testing NumberFormatter locale canonicalization:
32+
Input: pt -> Expected: pt -> Actual: pt -> PASS
33+
Input: pt-PT -> Expected: pt_PT -> Actual: pt_PT -> PASS
34+
Input: pt_PT.utf8 -> Expected: pt_PT -> Actual: pt_PT -> PASS
35+
Input: fr_CA@euro -> Expected: fr_CA -> Actual: fr_CA -> PASS
36+
37+
NumberFormatter with pt_PT.utf8: 1 234,56

0 commit comments

Comments
 (0)