Skip to content

Commit c9fea7d

Browse files
committed
Convert U+00AF (MACRON) to 0x8150 (FULLWIDTH MACRON) in some SJIS variants
Except for vanilla Shift-JIS, where 0x7E is a halfwidth overline/macron. As for Shift-JIS-2004, it has an added character (byte sequence 0x854A) which was defined as a halfwidth macron in JIS X 0213:2000, so we use that.
1 parent ecf7184 commit c9fea7d

File tree

7 files changed

+14
-3
lines changed

7 files changed

+14
-3
lines changed

ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,9 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
194194
{
195195
int s = 0;
196196

197-
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
197+
if (c == 0xAF) { /* U+00AF is MACRON */
198+
s = 0xA2B4; /* Use JIS X 0212 overline */
199+
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
198200
s = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
199201
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
200202
s = ucs_a2_jis_table[c - ucs_a2_jis_table_min];

ext/mbstring/libmbfl/filters/mbfilter_sjis.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
211211
/* Unicode 0x7E is a tilde, but Shift-JIS uses 0x7E for overline (or
212212
* macron). JIS X 0208 kuten 0x2141 is 'WAVE DASH' */
213213
s1 = 0x2141;
214-
} else if (c == 0x203E) { /* U+203E is OVERLINE */
214+
} else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */
215215
s1 = 0x7E; /* Halfwidth overline/macron */
216216
} else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
217217
s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];

ext/mbstring/libmbfl/filters/unicode_table_jis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2303,7 +2303,7 @@ const unsigned short ucs_a1_jis_table[] = {
23032303
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
23042304
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
23052305
0x0000,0xA2C2,0x2171,0x2172,0xA2F0,0x0000,0xA2C3,0x2178,
2306-
0x212F,0xA2ED,0xA2EC,0x0000,0x224C,0x0000,0xA2EE,0xA2B4,
2306+
0x212F,0xA2ED,0xA2EC,0x0000,0x224C,0x0000,0xA2EE,0x2131,
23072307
0x216B,0x215E,0x0000,0x0000,0x212D,0x0000,0x2279,0x0000,
23082308
0xA2B1,0x0000,0xA2EB,0x0000,0x0000,0x0000,0x0000,0xA2C4,
23092309
0xAAA2,0xAAA1,0xAAA4,0xAAAA,0xAAA3,0xAAA9,0xA9A1,0xAAAE,

ext/mbstring/tests/cp51932_encoding.phpt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ for ($i = 0; $i <= 0x7F; $i++)
8888
$fromUnicode["\x00\xA5"] = "\xA1\xEF";
8989
/* U+203E is OVERLINE; convert to FULLWIDTH MACRON */
9090
$fromUnicode["\x20\x3E"] = "\xA1\xB1";
91+
/* U+00AF is MACRON; convert to FULLWIDTH MACRON */
92+
$fromUnicode["\x00\xAF"] = "\xA1\xB1";
9193

9294
testAllValidChars($validChars, 'CP51932', 'UTF-16BE', false);
9395
testAllValidChars($fromUnicode, 'UTF-16BE', 'CP51932', false);

ext/mbstring/tests/cp932_encoding.phpt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ $fromUnicode["\x00\xAC"] = "\x81\xCA";
5050
/* U+203E is OVERLINE; convert to JIS X 0208 FULLWIDTH MACRON */
5151
$fromUnicode["\x20\x3E"] = "\x81\x50";
5252

53+
/* U+00AF is MACRON; it can also go to FULLWIDTH MACRON */
54+
$fromUnicode["\x00\xAF"] = "\x81\x50";
55+
5356
findInvalidChars($validChars, $invalidChars, $truncated, array_fill_keys(range(0x81, 0x9F), 2) + array_fill_keys(range(0xE0, 0xFC), 2));
5457

5558
findInvalidChars($fromUnicode, $invalidCodepoints, $unused, array_fill_keys(range(0, 0xFF), 2));

ext/mbstring/tests/sjis_encoding.phpt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ $fromUnicode["\x00\x7E"] = "\x81\x60";
2424
/* DEL character */
2525
$validChars["\x7F"] = "\x00\x7F";
2626
$fromUnicode["\x00\x7F"] = "\x7F";
27+
/* U+00AF is MACRON; Shift-JIS 0x7E is overline */
28+
$fromUnicode["\x00\xAF"] = "\x7E";
2729
/* Use fullwidth reverse solidus, not (halfwidth) backslash (0x5C) */
2830
$validChars["\x81\x5F"] = "\xFF\x3C";
2931
$fromUnicode["\xFF\x3C"] = "\x81\x5F";

ext/mbstring/tests/sjismac_encoding.phpt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ $fromUnicode["\x20\x15"] = "\x81\x5C";
6464

6565
/* Convert U+203E (OVERLINE) to 0x8150 (FULLWIDTH MACRON) */
6666
$fromUnicode["\x20\x3E"] = "\x81\x50";
67+
/* And also U+00AF (MACRON) */
68+
$fromUnicode["\x00\xAF"] = "\x81\x50";
6769

6870
/* Convert U+FF5E (FULLWIDTH TILDE) to 0x8160 (WAVE DASH) */
6971
$fromUnicode["\xFF\x5E"] = "\x81\x60";

0 commit comments

Comments
 (0)