Skip to content

Commit 8ae0473

Browse files
committed
Don't pass invalid JIS X 0208 characters through
Many Japanese encodings, such as JIS7/8, Shift JIS, ISO-2022-JP, EUC-JP, and so on encode characters from the JIS X 0208 character set. JIS X 0208 is based on the concept of a 94x94 table, with numbered rows and columns. However, more than a thousand of the cells in that table are empty; JIS X 0208 does not actually use all 94x94=8,836 possible kuten codes. mbstring had a dubious feature whereby, if a Japanese string contained one of these 'unmapped' kuten codes, and it was being converted to another Japanese encoding which was also based on JIS X 0208, the non-existent character would be silently passed through, and the unmapped kuten code would be re-encoded using the normal encoding method of the target text encoding. Again, this _only_ happened if converting the text with the funky kuten code to a Japanese encoding. If one tried converting it to Unicode, mbstring would treat that as an error. If somebody, somewhere, made their own private extension to JIS X 0208, and used the regular Japanese encodings like Shift JIS and EUC-JP to encode this private character set, then this feature might conceivably be useful. But how likely is that? If someone is using Shift JIS, EUC-JP, ISO-2022-JP, etc. to encode a funky version of JIS X 0208 with extra characters added, then that should be treated as a separate text encoding. The code which flags such characters with MBFL_WCSPLANE_JIS0208 is retained solely for error reporting in `mbfl_filt_conv_illegal_output`.
1 parent 2759874 commit 8ae0473

10 files changed

+2
-31
lines changed

ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,9 +409,7 @@ mbfl_filt_conv_wchar_jis_ms(int c, mbfl_convert_filter *filter)
409409
/* do some transliteration */
410410
if (s <= 0) {
411411
c1 = c & ~MBFL_WCSPLANE_MASK;
412-
if (c1 == MBFL_WCSPLANE_JIS0208) {
413-
s = c & MBFL_WCSPLANE_MASK;
414-
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
412+
if (c1 == MBFL_WCSPLANE_JIS0212) {
415413
s = c & MBFL_WCSPLANE_MASK;
416414
s |= 0x8080;
417415
} else if (c == 0xa5) { /* YEN SIGN */

ext/mbstring/libmbfl/filters/mbfilter_cp51932.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,6 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
216216
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
217217
s1 = -1;
218218
}
219-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
220-
s1 = c & MBFL_WCSPLANE_MASK;
221-
if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */
222-
s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */
223-
(s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */
224-
s1 <= ((94 + 0x20) << 8))) {
225-
s1 = -1;
226-
}
227219
} else if (c == 0xa5) { /* YEN SIGN */
228220
s1 = 0x005c; /* YEN SIGN */
229221
} else if (c == 0x203e) { /* OVER LINE */

ext/mbstring/libmbfl/filters/mbfilter_cp932.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,6 @@ mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter)
255255
if (c1 == MBFL_WCSPLANE_WINCP932) {
256256
s1 = c & MBFL_WCSPLANE_MASK;
257257
s2 = 1;
258-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
259-
s1 = c & MBFL_WCSPLANE_MASK;
260258
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
261259
s1 = c & MBFL_WCSPLANE_MASK;
262260
s1 |= 0x8080;

ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,6 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
279279
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */
280280
s1 = -1;
281281
}
282-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
283-
s1 = c & MBFL_WCSPLANE_MASK;
284-
if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 94ku */
285-
s1 = -1;
286-
}
287282
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
288283
s1 = c & MBFL_WCSPLANE_MASK;
289284
if (s1 >= ((83 + 0x20) << 8)) { /* 83ku - 94ku */

ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,6 @@ mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
306306
if (c1 == MBFL_WCSPLANE_WINCP932) {
307307
s1 = c & MBFL_WCSPLANE_MASK;
308308
s2 = 1;
309-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
310-
s1 = c & MBFL_WCSPLANE_MASK;
311309
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
312310
s1 = c & MBFL_WCSPLANE_MASK;
313311
s1 |= 0x8080;

ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,6 @@ mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
334334
if (c1 == MBFL_WCSPLANE_WINCP932) {
335335
s1 = c & MBFL_WCSPLANE_MASK;
336336
s2 = 1;
337-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
338-
s1 = c & MBFL_WCSPLANE_MASK;
339337
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
340338
s1 = c & MBFL_WCSPLANE_MASK;
341339
s1 |= 0x8080;

ext/mbstring/libmbfl/filters/mbfilter_jis.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
288288
}
289289
if (s <= 0) {
290290
c1 = c & ~MBFL_WCSPLANE_MASK;
291-
if (c1 == MBFL_WCSPLANE_JIS0208) {
292-
s = c & MBFL_WCSPLANE_MASK;
293-
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
291+
if (c1 == MBFL_WCSPLANE_JIS0212) {
294292
s = c & MBFL_WCSPLANE_MASK;
295293
s |= 0x8080;
296294
} else if (c == 0xa5) { /* YEN SIGN */

ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,6 @@ mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter)
420420
if (c1 == MBFL_WCSPLANE_WINCP932) {
421421
s1 = c & MBFL_WCSPLANE_MASK;
422422
s2 = 1;
423-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
424-
s1 = c & MBFL_WCSPLANE_MASK;
425423
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
426424
s1 = c & MBFL_WCSPLANE_MASK;
427425
s1 |= 0x8080;

ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -794,8 +794,6 @@ mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter)
794794
if (c1 == MBFL_WCSPLANE_WINCP932) {
795795
s1 = c & MBFL_WCSPLANE_MASK;
796796
s2 = 1;
797-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
798-
s1 = c & MBFL_WCSPLANE_MASK;
799797
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
800798
s1 = c & MBFL_WCSPLANE_MASK;
801799
s1 |= 0x8080;

ext/mbstring/libmbfl/filters/mbfilter_sjis_open.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,6 @@ mbfl_filt_conv_wchar_sjis_open(int c, mbfl_convert_filter *filter)
245245
if (c1 == MBFL_WCSPLANE_WINCP932) {
246246
s1 = c & MBFL_WCSPLANE_MASK;
247247
s2 = 1;
248-
} else if (c1 == MBFL_WCSPLANE_JIS0208) {
249-
s1 = c & MBFL_WCSPLANE_MASK;
250248
} else if (c1 == MBFL_WCSPLANE_JIS0212) {
251249
s1 = c & MBFL_WCSPLANE_MASK;
252250
s1 |= 0x8080;

0 commit comments

Comments
 (0)