27
27
*
28
28
*/
29
29
30
+ /* ISO-2022-KR is defined in RFC 1557
31
+ * The RFC says that _each_ line which uses KS X 1001 characters must start
32
+ * with an escape sequence of ESC $ ) C
33
+ * We don't enforce that for ISO-2022-KR input */
34
+
30
35
#include "mbfilter.h"
31
36
#include "mbfilter_iso2022_kr.h"
32
37
#include "unicode_table_uhc.h"
33
38
39
+ static int mbfl_filt_conv_2022kr_wchar_flush (mbfl_convert_filter * filter );
40
+ static int mbfl_filt_conv_any_2022kr_flush (mbfl_convert_filter * filter );
41
+
34
42
const mbfl_encoding mbfl_encoding_2022kr = {
35
43
mbfl_no_encoding_2022kr ,
36
44
"ISO-2022-KR" ,
@@ -58,61 +66,58 @@ const struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
58
66
mbfl_filt_conv_common_ctor ,
59
67
NULL ,
60
68
mbfl_filt_conv_2022kr_wchar ,
61
- mbfl_filt_conv_common_flush ,
69
+ mbfl_filt_conv_2022kr_wchar_flush ,
62
70
NULL ,
63
71
};
64
72
65
73
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
66
74
67
- /*
68
- * ISO-2022-KR => wchar
69
- */
70
- int
71
- mbfl_filt_conv_2022kr_wchar (int c , mbfl_convert_filter * filter )
75
+ int mbfl_filt_conv_2022kr_wchar (int c , mbfl_convert_filter * filter )
72
76
{
73
- int c1 , w , flag ;
77
+ int w = 0 ;
74
78
75
- retry :
76
79
switch (filter -> status & 0xf ) {
77
- /* case 0x00: ASCII */
78
- /* case 0x10: KSC5601 */
80
+ /* case 0x00: ASCII */
81
+ /* case 0x10: KSC5601 */
79
82
case 0 :
80
83
if (c == 0x1b ) { /* ESC */
81
84
filter -> status += 2 ;
82
- } else if (c == 0x0f ) { /* SI (ASCII) */
83
- filter -> status &= ~ 0xff ;
84
- } else if (c == 0x0e ) { /* SO (KSC5601) */
85
- filter -> status | = 0x10 ;
86
- } else if ((filter -> status & 0x10 ) != 0 && c > 0x20 && c < 0x7f ) {
85
+ } else if (c == 0x0f ) { /* shift in (ASCII) */
86
+ filter -> status = 0 ;
87
+ } else if (c == 0x0e ) { /* shift out (KSC5601) */
88
+ filter -> status = 0x10 ;
89
+ } else if ((filter -> status & 0x10 ) && c > 0x20 && c < 0x7f ) {
87
90
/* KSC5601 lead byte */
88
91
filter -> cache = c ;
89
- filter -> status += 1 ;
90
- } else if ((filter -> status & 0x10 ) == 0 && c >= 0 && c < 0x80 ) {
92
+ filter -> status = 0x11 ;
93
+ } else if ((filter -> status & 0x10 ) == 0 && c >= 0 && c < 0x80 ) {
91
94
/* latin, CTLs */
92
95
CK ((* filter -> output_function )(c , filter -> data ));
93
96
} else {
94
- w = c & MBFL_WCSGROUP_MASK ;
95
- w |= MBFL_WCSGROUP_THROUGH ;
96
- CK ((* filter -> output_function )(w , filter -> data ));
97
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
97
98
}
98
99
break ;
99
100
100
- case 1 : /* dbcs second byte */
101
- filter -> status &= ~0xf ;
102
- c1 = filter -> cache ;
103
- flag = 0 ;
101
+ case 1 : /* dbcs second byte */
102
+ filter -> status = 0x10 ;
103
+ int c1 = filter -> cache ;
104
+ int flag = 0 ;
105
+
104
106
if (c1 > 0x20 && c1 < 0x47 ) {
105
107
flag = 1 ;
106
108
} else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49 ) {
107
109
flag = 2 ;
108
110
}
111
+
109
112
if (flag > 0 && c > 0x20 && c < 0x7f ) {
110
- if (flag == 1 ){
111
- w = (c1 - 0x21 )* 190 + (c - 0x41 ) + 0x80 ;
112
- if (w >= 0 && w < uhc2_ucs_table_size ) {
113
- w = uhc2_ucs_table [w ];
114
- } else {
115
- w = 0 ;
113
+ if (flag == 1 ) {
114
+ if (c1 != 0x22 || c <= 0x65 ) {
115
+ w = (c1 - 0x21 )* 190 + (c - 0x41 ) + 0x80 ;
116
+ if (w >= 0 && w < uhc2_ucs_table_size ) {
117
+ w = uhc2_ucs_table [w ];
118
+ } else {
119
+ w = 0 ;
120
+ }
116
121
}
117
122
} else {
118
123
w = (c1 - 0x47 )* 94 + (c - 0x21 );
@@ -124,54 +129,40 @@ mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
124
129
}
125
130
126
131
if (w <= 0 ) {
127
- w = (c1 << 8 ) | c ;
128
- w &= MBFL_WCSPLANE_MASK ;
129
- w |= MBFL_WCSPLANE_KSC5601 ;
132
+ w = (c1 << 8 ) | c | MBFL_WCSPLANE_KSC5601 ;
130
133
}
131
134
CK ((* filter -> output_function )(w , filter -> data ));
132
- } else if (c == 0x1b ) { /* ESC */
133
- filter -> status ++ ;
134
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
135
- CK ((* filter -> output_function )(c , filter -> data ));
136
135
} else {
137
- w = (c1 << 8 ) | c ;
138
- w &= MBFL_WCSGROUP_MASK ;
139
- w |= MBFL_WCSGROUP_THROUGH ;
136
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
140
137
CK ((* filter -> output_function )(w , filter -> data ));
141
138
}
142
139
break ;
143
140
144
- case 2 : /* ESC */
145
- if (c == 0x24 ) { /* '$' */
141
+ case 2 : /* ESC */
142
+ if (c == '$' ) {
146
143
filter -> status ++ ;
147
144
} else {
148
- filter -> status &= ~0xf ;
149
- CK ((* filter -> output_function )(0x1b , filter -> data ));
150
- goto retry ;
145
+ filter -> status = 0 ;
146
+ CK ((* filter -> output_function )(0x1b | MBFL_WCSGROUP_THROUGH , filter -> data ));
151
147
}
152
148
break ;
153
- case 3 : /* ESC $ */
154
- if (c == 0x29 ) { /* ')' */
149
+
150
+ case 3 : /* ESC $ */
151
+ if (c == ')' ) {
155
152
filter -> status ++ ;
156
153
} else {
157
- filter -> status &= ~0xf ;
158
- CK ((* filter -> output_function )(0x1b , filter -> data ));
159
- CK ((* filter -> output_function )(0x24 , filter -> data ));
160
- goto retry ;
154
+ filter -> status = 0 ;
155
+ CK ((* filter -> output_function )(0x1b24 | MBFL_WCSGROUP_THROUGH , filter -> data ));
161
156
}
162
157
break ;
163
- case 4 : /* ESC $ ) */
164
- if (c == 0x43 ) { /* 'C' */
165
- filter -> status &= ~0xf ;
166
- filter -> status |= 0x100 ;
167
- } else {
168
- filter -> status &= ~0xf ;
169
- CK ((* filter -> output_function )(0x1b , filter -> data ));
170
- CK ((* filter -> output_function )(0x24 , filter -> data ));
171
- CK ((* filter -> output_function )(0x29 , filter -> data ));
172
- goto retry ;
158
+
159
+ case 4 : /* ESC $ ) */
160
+ filter -> status = 0 ;
161
+ if (c != 'C' ) {
162
+ CK ((* filter -> output_function )(0x1b2429 | MBFL_WCSGROUP_THROUGH , filter -> data ));
173
163
}
174
164
break ;
165
+
175
166
default :
176
167
filter -> status = 0 ;
177
168
break ;
@@ -180,15 +171,23 @@ mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter)
180
171
return c ;
181
172
}
182
173
183
- /*
184
- * wchar => ISO-2022-KR
185
- */
186
- int
187
- mbfl_filt_conv_wchar_2022kr (int c , mbfl_convert_filter * filter )
174
+ static int mbfl_filt_conv_2022kr_wchar_flush (mbfl_convert_filter * filter )
188
175
{
189
- int c1 , c2 , s ;
176
+ if (filter -> status & 0xF ) {
177
+ /* 2-byte character or escape sequence was truncated */
178
+ CK ((* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data ));
179
+ }
180
+
181
+ if (filter -> flush_function ) {
182
+ (* filter -> flush_function )(filter -> data );
183
+ }
184
+
185
+ return 0 ;
186
+ }
190
187
191
- s = 0 ;
188
+ int mbfl_filt_conv_wchar_2022kr (int c , mbfl_convert_filter * filter )
189
+ {
190
+ int c1 , c2 , s = 0 ;
192
191
193
192
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max ) {
194
193
s = ucs_a1_uhc_table [c - ucs_a1_uhc_table_min ];
@@ -209,43 +208,41 @@ mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
209
208
c1 = (s >> 8 ) & 0xff ;
210
209
c2 = s & 0xff ;
211
210
/* exclude UHC extension area */
212
- if (c1 < 0xa1 || c2 < 0xa1 ){
211
+ if (c1 < 0xa1 || c2 < 0xa1 ) {
213
212
s = c ;
214
213
}
214
+
215
215
if (s & 0x8000 ) {
216
216
s -= 0x8080 ;
217
217
}
218
218
219
219
if (s <= 0 ) {
220
- c1 = c & ~MBFL_WCSPLANE_MASK ;
221
- if (c1 == MBFL_WCSPLANE_KSC5601 ) {
222
- s = c & MBFL_WCSPLANE_MASK ;
223
- }
224
220
if (c == 0 ) {
225
221
s = 0 ;
226
- } else if ( s <= 0 ) {
222
+ } else {
227
223
s = -1 ;
228
224
}
229
225
} else if ((s >= 0x80 && s < 0x2121 ) || (s > 0x8080 )) {
230
226
s = -1 ;
231
227
}
228
+
232
229
if (s >= 0 ) {
233
- if (s < 0x80 && s > 0 ) { /* ASCII */
234
- if (( filter -> status & 0x10 ) != 0 ) {
235
- CK ((* filter -> output_function )(0x0f , filter -> data )); /* SI */
230
+ if (s < 0x80 && s >= 0 ) { /* ASCII */
231
+ if (filter -> status & 0x10 ) {
232
+ CK ((* filter -> output_function )(0x0f , filter -> data )); /* shift in */
236
233
filter -> status &= ~0x10 ;
237
234
}
238
235
CK ((* filter -> output_function )(s , filter -> data ));
239
236
} else {
240
- if ( (filter -> status & 0x100 ) == 0 ) {
241
- CK ((* filter -> output_function )(0x1b , filter -> data )); /* ESC */
242
- CK ((* filter -> output_function )(0x24 , filter -> data )); /* '$' */
243
- CK ((* filter -> output_function )(0x29 , filter -> data )); /* ')' */
244
- CK ((* filter -> output_function )(0x43 , filter -> data )); /* 'C' */
237
+ if ((filter -> status & 0x100 ) == 0 ) {
238
+ CK ((* filter -> output_function )(0x1b , filter -> data )); /* ESC */
239
+ CK ((* filter -> output_function )('$' , filter -> data ));
240
+ CK ((* filter -> output_function )(')' , filter -> data ));
241
+ CK ((* filter -> output_function )('C' , filter -> data ));
245
242
filter -> status |= 0x100 ;
246
243
}
247
244
if ((filter -> status & 0x10 ) == 0 ) {
248
- CK ((* filter -> output_function )(0x0e , filter -> data )); /* SO */
245
+ CK ((* filter -> output_function )(0x0e , filter -> data )); /* shift out */
249
246
filter -> status |= 0x10 ;
250
247
}
251
248
CK ((* filter -> output_function )((s >> 8 ) & 0xff , filter -> data ));
@@ -258,17 +255,16 @@ mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
258
255
return c ;
259
256
}
260
257
261
- int
262
- mbfl_filt_conv_any_2022kr_flush (mbfl_convert_filter * filter )
258
+ static int mbfl_filt_conv_any_2022kr_flush (mbfl_convert_filter * filter )
263
259
{
264
260
/* back to ascii */
265
- if (( filter -> status & 0xff00 ) != 0 ) {
266
- CK ((* filter -> output_function )(0x0f , filter -> data )); /* SI */
261
+ if (filter -> status & 0xff00 ) {
262
+ CK ((* filter -> output_function )(0x0f , filter -> data )); /* shift in */
267
263
}
268
264
269
- filter -> status &= 0xff ;
265
+ filter -> status = filter -> cache = 0 ;
270
266
271
- if (filter -> flush_function != NULL ) {
267
+ if (filter -> flush_function ) {
272
268
return (* filter -> flush_function )(filter -> data );
273
269
}
274
270
0 commit comments