|
1 | 1 | #!/usr/bin/env python3
|
2 | 2 |
|
3 | 3 | encodings = [
|
4 |
| - 'windows-1250', 'windows-1251', 'windows-1252', 'windows-1253', 'windows-1254', |
5 |
| - 'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258', |
6 |
| - 'iso-8859-1', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9', 'iso-8859-10', |
7 |
| - 'iso-8859-11', 'iso-8859-13', 'iso-8859-14', 'iso-8859-15', 'iso-8859-16' |
8 |
| - ] |
| 4 | + 'windows-1250', 'windows-1251', 'windows-1252', |
| 5 | + 'windows-1253', 'windows-1254', 'windows-1255', |
| 6 | + 'windows-1256', 'windows-1257', 'windows-1258', |
| 7 | + 'iso-8859-1', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', |
| 8 | + 'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', |
| 9 | + 'iso-8859-9', 'iso-8859-10', 'iso-8859-11', 'iso-8859-13', |
| 10 | + 'iso-8859-14', 'iso-8859-15', 'iso-8859-16' |
| 11 | +] |
9 | 12 |
|
10 | 13 | with open('include/tutf8e.h', 'w') as include:
|
11 | 14 |
|
|
61 | 64 | name = e.replace('-', '_').lower()
|
62 | 65 | include.write('extern const TUTF8encoder tutf8e_encoder_%s;\n'%(name))
|
63 | 66 |
|
64 |
| - # include.write('\n/* UTF8 Tables */\n') |
65 |
| - # for e in sorted(encodings): |
66 |
| - # name = e.replace('-', '_').lower() |
67 |
| - # include.write('extern const uint16_t tutf8e_%s_utf8[256];\n'%(name)) |
68 |
| - |
69 |
| - # include.write('\n/* Encode NUL-terminated string to UTF8 */\n') |
70 |
| - # for e in sorted(encodings): |
71 |
| - # name = e.replace('-', '_').lower() |
72 |
| - # include.write('extern int % -33s(char *output, size_t olen, const char *input);\n'%('tutf8e_string_encode_%s'%(name))) |
73 |
| - |
74 |
| - # include.write('\n/* Encode NUL-terminated string to UTF8, realloc as necessary */\n') |
75 |
| - # for e in sorted(encodings): |
76 |
| - # name = e.replace('-', '_').lower() |
77 |
| - # include.write('extern char * % -33s(char *input);\n'%('tutf8e_string_encode_%s_realloc'%(name))) |
78 |
| - |
79 |
| - # include.write('\n/* Buffer length as UTF8 */\n') |
80 |
| - # for e in sorted(encodings): |
81 |
| - # name = e.replace('-', '_').lower() |
82 |
| - # include.write('extern int % -33s(const char *i, size_t ilen, size_t *length);\n'%('tutf8e_buffer_length_%s'%(name))) |
83 |
| - |
84 |
| - # include.write('\n/* Encode buffer to UTF8 */\n') |
85 |
| - # for e in sorted(encodings): |
86 |
| - # name = e.replace('-', '_').lower() |
87 |
| - # include.write('extern int % -33s(char *output, size_t *olen, const char *input, size_t ilen);\n'%('tutf8e_buffer_encode_%s'%(name))) |
88 |
| - |
89 | 67 | include.write('\n')
|
90 | 68 | include.write('#endif\n')
|
91 | 69 |
|
|
208 | 186 | return TUTF8E_OK;
|
209 | 187 | }
|
210 | 188 | ''')
|
| 189 | + |
| 190 | + for e in sorted(encodings): |
| 191 | + |
| 192 | + mapping = {} |
| 193 | + domain = [] |
| 194 | + |
| 195 | + name = e.replace('-', '_').lower() |
| 196 | + |
| 197 | + v = [] |
| 198 | + for i in range(0,256): |
| 199 | + try: |
| 200 | + v.append(ord(bytes([i]).decode(e)[0])) |
| 201 | + except: |
| 202 | + v.append(0xffff) |
| 203 | + pass |
| 204 | + |
| 205 | + src.write('\n') |
| 206 | + src.write('const uint16_t tutf8e_%s_utf8[256] =\n'%(name)) |
| 207 | + src.write('{\n') |
| 208 | + for i in range(0,256,16): |
| 209 | + src.write(' %s,\n'%(', '.join([ '0x%04x'%(i) for i in v[i:i+16]]))) |
| 210 | + src.write('};\n') |
| 211 | + |
| 212 | + src.write('\n') |
| 213 | + for e in sorted(encodings): |
| 214 | + name = e.replace('-', '_').lower() |
| 215 | + src.write('const TUTF8encoder tutf8e_encoder_%s = (TUTF8encoder) tutf8e_%s_utf8;\n'%(name, name)) |
| 216 | + |
211 | 217 | src.write('''
|
212 | 218 | TUTF8encoder tutf8e_encoder(const char *encoding)
|
213 | 219 | {
|
|
220 | 226 | }
|
221 | 227 | ''')
|
222 | 228 |
|
223 |
| - |
224 | 229 | for e in sorted(encodings):
|
225 | 230 |
|
226 | 231 | mapping = {}
|
227 | 232 | domain = []
|
228 | 233 |
|
229 | 234 | name = e.replace('-', '_').lower()
|
230 |
| - |
231 |
| - print('Encoding: %s'%(e)) |
232 |
| - |
233 | 235 | with open('include/tutf8e/%s.h'%(name), 'w') as include:
|
234 | 236 |
|
235 | 237 | include.write('''
|
|
270 | 272 | include.write('\n')
|
271 | 273 | include.write('#endif\n')
|
272 | 274 |
|
273 |
| -# include.write('\n/* %s */\n'%(e)) |
274 |
| -# include.write('extern char * encode_%s_to_utf8(const char *input);\n'%(name)) |
275 |
| -# include.write('extern int % -33s(char *output, size_t olen, const char *input);\n'%('tutf8e_string_encode_%s'%(name))) |
276 |
| - |
277 |
| - with open('src/%s.c'%(name), 'w') as src: |
278 |
| - |
279 |
| - # Emit code |
280 |
| - |
281 |
| - src.write('#include <tutf8e.h>\n') |
282 |
| - src.write('\n') |
283 |
| - # src.write('#include <string.h> /* strlen */\n') |
284 |
| - # src.write('#include <stdlib.h> /* malloc/free */\n') |
285 |
| - # src.write('\n') |
286 |
| - |
287 |
| - v = [] |
288 |
| - for i in range(0,256): |
289 |
| - try: |
290 |
| - v.append(ord(bytes([i]).decode(e)[0])) |
291 |
| - except: |
292 |
| - v.append(0xffff) |
293 |
| - pass |
294 |
| - |
295 |
| - src.write('static const uint16_t tutf8e_%s_utf8[256] =\n'%(name)) |
296 |
| - src.write('{\n') |
297 |
| - for i in range(0,256,16): |
298 |
| - src.write(' %s,\n'%(', '.join([ '0x%04x'%(i) for i in v[i:i+16]]))) |
299 |
| - src.write('};\n') |
300 |
| - |
301 |
| - src.write('\n') |
302 |
| - src.write('const TUTF8encoder tutf8e_encoder_%s = (TUTF8encoder) tutf8e_%s_utf8;\n'%(name, name)) |
303 |
| - |
304 |
| -# src.write('\n') |
305 |
| -# src.write('int tutf8e_string_encode_%s(char *output, size_t olen, const char *input)\n'%(name)) |
306 |
| -# src.write('{\n') |
307 |
| -# src.write(' size_t len = strlen(input) + 1;\n') |
308 |
| -# src.write(' return tutf8e_buffer_encode(tutf8e_%s_utf8, input, len, output, &olen);\n'%(name)) |
309 |
| -# src.write('}\n') |
310 |
| - |
311 |
| -# src.write(''' |
312 |
| -# int tutf8e_buffer_length_%s(const char *i, size_t ilen, size_t *length) |
313 |
| -# { |
314 |
| -# return tutf8e_buffer_length(tutf8e_%s_utf8, i, ilen, length); |
315 |
| -# } |
316 |
| -# '''%(name, name)) |
317 |
| - |
318 |
| -# src.write('\n') |
319 |
| -# src.write('int tutf8e_buffer_encode_%s(char *output, size_t *olen, const char *input, size_t ilen)\n'%(name)) |
320 |
| -# src.write('{\n') |
321 |
| -# src.write(' return tutf8e_buffer_encode(tutf8e_%s_utf8, input, ilen, output, olen);\n'%(name)) |
322 |
| -# src.write('}\n') |
323 |
| - |
324 |
| -# src.write('\n') |
325 |
| -# src.write('char * tutf8e_string_encode_%s_realloc(char *input)\n'%(name)) |
326 |
| -# src.write('{\n') |
327 |
| -# src.write(' size_t ilen = 0;\n') |
328 |
| -# src.write(' size_t olen = 0;\n') |
329 |
| -# src.write(' if (input && !tutf8e_string_length(tutf8e_%s_utf8, input, &ilen, &olen) && ilen && olen && ilen!=olen) {\n'%(name)) |
330 |
| -# src.write(' char * output = malloc(olen + 1);\n') |
331 |
| -# src.write(' if (output && !tutf8e_buffer_encode(tutf8e_%s_utf8, input, ilen, output, &olen)) {\n'%(name)) |
332 |
| -# src.write(' free(input);\n') |
333 |
| -# src.write(' output[olen] = 0;\n') |
334 |
| -# src.write(' return output;\n') |
335 |
| -# src.write(' }\n') |
336 |
| -# src.write(' free(output);\n') |
337 |
| -# src.write(' }\n') |
338 |
| -# src.write(' return input;\n') |
339 |
| -# src.write('}\n') |
340 |
| - |
341 | 275 | # TESTS
|
342 | 276 |
|
343 | 277 | # List of pangrams
|
|
428 | 362 | test.write(' }\n')
|
429 | 363 | test.write('\n')
|
430 | 364 |
|
431 |
| - # test.write('\n /* string encode with possible re-allocation to UTF8 */\n') |
432 |
| - # for i in tests: |
433 |
| - # if i[1] in encodings: |
434 |
| - # name = i[1].replace('-', '_').lower() |
435 |
| - # test.write(' encoded = tutf8e_string_encode_%s_realloc(strdup(%s));\n'%(name, i[0])) |
436 |
| - # test.write(' if (encoded && !strcmp(encoded, %sUTF8)) {\n'%(i[0])) |
437 |
| - # test.write(' printf("%s\\n", encoded);\n') |
438 |
| - # test.write(' pass++;\n') |
439 |
| - # test.write(' } else {\n') |
440 |
| - # test.write(' printf("Failed to encode %s test\\n");\n'%(i[0])) |
441 |
| - # test.write(' fail++;\n') |
442 |
| - # test.write(' }\n') |
443 |
| - # test.write(' free(encoded);\n') |
444 |
| - # test.write('\n') |
445 |
| - |
446 | 365 | test.write(' printf("%d passed, %d failed tests\\n", pass, fail);\n')
|
447 | 366 |
|
448 | 367 | test.write('}\n')
|
0 commit comments