Skip to content

Commit 58ef500

Browse files
committed
tutf8e: Refresh from upstream, simplified
Signed-off-by: Nigel Stewart <[email protected]>
1 parent 71af5b7 commit 58ef500

27 files changed

+542
-671
lines changed

lib/tutf8e/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ project(tutf8e)
44
set(CMAKE_C_FLAGS "-Os -Wall")
55

66
include_directories(include)
7-
file(GLOB src src/*.c)
8-
add_library(tutf8e STATIC ${src})
7+
add_library(tutf8e STATIC src/tutf8e.c)
98

109
add_executable(tutf8e-test test/test.c)
1110
target_link_libraries(tutf8e-test tutf8e)

lib/tutf8e/codegen.py

Lines changed: 36 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#!/usr/bin/env python3
22

33
encodings = [
4-
'windows-1250', 'windows-1251', 'windows-1252', 'windows-1253', 'windows-1254',
5-
'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258',
6-
'iso-8859-1', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9', 'iso-8859-10',
7-
'iso-8859-11', 'iso-8859-13', 'iso-8859-14', 'iso-8859-15', 'iso-8859-16'
8-
]
4+
'windows-1250', 'windows-1251', 'windows-1252',
5+
'windows-1253', 'windows-1254', 'windows-1255',
6+
'windows-1256', 'windows-1257', 'windows-1258',
7+
'iso-8859-1', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4',
8+
'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8',
9+
'iso-8859-9', 'iso-8859-10', 'iso-8859-11', 'iso-8859-13',
10+
'iso-8859-14', 'iso-8859-15', 'iso-8859-16'
11+
]
912

1013
with open('include/tutf8e.h', 'w') as include:
1114

@@ -61,31 +64,6 @@
6164
name = e.replace('-', '_').lower()
6265
include.write('extern const TUTF8encoder tutf8e_encoder_%s;\n'%(name))
6366

64-
# include.write('\n/* UTF8 Tables */\n')
65-
# for e in sorted(encodings):
66-
# name = e.replace('-', '_').lower()
67-
# include.write('extern const uint16_t tutf8e_%s_utf8[256];\n'%(name))
68-
69-
# include.write('\n/* Encode NUL-terminated string to UTF8 */\n')
70-
# for e in sorted(encodings):
71-
# name = e.replace('-', '_').lower()
72-
# include.write('extern int % -33s(char *output, size_t olen, const char *input);\n'%('tutf8e_string_encode_%s'%(name)))
73-
74-
# include.write('\n/* Encode NUL-terminated string to UTF8, realloc as necessary */\n')
75-
# for e in sorted(encodings):
76-
# name = e.replace('-', '_').lower()
77-
# include.write('extern char * % -33s(char *input);\n'%('tutf8e_string_encode_%s_realloc'%(name)))
78-
79-
# include.write('\n/* Buffer length as UTF8 */\n')
80-
# for e in sorted(encodings):
81-
# name = e.replace('-', '_').lower()
82-
# include.write('extern int % -33s(const char *i, size_t ilen, size_t *length);\n'%('tutf8e_buffer_length_%s'%(name)))
83-
84-
# include.write('\n/* Encode buffer to UTF8 */\n')
85-
# for e in sorted(encodings):
86-
# name = e.replace('-', '_').lower()
87-
# include.write('extern int % -33s(char *output, size_t *olen, const char *input, size_t ilen);\n'%('tutf8e_buffer_encode_%s'%(name)))
88-
8967
include.write('\n')
9068
include.write('#endif\n')
9169

@@ -208,6 +186,34 @@
208186
return TUTF8E_OK;
209187
}
210188
''')
189+
190+
for e in sorted(encodings):
191+
192+
mapping = {}
193+
domain = []
194+
195+
name = e.replace('-', '_').lower()
196+
197+
v = []
198+
for i in range(0,256):
199+
try:
200+
v.append(ord(bytes([i]).decode(e)[0]))
201+
except:
202+
v.append(0xffff)
203+
pass
204+
205+
src.write('\n')
206+
src.write('const uint16_t tutf8e_%s_utf8[256] =\n'%(name))
207+
src.write('{\n')
208+
for i in range(0,256,16):
209+
src.write(' %s,\n'%(', '.join([ '0x%04x'%(i) for i in v[i:i+16]])))
210+
src.write('};\n')
211+
212+
src.write('\n')
213+
for e in sorted(encodings):
214+
name = e.replace('-', '_').lower()
215+
src.write('const TUTF8encoder tutf8e_encoder_%s = (TUTF8encoder) tutf8e_%s_utf8;\n'%(name, name))
216+
211217
src.write('''
212218
TUTF8encoder tutf8e_encoder(const char *encoding)
213219
{
@@ -220,16 +226,12 @@
220226
}
221227
''')
222228

223-
224229
for e in sorted(encodings):
225230

226231
mapping = {}
227232
domain = []
228233

229234
name = e.replace('-', '_').lower()
230-
231-
print('Encoding: %s'%(e))
232-
233235
with open('include/tutf8e/%s.h'%(name), 'w') as include:
234236

235237
include.write('''
@@ -270,74 +272,6 @@
270272
include.write('\n')
271273
include.write('#endif\n')
272274

273-
# include.write('\n/* %s */\n'%(e))
274-
# include.write('extern char * encode_%s_to_utf8(const char *input);\n'%(name))
275-
# include.write('extern int % -33s(char *output, size_t olen, const char *input);\n'%('tutf8e_string_encode_%s'%(name)))
276-
277-
with open('src/%s.c'%(name), 'w') as src:
278-
279-
# Emit code
280-
281-
src.write('#include <tutf8e.h>\n')
282-
src.write('\n')
283-
# src.write('#include <string.h> /* strlen */\n')
284-
# src.write('#include <stdlib.h> /* malloc/free */\n')
285-
# src.write('\n')
286-
287-
v = []
288-
for i in range(0,256):
289-
try:
290-
v.append(ord(bytes([i]).decode(e)[0]))
291-
except:
292-
v.append(0xffff)
293-
pass
294-
295-
src.write('static const uint16_t tutf8e_%s_utf8[256] =\n'%(name))
296-
src.write('{\n')
297-
for i in range(0,256,16):
298-
src.write(' %s,\n'%(', '.join([ '0x%04x'%(i) for i in v[i:i+16]])))
299-
src.write('};\n')
300-
301-
src.write('\n')
302-
src.write('const TUTF8encoder tutf8e_encoder_%s = (TUTF8encoder) tutf8e_%s_utf8;\n'%(name, name))
303-
304-
# src.write('\n')
305-
# src.write('int tutf8e_string_encode_%s(char *output, size_t olen, const char *input)\n'%(name))
306-
# src.write('{\n')
307-
# src.write(' size_t len = strlen(input) + 1;\n')
308-
# src.write(' return tutf8e_buffer_encode(tutf8e_%s_utf8, input, len, output, &olen);\n'%(name))
309-
# src.write('}\n')
310-
311-
# src.write('''
312-
# int tutf8e_buffer_length_%s(const char *i, size_t ilen, size_t *length)
313-
# {
314-
# return tutf8e_buffer_length(tutf8e_%s_utf8, i, ilen, length);
315-
# }
316-
# '''%(name, name))
317-
318-
# src.write('\n')
319-
# src.write('int tutf8e_buffer_encode_%s(char *output, size_t *olen, const char *input, size_t ilen)\n'%(name))
320-
# src.write('{\n')
321-
# src.write(' return tutf8e_buffer_encode(tutf8e_%s_utf8, input, ilen, output, olen);\n'%(name))
322-
# src.write('}\n')
323-
324-
# src.write('\n')
325-
# src.write('char * tutf8e_string_encode_%s_realloc(char *input)\n'%(name))
326-
# src.write('{\n')
327-
# src.write(' size_t ilen = 0;\n')
328-
# src.write(' size_t olen = 0;\n')
329-
# src.write(' if (input && !tutf8e_string_length(tutf8e_%s_utf8, input, &ilen, &olen) && ilen && olen && ilen!=olen) {\n'%(name))
330-
# src.write(' char * output = malloc(olen + 1);\n')
331-
# src.write(' if (output && !tutf8e_buffer_encode(tutf8e_%s_utf8, input, ilen, output, &olen)) {\n'%(name))
332-
# src.write(' free(input);\n')
333-
# src.write(' output[olen] = 0;\n')
334-
# src.write(' return output;\n')
335-
# src.write(' }\n')
336-
# src.write(' free(output);\n')
337-
# src.write(' }\n')
338-
# src.write(' return input;\n')
339-
# src.write('}\n')
340-
341275
# TESTS
342276

343277
# List of pangrams
@@ -428,21 +362,6 @@
428362
test.write(' }\n')
429363
test.write('\n')
430364

431-
# test.write('\n /* string encode with possible re-allocation to UTF8 */\n')
432-
# for i in tests:
433-
# if i[1] in encodings:
434-
# name = i[1].replace('-', '_').lower()
435-
# test.write(' encoded = tutf8e_string_encode_%s_realloc(strdup(%s));\n'%(name, i[0]))
436-
# test.write(' if (encoded && !strcmp(encoded, %sUTF8)) {\n'%(i[0]))
437-
# test.write(' printf("%s\\n", encoded);\n')
438-
# test.write(' pass++;\n')
439-
# test.write(' } else {\n')
440-
# test.write(' printf("Failed to encode %s test\\n");\n'%(i[0]))
441-
# test.write(' fail++;\n')
442-
# test.write(' }\n')
443-
# test.write(' free(encoded);\n')
444-
# test.write('\n')
445-
446365
test.write(' printf("%d passed, %d failed tests\\n", pass, fail);\n')
447366

448367
test.write('}\n')

lib/tutf8e/src/iso_8859_1.c

Lines changed: 0 additions & 23 deletions
This file was deleted.

lib/tutf8e/src/iso_8859_10.c

Lines changed: 0 additions & 23 deletions
This file was deleted.

lib/tutf8e/src/iso_8859_11.c

Lines changed: 0 additions & 23 deletions
This file was deleted.

lib/tutf8e/src/iso_8859_13.c

Lines changed: 0 additions & 23 deletions
This file was deleted.

lib/tutf8e/src/iso_8859_14.c

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)