26
26
#include " unicode/ucharstriebuilder.h"
27
27
#include " ucase.h"
28
28
#include " unicode/normalizer2.h"
29
+ #include " uprops.h"
29
30
#include " normalizer2impl.h"
30
31
#include " writesrc.h"
31
32
@@ -46,90 +47,6 @@ int16_t DATAEXPORT_SCRIPT_X_WITH_COMMON = 0x0400;
46
47
int16_t DATAEXPORT_SCRIPT_X_WITH_INHERITED = 0x0800 ;
47
48
int16_t DATAEXPORT_SCRIPT_X_WITH_OTHER = 0x0c00 ;
48
49
49
- // TODO(ICU-21821): Replace this with a call to a library function
50
- // This is an array of all code points with explicit scx values, and can be generated the quick and dirty
51
- // way with this script:
52
- //
53
- // # <ScriptExtensions.txt python script.py
54
- //
55
- // import sys
56
- // for line in sys.stdin:
57
- // line = line.strip()
58
- // if len(line) == 0 or line.startswith("#"):
59
- // continue
60
- // entry = line.split(" ")[0]
61
- // # Either it is a range
62
- // if ".." in entry:
63
- // split = entry.split("..")
64
- // start = int(split[0], 16)
65
- // end = int(split[1], 16)
66
- // # +
67
- // for ch in range(start, end + 1):
68
- // print("0x%04x, " % ch, end="")
69
- // # or a single code point
70
- // else:
71
- // print("0x%s, " % entry.lower(), end="")
72
-
73
- int32_t scxCodePoints[] = {
74
- 0x00b7 , 0x02bc , 0x02c7 , 0x02c9 , 0x02ca , 0x02cb , 0x02cd , 0x02d7 , 0x02d9 , 0x0300 , 0x0301 , 0x0302 ,
75
- 0x0303 , 0x0304 , 0x0305 , 0x0306 , 0x0307 , 0x0308 , 0x0309 , 0x030a , 0x030b , 0x030c , 0x030d , 0x030e ,
76
- 0x0310 , 0x0311 , 0x0313 , 0x0320 , 0x0323 , 0x0324 , 0x0325 , 0x032d , 0x032e , 0x0330 , 0x0331 , 0x0342 ,
77
- 0x0345 , 0x0358 , 0x035e , 0x0363 , 0x0364 , 0x0365 , 0x0366 , 0x0367 , 0x0368 , 0x0369 , 0x036a , 0x036b ,
78
- 0x036c , 0x036d , 0x036e , 0x036f , 0x0374 , 0x0375 , 0x0483 , 0x0484 , 0x0485 , 0x0486 , 0x0487 , 0x0589 ,
79
- 0x060c , 0x061b , 0x061c , 0x061f , 0x0640 , 0x064b , 0x064c , 0x064d , 0x064e , 0x064f , 0x0650 , 0x0651 ,
80
- 0x0652 , 0x0653 , 0x0654 , 0x0655 , 0x0660 , 0x0661 , 0x0662 , 0x0663 , 0x0664 , 0x0665 , 0x0666 , 0x0667 ,
81
- 0x0668 , 0x0669 , 0x0670 , 0x06d4 , 0x0951 , 0x0952 , 0x0964 , 0x0965 , 0x0966 , 0x0967 , 0x0968 , 0x0969 ,
82
- 0x096a , 0x096b , 0x096c , 0x096d , 0x096e , 0x096f , 0x09e6 , 0x09e7 , 0x09e8 , 0x09e9 , 0x09ea , 0x09eb ,
83
- 0x09ec , 0x09ed , 0x09ee , 0x09ef , 0x0a66 , 0x0a67 , 0x0a68 , 0x0a69 , 0x0a6a , 0x0a6b , 0x0a6c , 0x0a6d ,
84
- 0x0a6e , 0x0a6f , 0x0ae6 , 0x0ae7 , 0x0ae8 , 0x0ae9 , 0x0aea , 0x0aeb , 0x0aec , 0x0aed , 0x0aee , 0x0aef ,
85
- 0x0be6 , 0x0be7 , 0x0be8 , 0x0be9 , 0x0bea , 0x0beb , 0x0bec , 0x0bed , 0x0bee , 0x0bef , 0x0bf0 , 0x0bf1 ,
86
- 0x0bf2 , 0x0bf3 , 0x0ce6 , 0x0ce7 , 0x0ce8 , 0x0ce9 , 0x0cea , 0x0ceb , 0x0cec , 0x0ced , 0x0cee , 0x0cef ,
87
- 0x1040 , 0x1041 , 0x1042 , 0x1043 , 0x1044 , 0x1045 , 0x1046 , 0x1047 , 0x1048 , 0x1049 , 0x10fb , 0x16eb ,
88
- 0x16ec , 0x16ed , 0x1735 , 0x1736 , 0x1802 , 0x1803 , 0x1805 , 0x1cd0 , 0x1cd1 , 0x1cd2 , 0x1cd3 , 0x1cd4 ,
89
- 0x1cd5 , 0x1cd6 , 0x1cd7 , 0x1cd8 , 0x1cd9 , 0x1cda , 0x1cdb , 0x1cdc , 0x1cdd , 0x1cde , 0x1cdf , 0x1ce0 ,
90
- 0x1ce1 , 0x1ce2 , 0x1ce3 , 0x1ce4 , 0x1ce5 , 0x1ce6 , 0x1ce7 , 0x1ce8 , 0x1ce9 , 0x1cea , 0x1ceb , 0x1cec ,
91
- 0x1ced , 0x1cee , 0x1cef , 0x1cf0 , 0x1cf1 , 0x1cf2 , 0x1cf3 , 0x1cf4 , 0x1cf5 , 0x1cf6 , 0x1cf7 , 0x1cf8 ,
92
- 0x1cf9 , 0x1cfa , 0x1dc0 , 0x1dc1 , 0x1df8 , 0x1dfa , 0x202f , 0x204f , 0x205a , 0x205d , 0x20f0 , 0x2e17 ,
93
- 0x2e30 , 0x2e31 , 0x2e3c , 0x2e41 , 0x2e43 , 0x2ff0 , 0x2ff1 , 0x2ff2 , 0x2ff3 , 0x2ff4 , 0x2ff5 , 0x2ff6 ,
94
- 0x2ff7 , 0x2ff8 , 0x2ff9 , 0x2ffa , 0x2ffb , 0x2ffc , 0x2ffd , 0x2ffe , 0x2fff , 0x3001 , 0x3002 , 0x3003 ,
95
- 0x3006 , 0x3008 , 0x3009 , 0x300a , 0x300b , 0x300c , 0x300d , 0x300e , 0x300f , 0x3010 , 0x3011 , 0x3013 ,
96
- 0x3014 , 0x3015 , 0x3016 , 0x3017 , 0x3018 , 0x3019 , 0x301a , 0x301b , 0x301c , 0x301d , 0x301e , 0x301f ,
97
- 0x302a , 0x302b , 0x302c , 0x302d , 0x3030 , 0x3031 , 0x3032 , 0x3033 , 0x3034 , 0x3035 , 0x3037 , 0x303c ,
98
- 0x303d , 0x303e , 0x303f , 0x3099 , 0x309a , 0x309b , 0x309c , 0x30a0 , 0x30fb , 0x30fc , 0x3190 , 0x3191 ,
99
- 0x3192 , 0x3193 , 0x3194 , 0x3195 , 0x3196 , 0x3197 , 0x3198 , 0x3199 , 0x319a , 0x319b , 0x319c , 0x319d ,
100
- 0x319e , 0x319f , 0x31c0 , 0x31c1 , 0x31c2 , 0x31c3 , 0x31c4 , 0x31c5 , 0x31c6 , 0x31c7 , 0x31c8 , 0x31c9 ,
101
- 0x31ca , 0x31cb , 0x31cc , 0x31cd , 0x31ce , 0x31cf , 0x31d0 , 0x31d1 , 0x31d2 , 0x31d3 , 0x31d4 , 0x31d5 ,
102
- 0x31d6 , 0x31d7 , 0x31d8 , 0x31d9 , 0x31da , 0x31db , 0x31dc , 0x31dd , 0x31de , 0x31df , 0x31e0 , 0x31e1 ,
103
- 0x31e2 , 0x31e3 , 0x31e4 , 0x31e5 , 0x31ef , 0x3220 , 0x3221 , 0x3222 , 0x3223 , 0x3224 , 0x3225 , 0x3226 ,
104
- 0x3227 , 0x3228 , 0x3229 , 0x322a , 0x322b , 0x322c , 0x322d , 0x322e , 0x322f , 0x3230 , 0x3231 , 0x3232 ,
105
- 0x3233 , 0x3234 , 0x3235 , 0x3236 , 0x3237 , 0x3238 , 0x3239 , 0x323a , 0x323b , 0x323c , 0x323d , 0x323e ,
106
- 0x323f , 0x3240 , 0x3241 , 0x3242 , 0x3243 , 0x3244 , 0x3245 , 0x3246 , 0x3247 , 0x3280 , 0x3281 , 0x3282 ,
107
- 0x3283 , 0x3284 , 0x3285 , 0x3286 , 0x3287 , 0x3288 , 0x3289 , 0x328a , 0x328b , 0x328c , 0x328d , 0x328e ,
108
- 0x328f , 0x3290 , 0x3291 , 0x3292 , 0x3293 , 0x3294 , 0x3295 , 0x3296 , 0x3297 , 0x3298 , 0x3299 , 0x329a ,
109
- 0x329b , 0x329c , 0x329d , 0x329e , 0x329f , 0x32a0 , 0x32a1 , 0x32a2 , 0x32a3 , 0x32a4 , 0x32a5 , 0x32a6 ,
110
- 0x32a7 , 0x32a8 , 0x32a9 , 0x32aa , 0x32ab , 0x32ac , 0x32ad , 0x32ae , 0x32af , 0x32b0 , 0x32c0 , 0x32c1 ,
111
- 0x32c2 , 0x32c3 , 0x32c4 , 0x32c5 , 0x32c6 , 0x32c7 , 0x32c8 , 0x32c9 , 0x32ca , 0x32cb , 0x32ff , 0x3358 ,
112
- 0x3359 , 0x335a , 0x335b , 0x335c , 0x335d , 0x335e , 0x335f , 0x3360 , 0x3361 , 0x3362 , 0x3363 , 0x3364 ,
113
- 0x3365 , 0x3366 , 0x3367 , 0x3368 , 0x3369 , 0x336a , 0x336b , 0x336c , 0x336d , 0x336e , 0x336f , 0x3370 ,
114
- 0x337b , 0x337c , 0x337d , 0x337e , 0x337f , 0x33e0 , 0x33e1 , 0x33e2 , 0x33e3 , 0x33e4 , 0x33e5 , 0x33e6 ,
115
- 0x33e7 , 0x33e8 , 0x33e9 , 0x33ea , 0x33eb , 0x33ec , 0x33ed , 0x33ee , 0x33ef , 0x33f0 , 0x33f1 , 0x33f2 ,
116
- 0x33f3 , 0x33f4 , 0x33f5 , 0x33f6 , 0x33f7 , 0x33f8 , 0x33f9 , 0x33fa , 0x33fb , 0x33fc , 0x33fd , 0x33fe ,
117
- 0xa66f , 0xa700 , 0xa701 , 0xa702 , 0xa703 , 0xa704 , 0xa705 , 0xa706 , 0xa707 , 0xa830 , 0xa831 , 0xa832 ,
118
- 0xa833 , 0xa834 , 0xa835 , 0xa836 , 0xa837 , 0xa838 , 0xa839 , 0xa8f1 , 0xa8f3 , 0xa92e , 0xa9cf , 0xfd3e ,
119
- 0xfd3f , 0xfdf2 , 0xfdfd , 0xfe45 , 0xfe46 , 0xff61 , 0xff62 , 0xff63 , 0xff64 , 0xff65 , 0xff70 , 0xff9e ,
120
- 0xff9f , 0x10100 , 0x10101 , 0x10102 , 0x10107 , 0x10108 , 0x10109 , 0x1010a , 0x1010b , 0x1010c , 0x1010d ,
121
- 0x1010e , 0x1010f , 0x10110 , 0x10111 , 0x10112 , 0x10113 , 0x10114 , 0x10115 , 0x10116 , 0x10117 , 0x10118 ,
122
- 0x10119 , 0x1011a , 0x1011b , 0x1011c , 0x1011d , 0x1011e , 0x1011f , 0x10120 , 0x10121 , 0x10122 , 0x10123 ,
123
- 0x10124 , 0x10125 , 0x10126 , 0x10127 , 0x10128 , 0x10129 , 0x1012a , 0x1012b , 0x1012c , 0x1012d , 0x1012e ,
124
- 0x1012f , 0x10130 , 0x10131 , 0x10132 , 0x10133 , 0x10137 , 0x10138 , 0x10139 , 0x1013a , 0x1013b , 0x1013c ,
125
- 0x1013d , 0x1013e , 0x1013f , 0x102e0 , 0x102e1 , 0x102e2 , 0x102e3 , 0x102e4 , 0x102e5 , 0x102e6 , 0x102e7 ,
126
- 0x102e8 , 0x102e9 , 0x102ea , 0x102eb , 0x102ec , 0x102ed , 0x102ee , 0x102ef , 0x102f0 , 0x102f1 , 0x102f2 ,
127
- 0x102f3 , 0x102f4 , 0x102f5 , 0x102f6 , 0x102f7 , 0x102f8 , 0x102f9 , 0x102fa , 0x102fb , 0x10af2 , 0x11301 ,
128
- 0x11303 , 0x1133b , 0x1133c , 0x11fd0 , 0x11fd1 , 0x11fd3 , 0x1bca0 , 0x1bca1 , 0x1bca2 , 0x1bca3 , 0x1d360 ,
129
- 0x1d361 , 0x1d362 , 0x1d363 , 0x1d364 , 0x1d365 , 0x1d366 , 0x1d367 , 0x1d368 , 0x1d369 , 0x1d36a , 0x1d36b ,
130
- 0x1d36c , 0x1d36d , 0x1d36e , 0x1d36f , 0x1d370 , 0x1d371 , 0x1f250 , 0x1f251 ,
131
- };
132
-
133
50
void handleError (ErrorCode& status, int line, const char * context) {
134
51
if (status.isFailure ()) {
135
52
std::cerr << " Error[" << line << " ]: " << context << " : " << status.errorName () << std::endl;
@@ -374,6 +291,34 @@ void dumpGeneralCategoryMask(FILE* f) {
374
291
fprintf (f, " ]\n " );
375
292
}
376
293
294
+ namespace {
295
+
296
+ void U_CALLCONV
297
+ set_add (USet *set, UChar32 c) {
298
+ UnicodeSet::fromUSet (set)->add (c);
299
+ }
300
+
301
+ void U_CALLCONV
302
+ set_addRange (USet *set, UChar32 start, UChar32 end) {
303
+ UnicodeSet::fromUSet (set)->add (start, end);
304
+ }
305
+
306
+ }
307
+
308
+ UnicodeSet getScriptExtensionsCodePoints (IcuToolErrorCode &errorCode) {
309
+ UnicodeSet scxCPs;
310
+ USetAdder sa = {
311
+ scxCPs.toUSet (),
312
+ set_add,
313
+ set_addRange,
314
+ nullptr , // don't need addString,
315
+ nullptr , // don't need remove()
316
+ nullptr // don't need removeRange()
317
+ };
318
+ uprv_addScriptExtensionsCodePoints (&sa, errorCode);
319
+ return scxCPs;
320
+ }
321
+
377
322
void dumpScriptExtensions (FILE* f) {
378
323
IcuToolErrorCode status (" icuexportdata: dumpScriptExtensions" );
379
324
@@ -402,7 +347,8 @@ void dumpScriptExtensions(FILE* f) {
402
347
403
348
// The sc/scx companion array is an array of arrays (of script codes)
404
349
fputs (" script_code_array = [\n " , f);
405
- for (const UChar32 cp : scxCodePoints) {
350
+ UnicodeSet scxCodePoints = getScriptExtensionsCodePoints (status);
351
+ for (const UChar32 cp : scxCodePoints.codePoints ()) {
406
352
// Get the Script value
407
353
uint32_t scVal = umutablecptrie_get (builder.getAlias (), cp);
408
354
// Get the Script_Extensions value (array of Script codes)
0 commit comments