Skip to content

Commit 231825b

Browse files
committed
Revert "unicode: Don't special case ignorable code points"
This reverts commit 5c26d2f. It turns out that we can't do this, because while the old behavior of ignoring ignorable code points was most definitely wrong, we have case-folding filesystems with on-disk hash values with that wrong behavior. So now you can't look up those names, because they hash to something different. Of course, it's also entirely possible that in the meantime people have created *new* files with the new ("more correct") case folding logic, and reverting will just make other things break. The correct solution is to not do case folding in filesystems, but sadly, people seem to never really understand that. People still see it as a feature, not a bug. Reported-by: Qi Han <[email protected]> Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 Cc: Gabriel Krisman Bertazi <[email protected]> Requested-by: Jaegeuk Kim <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent ec8e2d3 commit 231825b

File tree

2 files changed

+3427
-3346
lines changed

2 files changed

+3427
-3346
lines changed

fs/unicode/mkutf8data.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,6 +2230,75 @@ static void nfdicf_init(void)
22302230
file_fail(fold_name);
22312231
}
22322232

2233+
static void ignore_init(void)
2234+
{
2235+
FILE *file;
2236+
unsigned int unichar;
2237+
unsigned int first;
2238+
unsigned int last;
2239+
unsigned int *um;
2240+
int count;
2241+
int ret;
2242+
2243+
if (verbose > 0)
2244+
printf("Parsing %s\n", prop_name);
2245+
file = fopen(prop_name, "r");
2246+
if (!file)
2247+
open_fail(prop_name, errno);
2248+
assert(file);
2249+
count = 0;
2250+
while (fgets(line, LINESIZE, file)) {
2251+
ret = sscanf(line, "%X..%X ; %s # ", &first, &last, buf0);
2252+
if (ret == 3) {
2253+
if (strcmp(buf0, "Default_Ignorable_Code_Point"))
2254+
continue;
2255+
if (!utf32valid(first) || !utf32valid(last))
2256+
line_fail(prop_name, line);
2257+
for (unichar = first; unichar <= last; unichar++) {
2258+
free(unicode_data[unichar].utf32nfdi);
2259+
um = malloc(sizeof(unsigned int));
2260+
*um = 0;
2261+
unicode_data[unichar].utf32nfdi = um;
2262+
free(unicode_data[unichar].utf32nfdicf);
2263+
um = malloc(sizeof(unsigned int));
2264+
*um = 0;
2265+
unicode_data[unichar].utf32nfdicf = um;
2266+
count++;
2267+
}
2268+
if (verbose > 1)
2269+
printf(" %X..%X Default_Ignorable_Code_Point\n",
2270+
first, last);
2271+
continue;
2272+
}
2273+
ret = sscanf(line, "%X ; %s # ", &unichar, buf0);
2274+
if (ret == 2) {
2275+
if (strcmp(buf0, "Default_Ignorable_Code_Point"))
2276+
continue;
2277+
if (!utf32valid(unichar))
2278+
line_fail(prop_name, line);
2279+
free(unicode_data[unichar].utf32nfdi);
2280+
um = malloc(sizeof(unsigned int));
2281+
*um = 0;
2282+
unicode_data[unichar].utf32nfdi = um;
2283+
free(unicode_data[unichar].utf32nfdicf);
2284+
um = malloc(sizeof(unsigned int));
2285+
*um = 0;
2286+
unicode_data[unichar].utf32nfdicf = um;
2287+
if (verbose > 1)
2288+
printf(" %X Default_Ignorable_Code_Point\n",
2289+
unichar);
2290+
count++;
2291+
continue;
2292+
}
2293+
}
2294+
fclose(file);
2295+
2296+
if (verbose > 0)
2297+
printf("Found %d entries\n", count);
2298+
if (count == 0)
2299+
file_fail(prop_name);
2300+
}
2301+
22332302
static void corrections_init(void)
22342303
{
22352304
FILE *file;
@@ -3342,6 +3411,7 @@ int main(int argc, char *argv[])
33423411
ccc_init();
33433412
nfdi_init();
33443413
nfdicf_init();
3414+
ignore_init();
33453415
corrections_init();
33463416
hangul_decompose();
33473417
nfdi_decompose();

0 commit comments

Comments
 (0)