Skip to content

Commit 137b654

Browse files
committed
Correct confusable pair for U+1D9F from U+1D4B to U+1D4C
See unicode-org/properties#500 This needs a temporary patch to the code to skip the suppression of the new pair.
1 parent b77635b commit 137b654

File tree

7 files changed

+24
-13
lines changed

7 files changed

+24
-13
lines changed

unicodetools/data/security/dev/confusables.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# confusables.txt
2-
# Date: 2026-02-06, 21:41:45 GMT
2+
# Date: 2026-02-20, 00:35:25 GMT
33
# © 2026 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -2389,13 +2389,13 @@ A613 ; 0190 ; MA # ( ꘓ → Ɛ ) VAI SYMBOL FEENG → LATIN CAPITAL LETTER OPEN
23892389
16F2D ; 0190 ; MA # ( 𖼭 → Ɛ ) MIAO LETTER NYHA → LATIN CAPITAL LETTER OPEN E #
23902390
10401 ; 0190 ; MA # ( 𐐁 → Ɛ ) DESERET CAPITAL LETTER LONG E → LATIN CAPITAL LETTER OPEN E #
23912391

2392-
1D9F ; 1D4B ; MA # ( ᶟ → ᵋ ) MODIFIER LETTER SMALL REVERSED OPEN E → MODIFIER LETTER SMALL OPEN E #
2393-
23942392
1D08 ; 025C ; MA # ( ᴈ → ɜ ) LATIN SMALL LETTER TURNED OPEN E → LATIN SMALL LETTER REVERSED OPEN E #
23952393
0437 ; 025C ; MA # ( з → ɜ ) CYRILLIC SMALL LETTER ZE → LATIN SMALL LETTER REVERSED OPEN E #
23962394

23972395
0499 ; 025C 0326 ; MA # ( ҙ → ɜ̦ ) CYRILLIC SMALL LETTER ZE WITH DESCENDER → LATIN SMALL LETTER REVERSED OPEN E, COMBINING COMMA BELOW # →з̡→
23982396

2397+
1D9F ; 1D4C ; MA # ( ᶟ → ᵌ ) MODIFIER LETTER SMALL REVERSED OPEN E → MODIFIER LETTER SMALL TURNED OPEN E #
2398+
23992399
10442 ; 025E ; MA # ( 𐑂 → ɞ ) DESERET SMALL LETTER VEE → LATIN SMALL LETTER CLOSED REVERSED OPEN E #
24002400

24012401
A79D ; 029A ; MA # ( ꞝ → ʚ ) LATIN SMALL LETTER VOLAPUK OE → LATIN SMALL LETTER CLOSED OPEN E #

unicodetools/data/security/dev/confusablesSummary.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# confusablesSummary.txt
2-
# Date: 2026-02-06, 21:41:44 GMT
2+
# Date: 2026-02-20, 00:35:24 GMT
33
# © 2026 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -12802,8 +12802,8 @@
1280212802
(‎ ᴴ ‎) 1D34 MODIFIER LETTER CAPITAL H
1280312803
← (‎ ᵸ ‎) 1D78 MODIFIER LETTER CYRILLIC EN
1280412804

12805-
#
12806-
(‎ ‎) 1D4B MODIFIER LETTER SMALL OPEN E
12805+
#
12806+
(‎ ‎) 1D4C MODIFIER LETTER SMALL TURNED OPEN E
1280712807
← (‎ ᶟ ‎) 1D9F MODIFIER LETTER SMALL REVERSED OPEN E
1280812808

1280912809
# ᵍ ᶢ

unicodetools/data/security/dev/data/source/confusables-macFonts.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,8 +1204,6 @@
12041204

12051205
1D34 ; 1D78 # ( ᴴ ~ ᵸ ) MODIFIER LETTER CAPITAL H ~ MODIFIER LETTER CYRILLIC EN
12061206

1207-
1D4B ; 1D9F # ( ᵋ ~ ᶟ ) MODIFIER LETTER SMALL OPEN E ~ MODIFIER LETTER SMALL REVERSED OPEN E
1208-
12091207
1D4C ; 1D9F # ( ᵌ ~ ᶟ ) MODIFIER LETTER SMALL TURNED OPEN E ~ MODIFIER LETTER SMALL REVERSED OPEN E
12101208

12111209
1D4D ; 1DA2 # ( ᵍ ~ ᶢ ) MODIFIER LETTER SMALL G ~ MODIFIER LETTER SMALL SCRIPT G

unicodetools/data/security/dev/data/source/confusables-source.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6151,3 +6151,6 @@ ABEC ; 002E
61516151
1F675 ; 0026
61526152
206A4 ; 3005
61536153
27144 ; 8641
6154+
6155+
# Correction (PAG ref #500)
6156+
1D9F ; 1D4C

unicodetools/data/security/dev/data/source/formatted-macFonts.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# formatted-macFonts.txt
2-
# Date: 2025-11-13, 04:42:21 GMT
3-
# © 2025 Unicode®, Inc.
2+
# Date: 2026-02-20, 00:35:23 GMT
3+
# © 2026 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -883,7 +883,7 @@
883883

884884
1D34 ; 1D78 # ( ᴴ ~ ᵸ ) MODIFIER LETTER CAPITAL H ~ MODIFIER LETTER CYRILLIC EN
885885

886-
1D4B ; 1D9F # ( ~ ᶟ ) MODIFIER LETTER SMALL OPEN E ~ MODIFIER LETTER SMALL REVERSED OPEN E
886+
1D4C ; 1D9F # ( ~ ᶟ ) MODIFIER LETTER SMALL TURNED OPEN E ~ MODIFIER LETTER SMALL REVERSED OPEN E
887887

888888
1D4D ; 1DA2 # ( ᵍ ~ ᶢ ) MODIFIER LETTER SMALL G ~ MODIFIER LETTER SMALL SCRIPT G
889889

unicodetools/data/security/dev/data/source/formatted-source.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# formatted-source.txt
2-
# Date: 2026-02-06, 21:41:43 GMT
2+
# Date: 2026-02-20, 00:35:22 GMT
33
# © 2026 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -4414,6 +4414,8 @@
44144414

44154415
1D43 2044 209B ; 2101 #* ( ᵃ⁄ₛ ~ ℁ ) MODIFIER LETTER SMALL A, FRACTION SLASH, LATIN SUBSCRIPT SMALL LETTER S ~ ADDRESSED TO THE SUBJECT
44164416

4417+
1D4C ; 1D9F # ( ᵌ ~ ᶟ ) MODIFIER LETTER SMALL TURNED OPEN E ~ MODIFIER LETTER SMALL REVERSED OPEN E
4418+
44174419
1D52 ; 2070 # ( ᵒ ~ ⁰ ) MODIFIER LETTER SMALL O ~ SUPERSCRIPT ZERO
44184420

44194421
1D9C 2044 1D64 ; 2106 #* ( ᶜ⁄ᵤ ~ ℆ ) MODIFIER LETTER SMALL C, FRACTION SLASH, LATIN SUBSCRIPT SMALL LETTER U ~ CADA UNA

unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusables.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1384,7 +1384,15 @@ private void add2(String source, String target, String type, int count, String l
13841384
+ ";\t"
13851385
+ line);
13861386
}
1387-
} else if (suppress_NFKC && nfkdSource.equals(nfkdTarget)) {
1387+
} else if (suppress_NFKC
1388+
&& nfkdSource.equals(nfkdTarget)
1389+
&& !source.equals("\u1D4C")
1390+
&& !target.equals("\u1D4C")) {
1391+
// Skipping the suppression of U+1D4C temporarily so that we can
1392+
// add the confusable pair (U+1D9F, U+1D4C) that have the same
1393+
// compatibility decomposition, but not the same canonical decomposition.
1394+
// In the future, suppress_NFKC should be set to false and all
1395+
// resulting additions reviewed.
13881396
if (SHOW_SUPPRESS) {
13891397
System.out.println(
13901398
"*** Suppressing nfkc for:\t"

0 commit comments

Comments
 (0)