diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index d4a7422a4..8aa89ccef 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2025-10-17, 00:06:13 GMT +# Date: 2025-10-25, 07:52:31 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -35,6 +35,12 @@ 0341 ; 0301 ; MA # ( ́ → ́ ) COMBINING ACUTE TONE MARK → COMBINING ACUTE ACCENT # 0954 ; 0301 ; MA # ( ॔ → ́ ) DEVANAGARI ACUTE ACCENT → COMBINING ACUTE ACCENT # 064E ; 0301 ; MA # ( َ → ́ ) ARABIC FATHA → COMBINING ACUTE ACCENT # +07A6 ; 0301 ; MA # ( ަ → ́ ) THAANA ABAFILI → COMBINING ACUTE ACCENT # + +030B ; 0301 0301 ; MA # ( ̋ → ́́ ) COMBINING DOUBLE ACUTE ACCENT → COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # +064B ; 0301 0301 ; MA # ( ً → ́́ ) ARABIC FATHATAN → COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →̋→ +08F0 ; 0301 0301 ; MA # ( ࣰ → ́́ ) ARABIC OPEN FATHATAN → COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →ً→→̋→ +07A7 ; 0301 0301 ; MA # ( ާ → ́́ ) THAANA AABAAFILI → COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →ަަ→ 0340 ; 0300 ; MA # ( ̀ → ̀ ) COMBINING GRAVE TONE MARK → COMBINING GRAVE ACCENT # 0953 ; 0300 ; MA # ( ॓ → ̀ ) DEVANAGARI GRAVE ACCENT → COMBINING GRAVE ACCENT # @@ -70,6 +76,7 @@ A6F0 ; 0302 ; MA # ( ꛰ → ̂ ) BAMUM COMBINING MARK KOQNDON → COMBINING CIR 309A ; 030A ; MA # ( ゚ → ̊ ) COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK → COMBINING RING ABOVE # 0652 ; 030A ; MA # ( ْ → ̊ ) ARABIC SUKUN → COMBINING RING ABOVE # 0B82 ; 030A ; MA # ( ஂ → ̊ ) TAMIL SIGN ANUSVARA → COMBINING RING ABOVE # +0F7E ; 030A ; MA # ( ཾ → ̊ ) TIBETAN SIGN RJES SU NGA RO → COMBINING RING ABOVE # 1036 ; 030A ; MA # ( ံ → ̊ ) MYANMAR SIGN ANUSVARA → COMBINING RING ABOVE # 17C6 ; 030A ; MA # ( ំ → ̊ ) KHMER SIGN NIKAHIT → COMBINING RING ABOVE # 11300 ; 030A ; MA # ( 𑌀 → ̊ ) GRANTHA SIGN COMBINING ANUSVARA ABOVE → COMBINING RING ABOVE # →ஂ→ @@ -78,13 +85,11 @@ A6F0 ; 0302 ; MA # ( ꛰ → ̂ ) BAMUM COMBINING MARK KOQNDON → COMBINING CIR 302C ; 030A ; MA # ( 〬 → ̊ ) IDEOGRAPHIC DEPARTING TONE MARK → COMBINING RING ABOVE # →゚→ 0366 ; 030A ; MA # ( ͦ → ̊ ) COMBINING LATIN SMALL LETTER O → COMBINING RING ABOVE # 2DEA ; 030A ; MA # ( ⷪ → ̊ ) COMBINING CYRILLIC LETTER O → COMBINING RING ABOVE # →ͦ→ +07B0 ; 030A ; MA # ( ް → ̊ ) THAANA SUKUN → COMBINING RING ABOVE # 08EB ; 0308 ; MA # ( ࣫ → ̈ ) ARABIC TONE TWO DOTS ABOVE → COMBINING DIAERESIS # 07F3 ; 0308 ; MA # ( ߳ → ̈ ) NKO COMBINING DOUBLE DOT ABOVE → COMBINING DIAERESIS # -064B ; 030B ; MA # ( ً → ̋ ) ARABIC FATHATAN → COMBINING DOUBLE ACUTE ACCENT # -08F0 ; 030B ; MA # ( ࣰ → ̋ ) ARABIC OPEN FATHATAN → COMBINING DOUBLE ACUTE ACCENT # →ً→ - 0342 ; 0303 ; MA # ( ͂ → ̃ ) COMBINING GREEK PERISPOMENI → COMBINING TILDE # 0653 ; 0303 ; MA # ( ٓ → ̃ ) ARABIC MADDAH ABOVE → COMBINING TILDE # @@ -126,6 +131,7 @@ A6F1 ; 0304 ; MA # ( ꛱ → ̄ ) BAMUM COMBINING MARK TUKWENTIS → COMBINING M 0357 ; 0350 ; MA # ( ͗ → ͐ ) COMBINING RIGHT HALF RING ABOVE → COMBINING RIGHT ARROWHEAD ABOVE # →ࣿ→→ࣸ→ 08FF ; 0350 ; MA # ( ࣿ → ͐ ) ARABIC MARK SIDEWAYS NOON GHUNNA → COMBINING RIGHT ARROWHEAD ABOVE # →ࣸ→ 08F8 ; 0350 ; MA # ( ࣸ → ͐ ) ARABIC RIGHT ARROWHEAD ABOVE → COMBINING RIGHT ARROWHEAD ABOVE # +07AA ; 0350 ; MA # ( ު → ͐ ) THAANA UBUFILI → COMBINING RIGHT ARROWHEAD ABOVE # 0900 ; 0352 ; MA # ( ऀ → ͒ ) DEVANAGARI SIGN INVERTED CANDRABINDU → COMBINING FERMATA # @@ -141,6 +147,8 @@ A6F1 ; 0304 ; MA # ( ꛱ → ̄ ) BAMUM COMBINING MARK TUKWENTIS → COMBINING M 031A ; 1AE9 ; MA # ( ̚ → ᫩ ) COMBINING LEFT ANGLE ABOVE → COMBINING LEFT ANGLE CENTRED ABOVE # +07AB ; 0350 0350 ; MA # ( ޫ → ͐͐ ) THAANA OOBOOFILI → COMBINING RIGHT ARROWHEAD ABOVE, COMBINING RIGHT ARROWHEAD ABOVE # →ުު→ + 1CED ; 0316 ; MA # ( ᳭ → ̖ ) VEDIC SIGN TIRYAK → COMBINING GRAVE ACCENT BELOW # 1CDC ; 0329 ; MA # ( ᳜ → ̩ ) VEDIC TONE KATHAKA ANUDATTA → COMBINING VERTICAL LINE BELOW # @@ -173,6 +181,7 @@ A6F1 ; 0304 ; MA # ( ꛱ → ̄ ) BAMUM COMBINING MARK TUKWENTIS → COMBINING M 111CA ; 0323 ; MA # ( 𑇊 → ̣ ) SHARADA SIGN NUKTA → COMBINING DOT BELOW # →़→ 114C3 ; 0323 ; MA # ( 𑓃 → ̣ ) TIRHUTA SIGN NUKTA → COMBINING DOT BELOW # →়→ 10A3A ; 0323 ; MA # ( 𐨺 → ̣ ) KHAROSHTHI SIGN DOT BELOW → COMBINING DOT BELOW # +0E3A ; 0323 ; MA # ( ฺ → ̣ ) THAI CHARACTER PHINTHU → COMBINING DOT BELOW # 08EE ; 0324 ; MA # ( ࣮ → ̤ ) ARABIC TONE TWO DOTS BELOW → COMBINING DIAERESIS BELOW # 1CDE ; 0324 ; MA # ( ᳞ → ̤ ) VEDIC TONE TWO DOTS BELOW → COMBINING DIAERESIS BELOW # @@ -209,6 +218,9 @@ FC61 ; FE78 0651 ; MA #* ( ‎ﱡ‎ → ‎ﹸّ‎ ) ARABIC LIGATURE SHADDA WI 061A ; 0650 ; MA # ( ؚ → ِ ) ARABIC SMALL KASRA → ARABIC KASRA # 0317 ; 0650 ; MA # ( ̗ → ِ ) COMBINING ACUTE ACCENT BELOW → ARABIC KASRA # +07A8 ; 0650 ; MA # ( ި → ِ ) THAANA IBIFILI → ARABIC KASRA # →̗→ + +07A9 ; 0650 0650 ; MA # ( ީ → ِِ ) THAANA EEBEEFILI → ARABIC KASRA, ARABIC KASRA # →ިި→ FCF4 ; FE7B 0651 ; MA # ( ‎ﳴ‎ → ‎ﹻّ‎ ) ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM → ARABIC KASRA MEDIAL FORM, ARABIC SHADDA # @@ -230,6 +242,8 @@ FC63 ; FE7C 0670 ; MA #* ( ‎ﱣ‎ → ‎ﹼٰ‎ ) ARABIC LIGATURE SHADDA WI 0D03 ; 0983 ; MA # ( ഃ → ঃ ) MALAYALAM SIGN VISARGA → BENGALI SIGN VISARGA # →ಃ→→ః→→ਃ→ 0D83 ; 0983 ; MA # ( ඃ → ঃ ) SINHALA SIGN VISARGAYA → BENGALI SIGN VISARGA # →ഃ→→ಃ→→ః→→ਃ→ 1038 ; 0983 ; MA # ( း → ঃ ) MYANMAR SIGN VISARGA → BENGALI SIGN VISARGA # →ඃ→→ഃ→→ಃ→→ః→→ਃ→ +17C7 ; 0983 ; MA # ( ះ → ঃ ) KHMER SIGN REAHMUK → BENGALI SIGN VISARGA # +11303 ; 0983 ; MA # ( 𑌃 → ঃ ) GRANTHA SIGN VISARGA → BENGALI SIGN VISARGA # 114C1 ; 0983 ; MA # ( 𑓁 → ঃ ) TIRHUTA SIGN VISARGA → BENGALI SIGN VISARGA # 17CB ; 0E48 ; MA # ( ់ → ่ ) KHMER SIGN BANTOC → THAI CHARACTER MAI EK # @@ -2724,8 +2738,8 @@ A4E1 ; 004C ; MA # ( ꓡ → L ) LISU LETTER LA → LATIN CAPITAL LETTER L # 1041B ; 004C ; MA # ( 𐐛 → L ) DESERET CAPITAL LETTER ETH → LATIN CAPITAL LETTER L # 10526 ; 004C ; MA # ( 𐔦 → L ) ELBASAN LETTER GHAMMA → LATIN CAPITAL LETTER L # -FD3C ; 006C 030B ; MA # ( ‎ﴼ‎ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →‎اً‎→ -FD3D ; 006C 030B ; MA # ( ‎ﴽ‎ → l̋ ) ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM → LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →‎اً‎→ +FD3C ; 006C 0301 0301 ; MA # ( ‎ﴼ‎ → ĺ́ ) ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM → LATIN SMALL LETTER L, COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →‎اً‎→ +FD3D ; 006C 0301 0301 ; MA # ( ‎ﴽ‎ → ĺ́ ) ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM → LATIN SMALL LETTER L, COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →‎اً‎→ FE82 ; 006C 0303 ; MA # ( ‎ﺂ‎ → l̃ ) ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM → LATIN SMALL LETTER L, COMBINING TILDE # →‎آ‎→ FE81 ; 006C 0303 ; MA # ( ‎ﺁ‎ → l̃ ) ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM → LATIN SMALL LETTER L, COMBINING TILDE # →‎آ‎→ @@ -5759,6 +5773,12 @@ FC03 ; 0649 0654 0649 ; MA # ( ‎ﰃ‎ → ‎ىٔى‎ ) ARABIC LIGATURE YEH FC69 ; 0649 0654 0649 ; MA # ( ‎ﱩ‎ → ‎ىٔى‎ ) ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH YEH FINAL FORM → ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE, ARABIC LETTER ALEF MAKSURA # →‎ئي‎→ FC04 ; 0649 0654 0649 ; MA # ( ‎ﰄ‎ → ‎ىٔى‎ ) ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH YEH ISOLATED FORM → ARABIC LETTER ALEF MAKSURA, ARABIC HAMZA ABOVE, ARABIC LETTER ALEF MAKSURA # →‎ئي‎→ +1DFE ; 07AC ; MA # ( ᷾ → ެ ) COMBINING LEFT ARROWHEAD ABOVE → THAANA EBEFILI # + +07AE ; 07AC 0350 ; MA # ( ޮ → ެ͐ ) THAANA OBOFILI → THAANA EBEFILI, COMBINING RIGHT ARROWHEAD ABOVE # →ެު→ + +07AD ; 07AC 07AC ; MA # ( ޭ → ެެ ) THAANA EYBEYFILI → THAANA EBEFILI, THAANA EBEFILI # + 102B8 ; 2D40 ; MA # ( 𐊸 → ⵀ ) CARIAN LETTER SS → TIFINAGH LETTER YAH # 205E ; 2D42 ; MA #* ( ⁞ → ⵂ ) VERTICAL FOUR DOTS → TIFINAGH LETTER TUAREG YAH # @@ -6154,6 +6174,10 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 0D8D ; 0DC3 0DD8 ; MA # ( ඍ → සෘ ) SINHALA LETTER IRUYANNA → SINHALA LETTER DANTAJA SAYANNA, SINHALA VOWEL SIGN GAETTA-PILLA # +0DF2 ; 0DD8 0DD8 ; MA # ( ෲ → ෘෘ ) SINHALA VOWEL SIGN DIGA GAETTA-PILLA → SINHALA VOWEL SIGN GAETTA-PILLA, SINHALA VOWEL SIGN GAETTA-PILLA # + +0DDB ; 0DD9 0DD9 ; MA # ( ෛ → ෙෙ ) SINHALA VOWEL SIGN KOMBU DEKA → SINHALA VOWEL SIGN KOMBUVA, SINHALA VOWEL SIGN KOMBUVA # + 1CD8 ; 11B62 ; MA # ( ᳘ → 𑭢 ) VEDIC TONE CANDRA BELOW → SHARADA VOWEL SIGN UE # →̮→→ॖ→ 032E ; 11B62 ; MA # ( ̮ → 𑭢 ) COMBINING BREVE BELOW → SHARADA VOWEL SIGN UE # →ॖ→ 0956 ; 11B62 ; MA # ( ॖ → 𑭢 ) DEVANAGARI VOWEL SIGN UE → SHARADA VOWEL SIGN UE # @@ -6244,13 +6268,15 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 0EB3 ; 030A 0EB2 ; MA # ( ຳ → ̊າ ) LAO VOWEL SIGN AM → COMBINING RING ABOVE, LAO VOWEL SIGN AA # →ໍາ→ +0EC1 ; 0EC0 0EC0 ; MA # ( ແ → ເເ ) LAO VOWEL SIGN EI → LAO VOWEL SIGN E, LAO VOWEL SIGN E # + 0F02 ; 0F60 0F74 0F82 0F7F ; MA #* ( ༂ → འུྂཿ ) TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA → TIBETAN LETTER -A, TIBETAN VOWEL SIGN U, TIBETAN SIGN NYI ZLA NAA DA, TIBETAN SIGN RNAM BCAD # 0F03 ; 0F60 0F74 0F82 0F14 ; MA #* ( ༃ → འུྂ༔ ) TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA → TIBETAN LETTER -A, TIBETAN VOWEL SIGN U, TIBETAN SIGN NYI ZLA NAA DA, TIBETAN MARK GTER TSHEG # 0F6A ; 0F62 ; MA # ( ཪ → ར ) TIBETAN LETTER FIXED-FORM RA → TIBETAN LETTER RA # -0F00 ; 0F68 0F7C 0F7E ; MA # ( ༀ → ཨོཾ ) TIBETAN SYLLABLE OM → TIBETAN LETTER A, TIBETAN VOWEL SIGN O, TIBETAN SIGN RJES SU NGA RO # +0F00 ; 0F68 0F7C 030A ; MA # ( ༀ → ཨོ̊ ) TIBETAN SYLLABLE OM → TIBETAN LETTER A, TIBETAN VOWEL SIGN O, COMBINING RING ABOVE # →ཨོཾ→ 0F77 ; 0FB2 0F71 0F80 ; MA # ( ཷ → ྲཱྀ ) TIBETAN VOWEL SIGN VOCALIC RR → TIBETAN SUBJOINED LETTER RA, TIBETAN VOWEL SIGN AA, TIBETAN VOWEL SIGN REVERSED I # @@ -6264,6 +6290,8 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 1734 ; 1715 ; MA # ( ᜴ → ᜕ ) HANUNOO SIGN PAMUDPOD → TAGALOG SIGN PAMUDPOD # +1022 ; 1075 102C ; MA # ( ဢ → ၵာ ) MYANMAR LETTER SHAN A → MYANMAR LETTER SHAN KA, MYANMAR VOWEL SIGN AA # + 1070 ; 1003 103E ; MA # ( ၰ → ဃှ ) MYANMAR LETTER EASTERN PWO KAREN GHWA → MYANMAR LETTER GHA, MYANMAR CONSONANT SIGN MEDIAL HA # 1066 ; 1015 103E ; MA # ( ၦ → ပှ ) MYANMAR LETTER WESTERN PWO KAREN PWA → MYANMAR LETTER PA, MYANMAR CONSONANT SIGN MEDIAL HA # @@ -6284,8 +6312,16 @@ FE19 ; 2D57 ; MA #* ( ︙ → ⵗ ) PRESENTATION FORM FOR VERTICAL HORIZONTAL EL 178F ; 178A ; MA # ( ត → ដ ) KHMER LETTER TA → KHMER LETTER DA # +17A1 ; 1791 17D2 1794 ; MA # ( ឡ → ទ្ប ) KHMER LETTER LA → KHMER LETTER TO, KHMER SIGN COENG, KHMER LETTER BA # + +17B0 ; 1796 17D2 1792 ; MA # ( ឰ → ព្ធ ) KHMER INDEPENDENT VOWEL QAI → KHMER LETTER PO, KHMER SIGN COENG, KHMER LETTER THO # + 17A3 ; 17A2 ; MA # ( ឣ → អ ) KHMER INDEPENDENT VOWEL QAQ → KHMER LETTER QA # +17BE ; 17C1 0E35 ; MA # ( ើ → េี ) KHMER VOWEL SIGN OE → KHMER VOWEL SIGN E, THAI CHARACTER SARA II # →េី→ + +17C4 ; 17C1 17B6 ; MA # ( ោ → េា ) KHMER VOWEL SIGN OO → KHMER VOWEL SIGN E, KHMER VOWEL SIGN AA # + 19D0 ; 199E ; MA # ( ᧐ → ᦞ ) NEW TAI LUE DIGIT ZERO → NEW TAI LUE LETTER LOW VA # 19D1 ; 19B1 ; MA # ( ᧑ → ᦱ ) NEW TAI LUE DIGIT ONE → NEW TAI LUE VOWEL SIGN AA # @@ -9978,5 +10014,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF 2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 # -# total: 6582 +# total: 6605 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index 675a54008..c9840f047 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2025-10-17, 00:06:13 GMT +# Date: 2025-10-25, 07:52:31 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1388,9 +1388,9 @@ ← (‎ الله ‎) 0627 0644 0644 0647 ARABIC LETTER ALEF, ARABIC LETTER LAM, ARABIC LETTER LAM, ARABIC LETTER HEH ← (‎ ﷲ ‎) FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM # →‎الله‎→ -# l̋ lً 1ً اً ﴼ ﴽ +# ĺ́ lً 1ً اً ﴼ ﴽ (‎ 1ً ‎) 0031 064B DIGIT ONE, ARABIC FATHATAN -← (‎ l̋ ‎) 006C 030B LATIN SMALL LETTER L, COMBINING DOUBLE ACUTE ACCENT # →‎اً‎→ +← (‎ ĺ́ ‎) 006C 0301 0301 LATIN SMALL LETTER L, COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT # →‎اً‎→ ← (‎ lً ‎) 006C 064B LATIN SMALL LETTER L, ARABIC FATHATAN ← (‎ اً ‎) 0627 064B ARABIC LETTER ALEF, ARABIC FATHATAN ← (‎ ﴼ ‎) FD3C ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM # →‎اً‎→ @@ -5651,9 +5651,10 @@ ← (‎ ॓ ‎) 0953 DEVANAGARI GRAVE ACCENT ← (‎ ̀ ‎) 0340 COMBINING GRAVE TONE MARK -# ́ َ ֜ ֝ ؘ ݇ ॔ ́ +# ́ َ ަ ֜ ֝ ؘ ݇ ॔ ́ (‎ ́ ‎) 0301 COMBINING ACUTE ACCENT ← (‎ َ ‎) 064E ARABIC FATHA +← (‎ ަ ‎) 07A6 THAANA ABAFILI ← (‎ ֜ ‎) 059C HEBREW ACCENT GERESH ← (‎ ֝ ‎) 059D HEBREW ACCENT GERESH MUQDAM # →֜→ ← (‎ ؘ ‎) 0618 ARABIC SMALL FATHA # →َ→ @@ -5661,6 +5662,14 @@ ← (‎ ॔ ‎) 0954 DEVANAGARI ACUTE ACCENT ← (‎ ́ ‎) 0341 COMBINING ACUTE TONE MARK +# ́́ ަަ ̋ ً ާ ࣰ + (‎ ́́ ‎) 0301 0301 COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT +← (‎ ަަ ‎) 07A6 07A6 THAANA ABAFILI, THAANA ABAFILI +← (‎ ̋ ‎) 030B COMBINING DOUBLE ACUTE ACCENT +← (‎ ً ‎) 064B ARABIC FATHATAN # →̋→ +← (‎ ާ ‎) 07A7 THAANA AABAAFILI # →ަަ→ +← (‎ ࣰ ‎) 08F0 ARABIC OPEN FATHATAN # →ً→→̋→ + # ̂ ̑ ٛ ߮ ᳐ ꛰ (‎ ̂ ‎) 0302 COMBINING CIRCUMFLEX ACCENT ← (‎ ̑ ‎) 0311 COMBINING INVERTED BREVE @@ -5733,11 +5742,13 @@ ← (‎ ߳ ‎) 07F3 NKO COMBINING DOUBLE DOT ABOVE ← (‎ ࣫ ‎) 08EB ARABIC TONE TWO DOTS ABOVE -# ̊ ْ ํ ໍ ံ ំ ͦ ֯ ۟ ஂ ៓ ⷪ 〬 ゚ 𑌀 +# ̊ ْ ް ํ ໍ ཾ ံ ំ ͦ ֯ ۟ ஂ ៓ ⷪ 〬 ゚ 𑌀 (‎ ̊ ‎) 030A COMBINING RING ABOVE ← (‎ ْ ‎) 0652 ARABIC SUKUN +← (‎ ް ‎) 07B0 THAANA SUKUN ← (‎ ํ ‎) 0E4D THAI CHARACTER NIKHAHIT ← (‎ ໍ ‎) 0ECD LAO NIGGAHITA +← (‎ ཾ ‎) 0F7E TIBETAN SIGN RJES SU NGA RO ← (‎ ံ ‎) 1036 MYANMAR SIGN ANUSVARA ← (‎ ំ ‎) 17C6 KHMER SIGN NIKAHIT ← (‎ ͦ ‎) 0366 COMBINING LATIN SMALL LETTER O @@ -5760,11 +5771,6 @@ ← (‎ ໍາ ‎) 0ECD 0EB2 LAO NIGGAHITA, LAO VOWEL SIGN AA ← (‎ ຳ ‎) 0EB3 LAO VOWEL SIGN AM # →ໍາ→ -# ̋ ً ࣰ - (‎ ̋ ‎) 030B COMBINING DOUBLE ACUTE ACCENT -← (‎ ً ‎) 064B ARABIC FATHATAN -← (‎ ࣰ ‎) 08F0 ARABIC OPEN FATHATAN # →ً→ - # ٰ ̍ (‎ ̍ ‎) 030D COMBINING VERTICAL LINE ABOVE ← (‎ ٰ ‎) 0670 ARABIC LETTER SUPERSCRIPT ALEF @@ -5793,9 +5799,10 @@ (‎ ̖ ‎) 0316 COMBINING GRAVE ACCENT BELOW ← (‎ ᳭ ‎) 1CED VEDIC SIGN TIRYAK -# ِ ̗ ؚ +# ِ ި ̗ ؚ (‎ ̗ ‎) 0317 COMBINING ACUTE ACCENT BELOW ← (‎ ِ ‎) 0650 ARABIC KASRA +← (‎ ި ‎) 07A8 THAANA IBIFILI ← (‎ ؚ ‎) 061A ARABIC SMALL KASRA # →ِ→ # ᫩ ̚ @@ -5819,13 +5826,14 @@ ← (‎ ͅ ‎) 0345 COMBINING GREEK YPOGEGRAMMENI # →̨→ ← (‎ ᪷ ‎) 1AB7 COMBINING OPEN MARK BELOW # →̨→ -# ̣ ़ ় ਼ ઼ ଼ ִ ׅ ٜ ࣭ ᳝ 𐨺 𑓃 𑇊 +# ̣ ़ ় ਼ ઼ ଼ ฺ ִ ׅ ٜ ࣭ ᳝ 𐨺 𑓃 𑇊 (‎ ̣ ‎) 0323 COMBINING DOT BELOW ← (‎ ़ ‎) 093C DEVANAGARI SIGN NUKTA ← (‎ ় ‎) 09BC BENGALI SIGN NUKTA ← (‎ ਼ ‎) 0A3C GURMUKHI SIGN NUKTA ← (‎ ઼ ‎) 0ABC GUJARATI SIGN NUKTA ← (‎ ଼ ‎) 0B3C ORIYA SIGN NUKTA +← (‎ ฺ ‎) 0E3A THAI CHARACTER PHINTHU ← (‎ ִ ‎) 05B4 HEBREW POINT HIRIQ ← (‎ ׅ ‎) 05C5 HEBREW MARK LOWER DOT ← (‎ ٜ ‎) 065C ARABIC VOWEL SIGN DOT BELOW @@ -5888,12 +5896,18 @@ (‎ ͈ ‎) 0348 COMBINING DOUBLE VERTICAL LINE BELOW ← (‎ 𐻺 ‎) 10EFA ARABIC DOUBLE VERTICAL BAR BELOW -# ͐ ͗ ࣸ ࣿ +# ͐ ު ͗ ࣸ ࣿ (‎ ͐ ‎) 0350 COMBINING RIGHT ARROWHEAD ABOVE +← (‎ ު ‎) 07AA THAANA UBUFILI ← (‎ ͗ ‎) 0357 COMBINING RIGHT HALF RING ABOVE # →ࣿ→→ࣸ→ ← (‎ ࣸ ‎) 08F8 ARABIC RIGHT ARROWHEAD ABOVE ← (‎ ࣿ ‎) 08FF ARABIC MARK SIDEWAYS NOON GHUNNA # →ࣸ→ +# ުު ͐͐ ޫ + (‎ ͐͐ ‎) 0350 0350 COMBINING RIGHT ARROWHEAD ABOVE, COMBINING RIGHT ARROWHEAD ABOVE +← (‎ ުު ‎) 07AA 07AA THAANA UBUFILI, THAANA UBUFILI +← (‎ ޫ ‎) 07AB THAANA OOBOOFILI # →ުު→ + # ͒ ऀ (‎ ͒ ‎) 0352 COMBINING FERMATA ← (‎ ऀ ‎) 0900 DEVANAGARI SIGN INVERTED CANDRABINDU @@ -8398,6 +8412,11 @@ (‎ ٍ ‎) 064D ARABIC KASRATAN ← (‎ ࣲ ‎) 08F2 ARABIC OPEN KASRATAN +# ِِ ިި ީ + (‎ ِِ ‎) 0650 0650 ARABIC KASRA, ARABIC KASRA +← (‎ ިި ‎) 07A8 07A8 THAANA IBIFILI, THAANA IBIFILI +← (‎ ީ ‎) 07A9 THAANA EEBEEFILI # →ިި→ + # ٕ ٟ (‎ ٕ ‎) 0655 ARABIC HAMZA BELOW ← (‎ ٟ ‎) 065F ARABIC WAVY HAMZA BELOW @@ -8590,6 +8609,19 @@ ← (‎ ݧ ‎) 0767 ARABIC LETTER NOON WITH TWO DOTS BELOW ← (‎ ࢩ ‎) 08A9 ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +# ެ ᷾ + (‎ ެ ‎) 07AC THAANA EBEFILI +← (‎ ᷾ ‎) 1DFE COMBINING LEFT ARROWHEAD ABOVE + +# ެު ެ͐ ޮ + (‎ ެ͐ ‎) 07AC 0350 THAANA EBEFILI, COMBINING RIGHT ARROWHEAD ABOVE +← (‎ ެު ‎) 07AC 07AA THAANA EBEFILI, THAANA UBUFILI +← (‎ ޮ ‎) 07AE THAANA OBOFILI # →ެު→ + +# ެެ ޭ + (‎ ެެ ‎) 07AC 07AC THAANA EBEFILI, THAANA EBEFILI +← (‎ ޭ ‎) 07AD THAANA EYBEYFILI + # अॆ ऄ (‎ ऄ ‎) 0904 DEVANAGARI LETTER SHORT A ← (‎ अॆ ‎) 0905 0946 DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E @@ -8781,14 +8813,16 @@ (‎ ८ ‎) 096E DEVANAGARI DIGIT EIGHT ← (‎ ૮ ‎) 0AEE GUJARATI DIGIT EIGHT -# ঃ ః ಃ ഃ ඃ း ਃ 𑓁 +# ঃ ః ಃ ഃ ඃ း ះ ਃ 𑌃 𑓁 (‎ ঃ ‎) 0983 BENGALI SIGN VISARGA ← (‎ ః ‎) 0C03 TELUGU SIGN VISARGA # →ਃ→ ← (‎ ಃ ‎) 0C83 KANNADA SIGN VISARGA # →ః→→ਃ→ ← (‎ ഃ ‎) 0D03 MALAYALAM SIGN VISARGA # →ಃ→→ః→→ਃ→ ← (‎ ඃ ‎) 0D83 SINHALA SIGN VISARGAYA # →ഃ→→ಃ→→ః→→ਃ→ ← (‎ း ‎) 1038 MYANMAR SIGN VISARGA # →ඃ→→ഃ→→ಃ→→ః→→ਃ→ +← (‎ ះ ‎) 17C7 KHMER SIGN REAHMUK ← (‎ ਃ ‎) 0A03 GURMUKHI SIGN VISARGA +← (‎ 𑌃 ‎) 11303 GRANTHA SIGN VISARGA ← (‎ 𑓁 ‎) 114C1 TIRHUTA SIGN VISARGA # অা আ @@ -9416,6 +9450,14 @@ (‎ භ ‎) 0DB7 SINHALA LETTER MAHAAPRAANA BAYANNA ← (‎ හ ‎) 0DC4 SINHALA LETTER HAYANNA +# ෘෘ ෲ + (‎ ෘෘ ‎) 0DD8 0DD8 SINHALA VOWEL SIGN GAETTA-PILLA, SINHALA VOWEL SIGN GAETTA-PILLA +← (‎ ෲ ‎) 0DF2 SINHALA VOWEL SIGN DIGA GAETTA-PILLA + +# ෙෙ ෛ + (‎ ෙෙ ‎) 0DD9 0DD9 SINHALA VOWEL SIGN KOMBUVA, SINHALA VOWEL SIGN KOMBUVA +← (‎ ෛ ‎) 0DDB SINHALA VOWEL SIGN KOMBU DEKA + # ෨ා ෩ (‎ ෨ා ‎) 0DE8 0DCF SINHALA LITH DIGIT TWO, SINHALA VOWEL SIGN AELA-PILLA ← (‎ ෩ ‎) 0DE9 SINHALA LITH DIGIT THREE @@ -9554,9 +9596,14 @@ (‎ ຫມ ‎) 0EAB 0EA1 LAO LETTER HO SUNG, LAO LETTER MO ← (‎ ໝ ‎) 0EDD LAO HO MO -# ཨོཾ ༀ +# ເເ ແ + (‎ ເເ ‎) 0EC0 0EC0 LAO VOWEL SIGN E, LAO VOWEL SIGN E +← (‎ ແ ‎) 0EC1 LAO VOWEL SIGN EI + +# ཨོཾ ཨོ̊ ༀ (‎ ༀ ‎) 0F00 TIBETAN SYLLABLE OM ← (‎ ཨོཾ ‎) 0F68 0F7C 0F7E TIBETAN LETTER A, TIBETAN VOWEL SIGN O, TIBETAN SIGN RJES SU NGA RO +← (‎ ཨོ̊ ‎) 0F68 0F7C 030A TIBETAN LETTER A, TIBETAN VOWEL SIGN O, COMBINING RING ABOVE # →ཨོཾ→ # འུྂཿ ༂ (‎ ༂ ‎) 0F02 TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA @@ -9649,6 +9696,10 @@ ← (‎ ဩော် ‎) 1029 1031 102C 103A MYANMAR LETTER O, MYANMAR VOWEL SIGN E, MYANMAR VOWEL SIGN AA, MYANMAR SIGN ASAT ← (‎ ဪ ‎) 102A MYANMAR LETTER AU # →ဩော်→ +# ၵာ ဢ + (‎ ဢ ‎) 1022 MYANMAR LETTER SHAN A +← (‎ ၵာ ‎) 1075 102C MYANMAR LETTER SHAN KA, MYANMAR VOWEL SIGN AA + # ၁ ၥ (‎ ၁ ‎) 1041 MYANMAR DIGIT ONE ← (‎ ၥ ‎) 1065 MYANMAR LETTER WESTERN PWO KAREN THA @@ -11800,10 +11851,27 @@ (‎ ដ ‎) 178A KHMER LETTER DA ← (‎ ត ‎) 178F KHMER LETTER TA +# ទ្ប ឡ + (‎ ទ្ប ‎) 1791 17D2 1794 KHMER LETTER TO, KHMER SIGN COENG, KHMER LETTER BA +← (‎ ឡ ‎) 17A1 KHMER LETTER LA + +# ព្ធ ឰ + (‎ ព្ធ ‎) 1796 17D2 1792 KHMER LETTER PO, KHMER SIGN COENG, KHMER LETTER THO +← (‎ ឰ ‎) 17B0 KHMER INDEPENDENT VOWEL QAI + # អ ឣ (‎ អ ‎) 17A2 KHMER LETTER QA ← (‎ ឣ ‎) 17A3 KHMER INDEPENDENT VOWEL QAQ +# េี េី ើ + (‎ ើ ‎) 17BE KHMER VOWEL SIGN OE +← (‎ េี ‎) 17C1 0E35 KHMER VOWEL SIGN E, THAI CHARACTER SARA II # →េី→ +← (‎ េី ‎) 17C1 17B8 KHMER VOWEL SIGN E, KHMER VOWEL SIGN II + +# េា ោ + (‎ េា ‎) 17C1 17B6 KHMER VOWEL SIGN E, KHMER VOWEL SIGN AA +← (‎ ោ ‎) 17C4 KHMER VOWEL SIGN OO + # ᠵ ᡕ (‎ ᠵ ‎) 1835 MONGOLIAN LETTER JA ← (‎ ᡕ ‎) 1855 MONGOLIAN LETTER TODO YA @@ -17804,5 +17872,5 @@ (‎ 𪘀 ‎) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← (‎ 𪘀 ‎) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7630 +# total : 7659 diff --git a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt index 4e7f5698f..5ead5c5e6 100644 --- a/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt +++ b/unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt @@ -1,5 +1,5 @@ # confusablesSummaryIdentifier.txt -# Date: 2025-10-11, 02:30:37 GMT +# Date: 2025-10-25, 07:52:31 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -459,9 +459,17 @@ (‎ φ ‎) 03C6 GREEK SMALL LETTER PHI ← (‎ ф ‎) 0444 CYRILLIC SMALL LETTER EF # →ɸ→ -# ́ َ +# ́ َ ަ (‎ ́ ‎) 0301 COMBINING ACUTE ACCENT ← (‎ َ ‎) 064E ARABIC FATHA +← (‎ ަ ‎) 07A6 THAANA ABAFILI + +# ́́ ަަ ̋ ً ާ + (‎ ́́ ‎) 0301 0301 COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT +← (‎ ަަ ‎) 07A6 07A6 THAANA ABAFILI, THAANA ABAFILI +← (‎ ̋ ‎) 030B COMBINING DOUBLE ACUTE ACCENT +← (‎ ً ‎) 064B ARABIC FATHATAN # →̋→ +← (‎ ާ ‎) 07A7 THAANA AABAAFILI # →ަަ→ # ̆ ̌ ॅ (‎ ̆ ‎) 0306 COMBINING BREVE @@ -481,29 +489,32 @@ ← (‎ ં ‎) 0A82 GUJARATI SIGN ANUSVARA ← (‎ ் ‎) 0BCD TAMIL SIGN VIRAMA -# ̊ ْ ํ ໍ ံ ំ +# ̊ ْ ް ํ ໍ ཾ ံ ំ (‎ ̊ ‎) 030A COMBINING RING ABOVE ← (‎ ْ ‎) 0652 ARABIC SUKUN +← (‎ ް ‎) 07B0 THAANA SUKUN ← (‎ ํ ‎) 0E4D THAI CHARACTER NIKHAHIT ← (‎ ໍ ‎) 0ECD LAO NIGGAHITA +← (‎ ཾ ‎) 0F7E TIBETAN SIGN RJES SU NGA RO ← (‎ ံ ‎) 1036 MYANMAR SIGN ANUSVARA ← (‎ ំ ‎) 17C6 KHMER SIGN NIKAHIT -# ̋ ً - (‎ ̋ ‎) 030B COMBINING DOUBLE ACUTE ACCENT -← (‎ ً ‎) 064B ARABIC FATHATAN +# ِ ި + (‎ ِ ‎) 0650 ARABIC KASRA +← (‎ ި ‎) 07A8 THAANA IBIFILI # →̗→ # ̦ ̧ (‎ ̦ ‎) 0326 COMBINING COMMA BELOW ← (‎ ̧ ‎) 0327 COMBINING CEDILLA # →̡→ -# ̣ ़ ় ਼ ઼ ଼ +# ̣ ़ ় ਼ ઼ ଼ ฺ (‎ ̣ ‎) 0323 COMBINING DOT BELOW ← (‎ ़ ‎) 093C DEVANAGARI SIGN NUKTA ← (‎ ় ‎) 09BC BENGALI SIGN NUKTA ← (‎ ਼ ‎) 0A3C GURMUKHI SIGN NUKTA ← (‎ ઼ ‎) 0ABC GUJARATI SIGN NUKTA ← (‎ ଼ ‎) 0B3C ORIYA SIGN NUKTA +← (‎ ฺ ‎) 0E3A THAI CHARACTER PHINTHU # ॖ ੁ (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE @@ -513,6 +524,10 @@ (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE ← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU +# ުު ޫ + (‎ ުު ‎) 07AA 07AA THAANA UBUFILI, THAANA UBUFILI +← (‎ ޫ ‎) 07AB THAANA OOBOOFILI + # Γ Г (‎ Γ ‎) 0393 GREEK CAPITAL LETTER GAMMA ← (‎ Г ‎) 0413 CYRILLIC CAPITAL LETTER GHE @@ -684,6 +699,11 @@ (‎ پ̆ ‎) 067E 0306 ARABIC LETTER PEH, COMBINING BREVE ← (‎ ࢾ ‎) 08BE ARABIC LETTER PEH WITH SMALL V # →‎پٚ‎→ +# ِِ ިި ީ + (‎ ِِ ‎) 0650 0650 ARABIC KASRA, ARABIC KASRA +← (‎ ިި ‎) 07A8 07A8 THAANA IBIFILI, THAANA IBIFILI +← (‎ ީ ‎) 07A9 THAANA EEBEEFILI # →ިި→ + # ٢ ۲ (‎ ٢ ‎) 0662 ARABIC-INDIC DIGIT TWO ← (‎ ۲ ‎) 06F2 EXTENDED ARABIC-INDIC DIGIT TWO @@ -725,6 +745,14 @@ (‎ ݧ ‎) 0767 ARABIC LETTER NOON WITH TWO DOTS BELOW ← (‎ ࢩ ‎) 08A9 ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE # →‎ݔ‎→ +# ެު ޮ + (‎ ެު ‎) 07AC 07AA THAANA EBEFILI, THAANA UBUFILI +← (‎ ޮ ‎) 07AE THAANA OBOFILI + +# ެެ ޭ + (‎ ެެ ‎) 07AC 07AC THAANA EBEFILI, THAANA EBEFILI +← (‎ ޭ ‎) 07AD THAANA EYBEYFILI + # अ̆ अॅ ॲ (‎ अ̆ ‎) 0905 0306 DEVANAGARI LETTER A, COMBINING BREVE ← (‎ अॅ ‎) 0905 0945 DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E @@ -888,13 +916,15 @@ (‎ ८ ‎) 096E DEVANAGARI DIGIT EIGHT ← (‎ ૮ ‎) 0AEE GUJARATI DIGIT EIGHT -# ঃ ః ಃ ഃ ඃ း +# ঃ ః ಃ ഃ ඃ း ះ 𑌃 (‎ ঃ ‎) 0983 BENGALI SIGN VISARGA ← (‎ ః ‎) 0C03 TELUGU SIGN VISARGA # →ਃ→ ← (‎ ಃ ‎) 0C83 KANNADA SIGN VISARGA # →ః→→ਃ→ ← (‎ ഃ ‎) 0D03 MALAYALAM SIGN VISARGA # →ಃ→→ః→→ਃ→ ← (‎ ඃ ‎) 0D83 SINHALA SIGN VISARGAYA # →ഃ→→ಃ→→ః→→ਃ→ ← (‎ း ‎) 1038 MYANMAR SIGN VISARGA # →ඃ→→ഃ→→ಃ→→ః→→ਃ→ +← (‎ ះ ‎) 17C7 KHMER SIGN REAHMUK +← (‎ 𑌃 ‎) 11303 GRANTHA SIGN VISARGA # অা আ (‎ অা ‎) 0985 09BE BENGALI LETTER A, BENGALI VOWEL SIGN AA @@ -1255,6 +1285,14 @@ (‎ භ ‎) 0DB7 SINHALA LETTER MAHAAPRAANA BAYANNA ← (‎ හ ‎) 0DC4 SINHALA LETTER HAYANNA +# ෘෘ ෲ + (‎ ෘෘ ‎) 0DD8 0DD8 SINHALA VOWEL SIGN GAETTA-PILLA, SINHALA VOWEL SIGN GAETTA-PILLA +← (‎ ෲ ‎) 0DF2 SINHALA VOWEL SIGN DIGA GAETTA-PILLA + +# ෙෙ ෛ + (‎ ෙෙ ‎) 0DD9 0DD9 SINHALA VOWEL SIGN KOMBUVA, SINHALA VOWEL SIGN KOMBUVA +← (‎ ෛ ‎) 0DDB SINHALA VOWEL SIGN KOMBU DEKA + # ข ฃ (‎ ข ‎) 0E02 THAI CHARACTER KHO KHAI ← (‎ ฃ ‎) 0E03 THAI CHARACTER KHO KHUAT @@ -1360,6 +1398,10 @@ (‎ ๋ ‎) 0E4B THAI CHARACTER MAI CHATTAWA ← (‎ ໋ ‎) 0ECB LAO TONE MAI CATAWA +# ເເ ແ + (‎ ເເ ‎) 0EC0 0EC0 LAO VOWEL SIGN E, LAO VOWEL SIGN E +← (‎ ແ ‎) 0EC1 LAO VOWEL SIGN EI + # ེེ ཻ (‎ ེེ ‎) 0F7A 0F7A TIBETAN VOWEL SIGN E, TIBETAN VOWEL SIGN E ← (‎ ཻ ‎) 0F7B TIBETAN VOWEL SIGN EE @@ -1386,6 +1428,10 @@ ← (‎ ဩော် ‎) 1029 1031 102C 103A MYANMAR LETTER O, MYANMAR VOWEL SIGN E, MYANMAR VOWEL SIGN AA, MYANMAR SIGN ASAT ← (‎ ဪ ‎) 102A MYANMAR LETTER AU # →ဩော်→ +# ၵာ ဢ + (‎ ဢ ‎) 1022 MYANMAR LETTER SHAN A +← (‎ ၵာ ‎) 1075 102C MYANMAR LETTER SHAN KA, MYANMAR VOWEL SIGN AA + # ၽှ ၾ (‎ ၽှ ‎) 107D 103E MYANMAR LETTER SHAN PHA, MYANMAR CONSONANT SIGN MEDIAL HA ← (‎ ၾ ‎) 107E MYANMAR LETTER SHAN FA @@ -1394,9 +1440,26 @@ (‎ ដ ‎) 178A KHMER LETTER DA ← (‎ ត ‎) 178F KHMER LETTER TA +# ទ្ប ឡ + (‎ ទ្ប ‎) 1791 17D2 1794 KHMER LETTER TO, KHMER SIGN COENG, KHMER LETTER BA +← (‎ ឡ ‎) 17A1 KHMER LETTER LA + +# ព្ធ ឰ + (‎ ព្ធ ‎) 1796 17D2 1792 KHMER LETTER PO, KHMER SIGN COENG, KHMER LETTER THO +← (‎ ឰ ‎) 17B0 KHMER INDEPENDENT VOWEL QAI + +# េี េី ើ + (‎ ើ ‎) 17BE KHMER VOWEL SIGN OE +← (‎ េี ‎) 17C1 0E35 KHMER VOWEL SIGN E, THAI CHARACTER SARA II # →េី→ +← (‎ េី ‎) 17C1 17B8 KHMER VOWEL SIGN E, KHMER VOWEL SIGN II + +# េា ោ + (‎ េា ‎) 17C1 17B6 KHMER VOWEL SIGN E, KHMER VOWEL SIGN AA +← (‎ ោ ‎) 17C4 KHMER VOWEL SIGN OO + # へ ヘ (‎ へ ‎) 3078 HIRAGANA LETTER HE ← (‎ ヘ ‎) 30D8 KATAKANA LETTER HE -# total : 500 +# total : 524 diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 69573a6b0..b4f4a70eb 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -5786,3 +5786,28 @@ A8CF ; 007C 007C # SAURASHTRA DOUBLE DANDA 11642 ; 007C 007C # MODI DOUBLE DANDA 11C42 ; 007C 007C # BHAIKSUKI DOUBLE DANDA 113D5 ; 007C 007C # TULU-TIGALARI DOUBLE DANDA + +# High-priority confusables data (PAG ref #458) +030B ; 0301 0301 +07A6 ; 0301 +07A7 ; 07A6 07A6 +07A8 ; 0317 +07A9 ; 07A8 07A8 +07AA ; 0350 +07AB ; 07AA 07AA +07AC ; 1DFE +07AD ; 07AC 07AC +07AE ; 07AC 07AA +07B0 ; 030A +0DDB ; 0DD9 0DD9 +0DF2 ; 0DD8 0DD8 +0E3A ; 0323 +0EC1 ; 0EC0 0EC0 +0F7E ; 030A +1022 ; 1075 102C +17A1 ; 1791 17D2 1794 +17B0 ; 1796 17D2 1792 +17BE ; 17C1 17B8 +17C4 ; 17C1 17B6 +17C7 ; 0983 +11303 ; 0983 diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index b7b6606bf..db36120b1 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2025-10-17, 00:06:11 GMT +# Date: 2025-10-25, 07:52:30 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1179,10 +1179,13 @@ 0300 ; 0953 # ( ̀ ~ ॓ ) COMBINING GRAVE ACCENT ~ DEVANAGARI GRAVE ACCENT 0301 ; 064E # ( ́ ~ َ ) COMBINING ACUTE ACCENT ~ ARABIC FATHA +0301 ; 07A6 # ( ́ ~ ަ ) COMBINING ACUTE ACCENT ~ THAANA ABAFILI 0301 ; 059C # ( ́ ~ ֜ ) COMBINING ACUTE ACCENT ~ HEBREW ACCENT GERESH 0301 ; 0747 # ( ́ ~ ݇ ) COMBINING ACUTE ACCENT ~ SYRIAC OBLIQUE LINE ABOVE 0301 ; 0954 # ( ́ ~ ॔ ) COMBINING ACUTE ACCENT ~ DEVANAGARI ACUTE ACCENT +0301 0301 ; 030B # ( ́́ ~ ̋ ) COMBINING ACUTE ACCENT, COMBINING ACUTE ACCENT ~ COMBINING DOUBLE ACUTE ACCENT + 0302 ; 0311 # ( ̂ ~ ̑ ) COMBINING CIRCUMFLEX ACCENT ~ COMBINING INVERTED BREVE 0302 ; 065B # ( ̂ ~ ٛ ) COMBINING CIRCUMFLEX ACCENT ~ ARABIC VOWEL SIGN INVERTED SMALL V ABOVE 0302 ; 1CD0 # ( ̂ ~ ᳐ ) COMBINING CIRCUMFLEX ACCENT ~ VEDIC TONE KARSHANA @@ -1222,8 +1225,10 @@ 0308 ; 08EB # ( ̈ ~ ࣫ ) COMBINING DIAERESIS ~ ARABIC TONE TWO DOTS ABOVE 030A ; 0652 # ( ̊ ~ ْ ) COMBINING RING ABOVE ~ ARABIC SUKUN +030A ; 07B0 # ( ̊ ~ ް ) COMBINING RING ABOVE ~ THAANA SUKUN 030A ; 0E4D # ( ̊ ~ ํ ) COMBINING RING ABOVE ~ THAI CHARACTER NIKHAHIT 030A ; 0ECD # ( ̊ ~ ໍ ) COMBINING RING ABOVE ~ LAO NIGGAHITA +030A ; 0F7E # ( ̊ ~ ཾ ) COMBINING RING ABOVE ~ TIBETAN SIGN RJES SU NGA RO 030A ; 1036 # ( ̊ ~ ံ ) COMBINING RING ABOVE ~ MYANMAR SIGN ANUSVARA 030A ; 17C6 # ( ̊ ~ ំ ) COMBINING RING ABOVE ~ KHMER SIGN NIKAHIT 030A ; 0366 # ( ̊ ~ ͦ ) COMBINING RING ABOVE ~ COMBINING LATIN SMALL LETTER O @@ -1257,6 +1262,7 @@ 0323 ; 0A3C # ( ̣ ~ ਼ ) COMBINING DOT BELOW ~ GURMUKHI SIGN NUKTA 0323 ; 0ABC # ( ̣ ~ ઼ ) COMBINING DOT BELOW ~ GUJARATI SIGN NUKTA 0323 ; 0B3C # ( ̣ ~ ଼ ) COMBINING DOT BELOW ~ ORIYA SIGN NUKTA +0323 ; 0E3A # ( ̣ ~ ฺ ) COMBINING DOT BELOW ~ THAI CHARACTER PHINTHU 0323 ; 05B4 # ( ̣ ~ ִ ) COMBINING DOT BELOW ~ HEBREW POINT HIRIQ 0323 ; 05C5 # ( ̣ ~ ׅ ) COMBINING DOT BELOW ~ HEBREW MARK LOWER DOT 0323 ; 065C # ( ̣ ~ ٜ ) COMBINING DOT BELOW ~ ARABIC VOWEL SIGN DOT BELOW @@ -1302,6 +1308,7 @@ 0345 ; 1AB7 # ( ͅ ~ ᪷ ) COMBINING GREEK YPOGEGRAMMENI ~ COMBINING OPEN MARK BELOW +0350 ; 07AA # ( ͐ ~ ު ) COMBINING RIGHT ARROWHEAD ABOVE ~ THAANA UBUFILI 0350 ; 08F8 # ( ͐ ~ ࣸ ) COMBINING RIGHT ARROWHEAD ABOVE ~ ARABIC RIGHT ARROWHEAD ABOVE 0352 ; 0900 # ( ͒ ~ ऀ ) COMBINING FERMATA ~ DEVANAGARI SIGN INVERTED CANDRABINDU @@ -1943,6 +1950,20 @@ 0754 ; 0767 # ( ‎ݔ‎ ~ ‎ݧ‎ ) ARABIC LETTER BEH WITH TWO DOTS BELOW AND DOT ABOVE ~ ARABIC LETTER NOON WITH TWO DOTS BELOW 0754 ; 08A9 # ( ‎ݔ‎ ~ ‎ࢩ‎ ) ARABIC LETTER BEH WITH TWO DOTS BELOW AND DOT ABOVE ~ ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +07A6 07A6 ; 07A7 # ( ަަ ~ ާ ) THAANA ABAFILI, THAANA ABAFILI ~ THAANA AABAAFILI + +07A8 ; 0317 # ( ި ~ ̗ ) THAANA IBIFILI ~ COMBINING ACUTE ACCENT BELOW + +07A8 07A8 ; 07A9 # ( ިި ~ ީ ) THAANA IBIFILI, THAANA IBIFILI ~ THAANA EEBEEFILI + +07AA 07AA ; 07AB # ( ުު ~ ޫ ) THAANA UBUFILI, THAANA UBUFILI ~ THAANA OOBOOFILI + +07AC ; 1DFE # ( ެ ~ ᷾ ) THAANA EBEFILI ~ COMBINING LEFT ARROWHEAD ABOVE + +07AC 07AA ; 07AE # ( ެު ~ ޮ ) THAANA EBEFILI, THAANA UBUFILI ~ THAANA OBOFILI + +07AC 07AC ; 07AD # ( ެެ ~ ޭ ) THAANA EBEFILI, THAANA EBEFILI ~ THAANA EYBEYFILI + 07F4 ; 2019 # ( ‎ߴ‎ ~ ’ ) NKO HIGH TONE APOSTROPHE ~ RIGHT SINGLE QUOTATION MARK 07F5 ; 2018 # ( ‎ߵ‎ ~ ‘ ) NKO LOW TONE APOSTROPHE ~ LEFT SINGLE QUOTATION MARK @@ -2069,7 +2090,9 @@ 0981 ; 0310 # ( ঁ ~ ̐ ) BENGALI SIGN CANDRABINDU ~ COMBINING CANDRABINDU 0981 ; 114BF # ( ঁ ~ 𑒿 ) BENGALI SIGN CANDRABINDU ~ TIRHUTA SIGN CANDRABINDU +0983 ; 17C7 # ( ঃ ~ ះ ) BENGALI SIGN VISARGA ~ KHMER SIGN REAHMUK 0983 ; 0A03 # ( ঃ ~ ਃ ) BENGALI SIGN VISARGA ~ GURMUKHI SIGN VISARGA +0983 ; 11303 # ( ঃ ~ 𑌃 ) BENGALI SIGN VISARGA ~ GRANTHA SIGN VISARGA 0983 ; 114C1 # ( ঃ ~ 𑓁 ) BENGALI SIGN VISARGA ~ TIRHUTA SIGN VISARGA 0985 09BE ; 0986 # ( অা ~ আ ) BENGALI LETTER A, BENGALI VOWEL SIGN AA ~ BENGALI LETTER AA @@ -2447,6 +2470,10 @@ 0DC3 0DD8 ; 0D8D # ( සෘ ~ ඍ ) SINHALA LETTER DANTAJA SAYANNA, SINHALA VOWEL SIGN GAETTA-PILLA ~ SINHALA LETTER IRUYANNA +0DD8 0DD8 ; 0DF2 # ( ෘෘ ~ ෲ ) SINHALA VOWEL SIGN GAETTA-PILLA, SINHALA VOWEL SIGN GAETTA-PILLA ~ SINHALA VOWEL SIGN DIGA GAETTA-PILLA + +0DD9 0DD9 ; 0DDB # ( ෙෙ ~ ෛ ) SINHALA VOWEL SIGN KOMBUVA, SINHALA VOWEL SIGN KOMBUVA ~ SINHALA VOWEL SIGN KOMBU DEKA + 0DE8 0DCF ; 0DE9 # ( ෨ා ~ ෩ ) SINHALA LITH DIGIT TWO, SINHALA VOWEL SIGN AELA-PILLA ~ SINHALA LITH DIGIT THREE 0DE8 0DD3 ; 0DEF # ( ෨ී ~ ෯ ) SINHALA LITH DIGIT TWO, SINHALA VOWEL SIGN DIGA IS-PILLA ~ SINHALA LITH DIGIT NINE @@ -2515,6 +2542,8 @@ 0E5B ; 17DA #* ( ๛ ~ ៚ ) THAI CHARACTER KHOMUT ~ KHMER SIGN KOOMUUT +0EC0 0EC0 ; 0EC1 # ( ເເ ~ ແ ) LAO VOWEL SIGN E, LAO VOWEL SIGN E ~ LAO VOWEL SIGN EI + 0F0D 0F0D ; 0F0E #* ( །། ~ ༎ ) TIBETAN MARK SHAD, TIBETAN MARK SHAD ~ TIBETAN MARK NYIS SHAD 0F1A 0F1A ; 0F1B #* ( ༚༚ ~ ༛ ) TIBETAN SIGN RDEL DKAR GCIG, TIBETAN SIGN RDEL DKAR GCIG ~ TIBETAN SIGN RDEL DKAR GNYIS @@ -2565,6 +2594,8 @@ 104A 104A ; 104B #* ( ၊၊ ~ ။ ) MYANMAR SIGN LITTLE SECTION, MYANMAR SIGN LITTLE SECTION ~ MYANMAR SIGN SECTION +1075 102C ; 1022 # ( ၵာ ~ ဢ ) MYANMAR LETTER SHAN KA, MYANMAR VOWEL SIGN AA ~ MYANMAR LETTER SHAN A + 107D 103E ; 107E # ( ၽှ ~ ၾ ) MYANMAR LETTER SHAN PHA, MYANMAR CONSONANT SIGN MEDIAL HA ~ MYANMAR LETTER SHAN FA 1083 1036 ; 109E # ( ႃံ ~ ႞ ) MYANMAR VOWEL SIGN SHAN AA, MYANMAR SIGN ANUSVARA ~ MYANMAR SYMBOL SHAN ONE @@ -3864,8 +3895,16 @@ 178A ; 178F # ( ដ ~ ត ) KHMER LETTER DA ~ KHMER LETTER TA +1791 17D2 1794 ; 17A1 # ( ទ្ប ~ ឡ ) KHMER LETTER TO, KHMER SIGN COENG, KHMER LETTER BA ~ KHMER LETTER LA + +1796 17D2 1792 ; 17B0 # ( ព្ធ ~ ឰ ) KHMER LETTER PO, KHMER SIGN COENG, KHMER LETTER THO ~ KHMER INDEPENDENT VOWEL QAI + 17A2 ; 17A3 # ( អ ~ ឣ ) KHMER LETTER QA ~ KHMER INDEPENDENT VOWEL QAQ +17C1 17B6 ; 17C4 # ( េា ~ ោ ) KHMER VOWEL SIGN E, KHMER VOWEL SIGN AA ~ KHMER VOWEL SIGN OO + +17C1 17B8 ; 17BE # ( េី ~ ើ ) KHMER VOWEL SIGN E, KHMER VOWEL SIGN II ~ KHMER VOWEL SIGN OE + 185C ; 1896 # ( ᡜ ~ ᢖ ) MONGOLIAN LETTER TODO DZA ~ MONGOLIAN LETTER ALI GALI ZA 18D4 ; 1DBA # ( ᣔ ~ ᶺ ) CANADIAN SYLLABICS OJIBWAY P ~ MODIFIER LETTER SMALL TURNED V