Skip to content

Commit d522205

Browse files
committed
fix BiDi to affect RandALcat labels only
1 parent 1115ce8 commit d522205

File tree

10 files changed

+24
-33
lines changed

10 files changed

+24
-33
lines changed

#/tests/0/50.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"description": "KATAKANA MIDDLE DOT with Hiragana valid for RFC 5890-5892, but fails Bidi rule 1",
2+
"description": "KATAKANA MIDDLE DOT with Hiragana",
33
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
44
"data": "・ぁ",
5-
"valid": false
5+
"valid": true
66
}

#/tests/0/51.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"description": "KATAKANA MIDDLE DOT with Katakana valid for RFC 5890-5892, but fails Bidi rule 1",
2+
"description": "KATAKANA MIDDLE DOT with Katakana",
33
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
44
"data": "・ァ",
5-
"valid": false
5+
"valid": true
66
}

#/tests/0/52.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"description": "KATAKANA MIDDLE DOT with Han valid for RFC 5890-5892, but fails Bidi rule 1",
2+
"description": "KATAKANA MIDDLE DOT with Han",
33
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.7",
44
"data": "・丈",
5-
"valid": false
5+
"valid": true
66
}

#/tests/0/55.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"description": "Extended Arabic-Indic digits not mixed with Arabic-Indic digits, but fail Bidi rule 1",
2+
"description": "Extended Arabic-Indic digits mixed with ASCII digits",
33
"comment": "https://tools.ietf.org/html/rfc5891#section-4.2.3.3 https://tools.ietf.org/html/rfc5892#appendix-A.9",
44
"data": "۰0",
5-
"valid": false
5+
"valid": true
66
}

#/tests/0/66.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "Bidi Rule 1 — L-branch fail: contains L but doesn't start with L",
3-
"data": "1g",
4-
"valid": false
2+
"description": "BiDi rules not affecting label that does not contain RTL chars",
3+
"data": "1host",
4+
"valid": true
55
}

#/tests/0/76.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "Bidi Rule 4 violation: AN only",
2+
"description": "AN only in label not affected by BiDi rules (no R or AL char present)",
33
"data": "١٢",
4-
"valid": false
4+
"valid": true
55
}

#/tests/0/77.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "Bidi Rule 4 — fail: mixed EN and AN",
2+
"description": "mixed EN and AN not affected by BiDi rules (no R or AL char present)",
33
"data": "",
4-
"valid": false
4+
"valid": true
55
}

#/tests/0/78.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "Bidi Rule 4 — fail: mixed AN and EN",
2+
"description": "mixed AN and EN not affected by BiDi rules (no R or AL char present)",
33
"data": "١2",
4-
"valid": false
4+
"valid": true
55
}

#/tests/0/82.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"description": "Bidi Rule 5 — fail: starts L but contains AN (disallowed)",
2+
"description": "mixed L with AN not affected by BiDi rules (no R or AL char present)",
33
"data": "",
4-
"valid": false
4+
"valid": true
55
}

index.js

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,6 @@ function uts46map(label) {
6060
function cpHex(cp) {
6161
return `char '${String.fromCodePoint(cp)}' ` + JSON.stringify('(U+' + cp.toString(16).toUpperCase().padStart(4, '0') + ')');
6262
}
63-
// digits: Arabic-Indic sets distinction (1 and 2), ASCII→0
64-
function digitSet(cp) {
65-
if (cp >= 0x30 && cp <= 0x39) return 0; // ASCII → EN (0)
66-
if (cp >= 0x0660 && cp <= 0x0669) return 1; // Arabic-Indic
67-
if (cp >= 0x06f0 && cp <= 0x06f9) return 2; // Extended Arabic-Indic
68-
return 0;
69-
}
7063
// main validator
7164
function isIdnHostname(hostname) {
7265
// basic hostname checks
@@ -102,13 +95,14 @@ function isIdnHostname(hostname) {
10295
if (aceLabel.length > 63) throwIdnaLengthError('Final ASCII Compatible Encoding (ACE) label cannot exceed 63 bytes (RFC 5890 §2.3.2.1).');
10396
aceHostnameLength += aceLabel.length + 1;
10497
// hyphen rules (the other one is covered by bidi)
98+
if (/^-|-$/.test(label)) throwIdnaSyntaxError('Label cannot begin or end with hyphen-minus (RFC 5891 §4.2.3.1).');
10599
if (label.indexOf('--') === 2) throwIdnaSyntaxError('Label cannot contain consecutive hyphen-minus in the 3rd and 4th positions (RFC 5891 §4.2.3.1).');
106100
// leading combining marks check (some are not covered by bidi)
107101
if (/^\p{M}$/u.test(String.fromCodePoint(label.codePointAt(0)))) throwIdnaSyntaxError(`Label cannot begin with combining/enclosing mark ${cpHex(label.codePointAt(0))} (RFC 5891 §4.2.3.2).`);
108102
// spread cps for context and bidi checks
109103
const cps = Array.from(label).map((char) => char.codePointAt(0));
110104
let joinTypes = '';
111-
let digitSeen = 0;
105+
let digits = '';
112106
let bidiClasses = [];
113107
// per-codepoint contextual checks
114108
for (let j = 0; j < cps.length; j++) {
@@ -143,14 +137,11 @@ function isIdnHostname(hostname) {
143137
}
144138
}
145139
// check mixed digit sets
146-
const ds = digitSet(cp);
147-
if (ds) {
148-
if (digitSeen && digitSeen !== ds) throwIdnaContextOError('Mixed digit sets in hostname not allowed (RFC 5892 Appendix A.8/A.9).');
149-
digitSeen = ds;
150-
}
140+
if ((cp >= 0x0660 && cp <= 0x0669) || (cp >= 0x06f0 && cp <= 0x06f9)) digits += (cp < 0x06f0 ? 'a' : 'e' );
141+
if (j === cps.length - 1 && /^(?=.*a)(?=.*e).*$/.test(digits)) throwIdnaContextOError('Arabic-Indic digits cannot be mixed with Extended Arabic-Indic digits (RFC 5892 Appendix A.8/A.9).');
151142
// validate bidi
152143
bidiClasses.push(getRange(bidi_ranges, cp));
153-
if (j === cps.length - 1) {
144+
if (j === cps.length - 1 && (bidiClasses.includes('R') || bidiClasses.includes('AL'))) {
154145
if (bidiClasses[0] === 'R' || bidiClasses[0] === 'AL') {
155146
for (let cls of bidiClasses) if (!['R', 'AL', 'AN', 'EN', 'ET', 'ES', 'CS', 'ON', 'BN', 'NSM'].includes(cls)) throwIdnaBidiError(`'${label}' breaks rule #2: Only R, AL, AN, EN, ET, ES, CS, ON, BN, NSM allowed in label (RFC 5893 §2.2)`);
156147
if (!/(R|AL|EN|AN)(NSM)*$/.test(bidiClasses.join(''))) throwIdnaBidiError(`'${label}' breaks rule #3: label must end with R, AL, EN, or AN, followed by zero or more NSM (RFC 5893 §2.3)`);

0 commit comments

Comments
 (0)