Skip to content

Commit cbba915

Browse files
authored
Allow double hyphens in 3rd&4th position in all domain operations (#2909)
This is a follow-up to PR #2908, which relaxed this restriction on bare TLDs only, but now we also allow it systemwide on domains and hostnames as well. The rules against hyphens in these positions are still enforced on all parts of the domain name except the last one. Correct handling of multi-part TLDs in this regard is out of scope in this PR; a multi-part TLD that looked something like ".zz--foobar.foobar" would still fail validation. (But of course you cannot a priori know just from looking at a 3-part string whether it might be a hostname on a normal TLD, or a domain name on a 2-part TLD.) This also has some annoying interactions with a trailing dot (indicating the root), which need to be preserved, but otherwise don't affect how TLD validation is handled. BUG= http://b/471013082
1 parent c24f09f commit cbba915

File tree

3 files changed

+77
-7
lines changed

3 files changed

+77
-7
lines changed

util/src/main/java/google/registry/util/DomainNameUtils.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919

2020
import com.google.common.base.Ascii;
2121
import com.google.common.base.Joiner;
22+
import com.google.common.base.Splitter;
2223
import com.google.common.base.Strings;
2324
import com.google.common.collect.ImmutableList;
2425
import com.google.common.net.InternetDomainName;
26+
import java.util.List;
2527

2628
/** Utility methods related to domain names. */
2729
public final class DomainNameUtils {
@@ -39,14 +41,34 @@ public static boolean isUnder(InternetDomainName name, InternetDomainName potent
3941
.equals(potentialParent.parts());
4042
}
4143

42-
/** Canonicalizes a hostname/domain name by lowercasing and converting unicode to punycode. */
44+
/**
45+
* Canonicalizes a hostname/domain name by lowercasing and converting Unicode to punycode.
46+
*
47+
* <p>This applies slightly stricter rules to all labels other than the TLD part (all other labels
48+
* are not allowed to have hyphens in the third and fourth position except when using
49+
* ACE-formatted Punycode). This restriction is not enforced on the last label (so multi-part TLDs
50+
* still cannot have said characters except on the last part).
51+
*/
4352
public static String canonicalizeHostname(String label) {
4453
String labelLowercased = Ascii.toLowerCase(label);
45-
try {
46-
return Idn.toASCII(labelLowercased);
47-
} catch (IllegalArgumentException e) {
48-
throw new IllegalArgumentException(String.format("Error ASCIIfying label '%s'", label), e);
54+
String finalChar = "";
55+
if (labelLowercased.endsWith(".")) {
56+
labelLowercased = labelLowercased.substring(0, labelLowercased.length() - 1);
57+
finalChar = ".";
58+
}
59+
List<String> parts = Splitter.on('.').splitToList(labelLowercased);
60+
// If the hostname only has one part, just canonicalize that.
61+
if (parts.size() == 1) {
62+
return Idn.toASCII(parts.getFirst()) + finalChar;
63+
}
64+
// If the hostname has multiple parts, apply stricter validation to all labels but the last
65+
// one (which relaxes the hyphens in third and fourth positions rule).
66+
StringBuilder sb = new StringBuilder();
67+
for (int i = 0; i < parts.size() - 1; i++) {
68+
sb.append(Idn.toASCII(parts.get(i))).append('.');
4969
}
70+
sb.append(Idn.tldToASCII(parts.getLast())).append(finalChar);
71+
return sb.toString();
5072
}
5173

5274
/**

util/src/main/java/google/registry/util/Idn.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@
1414

1515
package google.registry.util;
1616

17+
import static com.google.common.base.Preconditions.checkArgument;
18+
1719
import com.google.common.base.Joiner;
20+
import com.google.common.collect.ImmutableSet;
21+
import com.google.common.collect.Sets;
1822
import com.ibm.icu.text.IDNA;
23+
import com.ibm.icu.text.IDNA.Error;
1924
import com.ibm.icu.text.IDNA.Info;
25+
import java.util.Set;
2026

2127
/**
2228
* A partial API-compatible replacement for {@link java.net.IDN} that replaces <a
@@ -51,11 +57,29 @@ public static String toASCII(String name) {
5157
StringBuilder result = new StringBuilder();
5258
UTS46_INSTANCE.nameToASCII(name, result, info);
5359
if (info.hasErrors()) {
54-
throw new IllegalArgumentException("Errors: " + Joiner.on(',').join(info.getErrors()));
60+
throw new IllegalArgumentException(
61+
String.format(
62+
"Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(info.getErrors())));
5563
}
5664
return result.toString();
5765
}
5866

67+
/**
68+
* Translates a TLD string from Unicode to Punycoded ASCII.
69+
*
70+
* <p>Unlike {@link #toASCII}, this method does NOT enforce the restriction that hyphens may only
71+
* be present on the third and fourth characters for "xn--" ACE-formatted domains.
72+
*/
73+
public static String tldToASCII(String name) {
74+
Info info = new Info();
75+
StringBuilder result = new StringBuilder();
76+
UTS46_INSTANCE.nameToASCII(name, result, info);
77+
Set<Error> errors = Sets.difference(info.getErrors(), ImmutableSet.of(Error.HYPHEN_3_4));
78+
checkArgument(
79+
errors.isEmpty(), "Errors ASCIIfying label %s: %s", name, Joiner.on(',').join(errors));
80+
return result.toString();
81+
}
82+
5983
/**
6084
* Translates a string from ASCII Compatible Encoding (ACE) to Unicode, as defined by the
6185
* ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.

util/src/test/java/google/registry/util/DomainNameUtilsTest.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,37 @@ void testCanonicalizeHostname_allowsRdnsNames() {
4545
.isEqualTo("119.63.227.45-ns1.jhz-tt.uk");
4646
}
4747

48+
@Test
49+
void testCanonicalizeHostname_retainsTrailingDot() {
50+
assertThat(canonicalizeHostname("みんな.みんな.")).isEqualTo("xn--q9jyb4c.xn--q9jyb4c.");
51+
assertThat(canonicalizeHostname("BAR.foo.みんな.")).isEqualTo("bar.foo.xn--q9jyb4c.");
52+
assertThat(canonicalizeHostname("cat.lol.")).isEqualTo("cat.lol.");
53+
}
54+
4855
@Test
4956
void testCanonicalizeHostname_throwsOn34HyphenRule() {
5057
IllegalArgumentException thrown =
5158
assertThrows(
5259
IllegalArgumentException.class,
5360
() -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk"));
54-
assertThat(thrown).hasCauseThat().hasMessageThat().contains("HYPHEN_3_4");
61+
assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
62+
}
63+
64+
@Test
65+
void testCanonicalizeHostname_throwsOn34HyphenRule_withTrailingDot() {
66+
IllegalArgumentException thrown =
67+
assertThrows(
68+
IllegalArgumentException.class,
69+
() -> canonicalizeHostname("119.63.227.45--ns1.jhz-tt.uk."));
70+
assertThat(thrown).hasMessageThat().contains("HYPHEN_3_4");
71+
}
72+
73+
@Test
74+
void testCanonicalizeHostname_allows34HyphenOnTld() {
75+
assertThat(canonicalizeHostname("foobar.zz--main-2262")).isEqualTo("foobar.zz--main-2262");
76+
assertThat(canonicalizeHostname("foobar.zz--main-2262.")).isEqualTo("foobar.zz--main-2262.");
77+
assertThat(canonicalizeHostname("みんな.45--foo")).isEqualTo("xn--q9jyb4c.45--foo");
78+
assertThat(canonicalizeHostname("みんな.45--foo.")).isEqualTo("xn--q9jyb4c.45--foo.");
5579
}
5680

5781
@Test

0 commit comments

Comments
 (0)