Skip to content

Commit df35840

Browse files
committed
Normalise urls for autofill only on domain and not full path
1 parent 4a20aae commit df35840

File tree

2 files changed

+104
-15
lines changed

2 files changed

+104
-15
lines changed

autofill/autofill-impl/src/main/java/com/duckduckgo/autofill/impl/encoding/UrlUnicodeNormalizer.kt

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,32 +31,39 @@ interface UrlUnicodeNormalizer {
3131
class UrlUnicodeNormalizerImpl @Inject constructor() : UrlUnicodeNormalizer {
3232

3333
override fun normalizeAscii(url: String?): String? {
34-
if (url == null) return null
35-
36-
val originalScheme = url.scheme() ?: ""
37-
val noScheme = url.removePrefix(originalScheme)
38-
39-
val sb = StringBuilder()
40-
val info = IDNA.Info()
41-
IDNA.getUTS46Instance(IDNA.DEFAULT).nameToASCII(noScheme, sb, info)
42-
if (info.hasErrors()) {
43-
logcat { "Unable to convert to ASCII: $url" }
44-
return url
34+
return normalizeUrl(url) { hostname, sb, info ->
35+
IDNA.getUTS46Instance(IDNA.DEFAULT).nameToASCII(hostname, sb, info)
4536
}
46-
return "${originalScheme}$sb"
4737
}
4838

4939
override fun normalizeUnicode(url: String?): String? {
40+
return normalizeUrl(url) { hostname, sb, info ->
41+
IDNA.getUTS46Instance(IDNA.DEFAULT).nameToUnicode(hostname, sb, info)
42+
}
43+
}
44+
45+
private fun normalizeUrl(
46+
url: String?,
47+
idnaProcessor: (hostname: String, sb: StringBuilder, info: IDNA.Info) -> Unit,
48+
): String? {
5049
if (url == null) return null
5150

51+
val originalScheme = url.scheme() ?: ""
52+
val noScheme = url.removePrefix(originalScheme)
53+
54+
// Extract just the hostname/domain part for IDNA processing
55+
val hostEndIndex = noScheme.indexOfFirst { it == '/' || it == '?' || it == '#' }
56+
val hostname = if (hostEndIndex == -1) noScheme else noScheme.substring(0, hostEndIndex)
57+
val pathAndQuery = if (hostEndIndex == -1) "" else noScheme.substring(hostEndIndex)
58+
5259
val sb = StringBuilder()
5360
val info = IDNA.Info()
54-
IDNA.getUTS46Instance(IDNA.DEFAULT).nameToUnicode(url, sb, info)
61+
idnaProcessor(hostname, sb, info)
5562
if (info.hasErrors()) {
56-
logcat { "Unable to convert to unicode: $url" }
63+
logcat { "Unable to convert hostname: $hostname" }
5764
return url
5865
}
59-
return sb.toString()
66+
return "${originalScheme}$sb$pathAndQuery"
6067
}
6168
}
6269

autofill/autofill-impl/src/test/java/com/duckduckgo/autofill/impl/encoding/UrlUnicodeNormalizerImplTest.kt

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,86 @@ class UrlUnicodeNormalizerImplTest {
4545
fun whenNormalizingToUnicodeAndOnlyContainsAsciiThenThenInputAndOutputIdentical() {
4646
assertEquals("c.com", testee.normalizeUnicode("c.com"))
4747
}
48+
49+
@Test
50+
fun whenNormalizingToAsciiWithSchemesThenSchemePreserved() {
51+
assertEquals("http://xn--7ca.com", testee.normalizeAscii("http://ç.com"))
52+
assertEquals("https://xn--7ca.com", testee.normalizeAscii("https://ç.com"))
53+
}
54+
55+
@Test
56+
fun whenNormalizingToAsciiWithUrlComponentsThenAllPreserved() {
57+
assertEquals("http://xn--7ca.com/path", testee.normalizeAscii("http://ç.com/path"))
58+
assertEquals("http://xn--7ca.com?query=value", testee.normalizeAscii("http://ç.com?query=value"))
59+
assertEquals("http://xn--7ca.com#fragment", testee.normalizeAscii("http://ç.com#fragment"))
60+
assertEquals("https://xn--7ca.com/deep/nested/path/file.html", testee.normalizeAscii("https://ç.com/deep/nested/path/file.html"))
61+
assertEquals("https://xn--7ca.com/search?q=test&lang=en&page=1", testee.normalizeAscii("https://ç.com/search?q=test&lang=en&page=1"))
62+
}
63+
64+
@Test
65+
fun whenNormalizingToAsciiWithComplexUrlThenAllComponentsPreserved() {
66+
assertEquals("http://xn--7ca.com/path?query=value#fragment", testee.normalizeAscii("http://ç.com/path?query=value#fragment"))
67+
}
68+
69+
@Test
70+
fun whenNormalizingToAsciiWithUrlContainingInvalidDomainCharactersThenProcessesCorrectly() {
71+
// This URL contains characters that are invalid in domain names (/,?,=,&) in the path/query
72+
// Old implementation: tries to pass entire path to IDNA, fails, returns original
73+
// New implementation: processes only hostname, succeeds, preserves path/query
74+
val input = "https://google.com/signin?continue=https%3A%2F%2Fpasswords.com&id=123"
75+
val expected = "https://google.com/signin?continue=https%3A%2F%2Fpasswords.com&id=123"
76+
assertEquals(expected, testee.normalizeAscii(input))
77+
}
78+
79+
@Test
80+
fun whenNormalizingToAsciiWithNoSchemeThenProcessedWithoutScheme() {
81+
assertEquals("xn--7ca.com/path", testee.normalizeAscii("ç.com/path"))
82+
}
83+
84+
@Test
85+
fun whenNormalizingToAsciiWithPortNumberThenPortPreserved() {
86+
assertEquals("https://xn--7ca.com:8080/path", testee.normalizeAscii("https://ç.com:8080/path"))
87+
}
88+
89+
@Test
90+
fun whenNormalizingToAsciiWithSubdomainThenSubdomainProcessed() {
91+
assertEquals("https://xn--sb-xka.xn--dmain-jua.com", testee.normalizeAscii("https://süb.dömain.com"))
92+
}
93+
94+
@Test
95+
fun whenNormalizingToAsciiWithNullInputThenReturnsNull() {
96+
assertNull(testee.normalizeAscii(null))
97+
}
98+
99+
@Test
100+
fun whenNormalizingToUnicodeWithNullInputThenReturnsNull() {
101+
assertNull(testee.normalizeUnicode(null))
102+
}
103+
104+
@Test
105+
fun whenNormalizingToAsciiWithEmptyStringThenReturnsEmptyString() {
106+
assertEquals("", testee.normalizeAscii(""))
107+
}
108+
109+
@Test
110+
fun whenNormalizingToUnicodeWithEmptyStringThenReturnsEmptyString() {
111+
assertEquals("", testee.normalizeUnicode(""))
112+
}
113+
114+
@Test
115+
fun whenNormalizingToUnicodeWithComplexUrlThenAllComponentsPreserved() {
116+
assertEquals("https://ç.com/path?query=value#fragment", testee.normalizeUnicode("https://xn--7ca.com/path?query=value#fragment"))
117+
}
118+
119+
@Test
120+
fun whenNormalizingToUnicodeWithSchemesThenSchemePreserved() {
121+
assertEquals("http://ç.com", testee.normalizeUnicode("http://xn--7ca.com"))
122+
assertEquals("https://ç.com", testee.normalizeUnicode("https://xn--7ca.com"))
123+
}
124+
125+
@Test
126+
fun whenNormalizingToAsciiWithPortThenPortIncludedInHostname() {
127+
// Current implementation includes port in hostname - verify this behavior
128+
assertEquals("https://example.com:8080/path", testee.normalizeAscii("https://example.com:8080/path"))
129+
}
48130
}

0 commit comments

Comments
 (0)