Skip to content

Commit d48c769

Browse files
Fix: File pulling fails with uppercase URL characters (#4516)
* fix: remove unnecessary toLowerCase in URL validation * test: enhance URL validation tests to preserve case sensitivity and format * test: update URL validation tests to ensure domain normalization to lowercase while preserving path case * small formatting * fix filenames when downloading live URI --------- Co-authored-by: timothycarambat <[email protected]>
1 parent 4e6f0b3 commit d48c769

File tree

3 files changed

+32
-11
lines changed

3 files changed

+32
-11
lines changed

collector/__tests__/utils/url/index.test.js

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ describe("validURL", () => {
7474
});
7575

7676
describe("validateURL", () => {
77-
it("should return the exact same URL if it's already valid", () => {
78-
expect(validateURL("https://www.google.com")).toBe("https://www.google.com");
77+
it("should return the same URL if it's already valid", () => {
78+
expect(validateURL("https://www.google.com")).toBe(
79+
"https://www.google.com"
80+
);
7981
expect(validateURL("http://www.google.com")).toBe("http://www.google.com");
8082
expect(validateURL("https://random")).toBe("https://random");
8183

@@ -88,16 +90,22 @@ describe("validateURL", () => {
8890
it("should assume https:// if the URL doesn't have a protocol", () => {
8991
expect(validateURL("www.google.com")).toBe("https://www.google.com");
9092
expect(validateURL("google.com")).toBe("https://google.com");
93+
expect(validateURL("EXAMPLE.com/ABCDEF/q1=UPPER")).toBe("https://example.com/ABCDEF/q1=UPPER");
9194
expect(validateURL("ftp://www.google.com")).toBe("ftp://www.google.com");
92-
expect(validateURL("mailto://www.google.com")).toBe("mailto://www.google.com");
95+
expect(validateURL("mailto://www.google.com")).toBe(
96+
"mailto://www.google.com"
97+
);
9398
expect(validateURL("tel://www.google.com")).toBe("tel://www.google.com");
9499
expect(validateURL("data://www.google.com")).toBe("data://www.google.com");
95100
});
96101

97102
it("should remove trailing slashes post-validation", () => {
98-
expect(validateURL("https://www.google.com/")).toBe("https://www.google.com");
103+
expect(validateURL("https://www.google.com/")).toBe(
104+
"https://www.google.com"
105+
);
99106
expect(validateURL("http://www.google.com/")).toBe("http://www.google.com");
100107
expect(validateURL("https://random/")).toBe("https://random");
108+
expect(validateURL("https://example.com/ABCDEF/")).toBe("https://example.com/ABCDEF");
101109
});
102110

103111
it("should handle edge cases and bad data inputs", () => {
@@ -109,4 +117,13 @@ describe("validateURL", () => {
109117
expect(validateURL(" ")).toBe("");
110118
expect(validateURL(" look here! ")).toBe("look here!");
111119
});
120+
121+
it("should preserve case of characters in URL pathname", () => {
122+
expect(validateURL("https://example.com/To/ResOURce?q1=Value&qZ22=UPPE!R"))
123+
.toBe("https://example.com/To/ResOURce?q1=Value&qZ22=UPPE!R");
124+
expect(validateURL("https://sample.com/uPeRCaSe"))
125+
.toBe("https://sample.com/uPeRCaSe");
126+
expect(validateURL("Example.com/PATH/To/Resource?q2=Value&q1=UPPER"))
127+
.toBe("https://example.com/PATH/To/Resource?q2=Value&q1=UPPER");
128+
});
112129
});

collector/utils/downloadURIToFile/index.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ const fs = require("fs");
33
const path = require("path");
44
const { pipeline } = require("stream/promises");
55
const { validURL } = require("../url");
6+
const { default: slugify } = require("slugify");
67

78
/**
89
* Download a file to the hotdir
@@ -31,7 +32,12 @@ async function downloadURIToFile(url, maxTimeout = 10_000) {
3132
})
3233
.finally(() => clearTimeout(timeout));
3334

34-
const localFilePath = path.join(WATCH_DIRECTORY, path.basename(url));
35+
const urlObj = new URL(url);
36+
const filename = `${urlObj.hostname}-${slugify(
37+
urlObj.pathname.replace(/\//g, "-"),
38+
{ lower: true }
39+
)}`;
40+
const localFilePath = path.join(WATCH_DIRECTORY, filename);
3541
const writeStream = fs.createWriteStream(localFilePath);
3642
await pipeline(res.body, writeStream);
3743

collector/utils/url/index.js

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,12 @@ function validURL(url) {
8080
*/
8181
function validateURL(url) {
8282
try {
83-
let destination = url.trim().toLowerCase();
83+
let destination = url.trim();
8484
// If the URL has a protocol, just pass through
85-
if (destination.includes("://")) {
85+
// If the URL doesn't have a protocol, assume https://
86+
if (destination.includes("://"))
8687
destination = new URL(destination).toString();
87-
} else {
88-
// If the URL doesn't have a protocol, assume https://
89-
destination = new URL(`https://${destination.trim()}`).toString();
90-
}
88+
else destination = new URL(`https://${destination}`).toString();
9189

9290
// If the URL ends with a slash, remove it
9391
return destination.endsWith("/") ? destination.slice(0, -1) : destination;

0 commit comments

Comments
 (0)