Skip to content

Commit de17603

Browse files
committed
Ensure URLs without target params stay untouched
1 parent 11f9f3e commit de17603

File tree

2 files changed

+72
-15
lines changed

2 files changed

+72
-15
lines changed
Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
export function removeQueryParams(webPageDOM, paramsToRemove = []) {
2-
if (typeof paramsToRemove === 'string') {
3-
paramsToRemove = [paramsToRemove];
4-
}
2+
const normalizedParams = Array.isArray(paramsToRemove) ? paramsToRemove : [paramsToRemove];
53

6-
if (!paramsToRemove.length) {
4+
if (!normalizedParams.length) {
75
return;
86
}
97

10-
const elements = [
11-
...webPageDOM.querySelectorAll('a[href]'),
12-
...webPageDOM.querySelectorAll('img[src]'),
13-
];
8+
const elements = webPageDOM.querySelectorAll('a[href], img[src]');
149

15-
elements.forEach(element => {
10+
for (const element of elements) {
1611
try {
17-
const url = new URL(element.href || element.src);
12+
const urlString = element.href || element.src;
13+
const url = new URL(urlString);
14+
15+
const hasTargetParams = normalizedParams.some(param => url.searchParams.has(param));
16+
17+
if (hasTargetParams) {
18+
normalizedParams.forEach(param => url.searchParams.delete(param));
1819

19-
paramsToRemove.forEach(param => url.searchParams.delete(param));
20-
element[element.tagName === 'A' ? 'href' : 'src'] = url.toString();
21-
} catch (error) {
22-
// ignore if the element has not a valid URL
20+
const attributeName = element.tagName === 'A' ? 'href' : 'src';
21+
22+
element[attributeName] = url.toString();
23+
}
24+
} catch {
25+
// Silently ignore invalid URLs
2326
}
24-
});
27+
}
2528
}

src/archivist/extract/exposedFilters.test.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,60 @@ describe('exposedFilters', () => {
157157
});
158158
});
159159

160+
describe('with URLs without target parameters', () => {
161+
let absoluteLink;
162+
let relativeLink;
163+
let anchorLink;
164+
let img;
165+
const originalAbsoluteHref = 'https://example.com/page?keep=value&preserve=me';
166+
const originalRelativeHref = './relative/path?existing=param';
167+
const originalAnchorHref = '#section1';
168+
const originalImgSrc = 'https://example.com/image.jpg?width=100&height=200';
169+
170+
before(() => {
171+
absoluteLink = webPageDOM.createElement('a');
172+
absoluteLink.setAttribute('href', originalAbsoluteHref);
173+
webPageDOM.body.appendChild(absoluteLink);
174+
175+
relativeLink = webPageDOM.createElement('a');
176+
relativeLink.setAttribute('href', originalRelativeHref);
177+
webPageDOM.body.appendChild(relativeLink);
178+
179+
anchorLink = webPageDOM.createElement('a');
180+
anchorLink.setAttribute('href', originalAnchorHref);
181+
webPageDOM.body.appendChild(anchorLink);
182+
183+
img = webPageDOM.createElement('img');
184+
img.setAttribute('src', originalImgSrc);
185+
webPageDOM.body.appendChild(img);
186+
187+
removeQueryParams(webPageDOM, [ 'utm_source', 'utm_medium', 'session_id' ]);
188+
});
189+
190+
after(() => {
191+
absoluteLink.remove();
192+
relativeLink.remove();
193+
anchorLink.remove();
194+
img.remove();
195+
});
196+
197+
it('leaves absolute link URLs untouched', () => {
198+
expect(absoluteLink.getAttribute('href')).to.equal(originalAbsoluteHref);
199+
});
200+
201+
it('leaves relative link URLs untouched', () => {
202+
expect(relativeLink.getAttribute('href')).to.equal(originalRelativeHref);
203+
});
204+
205+
it('leaves anchor link URLs untouched', () => {
206+
expect(anchorLink.getAttribute('href')).to.equal(originalAnchorHref);
207+
});
208+
209+
it('leaves image source URLs untouched', () => {
210+
expect(img.getAttribute('src')).to.equal(originalImgSrc);
211+
});
212+
});
213+
160214
describe('textual content preservation', () => {
161215
let codeElement;
162216
let paragraphElement;

0 commit comments

Comments
 (0)