Skip to content

Commit 4660462

Browse files
authored
Fix built-in removeQueryParams filter to not modify URLs without target parameters (#1195)
2 parents a1d3b00 + 754a467 commit 4660462

File tree

3 files changed

+104
-33
lines changed

3 files changed

+104
-33
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22

33
All changes that impact users of this module are documented in this file, in the [Common Changelog](https://common-changelog.org) format with some additional specifications defined in the CONTRIBUTING file. This codebase adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
44

5+
## Unreleased [patch]
6+
7+
> Development of this release was supported by the [French Ministry for Foreign Affairs](https://www.diplomatie.gouv.fr/fr/politique-etrangere-de-la-france/diplomatie-numerique/) through its ministerial [State Startups incubator](https://beta.gouv.fr/startups/open-terms-archive.html) under the aegis of the Ambassador for Digital Affairs.
8+
9+
### Fixed
10+
11+
- Fix built-in `removeQueryParams` filter to not modify URLs without target parameters
12+
513
## 7.2.3 - 2025-09-17
614

715
_Full changeset and discussions: [#1192](https://github.com/OpenTermsArchive/engine/pull/1192)._
Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
export function removeQueryParams(webPageDOM, paramsToRemove = []) {
2-
if (typeof paramsToRemove === 'string') {
3-
paramsToRemove = [paramsToRemove];
4-
}
2+
const normalizedParams = Array.isArray(paramsToRemove) ? paramsToRemove : [paramsToRemove];
53

6-
if (!paramsToRemove.length) {
4+
if (!normalizedParams.length) {
75
return;
86
}
97

10-
const elements = [
11-
...webPageDOM.querySelectorAll('a[href]'),
12-
...webPageDOM.querySelectorAll('img[src]'),
13-
];
8+
const elements = webPageDOM.querySelectorAll('a[href], img[src]');
149

15-
elements.forEach(element => {
10+
for (const element of elements) {
1611
try {
17-
const url = new URL(element.href || element.src);
12+
const urlString = element.href || element.src;
13+
const url = new URL(urlString);
14+
15+
const hasTargetParams = normalizedParams.some(param => url.searchParams.has(param));
16+
17+
if (hasTargetParams) {
18+
normalizedParams.forEach(param => url.searchParams.delete(param));
1819

19-
paramsToRemove.forEach(param => url.searchParams.delete(param));
20-
element[element.tagName === 'A' ? 'href' : 'src'] = url.toString();
21-
} catch (error) {
22-
// ignore if the element has not a valid URL
20+
const attributeName = element.tagName === 'A' ? 'href' : 'src';
21+
22+
element[attributeName] = url.toString();
23+
}
24+
} catch {
25+
// Silently ignore invalid URLs
2326
}
24-
});
27+
}
2528
}

src/archivist/extract/exposedFilters.test.js

Lines changed: 78 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ describe('exposedFilters', () => {
77
let webPageDOM;
88

99
before(() => {
10-
webPageDOM = createWebPageDOM('<!DOCTYPE html><html><body></body></html>');
10+
webPageDOM = createWebPageDOM('<!DOCTYPE html><html><body></body></html>', 'https://example.com');
1111
});
1212

1313
describe('#removeQueryParams', () => {
@@ -16,7 +16,7 @@ describe('exposedFilters', () => {
1616

1717
before(() => {
1818
link = webPageDOM.createElement('a');
19-
link.href = 'https://example.com/page?utm_source=test&keep=value';
19+
link.setAttribute('href', 'https://example.com/page?utm_source=test&keep=value');
2020
webPageDOM.body.appendChild(link);
2121
});
2222

@@ -27,7 +27,7 @@ describe('exposedFilters', () => {
2727
it('removes the specified query parameters', () => {
2828
removeQueryParams(webPageDOM, ['utm_source']);
2929

30-
expect(link.href).to.equal('https://example.com/page?keep=value');
30+
expect(link.getAttribute('href')).to.equal('https://example.com/page?keep=value');
3131
});
3232
});
3333

@@ -36,7 +36,7 @@ describe('exposedFilters', () => {
3636

3737
before(() => {
3838
img = webPageDOM.createElement('img');
39-
img.src = 'https://example.com/image.jpg?width=100&keep=value';
39+
img.setAttribute('src', 'https://example.com/image.jpg?width=100&keep=value');
4040
webPageDOM.body.appendChild(img);
4141
});
4242

@@ -47,7 +47,7 @@ describe('exposedFilters', () => {
4747
it('removes the specified query parameters', () => {
4848
removeQueryParams(webPageDOM, ['width']);
4949

50-
expect(img.src).to.equal('https://example.com/image.jpg?keep=value');
50+
expect(img.getAttribute('src')).to.equal('https://example.com/image.jpg?keep=value');
5151
});
5252
});
5353

@@ -56,7 +56,7 @@ describe('exposedFilters', () => {
5656

5757
before(() => {
5858
link = webPageDOM.createElement('a');
59-
link.href = 'https://example.com/page?utm_source=test&keep=value';
59+
link.setAttribute('href', 'https://example.com/page?utm_source=test&keep=value');
6060
webPageDOM.body.appendChild(link);
6161
});
6262

@@ -67,7 +67,7 @@ describe('exposedFilters', () => {
6767
it('removes a single query parameter passed as string', () => {
6868
removeQueryParams(webPageDOM, 'utm_source');
6969

70-
expect(link.href).to.equal('https://example.com/page?keep=value');
70+
expect(link.getAttribute('href')).to.equal('https://example.com/page?keep=value');
7171
});
7272
});
7373

@@ -76,7 +76,7 @@ describe('exposedFilters', () => {
7676

7777
before(() => {
7878
link = webPageDOM.createElement('a');
79-
link.href = 'https://example.com/page?utm_source=test&keep=value';
79+
link.setAttribute('href', 'https://example.com/page?utm_source=test&keep=value');
8080
webPageDOM.body.appendChild(link);
8181
});
8282

@@ -87,27 +87,33 @@ describe('exposedFilters', () => {
8787
it('leaves the URL unchanged', () => {
8888
removeQueryParams(webPageDOM, []);
8989

90-
expect(link.href).to.equal('https://example.com/page?utm_source=test&keep=value');
90+
expect(link.getAttribute('href')).to.equal('https://example.com/page?utm_source=test&keep=value');
9191
});
9292
});
9393

9494
describe('with invalid URLs', () => {
9595
let link;
96+
let webPageDOMWithBaseURL;
97+
const invalidURL = 'ht^THIS_IS_WRONG^tp://example.com?utm_source=test';
9698

9799
before(() => {
98-
link = webPageDOM.createElement('a');
99-
link.href = 'ht^THIS_IS_WRONG^tp://example.com?utm_source=test';
100-
webPageDOM.body.appendChild(link);
100+
webPageDOMWithBaseURL = createWebPageDOM('<!DOCTYPE html><html><body></body></html>');
101+
});
102+
103+
before(() => {
104+
link = webPageDOMWithBaseURL.createElement('a');
105+
link.setAttribute('href', invalidURL);
106+
webPageDOMWithBaseURL.body.appendChild(link);
101107
});
102108

103109
after(() => {
104110
link.remove();
105111
});
106112

107113
it('ignores elements with invalid URLs', () => {
108-
removeQueryParams(webPageDOM, ['utm_source']);
114+
removeQueryParams(webPageDOMWithBaseURL, ['utm_source']);
109115

110-
expect(link.href).to.equal('ht^THIS_IS_WRONG^tp://example.com?utm_source=test');
116+
expect(link.getAttribute('href')).to.equal(invalidURL);
111117
});
112118
});
113119

@@ -116,7 +122,7 @@ describe('exposedFilters', () => {
116122

117123
before(() => {
118124
link = webPageDOM.createElement('a');
119-
link.href = 'https://example.com/page?utm_source=test&utm_medium=email&keep=value&remove=me';
125+
link.setAttribute('href', 'https://example.com/page?utm_source=test&utm_medium=email&keep=value&remove=me');
120126
webPageDOM.body.appendChild(link);
121127
});
122128

@@ -127,7 +133,7 @@ describe('exposedFilters', () => {
127133
it('removes all specified query parameters', () => {
128134
removeQueryParams(webPageDOM, [ 'utm_source', 'utm_medium', 'remove' ]);
129135

130-
expect(link.href).to.equal('https://example.com/page?keep=value');
136+
expect(link.getAttribute('href')).to.equal('https://example.com/page?keep=value');
131137
});
132138
});
133139

@@ -136,7 +142,7 @@ describe('exposedFilters', () => {
136142

137143
before(() => {
138144
link = webPageDOM.createElement('a');
139-
link.href = 'https://example.com/test?utm_source=to_remove_1&keep=true&utm_source=to_remove_2';
145+
link.setAttribute('href', 'https://example.com/test?utm_source=to_remove_1&keep=true&utm_source=to_remove_2');
140146
webPageDOM.body.appendChild(link);
141147
});
142148

@@ -147,7 +153,61 @@ describe('exposedFilters', () => {
147153
it('removes all instances of duplicate query parameters', () => {
148154
removeQueryParams(webPageDOM, ['utm_source']);
149155

150-
expect(link.href).to.equal('https://example.com/test?keep=true');
156+
expect(link.getAttribute('href')).to.equal('https://example.com/test?keep=true');
157+
});
158+
});
159+
160+
describe('with URLs without target parameters', () => {
161+
let absoluteLink;
162+
let relativeLink;
163+
let anchorLink;
164+
let img;
165+
const originalAbsoluteHref = 'https://example.com/page?keep=value&preserve=me';
166+
const originalRelativeHref = './relative/path?existing=param';
167+
const originalAnchorHref = '#section1';
168+
const originalImgSrc = 'https://example.com/image.jpg?width=100&height=200';
169+
170+
before(() => {
171+
absoluteLink = webPageDOM.createElement('a');
172+
absoluteLink.setAttribute('href', originalAbsoluteHref);
173+
webPageDOM.body.appendChild(absoluteLink);
174+
175+
relativeLink = webPageDOM.createElement('a');
176+
relativeLink.setAttribute('href', originalRelativeHref);
177+
webPageDOM.body.appendChild(relativeLink);
178+
179+
anchorLink = webPageDOM.createElement('a');
180+
anchorLink.setAttribute('href', originalAnchorHref);
181+
webPageDOM.body.appendChild(anchorLink);
182+
183+
img = webPageDOM.createElement('img');
184+
img.setAttribute('src', originalImgSrc);
185+
webPageDOM.body.appendChild(img);
186+
187+
removeQueryParams(webPageDOM, [ 'utm_source', 'utm_medium', 'session_id' ]);
188+
});
189+
190+
after(() => {
191+
absoluteLink.remove();
192+
relativeLink.remove();
193+
anchorLink.remove();
194+
img.remove();
195+
});
196+
197+
it('leaves absolute link URLs untouched', () => {
198+
expect(absoluteLink.getAttribute('href')).to.equal(originalAbsoluteHref);
199+
});
200+
201+
it('leaves relative link URLs untouched', () => {
202+
expect(relativeLink.getAttribute('href')).to.equal(originalRelativeHref);
203+
});
204+
205+
it('leaves anchor link URLs untouched', () => {
206+
expect(anchorLink.getAttribute('href')).to.equal(originalAnchorHref);
207+
});
208+
209+
it('leaves image source URLs untouched', () => {
210+
expect(img.getAttribute('src')).to.equal(originalImgSrc);
151211
});
152212
});
153213

0 commit comments

Comments
 (0)