Skip to content

Commit bc0b730

Browse files
authored
Merge pull request #2538 from openzim/remove-iframe
Remove any iframe in article content
2 parents 9f4ae0a + 9171856 commit bc0b730

File tree

3 files changed

+134
-87
lines changed

3 files changed

+134
-87
lines changed

Changelog

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Unreleased:
4141
* FIX: Better handling of audio resources in articles for full ZIMs (@benoit74 #2420)
4242
* CHANGED: Rework file downloads to reduce pressure on servers and better handle throttling (@benoit74 #2377)
4343
* FIX: Automatically delete old Redis lists (@benoit74 #1906)
44-
* FIX: Also retry 524 HTTP errors - usually coming from Cloudflare handling of timeouts (@benoit74 #)
44+
* FIX: Also retry 524 HTTP errors - usually coming from Cloudflare handling of timeouts (@benoit74 #2526)
45+
# FIX: Remove any iframe in article content (@benoit74 #2537)
4546

4647
1.16.0:
4748
* CHANGED: ActionParse renderer is now the preferred one when available (@benoit74 #2183)

src/renderers/abstract.renderer.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,14 @@ export abstract class Renderer {
680680
element.parentElement.innerHTML = `${slices[0]}<!--htdig_noindex-->${element.outerHTML}<!--/htdig_noindex-->${slices[1]}`
681681
}
682682

683+
private removeIframeTags(parsoidDoc: DominoElement) {
684+
// Remove all <iframe> tags
685+
const iframes: DominoElement[] = Array.from(parsoidDoc.getElementsByTagName('iframe'))
686+
for (const iframe of iframes) {
687+
DU.deleteNode(iframe)
688+
}
689+
}
690+
683691
private clearLinkAndInputTags(parsoidDoc: DominoElement, filtersConfig: any, dump: Dump) {
684692
/* Don't need <link> and <input> tags */
685693
const nodesToDelete: Array<{ class?: string; tag?: string; filter?: (n: any) => boolean }> = [{ tag: 'link' }, { tag: 'input' }]
@@ -782,6 +790,8 @@ export abstract class Renderer {
782790
}
783791

784792
private applyOtherTreatments(parsoidDoc: DominoElement, dump: Dump, articleId: string) {
793+
this.removeIframeTags(parsoidDoc)
794+
785795
const filtersConfig = config.filters
786796
this.clearLinkAndInputTags(parsoidDoc, filtersConfig, dump)
787797

test/unit/renderers/processHtml.test.ts

Lines changed: 122 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -37,102 +37,138 @@ describe('processHtml', () => {
3737
return await testRenderer.processHtml(opts)
3838
}
3939

40-
function checkAudioOnly(data) {
41-
const { finalHTML, videoDependencies } = data
42-
const finalDoc = domino.createDocument(finalHTML)
43-
const audioEls = finalDoc.getElementsByTagName('audio')
44-
expect(audioEls.length).toBe(1)
45-
const audioEl = audioEls[0]
46-
expect(audioEl.getAttribute('src')).toContain(`./${expectedAudioPath}`)
47-
expect(audioEl.getAttribute('resource')).toBeNull()
48-
expect(videoDependencies.length).toBe(1)
49-
const videoDependency = videoDependencies[0]
50-
expect(videoDependency.path).toContain(expectedAudioPath)
51-
expect(videoDependency.url).toContain(`https:${audioSrc.replace('.ogg', '.')}`)
52-
}
40+
describe('audio tags', () => {
41+
function checkAudioOnly(data) {
42+
const { finalHTML, videoDependencies } = data
43+
const finalDoc = domino.createDocument(finalHTML)
44+
const audioEls = finalDoc.getElementsByTagName('audio')
45+
expect(audioEls.length).toBe(1)
46+
const audioEl = audioEls[0]
47+
expect(audioEl.getAttribute('src')).toContain(`./${expectedAudioPath}`)
48+
expect(audioEl.getAttribute('resource')).toBeNull()
49+
expect(videoDependencies.length).toBe(1)
50+
const videoDependency = videoDependencies[0]
51+
expect(videoDependency.path).toContain(expectedAudioPath)
52+
expect(videoDependency.url).toContain(`https:${audioSrc.replace('.ogg', '.')}`)
53+
}
5354

54-
it('rewrite audio with src attribute', async () => {
55-
checkAudioOnly(await testProcessHtml(`<audio src="${audioSrc}"></audio>`))
56-
})
55+
it('rewrite audio with src attribute', async () => {
56+
checkAudioOnly(await testProcessHtml(`<audio src="${audioSrc}"></audio>`))
57+
})
5758

58-
it('rewrite audio with resource attribute', async () => {
59-
checkAudioOnly(await testProcessHtml(`<audio resource="${audioSrc}"></audio>`))
60-
})
59+
it('rewrite audio with resource attribute', async () => {
60+
checkAudioOnly(await testProcessHtml(`<audio resource="${audioSrc}"></audio>`))
61+
})
6162

62-
it('rewrite audio with both src and resource attribute - order 1', async () => {
63-
checkAudioOnly(await testProcessHtml(`<audio src="${audioSrc}" resource="${audioSrc.replace('Elevator', 'Foo')}"></audio>`))
64-
})
63+
it('rewrite audio with both src and resource attribute - order 1', async () => {
64+
checkAudioOnly(await testProcessHtml(`<audio src="${audioSrc}" resource="${audioSrc.replace('Elevator', 'Foo')}"></audio>`))
65+
})
6566

66-
it('rewrite audio with both src and resource attribute - order 2', async () => {
67-
checkAudioOnly(await testProcessHtml(`<audio resource="${audioSrc.replace('Elevator', 'Foo')}" src="${audioSrc}"></audio>`))
68-
})
67+
it('rewrite audio with both src and resource attribute - order 2', async () => {
68+
checkAudioOnly(await testProcessHtml(`<audio resource="${audioSrc.replace('Elevator', 'Foo')}" src="${audioSrc}"></audio>`))
69+
})
6970

70-
function checkAudioWithSource(data) {
71-
const { finalHTML, videoDependencies } = data
72-
const finalDoc = domino.createDocument(finalHTML)
73-
const audioEls = finalDoc.getElementsByTagName('audio')
74-
expect(audioEls.length).toBe(1)
75-
const audioEl = audioEls[0]
76-
expect(audioEl.getAttribute('src')).toBeNull()
77-
expect(audioEl.getAttribute('resource')).toBeNull()
78-
const sourceEls = finalDoc.getElementsByTagName('source')
79-
expect(sourceEls.length).toBe(1)
80-
const sourceEl = sourceEls[0]
81-
expect(sourceEl.getAttribute('src')).toContain(`./${expectedAudioPath}`)
82-
expect(sourceEl.getAttribute('resource')).toBeNull()
83-
expect(videoDependencies.length).toBe(1)
84-
const videoDependency = videoDependencies[0]
85-
expect(videoDependency.path).toContain(expectedAudioPath)
86-
expect(videoDependency.url).toContain(`https:${audioSrc.replace('.ogg', '.')}`)
87-
}
71+
function checkAudioWithSource(data) {
72+
const { finalHTML, videoDependencies } = data
73+
const finalDoc = domino.createDocument(finalHTML)
74+
const audioEls = finalDoc.getElementsByTagName('audio')
75+
expect(audioEls.length).toBe(1)
76+
const audioEl = audioEls[0]
77+
expect(audioEl.getAttribute('src')).toBeNull()
78+
expect(audioEl.getAttribute('resource')).toBeNull()
79+
const sourceEls = finalDoc.getElementsByTagName('source')
80+
expect(sourceEls.length).toBe(1)
81+
const sourceEl = sourceEls[0]
82+
expect(sourceEl.getAttribute('src')).toContain(`./${expectedAudioPath}`)
83+
expect(sourceEl.getAttribute('resource')).toBeNull()
84+
expect(videoDependencies.length).toBe(1)
85+
const videoDependency = videoDependencies[0]
86+
expect(videoDependency.path).toContain(expectedAudioPath)
87+
expect(videoDependency.url).toContain(`https:${audioSrc.replace('.ogg', '.')}`)
88+
}
8889

89-
it('rewrite audio with src, resource attributes and one source', async () => {
90-
checkAudioWithSource(
91-
await testProcessHtml(`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc}"></audio>`),
92-
)
93-
})
90+
it('rewrite audio with src, resource attributes and one source', async () => {
91+
checkAudioWithSource(
92+
await testProcessHtml(`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc}"></audio>`),
93+
)
94+
})
9495

95-
it('rewrite audio with src, resource attributes and two sources', async () => {
96-
checkAudioWithSource(
97-
await testProcessHtml(
98-
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc}"><source src="${audioSrc.replace(
99-
'Elevator',
100-
'Foo',
101-
)}"></audio>`,
102-
),
103-
)
104-
})
96+
it('rewrite audio with src, resource attributes and two sources', async () => {
97+
checkAudioWithSource(
98+
await testProcessHtml(
99+
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc}"><source src="${audioSrc.replace(
100+
'Elevator',
101+
'Foo',
102+
)}"></audio>`,
103+
),
104+
)
105+
})
105106

106-
it('rewrite audio with src, resource attributes and two sources, ogg first', async () => {
107-
checkAudioWithSource(
108-
await testProcessHtml(
109-
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace(
110-
'Elevator',
111-
'Bar',
112-
)}"><source src="${audioSrc}" type="audio/ogg"><source src="${audioSrc.replace('Elevator', 'Foo')}"></audio>`,
113-
),
114-
)
115-
})
107+
it('rewrite audio with src, resource attributes and two sources, ogg first', async () => {
108+
checkAudioWithSource(
109+
await testProcessHtml(
110+
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace(
111+
'Elevator',
112+
'Bar',
113+
)}"><source src="${audioSrc}" type="audio/ogg"><source src="${audioSrc.replace('Elevator', 'Foo')}"></audio>`,
114+
),
115+
)
116+
})
117+
118+
it('rewrite audio with src, resource attributes and two sources, ogg last with type', async () => {
119+
checkAudioWithSource(
120+
await testProcessHtml(
121+
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc.replace(
122+
'.ogg',
123+
'.mp3',
124+
)}"><source src="${audioSrc.replace('.ogg', '.foo')}" type="audio/ogg"></audio>`,
125+
),
126+
)
127+
})
116128

117-
it('rewrite audio with src, resource attributes and two sources, ogg last with type', async () => {
118-
checkAudioWithSource(
119-
await testProcessHtml(
120-
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc.replace(
121-
'.ogg',
122-
'.mp3',
123-
)}"><source src="${audioSrc.replace('.ogg', '.foo')}" type="audio/ogg"></audio>`,
124-
),
125-
)
129+
it('rewrite audio with src, resource attributes and two sources, ogg last without type', async () => {
130+
checkAudioWithSource(
131+
await testProcessHtml(
132+
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc.replace(
133+
'.ogg',
134+
'.mp3',
135+
)}"><source src="${audioSrc}"><source src="${audioSrc.replace('Elevator', 'Foo')}"></audio>`,
136+
),
137+
)
138+
})
126139
})
127140

128-
it('rewrite audio with src, resource attributes and two sources, ogg last without type', async () => {
129-
checkAudioWithSource(
130-
await testProcessHtml(
131-
`<audio src="${audioSrc.replace('Elevator', 'Foo')}" resource="${audioSrc.replace('Elevator', 'Bar')}"><source src="${audioSrc.replace(
132-
'.ogg',
133-
'.mp3',
134-
)}"><source src="${audioSrc}"><source src="${audioSrc.replace('Elevator', 'Foo')}"></audio>`,
135-
),
136-
)
141+
describe('iframe', () => {
142+
it('remove empty iframe', async () => {
143+
const { finalHTML } = await testProcessHtml(`<div id="foo"><iframe></iframe></div>`)
144+
expect(finalHTML).toContain('<div id="foo"></div>')
145+
})
146+
it('remove full iframe', async () => {
147+
const { finalHTML } = await testProcessHtml(
148+
`<div id="foo">` +
149+
`<iframe id="inlineFrameExample" title="Inline Frame Example" width="300" height="200" ` +
150+
`src="https://www.openstreetmap.org/export/embed.html"></iframe></div>`,
151+
)
152+
expect(finalHTML).toContain('<div id="foo"></div>')
153+
})
154+
it('remove iframe "with content"', async () => {
155+
const { finalHTML } = await testProcessHtml(
156+
`<div id="foo">` +
157+
`<iframe id="inlineFrameExample" title="Inline Frame Example" width="300" height="200" ` +
158+
`src="https://www.openstreetmap.org/export/embed.html"><span>bar</span></iframe></div>`,
159+
)
160+
expect(finalHTML).toContain('<div id="foo"></div>')
161+
})
162+
it('remove multiple iframes', async () => {
163+
const { finalHTML } = await testProcessHtml(
164+
`<div id="foo">` +
165+
`<iframe id="inlineFrameExample1" title="Inline Frame Example 1" width="300" height="200" ` +
166+
`src="https://www.openstreetmap.org/export/embed.html"></iframe></div>` +
167+
`<div id="bar">` +
168+
`<iframe id="inlineFrameExample2" title="Inline Frame Example 2" width="300" height="200" ` +
169+
`src="https://www.openstreetmap.org/export/embed.html"></iframe></div>`,
170+
)
171+
expect(finalHTML).toContain('<div id="foo"></div><div id="bar"></div>')
172+
})
137173
})
138174
})

0 commit comments

Comments
 (0)