Skip to content

Commit 7849864

Browse files
committed
fix #8541 - whitelist youtube (and vimeo) videos on the share server (and in timetravel)
- I did this by switching from xss to sanitize-html, which seems to be a lot cleaner and easier to use, just as popular, updated more recently, etc.
1 parent 6d3b172 commit 7849864

File tree

6 files changed

+262
-435
lines changed

6 files changed

+262
-435
lines changed

src/packages/frontend/components/html-ssr.tsx

Lines changed: 27 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -17,59 +17,18 @@ import React from "react";
1717
import htmlReactParser, {
1818
attributesToProps,
1919
domToReact,
20-
Element, Text
20+
Element,
21+
Text,
2122
} from "html-react-parser";
22-
import stripXSS, { safeAttrValue, whiteList } from "xss";
23-
import type { IFilterXSSOptions } from "xss";
23+
import sanitizeHtml from "sanitize-html";
2424
import { useFileContext } from "@cocalc/frontend/lib/file-context";
2525
import DefaultMath from "@cocalc/frontend/components/math/ssr";
2626
import { MathJaxConfig } from "@cocalc/util/mathjax-config";
2727
import { decodeHTML } from "entities";
2828

2929
const URL_ATTRIBS = ["src", "href", "data"];
30-
3130
const MATH_SKIP_TAGS = new Set<string>(MathJaxConfig.tex2jax.skipTags);
3231

33-
function getXSSOptions(urlTransform): IFilterXSSOptions | undefined {
34-
// - stripIgnoreTagBody - completely get rid of dangerous HTML
35-
// (otherwise user sees weird mangled style code, when seeing
36-
// nothing would be better).
37-
// - whiteList - we need iframes to support 3d graphics; unfortunately this
38-
// isn't safe without a lot more work, so we do NOT enable them.
39-
return {
40-
stripIgnoreTagBody: true,
41-
// SECURITY: whitelist note -- we had tried to explicitly allow mathjax script tags in sanitized html
42-
// by whitelisting and scanning. However, this didn't properly work (perhaps due to some update)
43-
// and resulted in a security vulnerability:
44-
// https://github.com/sagemathinc/cocalc/security/advisories/GHSA-8w44-hggw-p5rf
45-
// The fix is completley removing any whitelisting of any script tags. The feature of
46-
// mathjax in html is not important enough to support, and too dangerous -- even if it worked,
47-
// it would probably be an easy attack vector by just making up fake mathjax.
48-
// Due to https://github.com/sagemathinc/cocalc/security/advisories/GHSA-jpjc-pwjv-j9mg
49-
// we also remove all use of iframes, which
50-
whiteList: {
51-
...whiteList,
52-
// DISABLED due to https://github.com/sagemathinc/cocalc/security/advisories/GHSA-jpjc-pwjv-j9mg
53-
// iframe: ["src", "srcdoc", "width", "height"],
54-
iframe: [],
55-
html: [],
56-
},
57-
safeAttrValue: (tag, name, value) => {
58-
// disabled since not sufficiently secure.
59-
// if (tag == "iframe" && name == "srcdoc") {
60-
// // important not to mangle this or it won't work.
61-
// return value;
62-
// }
63-
if (urlTransform && URL_ATTRIBS.includes(name)) {
64-
// use the url transform
65-
return urlTransform(value, tag, name) ?? value;
66-
}
67-
// fallback to the builtin version
68-
return safeAttrValue(tag, name, value, false as any);
69-
},
70-
};
71-
}
72-
7332
export default function HTML({
7433
value,
7534
style,
@@ -82,7 +41,30 @@ export default function HTML({
8241
const { urlTransform, AnchorTagComponent, noSanitize, MathComponent } =
8342
useFileContext();
8443
if (!noSanitize) {
85-
value = stripXSS(value, getXSSOptions(urlTransform));
44+
value = sanitizeHtml(value, {
45+
allowedTags: sanitizeHtml.defaults.allowedTags.concat(["img", "iframe"]),
46+
allowedAttributes: {
47+
...sanitizeHtml.defaults.allowedAttributes,
48+
iframe: [
49+
"src",
50+
"width",
51+
"height",
52+
"title",
53+
"allow",
54+
"allowfullscreen",
55+
"referrerpolicy",
56+
"loading",
57+
"frameborder",
58+
],
59+
},
60+
allowedIframeHostnames: [
61+
"www.youtube.com",
62+
"youtube.com",
63+
"www.youtube-nocookie.com",
64+
"youtube-nocookie.com",
65+
"player.vimeo.com",
66+
],
67+
});
8668
}
8769
if (value.trimLeft().startsWith("<html>")) {
8870
// Sage output formulas are wrapped in "<html>" for some stupid reason, which
@@ -143,28 +125,6 @@ export default function HTML({
143125
</AnchorTagComponent>
144126
);
145127
}
146-
if (name == "iframe") {
147-
// We sandbox and minimize what we allow. Don't
148-
// use {...attribs} due to srcDoc vs srcdoc.
149-
// We don't allow setting the style, since that leads
150-
// to a lot of attacks (i.e., making the iframe move in a
151-
// sneaky way). We have to allow-same-origin or scripts
152-
// won't work at all, which is one of the main uses for
153-
// iframes. A good test is 3d graphics in Sage kernel
154-
// Jupyter notebooks.
155-
// TODO: Except this is a security issue, since
156-
// combining allow-scripts & allow-same-origin makes it
157-
// possible to remove a lot of sandboxing.
158-
return (
159-
<iframe
160-
src={attribs.src}
161-
srcDoc={attribs.srcdoc}
162-
width={attribs.width}
163-
height={attribs.height}
164-
sandbox="allow-forms allow-scripts allow-same-origin"
165-
/>
166-
);
167-
}
168128

169129
if (noSanitize && urlTransform != null && attribs != null) {
170130
// since we did not sanitize the HTML (which also does urlTransform),
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
export function parseYouTubeEmbedId(src: string): string | null {
2+
try {
3+
const u = new URL(src, "https://dummy.example"); // handle relative
4+
if (u.protocol !== "https:") return null;
5+
6+
const host = u.hostname.toLowerCase();
7+
const allowedHosts = new Set([
8+
"www.youtube.com",
9+
"youtube.com",
10+
"www.youtube-nocookie.com",
11+
"youtube-nocookie.com",
12+
]);
13+
14+
if (!allowedHosts.has(host)) return null;
15+
16+
// Only allow /embed/<VIDEO_ID> (11-char ID)
17+
const m = u.pathname.match(/^\/embed\/([a-zA-Z0-9_-]{11})$/);
18+
if (!m) return null;
19+
return m[1]; // the 11-char video id
20+
} catch {
21+
return null;
22+
}
23+
}
24+
25+
export function buildSafeYouTubeIframeHTML(
26+
videoId: string,
27+
opts?: { width?: string; height?: string; title?: string },
28+
): string {
29+
const width =
30+
opts?.width && /^\d{1,4}$/.test(opts.width) ? opts.width : "560";
31+
const height =
32+
opts?.height && /^\d{1,4}$/.test(opts.height) ? opts.height : "315";
33+
const title = (opts?.title ?? "YouTube video player").replace(/"/g, "&quot;");
34+
35+
// Force privacy-enhanced domain and a tight, fixed attribute set
36+
const src = `https://www.youtube-nocookie.com/embed/${videoId}`;
37+
38+
return `<iframe width="${width}" height="${height}" src="${src}" title="${title}" frameborder="0" loading="lazy" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>`;
39+
}

src/packages/frontend/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
"react-redux": "^9.2.0",
136136
"react-timeago": "^8.2.0",
137137
"react-virtuoso": "^4.9.0",
138+
"sanitize-html": "^2.17.0",
138139
"slate": "^0.103.0",
139140
"superb": "^3.0.0",
140141
"three-ancient": "npm:three@=0.78.0",
@@ -145,7 +146,6 @@
145146
"use-resize-observer": "^9.1.0",
146147
"utility-types": "^3.10.0",
147148
"video-extensions": "^1.2.0",
148-
"xss": "^1.0.11",
149149
"zlibjs": "^0.3.1"
150150
},
151151
"devDependencies": {

0 commit comments

Comments
 (0)