Skip to content

Commit fa7ea76

Browse files
committed
fix issue with url transforms
- the core problem was there was a check 'domNode instanceof Element', and now it clearly fails on things that are Element. My guess is that some library upgrade (maybe react 19) caused this to not work. - I removed the check, which may make it less efficient (?), but I put in a try catch just in case - which results in doing what it did before. - This specifically happened with Scott's fluid mechanics book.
1 parent 5007001 commit fa7ea76

File tree

2 files changed

+68
-65
lines changed

2 files changed

+68
-65
lines changed

src/packages/frontend/components/html-ssr.tsx

Lines changed: 66 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import DefaultMath from "@cocalc/frontend/components/math/ssr";
2626
import { MathJaxConfig } from "@cocalc/util/mathjax-config";
2727
import { decodeHTML } from "entities";
2828

29-
const URL_TAGS = ["src", "href", "data"];
29+
const URL_ATTRIBS = ["src", "href", "data"];
3030

3131
const MATH_SKIP_TAGS = new Set<string>(MathJaxConfig.tex2jax.skipTags);
3232

@@ -60,7 +60,7 @@ function getXSSOptions(urlTransform): IFilterXSSOptions | undefined {
6060
// // important not to mangle this or it won't work.
6161
// return value;
6262
// }
63-
if (urlTransform && URL_TAGS.includes(name)) {
63+
if (urlTransform && URL_ATTRIBS.includes(name)) {
6464
// use the url transform
6565
return urlTransform(value, tag, name) ?? value;
6666
}
@@ -95,7 +95,6 @@ export default function HTML({
9595
}
9696
let options: any = {};
9797
options.replace = (domNode) => {
98-
// console.log("domNode = ", domNode);
9998
if (!/^[a-zA-Z]+[0-9]?$/.test(domNode.name)) {
10099
// Without this, if user gives html input that is a malformed tag then all of React
101100
// completely crashes, which is not desirable for us. On the other hand, I prefer not
@@ -116,78 +115,81 @@ export default function HTML({
116115
return <DefaultMath data={decodeHTML(data)} />;
117116
}
118117

119-
if (!(domNode instanceof Element)) return;
118+
try {
119+
const { name, children, attribs } = domNode;
120120

121-
const { name, children, attribs } = domNode;
122-
123-
if (name == "script") {
124-
const type = domNode.attribs?.type?.toLowerCase();
125-
if (type?.startsWith("math/tex")) {
126-
const child = domNode.children?.[0];
127-
if (child instanceof Text && child.data) {
128-
let data = "$" + decodeHTML(child.data) + "$";
129-
if (type.includes("display")) {
130-
data = "$" + data + "$";
131-
}
132-
if (MathComponent != null) {
133-
return <MathComponent data={data} />;
121+
if (name == "script") {
122+
const type = domNode.attribs?.type?.toLowerCase();
123+
if (type?.startsWith("math/tex")) {
124+
const child = domNode.children?.[0];
125+
if (child instanceof Text && child.data) {
126+
let data = "$" + decodeHTML(child.data) + "$";
127+
if (type.includes("display")) {
128+
data = "$" + data + "$";
129+
}
130+
if (MathComponent != null) {
131+
return <MathComponent data={data} />;
132+
}
133+
return <DefaultMath data={data} />;
134134
}
135-
return <DefaultMath data={data} />;
136135
}
137136
}
138-
}
139137

140-
if (AnchorTagComponent != null && name == "a") {
141-
return (
142-
<AnchorTagComponent {...attribs}>
143-
{domToReact(children as any, options)}
144-
</AnchorTagComponent>
145-
);
146-
}
147-
if (name == "iframe") {
148-
// We sandbox and minimize what we allow. Don't
149-
// use {...attribs} due to srcDoc vs srcdoc.
150-
// We don't allow setting the style, since that leads
151-
// to a lot of attacks (i.e., making the iframe move in a
152-
// sneaky way). We have to allow-same-origin or scripts
153-
// won't work at all, which is one of the main uses for
154-
// iframes. A good test is 3d graphics in Sage kernel
155-
// Jupyter notebooks.
156-
// TODO: Except this is a security issue, since
157-
// combining allow-scripts & allow-same-origin makes it
158-
// possible to remove a lot of sandboxing.
159-
return (
160-
<iframe
161-
src={attribs.src}
162-
srcDoc={attribs.srcdoc}
163-
width={attribs.width}
164-
height={attribs.height}
165-
sandbox="allow-forms allow-scripts allow-same-origin"
166-
/>
167-
);
168-
}
138+
if (AnchorTagComponent != null && name == "a") {
139+
return (
140+
<AnchorTagComponent {...attribs}>
141+
{domToReact(children as any, options)}
142+
</AnchorTagComponent>
143+
);
144+
}
145+
if (name == "iframe") {
146+
// We sandbox and minimize what we allow. Don't
147+
// use {...attribs} due to srcDoc vs srcdoc.
148+
// We don't allow setting the style, since that leads
149+
// to a lot of attacks (i.e., making the iframe move in a
150+
// sneaky way). We have to allow-same-origin or scripts
151+
// won't work at all, which is one of the main uses for
152+
// iframes. A good test is 3d graphics in Sage kernel
153+
// Jupyter notebooks.
154+
// TODO: Except this is a security issue, since
155+
// combining allow-scripts & allow-same-origin makes it
156+
// possible to remove a lot of sandboxing.
157+
return (
158+
<iframe
159+
src={attribs.src}
160+
srcDoc={attribs.srcdoc}
161+
width={attribs.width}
162+
height={attribs.height}
163+
sandbox="allow-forms allow-scripts allow-same-origin"
164+
/>
165+
);
166+
}
169167

170-
if (noSanitize && urlTransform != null && attribs != null) {
171-
// since we did not sanitize the HTML (which also does urlTransform),
172-
// we have to do the urlTransform here instead.
173-
for (const tag of URL_TAGS) {
174-
if (attribs[tag] != null) {
175-
const x = urlTransform(attribs[tag]);
176-
if (x != null) {
177-
const props = attributesToProps(attribs);
178-
props[tag] = x;
179-
return React.createElement(
180-
name,
181-
props,
182-
children && children?.length > 0
183-
? domToReact(children as any, options)
184-
: undefined,
185-
);
168+
if (noSanitize && urlTransform != null && attribs != null) {
169+
// since we did not sanitize the HTML (which also does urlTransform),
170+
// we have to do the urlTransform here instead.
171+
for (const attrib of URL_ATTRIBS) {
172+
if (attribs[attrib] != null) {
173+
const x = urlTransform(attribs[attrib]);
174+
if (x != null) {
175+
const props = attributesToProps(attribs);
176+
props[attrib] = x;
177+
return React.createElement(
178+
name,
179+
props,
180+
children && children?.length > 0
181+
? domToReact(children as any, options)
182+
: undefined,
183+
);
184+
}
186185
}
187186
}
188187
}
188+
} catch (err) {
189+
console.log("WARNING -- issue parsing HTML", err);
189190
}
190191
};
192+
191193
if (inline) {
192194
return <span style={style}>{htmlReactParser(value, options)}</span>;
193195
} else {

src/packages/util/latex-envs.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export default function latexEnvs(value: string): string {
99
}
1010

1111
/*
12-
transformFigures -- dumb parser to turn this:
12+
transformFigures -- simple parser to turn this:
1313
1414
---
1515
@@ -87,6 +87,7 @@ function transformFigures(content: string): string {
8787
}
8888

8989
const md = `\n\n<div style="text-align:center;margin:20px auto;max-width:750px"><img src="${url}" style="${style}"/><br/><br/><b>Figure${figlabel}:</b> ${caption}</div>\n\n`;
90+
//const md = `\n\n<img src="${url}" style="${style}"/>\n\n**Figure${figlabel}:** ${caption}\n\n`;
9091
content =
9192
content.slice(0, i) + md + content.slice(j + "\\end{figure}".length);
9293
}

0 commit comments

Comments
 (0)