Skip to content

Commit 6e7a6eb

Browse files
Merge pull request #20408 from calixteman/fix_mml_encoding
Don't set the MathML namespace for attributes in MathML tags (bug 1997343)
2 parents c696648 + 6db2313 commit 6e7a6eb

File tree

5 files changed

+50
-6
lines changed

5 files changed

+50
-6
lines changed

src/core/struct_tree.js

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
* limitations under the License.
1414
*/
1515

16-
import { AnnotationPrefix, stringToPDFString, warn } from "../shared/util.js";
16+
import {
17+
AnnotationPrefix,
18+
stringToPDFString,
19+
stringToUTF8String,
20+
warn,
21+
} from "../shared/util.js";
1722
import { Dict, isName, Name, Ref, RefSetCache } from "./primitives.js";
1823
import { lookupNormalRect, stringToAsciiOrUTF16BE } from "./core_utils.js";
1924
import { BaseStream } from "./base_stream.js";
@@ -610,7 +615,8 @@ class StructElementNode {
610615
if (!isName(fileStream.dict.get("Subtype"), "application/mathml+xml")) {
611616
continue;
612617
}
613-
return fileStream.getString();
618+
// The default encoding for xml files is UTF-8.
619+
return stringToUTF8String(fileStream.getString());
614620
}
615621
const A = this.dict.get("A");
616622
if (A instanceof Dict) {

test/integration/accessibility_spec.mjs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,46 @@ describe("accessibility", () => {
346346
});
347347
});
348348

349+
describe("MathML with some attributes in AF entry from LaTeX", () => {
350+
let pages;
351+
352+
beforeEach(async () => {
353+
pages = await loadAndWait("bug1997343.pdf", ".textLayer");
354+
});
355+
356+
afterEach(async () => {
357+
await closePages(pages);
358+
});
359+
360+
it("must check that the MathML is correctly inserted", async () => {
361+
await Promise.all(
362+
pages.map(async ([browserName, page]) => {
363+
const isSanitizerSupported = await page.evaluate(() => {
364+
try {
365+
// eslint-disable-next-line no-undef
366+
return typeof Sanitizer !== "undefined";
367+
} catch {
368+
return false;
369+
}
370+
});
371+
if (isSanitizerSupported) {
372+
const mathML = await page.$eval(
373+
"span.structTree span[aria-owns='p21R_mc64']",
374+
el => el?.innerHTML ?? ""
375+
);
376+
expect(mathML)
377+
.withContext(`In ${browserName}`)
378+
.toEqual(
379+
'<math display="block"> <msup> <mi>𝑛</mi> <mi>𝑝</mi> </msup> <mo lspace="0.278em" rspace="0.278em">=</mo> <mi>𝑛</mi> <mspace width="1.000em"></mspace> <mi> mod </mi> <mspace width="0.167em"></mspace> <mspace width="0.167em"></mspace> <mi>𝑝</mi> </math>'
380+
);
381+
} else {
382+
pending(`Sanitizer API (in ${browserName}) is not supported`);
383+
}
384+
})
385+
);
386+
});
387+
});
388+
349389
describe("MathML tags in the struct tree", () => {
350390
let pages;
351391

test/pdfs/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,3 +753,4 @@
753753
!bug1937438_af_from_latex.pdf
754754
!bug1937438_from_word.pdf
755755
!bug1937438_mml_from_latex.pdf
756+
!bug1997343.pdf

test/pdfs/bug1997343.pdf

185 KB
Binary file not shown.

web/struct_tree_layer_builder.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,7 @@ class MathMLSanitizer {
155155
"accentunder",
156156
"columnspan",
157157
"rowspan",
158-
].map(name => ({
159-
name,
160-
namespace: MathMLNamespace,
161-
})),
158+
],
162159
comments: false,
163160
})
164161
: null

0 commit comments

Comments
 (0)