Skip to content

Commit 372d8ea

Browse files
committed
Improved HTML exports
1 parent 3e670eb commit 372d8ea

File tree

1 file changed

+26
-13
lines changed

1 file changed

+26
-13
lines changed

js/export/writeHtml.js

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { assignParagraphs } from '../utils/reflowPars.js';
55
import { pageMetricsArr } from '../containers/dataContainer.js';
66
import ocr from '../objects/ocrObjects.js';
77

8+
const formatNum = (num) => (num.toFixed(5).replace(/\.?0+$/, ''));
9+
810
/**
911
* Calculate the font metrics for a given font and font size.
1012
* This is used to get metrics that match `ctx.measureText`, but without requiring a canvas.
@@ -89,8 +91,9 @@ export function writeHtml({
8991
if (activeLine.bodyWordsStr !== '') {
9092
const topHTML = Math.round((activeLine.y1 - activeLine.maxFontBoundingBoxAscentLine) * 1000) / 1000;
9193
bodyStr += ` <div class="scribe-line" style="left:${activeLine.left}px;top:${topHTML}px;">\n`;
94+
bodyStr += ' ';
9295
bodyStr += activeLine.bodyWordsStr;
93-
bodyStr += ' <br>\n';
96+
bodyStr += '<br>\n';
9497
bodyStr += ' </div>\n';
9598
}
9699
activeLine.bodyWordsStr = '';
@@ -101,8 +104,6 @@ export function writeHtml({
101104

102105
let top = 0;
103106

104-
let firstPage = true;
105-
106107
for (let g = minpage; g <= maxpage; g++) {
107108
// TODO: change this when an image is included.
108109
if (!ocrPages[g] || ocrPages[g].lines.length === 0) continue;
@@ -123,8 +124,6 @@ export function writeHtml({
123124
}
124125
}
125126

126-
if (!firstPage) bodyStr += '\n</div>\n';
127-
firstPage = false;
128127
bodyStr += ` <div class="scribe-page" id="page${g}" style="position:absolute;top:${top}px;">\n`;
129128

130129
const imageObj = images ? images[g] : null;
@@ -145,6 +144,7 @@ export function writeHtml({
145144

146145
let parCurrent = pageObj.lines[0].par;
147146
let wordObjPrev = /** @type {?OcrWord} */ (null);
147+
let advanceDiffPrev = 0;
148148
let rightSideBearingPrev = 0;
149149
let charSpacingHTMLPrev = 0;
150150

@@ -181,8 +181,6 @@ export function writeHtml({
181181

182182
activeLine.left = wordObj.bbox.left - minLeft;
183183
if (wordObj.visualCoords) activeLine.left -= leftSideBearing * scale;
184-
} else if (h > 0 || g > 0 || i > 0) {
185-
bodyStr += ' ';
186184
}
187185

188186
newLine = false;
@@ -253,15 +251,19 @@ export function writeHtml({
253251

254252
let leftPad = 0;
255253
if (wordObjPrev) {
256-
let bearingAdj = 0;
254+
let spaceAdj = 0;
257255
if (wordObj.visualCoords) {
258-
bearingAdj = leftSideBearing + rightSideBearingPrev;
256+
spaceAdj = leftSideBearing + rightSideBearingPrev;
257+
} else {
258+
// This is usually 0, however can be non-zero when the PDF glyph advances
259+
// are different from the HTML glyph advances.
260+
spaceAdj = advanceDiffPrev;
259261
}
260262

261-
leftPad = (wordObj.bbox.left - wordObjPrev.bbox.right - bearingAdj - charSpacingHTMLPrev) / Math.cos(angle);
263+
leftPad = (wordObj.bbox.left - wordObjPrev.bbox.right - spaceAdj - charSpacingHTMLPrev) / Math.cos(angle);
262264
}
263265

264-
styleStr += `letter-spacing:${charSpacingHTML}px;`;
266+
styleStr += `letter-spacing:${formatNum(charSpacingHTML)}px;`;
265267

266268
styleStr += `font-weight:${fontI.fontFaceWeight};`;
267269
styleStr += `font-style:${fontI.fontFaceStyle};`;
@@ -294,10 +296,12 @@ export function writeHtml({
294296
} else {
295297
styleStrSpace += `font-size:${fontSizeHTML}px;`;
296298
const leftPadFinal = leftPad - spaceAdvancePx * fontSizeHTML;
297-
styleStrSpace += `word-spacing:${leftPadFinal}px;`;
299+
styleStrSpace += `word-spacing:${formatNum(leftPadFinal)}px;`;
298300
}
299301

300302
if (underlinePrev) {
303+
styleStrSpace += `color:${fill};`;
304+
styleStrSpace += `opacity:${opacity};`;
301305
styleStrSpace += 'text-decoration:underline;';
302306
styleStrSpace += `text-decoration-color:${fill};`;
303307
styleStrSpace += `text-decoration-thickness:${Math.ceil(fontSizeHTML / 12)}px;`;
@@ -311,14 +315,19 @@ export function writeHtml({
311315

312316
underlinePrev = wordObj.style.underline;
313317

318+
const advanceTotalHTML = advanceArr.reduce((a, b) => a + b, 0)
319+
+ kerningArr.reduce((a, b) => a + b, 0)
320+
+ charSpacingHTML * (charArr.length - 1);
321+
advanceDiffPrev = advanceTotalHTML - (wordObj.bbox.right - wordObj.bbox.left);
322+
314323
wordObjPrev = wordObj;
315324
rightSideBearingPrev = rightSideBearing;
316325
charSpacingHTMLPrev = charSpacingHTML;
317326
}
318327
}
319328

320329
addLine();
321-
bodyStr += '\n </div>\n';
330+
bodyStr += ' </div>\n';
322331

323332
opt.progressHandler({ n: g, type: 'export', info: { } });
324333
}
@@ -340,6 +349,10 @@ export function writeHtml({
340349
styleStr += ' white-space:nowrap;\n';
341350
styleStr += ' }\n';
342351

352+
styleStr += ' .scribe-page {\n';
353+
styleStr += ' text-decoration-skip-ink:none;\n';
354+
styleStr += ' }\n';
355+
343356
styleStr += ' .scribe-image {\n';
344357
styleStr += ' position:absolute;\n';
345358
styleStr += ' user-select:none;\n';

0 commit comments

Comments
 (0)