@@ -5,6 +5,8 @@ import { assignParagraphs } from '../utils/reflowPars.js';
5
5
import { pageMetricsArr } from '../containers/dataContainer.js' ;
6
6
import ocr from '../objects/ocrObjects.js' ;
7
7
8
+ const formatNum = ( num ) => ( num . toFixed ( 5 ) . replace ( / \. ? 0 + $ / , '' ) ) ;
9
+
8
10
/**
9
11
* Calculate the font metrics for a given font and font size.
10
12
* This is used to get metrics that match `ctx.measureText`, but without requiring a canvas.
@@ -89,8 +91,9 @@ export function writeHtml({
89
91
if ( activeLine . bodyWordsStr !== '' ) {
90
92
const topHTML = Math . round ( ( activeLine . y1 - activeLine . maxFontBoundingBoxAscentLine ) * 1000 ) / 1000 ;
91
93
bodyStr += ` <div class="scribe-line" style="left:${ activeLine . left } px;top:${ topHTML } px;">\n` ;
94
+ bodyStr += ' ' ;
92
95
bodyStr += activeLine . bodyWordsStr ;
93
- bodyStr += ' <br>\n' ;
96
+ bodyStr += '<br>\n' ;
94
97
bodyStr += ' </div>\n' ;
95
98
}
96
99
activeLine . bodyWordsStr = '' ;
@@ -101,8 +104,6 @@ export function writeHtml({
101
104
102
105
let top = 0 ;
103
106
104
- let firstPage = true ;
105
-
106
107
for ( let g = minpage ; g <= maxpage ; g ++ ) {
107
108
// TODO: change this when an image is included.
108
109
if ( ! ocrPages [ g ] || ocrPages [ g ] . lines . length === 0 ) continue ;
@@ -123,8 +124,6 @@ export function writeHtml({
123
124
}
124
125
}
125
126
126
- if ( ! firstPage ) bodyStr += '\n</div>\n' ;
127
- firstPage = false ;
128
127
bodyStr += ` <div class="scribe-page" id="page${ g } " style="position:absolute;top:${ top } px;">\n` ;
129
128
130
129
const imageObj = images ? images [ g ] : null ;
@@ -145,6 +144,7 @@ export function writeHtml({
145
144
146
145
let parCurrent = pageObj . lines [ 0 ] . par ;
147
146
let wordObjPrev = /** @type {?OcrWord } */ ( null ) ;
147
+ let advanceDiffPrev = 0 ;
148
148
let rightSideBearingPrev = 0 ;
149
149
let charSpacingHTMLPrev = 0 ;
150
150
@@ -181,8 +181,6 @@ export function writeHtml({
181
181
182
182
activeLine . left = wordObj . bbox . left - minLeft ;
183
183
if ( wordObj . visualCoords ) activeLine . left -= leftSideBearing * scale ;
184
- } else if ( h > 0 || g > 0 || i > 0 ) {
185
- bodyStr += ' ' ;
186
184
}
187
185
188
186
newLine = false ;
@@ -253,15 +251,19 @@ export function writeHtml({
253
251
254
252
let leftPad = 0 ;
255
253
if ( wordObjPrev ) {
256
- let bearingAdj = 0 ;
254
+ let spaceAdj = 0 ;
257
255
if ( wordObj . visualCoords ) {
258
- bearingAdj = leftSideBearing + rightSideBearingPrev ;
256
+ spaceAdj = leftSideBearing + rightSideBearingPrev ;
257
+ } else {
258
+ // This is usually 0, however can be non-zero when the PDF glyph advances
259
+ // are different from the HTML glyph advances.
260
+ spaceAdj = advanceDiffPrev ;
259
261
}
260
262
261
- leftPad = ( wordObj . bbox . left - wordObjPrev . bbox . right - bearingAdj - charSpacingHTMLPrev ) / Math . cos ( angle ) ;
263
+ leftPad = ( wordObj . bbox . left - wordObjPrev . bbox . right - spaceAdj - charSpacingHTMLPrev ) / Math . cos ( angle ) ;
262
264
}
263
265
264
- styleStr += `letter-spacing:${ charSpacingHTML } px;` ;
266
+ styleStr += `letter-spacing:${ formatNum ( charSpacingHTML ) } px;` ;
265
267
266
268
styleStr += `font-weight:${ fontI . fontFaceWeight } ;` ;
267
269
styleStr += `font-style:${ fontI . fontFaceStyle } ;` ;
@@ -294,10 +296,12 @@ export function writeHtml({
294
296
} else {
295
297
styleStrSpace += `font-size:${ fontSizeHTML } px;` ;
296
298
const leftPadFinal = leftPad - spaceAdvancePx * fontSizeHTML ;
297
- styleStrSpace += `word-spacing:${ leftPadFinal } px;` ;
299
+ styleStrSpace += `word-spacing:${ formatNum ( leftPadFinal ) } px;` ;
298
300
}
299
301
300
302
if ( underlinePrev ) {
303
+ styleStrSpace += `color:${ fill } ;` ;
304
+ styleStrSpace += `opacity:${ opacity } ;` ;
301
305
styleStrSpace += 'text-decoration:underline;' ;
302
306
styleStrSpace += `text-decoration-color:${ fill } ;` ;
303
307
styleStrSpace += `text-decoration-thickness:${ Math . ceil ( fontSizeHTML / 12 ) } px;` ;
@@ -311,14 +315,19 @@ export function writeHtml({
311
315
312
316
underlinePrev = wordObj . style . underline ;
313
317
318
+ const advanceTotalHTML = advanceArr . reduce ( ( a , b ) => a + b , 0 )
319
+ + kerningArr . reduce ( ( a , b ) => a + b , 0 )
320
+ + charSpacingHTML * ( charArr . length - 1 ) ;
321
+ advanceDiffPrev = advanceTotalHTML - ( wordObj . bbox . right - wordObj . bbox . left ) ;
322
+
314
323
wordObjPrev = wordObj ;
315
324
rightSideBearingPrev = rightSideBearing ;
316
325
charSpacingHTMLPrev = charSpacingHTML ;
317
326
}
318
327
}
319
328
320
329
addLine ( ) ;
321
- bodyStr += '\n </div>\n' ;
330
+ bodyStr += ' </div>\n' ;
322
331
323
332
opt . progressHandler ( { n : g , type : 'export' , info : { } } ) ;
324
333
}
@@ -340,6 +349,10 @@ export function writeHtml({
340
349
styleStr += ' white-space:nowrap;\n' ;
341
350
styleStr += ' }\n' ;
342
351
352
+ styleStr += ' .scribe-page {\n' ;
353
+ styleStr += ' text-decoration-skip-ink:none;\n' ;
354
+ styleStr += ' }\n' ;
355
+
343
356
styleStr += ' .scribe-image {\n' ;
344
357
styleStr += ' position:absolute;\n' ;
345
358
styleStr += ' user-select:none;\n' ;
0 commit comments