@@ -327,18 +327,64 @@ export default class PDFJSClass extends EventEmitter {
327327
328328 this . rawTextContents . forEach ( ( textContent , index ) => {
329329 let prevText = null ;
330+
330331 textContent . bidiTexts . forEach ( ( textObj , idx ) => {
331- if ( prevText ) {
332- if ( Math . abs ( textObj . y - prevText . y ) <= 9 ) {
333- prevText . str += textObj . str ;
334- } else {
335- retVal += `${ prevText . str } \r\n` ;
336- prevText = textObj ;
332+ // Check if on same line
333+ // Use a tolerance relative to font size for better accuracy
334+ // Typical line spacing is 120% of font size, so 10-15% tolerance is reasonable
335+ const tolerance = prevText ? ( prevText . fontSize || 12 ) * 0.15 : 2 ;
336+ const sameLine = prevText && Math . abs ( textObj . y - prevText . y ) <= tolerance ;
337+
338+ if ( sameLine ) {
339+ // spaceWidth is in unscaled coordinates (no textHScale, matching JSON w property)
340+ const { spaceWidth, startX, width, textHScale } = prevText ;
341+
342+ // Use actual calculated text width (from glyph widths)
343+ // width is in unscaled coordinates, but startX is in scaled coordinates
344+ // So we must apply textHScale to width before adding to startX
345+ // This matches canvas.js: current.x += x * textHScale (line 1267)
346+ const prevTextEndX = startX + ( width * textHScale ) ;
347+
348+ // Calculate gap between end of previous text and start of current text
349+ // gap is in SCALED coordinates (both textObj.x and prevTextEndX are scaled)
350+ const gap = textObj . x - prevTextEndX ;
351+
352+ // Scale spaceWidth to match gap's coordinate system
353+ const scaledSpaceWidth = spaceWidth * textHScale ;
354+
355+ // Add spaces if gap is positive and significant (> 30% of scaled space width)
356+ if ( gap > scaledSpaceWidth * 0.3 ) {
357+ const numSpaces = Math . round ( gap / scaledSpaceWidth ) ;
358+ prevText . str += ' ' . repeat ( Math . max ( 1 , numSpaces ) ) ;
337359 }
360+
361+ // Append current text
362+ prevText . str += textObj . str ;
363+
364+ // Update prevText to track current text for next iteration
365+ prevText . startX = textObj . x ;
366+ prevText . width = textObj . width ;
367+ prevText . spaceWidth = textObj . spaceWidth ;
368+ prevText . textHScale = textObj . textHScale ;
338369 } else {
339- prevText = textObj ;
370+ // Different line or first text
371+ if ( prevText ) {
372+ retVal += `${ prevText . str } \r\n` ;
373+ }
374+
375+ // Initialize new text object with font metrics
376+ prevText = {
377+ str : textObj . str ,
378+ y : textObj . y ,
379+ startX : textObj . x ,
380+ width : textObj . width ,
381+ spaceWidth : textObj . spaceWidth ,
382+ textHScale : textObj . textHScale ,
383+ fontSize : textObj . fontSize // Keep for tolerance calculation
384+ } ;
340385 }
341386 } ) ;
387+
342388 if ( prevText ) {
343389 retVal += prevText . str ;
344390 }
0 commit comments