Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions base/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,38 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
bidiText.x += renderParams.vScale / 2;
bidiText.y -= renderParams.vScale;
}

// MQZ: Add font metrics for accurate spacing calculation
bidiText.fontName = font.loadedName || font.name;
bidiText.fontSize = textState.fontSize;

// Get fontMatrix once (used for both spaceWidth and textWidth calculations)
var fontMatrix = font.fontMatrix || FONT_IDENTITY_MATRIX;
var fontDirection = textState.fontDirection || 1;

// Scale spaceWidth to PDF coordinates using fontMatrix (NO textHScale)
// Must match canvas.js canvasWidth calculation (line 1258 - no textHScale)
bidiText.spaceWidth = font.spaceWidth * textState.fontSize * fontMatrix[0];
bidiText.charSpace = charSpace;
bidiText.wordSpace = wordSpace;
bidiText.textHScale = textState.textHScale;

// Calculate actual text width using font glyph widths
// Match canvas.js calculation exactly (lines 1210-1211, 1258, canvasWidth does NOT include textHScale)
var textWidth = 0;
var glyphs = font.charsToGlyphs(chunk);
for (var i = 0, ii = glyphs.length; i < ii; i++) {
var glyph = glyphs[i];
// Use glyph.width if available, otherwise font.defaultWidth (like canvas.js does)
var glyphWidth = (glyph && glyph.width) || font.defaultWidth || 0;
// Match canvas.js line 1210-1211: width * fontSize * fontMatrix[0] + charSpacing * fontDirection
var charWidth = glyphWidth * textState.fontSize * fontMatrix[0] + charSpace * fontDirection;
textWidth += charWidth;
}
// DO NOT apply textHScale - canvasWidth is in unscaled coordinates
// (bidiText.x is scaled, but bidiText.width matches JSON w property which is unscaled)
bidiText.width = textWidth;

bidiTexts.push(bidiText);

chunk = '';
Expand Down
60 changes: 53 additions & 7 deletions lib/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -327,18 +327,64 @@ export default class PDFJSClass extends EventEmitter {

this.rawTextContents.forEach((textContent, index) => {
let prevText = null;

textContent.bidiTexts.forEach((textObj, idx) => {
if (prevText) {
if (Math.abs(textObj.y - prevText.y) <= 9) {
prevText.str += textObj.str;
} else {
retVal += `${prevText.str}\r\n`;
prevText = textObj;
// Check if on same line
// Use a tolerance relative to font size for better accuracy
// Typical line spacing is 120% of font size, so 10-15% tolerance is reasonable
const tolerance = prevText ? (prevText.fontSize || 12) * 0.15 : 2;
const sameLine = prevText && Math.abs(textObj.y - prevText.y) <= tolerance;

if (sameLine) {
// spaceWidth is in unscaled coordinates (no textHScale, matching JSON w property)
const { spaceWidth, startX, width, textHScale } = prevText;

// Use actual calculated text width (from glyph widths)
// width is in unscaled coordinates, but startX is in scaled coordinates
// So we must apply textHScale to width before adding to startX
// This matches canvas.js: current.x += x * textHScale (line 1267)
const prevTextEndX = startX + (width * textHScale);

// Calculate gap between end of previous text and start of current text
// gap is in SCALED coordinates (both textObj.x and prevTextEndX are scaled)
const gap = textObj.x - prevTextEndX;

// Scale spaceWidth to match gap's coordinate system
const scaledSpaceWidth = spaceWidth * textHScale;

// Add spaces if gap is positive and significant (> 30% of scaled space width)
if (gap > scaledSpaceWidth * 0.3) {
const numSpaces = Math.round(gap / scaledSpaceWidth);
prevText.str += ' '.repeat(Math.max(1, numSpaces));
}

// Append current text
prevText.str += textObj.str;

// Update prevText to track current text for next iteration
prevText.startX = textObj.x;
prevText.width = textObj.width;
prevText.spaceWidth = textObj.spaceWidth;
prevText.textHScale = textObj.textHScale;
} else {
prevText = textObj;
// Different line or first text
if (prevText) {
retVal += `${prevText.str}\r\n`;
}

// Initialize new text object with font metrics
prevText = {
str: textObj.str,
y: textObj.y,
startX: textObj.x,
width: textObj.width,
spaceWidth: textObj.spaceWidth,
textHScale: textObj.textHScale,
fontSize: textObj.fontSize // Keep for tolerance calculation
};
}
});

if (prevText) {
retVal += prevText.str;
}
Expand Down
Loading