Skip to content

Commit 54e6600

Browse files
Fix precision rounding issues in LineWrapper (#1595)
* Fix further LineWrapper precision issues * add test of bounded text precision issue * add rowSpanning table example * add failure threshold * implement toContainText jest matcher * create a unit test for bounded text precision * remove round up rounding code path --------- Co-authored-by: Luiz Américo Pereira Câmara <[email protected]>
1 parent f3c1776 commit 54e6600

File tree

13 files changed

+306
-34
lines changed

13 files changed

+306
-34
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### Unreleased
44

55
- Fix null values in table cells rendering as `[object Object]`
6+
- Fix further LineWrapper precision issues
67

78
### [v0.17.0] - 2025-04-12
89

lib/line_wrapper.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ class LineWrapper extends EventEmitter {
8585
}
8686

8787
wordWidth(word) {
88-
return (
88+
return PDFNumber(
8989
this.document.widthOfString(word, this) +
90-
this.characterSpacing +
91-
this.wordSpacing
90+
this.characterSpacing +
91+
this.wordSpacing,
9292
);
9393
}
9494

lib/utils.js

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,25 @@
1+
const fArray = new Float32Array(1);
2+
const uArray = new Uint32Array(fArray.buffer);
3+
14
export function PDFNumber(n) {
25
// PDF numbers are strictly 32bit
3-
// so convert this number to the nearest 32bit number
6+
// so convert this number to a 32bit number
47
// @see ISO 32000-1 Annex C.2 (real numbers)
5-
return Math.fround(n);
8+
const rounded = Math.fround(n);
9+
if (rounded <= n) return rounded;
10+
11+
// Will have to perform 32bit float truncation
12+
fArray[0] = n;
13+
14+
// Get the 32-bit representation as integer and shift bits
15+
if (n <= 0) {
16+
uArray[0] += 1;
17+
} else {
18+
uArray[0] -= 1;
19+
}
20+
21+
// Return the float value
22+
return fArray[0];
623
}
724

825
/**

tests/unit/helpers.js

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
/**
2+
* @import PDFDocument from '../../lib/document';
3+
*/
4+
5+
/**
6+
* @typedef {object} TextStream
7+
* @property {string} text
8+
* @property {string} font
9+
* @property {number} fontSize
10+
*
11+
* @typedef {string | Buffer} PDFDataItem
12+
* @typedef {Array<PDFDataItem>} PDFData
13+
*
14+
* @typedef {object} PDFDataObject
15+
* @property {PDFDataItem[]} items
16+
*/
17+
18+
/**
19+
* @param {PDFDocument} doc
20+
* @return {PDFData}
21+
*/
122
function logData(doc) {
223
const loggedData = [];
324
const originalMethod = doc._write;
@@ -18,4 +39,83 @@ function joinTokens(...args) {
1839
return r;
1940
}
2041

21-
export { logData, joinTokens };
42+
/**
43+
* @description
44+
* Returns an array of objects from the PDF data. Object items are surrounded by /\d 0 obj/ and 'endobj'.
45+
* @param {PDFData} data
46+
* @return {Array<PDFDataObject>}
47+
*/
48+
function getObjects(data) {
49+
const objects = [];
50+
let currentObject = null;
51+
for (const item of data) {
52+
if (item instanceof Buffer) {
53+
if (currentObject) {
54+
currentObject.items.push(item);
55+
}
56+
} else if (typeof item === 'string') {
57+
if (/^\d+\s0\sobj/.test(item)) {
58+
currentObject = { items: [] };
59+
objects.push(currentObject);
60+
} else if (item === 'endobj') {
61+
currentObject = null;
62+
} else if (currentObject) {
63+
currentObject.items.push(item);
64+
}
65+
}
66+
}
67+
return objects;
68+
}
69+
70+
/**
71+
* @param {Buffer} textStream
72+
* @return {TextStream | undefined}
73+
*/
74+
function parseTextStream(textStream) {
75+
const decodedStream = textStream.toString('utf8');
76+
77+
// Extract font and font size
78+
const fontMatch = decodedStream.match(/\/([A-Za-z0-9]+)\s+(\d+)\s+Tf/);
79+
80+
if (!fontMatch) {
81+
return undefined;
82+
}
83+
84+
const font = fontMatch[1];
85+
const fontSize = parseInt(fontMatch[2], 10);
86+
87+
// Extract hex strings inside TJ array
88+
const tjMatch = decodedStream.match(/\[([^\]]+)\]\s+TJ/);
89+
if (!tjMatch) {
90+
return undefined;
91+
}
92+
let text = '';
93+
94+
// this is a simplified version
95+
// the correct way is to retrieve the encoding from /Resources /Font dictionary and decode using it
96+
// https://stackoverflow.com/a/29468049/5724645
97+
98+
// Match all hex strings like <...>
99+
const hexMatches = [...tjMatch[1].matchAll(/<([0-9a-fA-F]+)>/g)];
100+
for (const m of hexMatches) {
101+
// Convert hex to string
102+
const hex = m[1];
103+
for (let i = 0; i < hex.length; i += 2) {
104+
const code = parseInt(hex.substr(i, 2), 16);
105+
let char = String.fromCharCode(code);
106+
// Handle special cases
107+
if (code === 0x0a) {
108+
char = '\n'; // Newline
109+
} else if (code === 0x0d) {
110+
char = '\r'; // Carriage return
111+
} else if (code === 0x85) {
112+
char = '...';
113+
}
114+
text += char;
115+
}
116+
}
117+
118+
return { text, font, fontSize };
119+
}
120+
121+
export { logData, joinTokens, parseTextStream, getObjects };

tests/unit/setupTests.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
import matcher from './toContainChunk';
1+
import toContainChunk from './toContainChunk';
2+
import toContainText from './toContainText';
23
import { toMatchImageSnapshot } from 'jest-image-snapshot';
34

4-
expect.extend(matcher);
5+
expect.extend(toContainChunk);
6+
expect.extend(toContainText);
57
expect.extend({ toMatchImageSnapshot });

tests/unit/text.spec.js

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,15 @@ describe('Text', () => {
1515
test('with simple content', () => {
1616
const docData = logData(document);
1717

18-
const textStream = Buffer.from(
19-
`1 0 0 -1 0 792 cm
20-
q
21-
1 0 0 -1 0 792 cm
22-
BT
23-
1 0 0 1 72 711.384 Tm
24-
/F1 12 Tf
25-
[<73696d706c65207465> 30 <7874> 0] TJ
26-
ET
27-
Q
28-
`,
29-
'binary',
30-
);
31-
3218
document.text('simple text');
3319
document.end();
3420

35-
expect(docData).toContainChunk([
36-
`5 0 obj`,
37-
`<<
38-
/Length 116
39-
>>`,
40-
`stream`,
41-
textStream,
42-
`\nendstream`,
43-
`endobj`,
44-
]);
21+
expect(docData).toContainText({ text: 'simple text' });
22+
});
23+
24+
test('with destination', () => {
25+
// just check that there is no exception
26+
document.text('simple text', { destination: 'anchor' });
4527
});
4628

4729
test('with content ending after page right margin', () => {
@@ -194,5 +176,21 @@ Q
194176
`endobj`,
195177
]);
196178
});
179+
180+
test('bounded text precision - issue #1611', () => {
181+
const docData = logData(document);
182+
const text = 'New york';
183+
const bounds = document.boundsOfString(text);
184+
// Draw text which is constrained to the bounds
185+
document.text(text, {
186+
ellipsis: true,
187+
width: bounds.width,
188+
height: bounds.height,
189+
});
190+
191+
document.end();
192+
193+
expect(docData).toContainText({ text });
194+
});
197195
});
198196
});

tests/unit/toContainText/index.js

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import { getObjects, parseTextStream } from '../helpers.js';
2+
3+
/**
4+
* @import { TextStream, PDFDataObject } from '../helpers.js';
5+
* @import JestMatchedUtils from 'jest-matcher-utils';
6+
*/
7+
8+
/**
9+
* @param {JestMatchedUtils} utils
10+
* @param {TextStream} argument
11+
* @return {string}
12+
*/
13+
const passMessage = (utils, argument) => () => {
14+
return (
15+
utils.matcherHint('.not.toContainText', 'data', 'textStream') +
16+
'\n\n' +
17+
`Expected data not to contain text:\n\n${utils.printExpected(argument)}`
18+
);
19+
};
20+
21+
/**
22+
* @param {JestMatchedUtils} utils
23+
* @param {TextStream[]} received
24+
* @param {TextStream} argument
25+
* @return {string}
26+
*/
27+
const failMessage = (utils, received, argument) => () => {
28+
return (
29+
utils.matcherHint('.toContainText', 'data', 'textStream') +
30+
'\n\n' +
31+
`Expected data to contain text:\n\n${utils.printExpected(argument)}\n\nFound:\n\n${utils.printReceived(received)}`
32+
);
33+
};
34+
35+
function textStreamMatches(expected, actual) {
36+
if (expected.text !== actual.text) {
37+
return false;
38+
}
39+
40+
if (expected.font && expected.font !== actual.font) {
41+
return false;
42+
}
43+
44+
if (expected.fontSize && expected.fontSize !== actual.fontSize) {
45+
return false;
46+
}
47+
48+
return true;
49+
}
50+
51+
/**
52+
* @param {PDFDataObject} object
53+
* @return {TextStream | undefined}
54+
*/
55+
function getTextStream(object) {
56+
// text stream objects have 4 items
57+
// first item is a string containing the Length of the stream
58+
// second item 'stream'
59+
// third item is the stream content Buffer
60+
// fourth item is 'endstream'
61+
62+
if (object.items.length !== 4) {
63+
return;
64+
}
65+
if (typeof object.items[0] !== 'string') {
66+
return;
67+
}
68+
if (object.items[1] !== 'stream') {
69+
return;
70+
}
71+
if (!(object.items[2] instanceof Buffer)) {
72+
return;
73+
}
74+
if (!/endstream/.test(object.items[3])) {
75+
return;
76+
}
77+
78+
return parseTextStream(object.items[2]);
79+
}
80+
81+
export default {
82+
/**
83+
*
84+
* @param {(string | Buffer)[]} data
85+
* @param {Partial<TextStream>} textStream
86+
* @returns
87+
*/
88+
toContainText(data, textStream) {
89+
const objects = getObjects(data);
90+
const foundTextStreams = [];
91+
let pass = false;
92+
93+
for (const object of objects) {
94+
const objectTextStream = getTextStream(object, textStream);
95+
if (!objectTextStream) {
96+
continue;
97+
}
98+
foundTextStreams.push(objectTextStream);
99+
if (textStreamMatches(textStream, objectTextStream)) {
100+
pass = true;
101+
break;
102+
}
103+
}
104+
105+
if (pass) {
106+
return {
107+
pass: true,
108+
message: passMessage(this.utils, textStream),
109+
};
110+
}
111+
112+
return {
113+
pass: false,
114+
message: failMessage(this.utils, foundTextStreams, textStream),
115+
};
116+
},
117+
};

tests/unit/utils.spec.js

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { normalizeSides } from '../../lib/utils';
1+
import { normalizeSides, PDFNumber } from '../../lib/utils';
22

33
describe('normalizeSides', () => {
44
test.each([
@@ -54,3 +54,19 @@ describe('normalizeSides', () => {
5454
});
5555
});
5656
});
57+
58+
describe('PDFNumber', () => {
59+
test.each([
60+
[0, 0],
61+
[0.04999999701976776], //float32 rounded down
62+
[0.05],
63+
[0.05000000074505806], //float32 rounded up
64+
[1],
65+
[-1],
66+
[-5.05],
67+
[5.05],
68+
])('PDFNumber(%f) -> %f', (n) => {
69+
expect(PDFNumber(n)).toBeLessThanOrEqual(n);
70+
expect(PDFNumber(n, false)).toBeLessThanOrEqual(n);
71+
});
72+
});
67.1 KB
Loading
16.8 KB
Loading

0 commit comments

Comments
 (0)