@@ -20,100 +20,115 @@ module.exports = defineConfig({
2020 watchForFileChanges : true ,
2121 video : true ,
2222 setupNodeEvents ( on , config ) {
23+ //Task: checks if the PDF contains embedded images and verifies logo existence and text content
2324 on ( "task" , {
25+ // --- Check if PDF contains any image ---
2426 async verifyPdf ( { filePath, options = { } } ) {
25- if ( typeof global . DOMMatrix === "undefined" ) {
26- global . DOMMatrix = class DOMMatrix { } ;
27- }
28-
29- const data = new Uint8Array ( fs . readFileSync ( filePath ) ) ;
30- const pdfDoc = await pdfjsLib . getDocument ( { data } ) . promise ;
27+ // options: { referenceLogoPath: string, checkText: true/false }
28+ try {
29+ if ( typeof global . DOMMatrix === "undefined" ) {
30+ global . DOMMatrix = class DOMMatrix { } ;
31+ }
3132
32- let hasImage = false ;
33- let logoFound = false ;
34- let extractedText = "" ;
33+ // PDF loading
34+ const data = new Uint8Array ( fs . readFileSync ( filePath ) ) ;
35+ const pdfDoc = await pdfjsLib . getDocument ( { data } ) . promise ;
3536
36- // Text extraction Node-only
37- if ( options . checkText ) {
38- const dataBuffer = fs . readFileSync ( filePath ) ;
39- const pdfData = await pdfParseFn ( dataBuffer ) ;
40- extractedText = pdfData . text ;
41- }
42-
43- // Pixelmatch import
44- let pixelmatch ;
45- if ( options . referenceLogoPath ) {
46- const pm = await import ( "pixelmatch" ) ;
47- pixelmatch = pm . default ;
48- }
37+ // Dynamic import pixelmatch if logo check is needed
38+ let pixelmatch ;
39+ const doLogoCheck = ! ! options . referenceLogoPath ;
40+ if ( doLogoCheck ) {
41+ const pm = await import ( "pixelmatch" ) ;
42+ pixelmatch = pm . default ;
43+ }
4944
50- for ( let p = 1 ; p <= pdfDoc . numPages ; p ++ ) {
51- const page = await pdfDoc . getPage ( p ) ;
52- const ops = await page . getOperatorList ( ) ;
53-
54- for ( let i = 0 ; i < ops . fnArray . length ; i ++ ) {
55- const fn = ops . fnArray [ i ] ;
56- const args = ops . argsArray [ i ] ;
57-
58- if (
59- fn === pdfjsLib . OPS . paintImageXObject ||
60- fn === pdfjsLib . OPS . paintJpegXObject ||
61- fn === pdfjsLib . OPS . paintInlineImageXObject
62- ) {
63- hasImage = true ;
64-
65- if ( options . referenceLogoPath && args [ 0 ] ) {
66- const objName = args [ 0 ] ;
67- const imgData = await page . objs . get ( objName ) ;
68- if ( ! imgData ) {
69- continue ;
70- }
45+ let hasImage = false ;
46+ let logoFound = false ;
47+ let extractedText = "" ;
48+
49+ for ( let p = 1 ; p <= pdfDoc . numPages ; p ++ ) {
50+ const page = await pdfDoc . getPage ( p ) ;
51+
52+ // --- Text extraction ---
53+ if ( options . checkText ) {
54+ extractedText += await new Promise ( ( resolve , reject ) => {
55+ extract ( filePath , ( err , pages ) => {
56+ if ( err ) return reject ( err ) ;
57+ resolve ( pages . join ( "\n" ) ) ;
58+ } ) ;
59+ } ) ;
60+ }
7161
72- const pdfImg = new PNG ( { width : imgData . width , height : imgData . height } ) ;
73- pdfImg . data = imgData . data ;
74- const pdfBuffer = PNG . sync . write ( pdfImg ) ;
75-
76- const refLogo = PNG . sync . read ( fs . readFileSync ( options . referenceLogoPath ) ) ;
77- const resizedPdfBuffer = await sharp ( pdfBuffer )
78- . resize ( refLogo . width , refLogo . height )
79- . png ( )
80- . toBuffer ( ) ;
81-
82- const resizedPdfImg = PNG . sync . read ( resizedPdfBuffer ) ;
83- const diff = new PNG ( { width : refLogo . width , height : refLogo . height } ) ;
84- const mismatched = pixelmatch (
85- refLogo . data ,
86- resizedPdfImg . data ,
87- diff . data ,
88- refLogo . width ,
89- refLogo . height ,
90- { threshold : 0.1 }
91- ) ;
92-
93- if ( mismatched === 0 ) {
94- logoFound = true ;
95- break ;
62+ const ops = await page . getOperatorList ( ) ;
63+
64+ for ( let i = 0 ; i < ops . fnArray . length ; i ++ ) {
65+ const fn = ops . fnArray [ i ] ;
66+ const args = ops . argsArray [ i ] ;
67+
68+ // --- Image check ---
69+ if (
70+ fn === pdfjsLib . OPS . paintImageXObject ||
71+ fn === pdfjsLib . OPS . paintJpegXObject ||
72+ fn === pdfjsLib . OPS . paintInlineImageXObject
73+ ) {
74+ hasImage = true ;
75+
76+ if ( doLogoCheck && args [ 0 ] ) {
77+ const objName = args [ 0 ] ;
78+ const imgData = await page . objs . get ( objName ) ;
79+ if ( ! imgData ) continue ;
80+
81+ const pdfImg = new PNG ( { width : imgData . width , height : imgData . height } ) ;
82+ pdfImg . data = imgData . data ;
83+
84+ // resize PDF image to reference logo size
85+ const pdfBuffer = PNG . sync . write ( pdfImg ) ;
86+ const refLogo = PNG . sync . read ( fs . readFileSync ( options . referenceLogoPath ) ) ;
87+ const resizedPdfBuffer = await sharp ( pdfBuffer )
88+ . resize ( refLogo . width , refLogo . height )
89+ . png ( )
90+ . toBuffer ( ) ;
91+
92+ const resizedPdfImg = PNG . sync . read ( resizedPdfBuffer ) ;
93+
94+ // pixelmatch
95+ const diff = new PNG ( { width : refLogo . width , height : refLogo . height } ) ;
96+ const mismatched = pixelmatch (
97+ refLogo . data ,
98+ resizedPdfImg . data ,
99+ diff . data ,
100+ refLogo . width ,
101+ refLogo . height ,
102+ { threshold : 0.1 }
103+ ) ;
104+
105+ if ( mismatched === 0 ) {
106+ logoFound = true ;
107+ break ;
108+ }
96109 }
97110 }
98111 }
112+
113+ if ( ( doLogoCheck && logoFound ) || ( ! doLogoCheck && hasImage ) ) {
114+ break ;
115+ }
99116 }
100117
101- if ( ( options . referenceLogoPath && logoFound ) || ( ! options . referenceLogoPath && hasImage ) ) {
102- break ;
118+ if ( doLogoCheck && ! logoFound ) {
119+ throw new Error ( "Logo in PDF does not match reference image" ) ;
103120 }
104- }
105121
106- if ( options . referenceLogoPath && ! logoFound ) {
107- throw new Error ( "Logo in PDF does not match reference image" ) ;
122+ return {
123+ hasImage,
124+ logoFound,
125+ text : extractedText ,
126+ numPages : pdfDoc . numPages
127+ } ;
128+ } catch ( err ) {
129+ throw err ;
108130 }
109-
110- return {
111- hasImage,
112- logoFound,
113- text : extractedText ,
114- numPages : pdfDoc . numPages ,
115- } ;
116- } ,
131+ }
117132 } ) ;
118133
119134 on ( "after:spec" , ( spec , results ) => {
0 commit comments