@@ -163,6 +163,46 @@ function checkResult_pageContent(Pages, fileName) {
163163 } ) ;
164164}
165165
166+ function checkResult_textCoordinates ( Pages , fileName ) {
167+ // Verify text block coordinates are unique (issue #408 regression test)
168+ Pages . forEach ( ( page , pageIndex ) => {
169+ const texts = page . Texts || [ ] ;
170+ if ( texts . length === 0 ) return ; // Skip pages with no text
171+
172+ // Collect all coordinates
173+ const coords = texts . map ( t => ( { x : t . x , y : t . y } ) ) ;
174+
175+ // Create unique coordinate strings
176+ const uniqueCoords = new Set ( coords . map ( c => `${ c . x } ,${ c . y } ` ) ) ;
177+
178+ // Check that we have more than one unique coordinate if we have multiple text elements
179+ // This prevents the regression where all text elements had identical coordinates (-0.25, 48.75)
180+ if ( texts . length > 5 ) {
181+ assert (
182+ uniqueCoords . size > 1 ,
183+ fileName + " page " + pageIndex +
184+ " : all " + texts . length + " text elements have identical coordinates. " +
185+ "This is a regression of issue #408. Found only " + uniqueCoords . size +
186+ " unique coordinate(s): " + Array . from ( uniqueCoords ) . slice ( 0 , 3 ) . join ( ", " )
187+ ) ;
188+ }
189+
190+ // Verify coordinates are reasonable (not all the same broken value)
191+ texts . forEach ( ( text , textIndex ) => {
192+ assert (
193+ typeof text . x === 'number' && ! isNaN ( text . x ) ,
194+ fileName + " page " + pageIndex + " text " + textIndex +
195+ " : has invalid x coordinate: " + text . x
196+ ) ;
197+ assert (
198+ typeof text . y === 'number' && ! isNaN ( text . y ) ,
199+ fileName + " page " + pageIndex + " text " + textIndex +
200+ " : has invalid y coordinate: " + text . y
201+ ) ;
202+ } ) ;
203+ } ) ;
204+ }
205+
166206async function parseAndVerifyOnePDF ( fileName , fromBuffer , pageCount ) {
167207 let timeoutId ;
168208 let pdfParser = null ;
@@ -203,12 +243,13 @@ async function parseAndVerifyOnePDF(fileName, fromBuffer, pageCount) {
203243 } ) ;
204244
205245 const evtData = await pdfParserDataReady ;
206-
246+
207247 expect ( evtData ) . toBeDefined ( ) ;
208248 checkResult_parseStatus ( null , evtData , fileName ) ;
209249 checkResult_mainFields ( evtData , fileName ) ;
210250 checkResult_pageCount ( evtData . Pages , pageCount , fileName ) ;
211251 checkResult_pageContent ( evtData . Pages , fileName ) ;
252+ checkResult_textCoordinates ( evtData . Pages , fileName ) ;
212253 } catch ( error ) {
213254 console . error ( `Error parsing PDF ${ fileName } : ` , error ) ;
214255 throw error ; // Re-throw to ensure Jest knows the test failed
0 commit comments