SylphxAI
diff --git a/‎CHANGELOG.md‎
Lines changed: 27 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎dist/handlers/readPdf.js‎
Lines changed: 63 additions & 54 deletions b/‎dist/handlers/readPdf.js‎
Lines changed: 63 additions & 54 deletions
diff --git a/‎dist/index.js‎
Lines changed: 1 addition & 1 deletion b/‎dist/index.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dist/pdf/extractor.js‎
Lines changed: 112 additions & 0 deletions b/‎dist/pdf/extractor.js‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
@@ -2,6 +2,33 @@
 
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
 
+## [1.2.0](https://github.com/sylphxltd/pdf-reader-mcp/compare/v1.1.0...v1.2.0) (2025-10-31)
+
+### Features
+
+* **Content Ordering**: Preserve exact text and image order based on Y-coordinates
+  - Content items within each page are now sorted by their vertical position
+  - Enables AI to see content in the same order as it appears in the PDF
+  - Text and images are interleaved based on document layout
+  - Example: page 1 [text, image, text, image, image, text]
+  - Uses PDF.js transform matrices to extract Y-coordinates
+  - Automatically groups text items on the same line
+  - Returns ordered content parts for optimal AI consumption
+
+### Internal Changes
+
+* New `extractPageContent()` function combines text and image extraction with positioning
+* New `PageContentItem` interface tracks content type, position, and data
+* Handler updated to generate content parts in document-reading order
+* Improved error handling to return descriptive error messages as text content
+
+### Code Quality
+
+* All tests passing (91 tests)
+* Coverage maintained at 97.76% statements, 90.95% branches
+* TypeScript strict mode compliance
+* Zero linting errors
+
 ## [1.1.0](https://github.com/sylphxltd/pdf-reader-mcp/compare/v1.0.0...v1.1.0) (2025-10-31)
 
 ### Features
 
@@ -1,7 +1,7 @@
 // PDF reading handler - orchestrates PDF processing workflow
 import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
 import { z } from 'zod';
-import { buildWarnings, extractImages, extractMetadataAndPageCount, extractPageTexts, } from '../pdf/extractor.js';
+import { buildWarnings, extractMetadataAndPageCount, extractPageContent, } from '../pdf/extractor.js';
 import { loadPdfDocument } from '../pdf/loader.js';
 import { determinePagesToProcess, getTargetPages } from '../pdf/parser.js';
 import { readPdfArgsSchema } from '../schemas/readPdf.js';
@@ -28,9 +28,23 @@ const processSingleSource = async (source, options) => {
         if (warnings.length > 0) {
             output.warnings = warnings;
         }
-        // Extract text if needed
+        // Extract content with ordering preserved
         if (pagesToProcess.length > 0) {
-            const extractedPageTexts = await extractPageTexts(pdfDocument, pagesToProcess, sourceDescription);
+            // Use new extractPageContent to preserve Y-coordinate ordering
+            const pageContents = await Promise.all(pagesToProcess.map((pageNum) => extractPageContent(pdfDocument, pageNum, options.includeImages, sourceDescription)));
+            // Store page contents for ordered retrieval
+            output.page_contents = pageContents.map((items, idx) => ({
+                page: pagesToProcess[idx],
+                items,
+            }));
+            // For backward compatibility, also provide text-only outputs
+            const extractedPageTexts = pageContents.map((items, idx) => ({
+                page: pagesToProcess[idx],
+                text: items
+                    .filter((item) => item.type === 'text')
+                    .map((item) => item.textContent)
+                    .join(''),
+            }));
             if (targetPages) {
                 // Specific pages requested
                 output.page_texts = extractedPageTexts;
@@ -39,12 +53,15 @@ const processSingleSource = async (source, options) => {
                 // Full text requested
                 output.full_text = extractedPageTexts.map((p) => p.text).join('\n\n');
             }
-        }
-        // Extract images if needed
-        if (options.includeImages && pagesToProcess.length > 0) {
-            const extractedImages = await extractImages(pdfDocument, pagesToProcess);
-            if (extractedImages.length > 0) {
-                output.images = extractedImages;
+            // Extract image metadata for JSON response
+            if (options.includeImages) {
+                const extractedImages = pageContents
+                    .flatMap((items) => items.filter((item) => item.type === 'image' && item.imageData))
+                    .map((item) => item.imageData)
+                    .filter((img) => img !== undefined);
+                if (extractedImages.length > 0) {
+                    output.images = extractedImages;
+                }
             }
         }
         individualResult = { ...individualResult, data: output, success: true };
@@ -89,60 +106,52 @@ export const handleReadPdfFunc = async (args) => {
         includePageCount: include_page_count,
         includeImages: include_images,
     })));
-    // Build content parts preserving page order
+    // Build content parts - start with structured JSON for backward compatibility
     const content = [];
-    // Add metadata/summary as first text part
-    const summaryData = results.map((result) => ({
-        source: result.source,
-        success: result.success,
-        num_pages: result.data?.num_pages,
-        info: result.data?.info,
-        metadata: result.data?.metadata,
-        warnings: result.data?.warnings,
-        error: result.error,
-    }));
+    // Strip image data and page_contents from JSON to keep it manageable
+    const resultsForJson = results.map((result) => {
+        if (result.data) {
+            const { images, page_contents, ...dataWithoutBinaryContent } = result.data;
+            // Include image count and metadata in JSON, but not the base64 data
+            if (images) {
+                const imageInfo = images.map((img) => ({
+                    page: img.page,
+                    index: img.index,
+                    width: img.width,
+                    height: img.height,
+                    format: img.format,
+                }));
+                return { ...result, data: { ...dataWithoutBinaryContent, image_info: imageInfo } };
+            }
+            return { ...result, data: dataWithoutBinaryContent };
+        }
+        return result;
+    });
+    // First content part: Structured JSON results
     content.push({
         type: 'text',
-        text: JSON.stringify({ summary: summaryData }, null, 2),
+        text: JSON.stringify({ results: resultsForJson }, null, 2),
     });
-    // Add page content in order: text then images for each page
+    // Add page content in exact Y-coordinate order
     for (const result of results) {
-        if (!result.success || !result.data)
+        if (!result.success || !result.data?.page_contents)
             continue;
-        // Handle page_texts (specific pages requested)
-        if (result.data.page_texts) {
-            for (const pageText of result.data.page_texts) {
-                // Add text for this page
-                content.push({
-                    type: 'text',
-                    text: `[Page ${pageText.page} from ${result.source}]\n${pageText.text}`,
-                });
-                // Add images for this page (if any)
-                if (result.data.images) {
-                    const pageImages = result.data.images.filter((img) => img.page === pageText.page);
-                    for (const image of pageImages) {
-                        content.push({
-                            type: 'image',
-                            data: image.data,
-                            mimeType: image.format === 'rgba' ? 'image/png' : 'image/jpeg',
-                        });
-                    }
+        // Process each page's content items in order
+        for (const pageContent of result.data.page_contents) {
+            for (const item of pageContent.items) {
+                if (item.type === 'text' && item.textContent) {
+                    // Add text content part
+                    content.push({
+                        type: 'text',
+                        text: item.textContent,
+                    });
                 }
-            }
-        }
-        // Handle full_text (all pages)
-        if (result.data.full_text) {
-            content.push({
-                type: 'text',
-                text: `[Full text from ${result.source}]\n${result.data.full_text}`,
-            });
-            // Add all images at the end for full text mode
-            if (result.data.images) {
-                for (const image of result.data.images) {
+                else if (item.type === 'image' && item.imageData) {
+                    // Add image content part
                     content.push({
                         type: 'image',
-                        data: image.data,
-                        mimeType: image.format === 'rgba' ? 'image/png' : 'image/jpeg',
+                        data: item.imageData.data,
+                        mimeType: item.imageData.format === 'rgba' ? 'image/png' : 'image/jpeg',
                     });
                 }
             }
 
@@ -11,7 +11,7 @@ import { allToolDefinitions } from './handlers/index.js';
 // --- Server Setup ---
 const server = new Server({
     name: 'pdf-reader-mcp',
-    version: '1.1.0',
+    version: '1.2.0',
     description: 'MCP Server for reading PDF files and extracting text, metadata, images, and page information.',
 }, {
     capabilities: { tools: {} },
 
@@ -151,3 +151,115 @@ export const buildWarnings = (invalidPages, totalPages) => {
         `Requested page numbers ${invalidPages.join(', ')} exceed total pages (${String(totalPages)}).`,
     ];
 };
+/**
+ * Extract all content (text and images) from a single page with Y-coordinate ordering
+ */
+export const extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescription) => {
+    const contentItems = [];
+    try {
+        const page = await pdfDocument.getPage(pageNum);
+        // Extract text content with Y-coordinates
+        const textContent = await page.getTextContent();
+        // Group text items by Y-coordinate (items on same line have similar Y values)
+        const textByY = new Map();
+        for (const item of textContent.items) {
+            const textItem = item;
+            // transform[5] is the Y coordinate
+            const yCoord = textItem.transform[5];
+            if (yCoord === undefined)
+                continue;
+            const y = Math.round(yCoord);
+            if (!textByY.has(y)) {
+                textByY.set(y, []);
+            }
+            textByY.get(y)?.push(textItem.str);
+        }
+        // Convert grouped text to content items
+        for (const [y, textParts] of textByY.entries()) {
+            const textContent = textParts.join('');
+            if (textContent.trim()) {
+                contentItems.push({
+                    type: 'text',
+                    yPosition: y,
+                    textContent,
+                });
+            }
+        }
+        // Extract images with Y-coordinates if requested
+        if (includeImages) {
+            const operatorList = await page.getOperatorList();
+            // Find all image painting operations
+            const imageIndices = [];
+            for (let i = 0; i < operatorList.fnArray.length; i++) {
+                const op = operatorList.fnArray[i];
+                if (op === OPS.paintImageXObject || op === OPS.paintXObject) {
+                    imageIndices.push(i);
+                }
+            }
+            // Extract each image with its Y-coordinate
+            const imagePromises = imageIndices.map((imgIndex, arrayIndex) => new Promise((resolve) => {
+                const argsArray = operatorList.argsArray[imgIndex];
+                if (!argsArray || argsArray.length === 0) {
+                    resolve(null);
+                    return;
+                }
+                const imageName = argsArray[0];
+                // Get transform matrix from the args (if available)
+                // The transform is typically in argsArray[1] for some ops
+                let yPosition = 0;
+                if (argsArray.length > 1 && Array.isArray(argsArray[1])) {
+                    const transform = argsArray[1];
+                    // transform[5] is the Y coordinate
+                    const yCoord = transform[5];
+                    if (yCoord !== undefined) {
+                        yPosition = Math.round(yCoord);
+                    }
+                }
+                // Use callback-based get() as images may not be resolved yet
+                page.objs.get(imageName, (imageData) => {
+                    if (!imageData || typeof imageData !== 'object') {
+                        resolve(null);
+                        return;
+                    }
+                    const img = imageData;
+                    if (!img.data || !img.width || !img.height) {
+                        resolve(null);
+                        return;
+                    }
+                    // Determine image format based on kind
+                    const format = img.kind === 1 ? 'grayscale' : img.kind === 3 ? 'rgba' : 'rgb';
+                    // Convert Uint8Array to base64
+                    const base64 = Buffer.from(img.data).toString('base64');
+                    resolve({
+                        type: 'image',
+                        yPosition,
+                        imageData: {
+                            page: pageNum,
+                            index: arrayIndex,
+                            width: img.width,
+                            height: img.height,
+                            format,
+                            data: base64,
+                        },
+                    });
+                });
+            }));
+            const resolvedImages = await Promise.all(imagePromises);
+            contentItems.push(...resolvedImages.filter((item) => item !== null));
+        }
+    }
+    catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        console.warn(`[PDF Reader MCP] Error extracting page content for page ${String(pageNum)} in ${sourceDescription}: ${message}`);
+        // Return error message as text content
+        return [
+            {
+                type: 'text',
+                yPosition: 0,
+                textContent: `Error processing page: ${message}`,
+            },
+        ];
+    }
+    // Sort by Y-position (descending = top to bottom in PDF coordinates)
+    return contentItems.sort((a, b) => b.yPosition - a.yPosition);
+};
@@ -1,6 +1,6 @@
 {
   "name": "@sylphx/pdf-reader-mcp",
-  "version": "1.1.0",
+  "version": "1.2.0",
   "description": "An MCP server providing tools to read PDF files.",
   "type": "module",
   "bin": {
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@sylphx/pdf-reader-mcp",`
`3`		`- "version": "1.1.0",`
	`3`	`+ "version": "1.2.0",`
`4`	`4`	`"description": "An MCP server providing tools to read PDF files.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"bin": {`