1+ /**
2+ * PDF processing utilities using PDF.js
3+ * Handles PDF text extraction and image conversion in the browser
4+ */
5+
16import { browser } from '$app/environment' ;
27
38// Types for PDF.js (imported conditionally)
@@ -12,7 +17,10 @@ type TextItem = {
1217// PDF.js instance (loaded dynamically)
1318let pdfjs : any = null ;
1419
15- // Initialize PDF.js only on the client side
20+ /**
21+ * Initialize PDF.js only on the client side
22+ * Sets up the PDF.js library and worker for processing
23+ */
1624async function initializePdfJs ( ) {
1725 if ( ! browser || pdfjs ) return ;
1826
@@ -31,6 +39,11 @@ async function initializePdfJs() {
3139 }
3240}
3341
42+ /**
43+ * Convert a File object to ArrayBuffer for PDF.js processing
44+ * @param file - The PDF file to convert
45+ * @returns Promise resolving to the file's ArrayBuffer
46+ */
3447async function getFileAsBuffer ( file : File ) : Promise < ArrayBuffer > {
3548 return new Promise ( ( resolve , reject ) => {
3649 const reader = new FileReader ( ) ;
@@ -49,6 +62,11 @@ async function getFileAsBuffer(file: File): Promise<ArrayBuffer> {
4962}
5063
5164
65+ /**
66+ * Extract text content from a PDF file
67+ * @param file - The PDF file to process
68+ * @returns Promise resolving to the extracted text content
69+ */
5270export async function convertPDFToText ( file : File ) : Promise < string > {
5371 if ( ! browser ) {
5472 throw new Error ( 'PDF processing is only available in the browser' ) ;
@@ -80,6 +98,12 @@ export async function convertPDFToText(file: File): Promise<string> {
8098 }
8199}
82100
101+ /**
102+ * Convert PDF pages to PNG images as data URLs
103+ * @param file - The PDF file to convert
104+ * @param scale - Rendering scale factor (default: 1.5)
105+ * @returns Promise resolving to array of PNG data URLs
106+ */
83107export async function convertPDFToImage ( file : File , scale : number = 1.5 ) : Promise < string [ ] > {
84108 if ( ! browser ) {
85109 throw new Error ( 'PDF processing is only available in the browser' ) ;
@@ -124,10 +148,20 @@ export async function convertPDFToImage(file: File, scale: number = 1.5): Promis
124148 }
125149}
126150
151+ /**
152+ * Check if a file is a PDF based on its MIME type
153+ * @param file - The file to check
154+ * @returns True if the file is a PDF
155+ */
127156export function isPdfFile ( file : File ) : boolean {
128157 return file . type === 'application/pdf' ;
129158}
130159
160+ /**
161+ * Check if a MIME type represents a PDF
162+ * @param mimeType - The MIME type to check
163+ * @returns True if the MIME type is application/pdf
164+ */
131165export function isPdfMimeType ( mimeType : string ) : boolean {
132166 return mimeType === 'application/pdf' ;
133167}
0 commit comments