Skip to content

Commit 8ab70cf

Browse files
author
Shebin
committed
added feature to extract all images from the pdf #44
1 parent 6668063 commit 8ab70cf

File tree

3 files changed

+173
-1
lines changed

3 files changed

+173
-1
lines changed

.github/workflows/node.js.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,4 @@ jobs:
9797
9898
- name: Enforce coverage thresholds
9999
run: |
100-
npx nyc check-coverage --lines 75 --functions 75 --branches 75 --statements 75
100+
npx nyc check-coverage --lines 80 --functions 80 --branches 80 --statements 80

test/image_extraction.test.js

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,39 @@ describe('PDF to Images with images', () => {
4040
expect(extractedImagePaths).to.have.lengthOf(3);
4141
});
4242
});
43+
44+
describe('Default options', () => {
45+
it('should extract images with default options when options not provided', async () => {
46+
const extractedImagePaths = await pdf2html.extractImages(pdfImageFilepath);
47+
should.exist(extractedImagePaths);
48+
expect(extractedImagePaths).to.be.an('array');
49+
expect(extractedImagePaths).to.have.lengthOf(3);
50+
// Check that images are saved to default directory
51+
extractedImagePaths.forEach(imagePath => {
52+
expect(imagePath).to.include('/files/image/');
53+
});
54+
});
55+
});
56+
57+
describe('Error handling', () => {
58+
it('should handle non-existent PDF file', async () => {
59+
try {
60+
await pdf2html.extractImages('/path/to/non-existent.pdf');
61+
expect.fail('Should have thrown an error');
62+
} catch (error) {
63+
should.exist(error);
64+
expect(error.message).to.include('not found');
65+
}
66+
});
67+
68+
it('should handle invalid PDF buffer', async () => {
69+
const invalidBuffer = Buffer.from('This is not a PDF');
70+
try {
71+
await pdf2html.extractImages(invalidBuffer, { outputDirectory: outputDir });
72+
expect.fail('Should have thrown an error');
73+
} catch (error) {
74+
should.exist(error);
75+
}
76+
});
77+
});
4378
});

test/test.js

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,3 +348,140 @@ describe('Buffer vs File Path Consistency', () => {
348348
expect(metaFromFile).to.deep.equal(metaFromBuffer);
349349
});
350350
});
351+
352+
// Internal module tests
353+
const CommandExecutor = require('../lib/CommandExecutor');
354+
const { PDFProcessingError } = require('../lib/errors');
355+
356+
describe('CommandExecutor', () => {
357+
describe('execute', () => {
358+
it('should execute a simple command successfully', async () => {
359+
const result = await CommandExecutor.execute('echo', ['hello']);
360+
expect(result.trim()).to.equal('hello');
361+
});
362+
363+
it('should handle commands with multiple arguments', async () => {
364+
const result = await CommandExecutor.execute('echo', ['hello', 'world']);
365+
expect(result.trim()).to.equal('hello world');
366+
});
367+
368+
it('should reject when command exits with non-zero code', async () => {
369+
try {
370+
await CommandExecutor.execute('sh', ['-c', 'exit 1']);
371+
expect.fail('Should have thrown an error');
372+
} catch (error) {
373+
expect(error).to.be.instanceOf(PDFProcessingError);
374+
expect(error.command).to.include('sh -c exit 1');
375+
expect(error.exitCode).to.equal(1);
376+
}
377+
});
378+
379+
it('should include stderr in error message when command fails', async () => {
380+
try {
381+
await CommandExecutor.execute('sh', ['-c', 'echo "error message" >&2; exit 1']);
382+
expect.fail('Should have thrown an error');
383+
} catch (error) {
384+
expect(error).to.be.instanceOf(PDFProcessingError);
385+
expect(error.message).to.include('error message');
386+
}
387+
});
388+
389+
it('should handle spawn errors for non-existent commands', async () => {
390+
try {
391+
await CommandExecutor.execute('nonexistentcommand123456', ['arg']);
392+
expect.fail('Should have thrown an error');
393+
} catch (error) {
394+
expect(error).to.be.instanceOf(PDFProcessingError);
395+
expect(error.message).to.include('Failed to spawn process');
396+
expect(error.command).to.include('nonexistentcommand123456');
397+
}
398+
});
399+
400+
it('should handle process termination by signal', async () => {
401+
// This test simulates a process being killed by a signal
402+
try {
403+
// Start a long-running process and kill it
404+
const promise = CommandExecutor.execute('sh', ['-c', 'sleep 10']);
405+
406+
// Give it a moment to start
407+
await new Promise(resolve => setTimeout(resolve, 100));
408+
409+
// Kill all sleep processes (this is a bit hacky but works for testing)
410+
await CommandExecutor.execute('sh', ['-c', 'pkill -9 sleep || true']);
411+
412+
await promise;
413+
expect.fail('Should have thrown an error');
414+
} catch (error) {
415+
expect(error).to.be.instanceOf(PDFProcessingError);
416+
// The error should mention the signal or non-zero exit
417+
expect(error.message).to.match(/Process exited with code|signal/);
418+
}
419+
});
420+
421+
it('should capture stdout correctly', async () => {
422+
const result = await CommandExecutor.execute('sh', ['-c', 'echo "line1"; echo "line2"']);
423+
expect(result).to.include('line1');
424+
expect(result).to.include('line2');
425+
});
426+
427+
it('should pass options to spawn', async () => {
428+
const result = await CommandExecutor.execute('pwd', [], { cwd: '/tmp' });
429+
// On macOS, /tmp is a symlink to /private/tmp
430+
expect(result.trim()).to.match(/\/tmp$|\/private\/tmp$/);
431+
});
432+
});
433+
});
434+
435+
// Additional internal module tests for coverage
436+
const FileManager = require('../lib/FileManager');
437+
const HTMLParser = require('../lib/HTMLParser');
438+
const ImageProcessor = require('../lib/ImageProcessor');
439+
440+
describe('Internal Modules - Coverage Tests', () => {
441+
describe('FileManager', () => {
442+
it('should use default .pdf extension when not provided', async () => {
443+
const buffer = Buffer.from('test content');
444+
const filePath = await FileManager.createTempFileFromBuffer(buffer);
445+
446+
expect(filePath).to.include('.pdf');
447+
448+
// Clean up
449+
if (fs.existsSync(filePath)) {
450+
fs.unlinkSync(filePath);
451+
}
452+
});
453+
});
454+
455+
describe('HTMLParser', () => {
456+
it('should use default empty options when not provided', () => {
457+
const html = '<html><body><div class="page">Page 1</div><div class="page">Page 2</div></body></html>';
458+
const pages = HTMLParser.extractPages(html);
459+
460+
expect(pages).to.be.an('array');
461+
expect(pages).to.have.length(2);
462+
expect(pages[0]).to.include('Page 1');
463+
expect(pages[1]).to.include('Page 2');
464+
});
465+
});
466+
467+
describe('ImageProcessor', () => {
468+
it('should throw error when sharp fails to process image', async () => {
469+
// Create a file that's not a valid image
470+
const invalidImagePath = path.join(__dirname, 'test-invalid.txt');
471+
fs.writeFileSync(invalidImagePath, 'This is not an image');
472+
473+
try {
474+
await ImageProcessor.resize(invalidImagePath, 100, 100);
475+
expect.fail('Should have thrown an error');
476+
} catch (error) {
477+
expect(error).to.exist;
478+
expect(error.message).to.include('Missing output file');
479+
} finally {
480+
// Clean up
481+
if (fs.existsSync(invalidImagePath)) {
482+
fs.unlinkSync(invalidImagePath);
483+
}
484+
}
485+
});
486+
});
487+
});

0 commit comments

Comments
 (0)