Skip to content

Commit cb8d8d7

Browse files
committed
Replace client-side libraries with pandoc for document format conversions
1 parent 3636637 commit cb8d8d7

File tree

3 files changed

+159
-148
lines changed

3 files changed

+159
-148
lines changed

src/services/excel-service.js

Lines changed: 50 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,62 @@
1-
import * as XLSX from 'xlsx';
2-
import { JSDOM } from 'jsdom';
1+
import { exec } from 'child_process';
2+
import fs from 'fs';
3+
import path from 'path';
4+
import { promisify } from 'util';
5+
import { v4 as uuidv4 } from 'uuid';
6+
import os from 'os';
7+
8+
const execPromise = promisify(exec);
39

410
/**
511
* Service for generating Excel spreadsheets from HTML content
612
*/
713
export const excelService = {
814
/**
9-
* Generate an Excel spreadsheet from HTML content containing tables
15+
* Generate an Excel spreadsheet from HTML content using pandoc
1016
* @param {string} html - The HTML content containing tables
1117
* @param {string} sheetName - The name for the worksheet (default: 'Sheet1')
12-
* @returns {Buffer} - A buffer containing the Excel file data
13-
* @throws {Error} - If no tables are found in the HTML content
18+
* @returns {Promise<Buffer>} - A buffer containing the Excel file data
19+
* @throws {Error} - If pandoc conversion fails
1420
*/
15-
generateExcel(html, sheetName = 'Sheet1') {
16-
// Create a DOM from the HTML
17-
const dom = new JSDOM(html);
18-
const document = dom.window.document;
19-
20-
// Find all tables in the HTML
21-
const tables = document.querySelectorAll('table');
22-
23-
if (tables.length === 0) {
24-
throw new Error('No tables found in the HTML content');
21+
async generateExcel(html, sheetName = 'Sheet1') {
22+
try {
23+
// Create temporary files for input and output
24+
const tempDir = os.tmpdir();
25+
const inputId = uuidv4();
26+
const outputId = uuidv4();
27+
const inputPath = path.join(tempDir, `${inputId}.html`);
28+
const outputPath = path.join(tempDir, `${outputId}.xlsx`);
29+
30+
// Write HTML to temporary file
31+
await fs.promises.writeFile(inputPath, html, 'utf8');
32+
33+
// Use pandoc to convert HTML to XLSX
34+
const command = `pandoc -f html -t xlsx "${inputPath}" -o "${outputPath}"`;
35+
console.log(`Executing pandoc command: ${command}`);
36+
37+
await execPromise(command);
38+
39+
// Read the generated XLSX file
40+
const excelBuffer = await fs.promises.readFile(outputPath);
41+
42+
// Clean up temporary files
43+
try {
44+
await fs.promises.unlink(inputPath);
45+
await fs.promises.unlink(outputPath);
46+
} catch (cleanupError) {
47+
console.warn('Error cleaning up temporary files:', cleanupError);
48+
}
49+
50+
return excelBuffer;
51+
} catch (error) {
52+
console.error('Error generating Excel document with pandoc:', error);
53+
54+
// If pandoc fails, provide a detailed error message
55+
if (error.stderr) {
56+
console.error('Pandoc error output:', error.stderr);
57+
}
58+
59+
throw new Error(`Failed to generate Excel document: ${error.message}`);
2560
}
26-
27-
// Create a new workbook
28-
const workbook = XLSX.utils.book_new();
29-
30-
// Process each table and add it as a sheet
31-
tables.forEach((table, index) => {
32-
// Convert table to worksheet
33-
const worksheet = XLSX.utils.table_to_sheet(table);
34-
35-
// Add the worksheet to the workbook
36-
const currentSheetName = tables.length === 1 ? sheetName : `${sheetName}${index + 1}`;
37-
XLSX.utils.book_append_sheet(workbook, worksheet, currentSheetName);
38-
});
39-
40-
// Write the workbook to a buffer
41-
const excelBuffer = XLSX.write(workbook, { type: 'buffer', bookType: 'xlsx' });
42-
43-
return excelBuffer;
4461
}
4562
};

src/services/markdown-service.js

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import { marked } from 'marked';
2-
import TurndownService from 'turndown';
2+
import { exec } from 'child_process';
3+
import fs from 'fs';
4+
import path from 'path';
5+
import { promisify } from 'util';
6+
import { v4 as uuidv4 } from 'uuid';
7+
import os from 'os';
8+
9+
const execPromise = promisify(exec);
310

411
/**
512
* Service for converting between Markdown and HTML
@@ -27,23 +34,50 @@ export const markdownService = {
2734
},
2835

2936
/**
30-
* Convert HTML to Markdown
37+
* Convert HTML to Markdown using pandoc
3138
* @param {string} html - The HTML content to convert
32-
* @param {Object} options - Options for the turndown library
33-
* @returns {string} - The Markdown content
39+
* @param {Object} options - Options for pandoc conversion
40+
* @returns {Promise<string>} - The Markdown content
3441
*/
35-
htmlToMarkdown(html, options = {}) {
36-
// Create a new TurndownService instance
37-
const turndownService = new TurndownService(options);
38-
39-
// Configure turndown options
40-
turndownService.use([
41-
// Add any plugins or rules here
42-
]);
43-
44-
// Convert HTML to Markdown
45-
const markdown = turndownService.turndown(html);
46-
47-
return markdown;
42+
async htmlToMarkdown(html, options = {}) {
43+
try {
44+
// Create temporary files for input and output
45+
const tempDir = os.tmpdir();
46+
const inputId = uuidv4();
47+
const outputId = uuidv4();
48+
const inputPath = path.join(tempDir, `${inputId}.html`);
49+
const outputPath = path.join(tempDir, `${outputId}.md`);
50+
51+
// Write HTML to temporary file
52+
await fs.promises.writeFile(inputPath, html, 'utf8');
53+
54+
// Use pandoc to convert HTML to Markdown
55+
const command = `pandoc -f html -t markdown_github "${inputPath}" -o "${outputPath}"`;
56+
console.log(`Executing pandoc command: ${command}`);
57+
58+
await execPromise(command);
59+
60+
// Read the generated Markdown file
61+
const markdown = await fs.promises.readFile(outputPath, 'utf8');
62+
63+
// Clean up temporary files
64+
try {
65+
await fs.promises.unlink(inputPath);
66+
await fs.promises.unlink(outputPath);
67+
} catch (cleanupError) {
68+
console.warn('Error cleaning up temporary files:', cleanupError);
69+
}
70+
71+
return markdown;
72+
} catch (error) {
73+
console.error('Error converting HTML to Markdown with pandoc:', error);
74+
75+
// If pandoc fails, provide a detailed error message
76+
if (error.stderr) {
77+
console.error('Pandoc error output:', error.stderr);
78+
}
79+
80+
throw new Error(`Failed to convert HTML to Markdown: ${error.message}`);
81+
}
4882
}
4983
};

src/services/ppt-service.js

Lines changed: 58 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,72 @@
1-
import PptxGenJS from 'pptxgenjs';
2-
import { JSDOM } from 'jsdom';
1+
import { exec } from 'child_process';
2+
import fs from 'fs';
3+
import path from 'path';
4+
import { promisify } from 'util';
5+
import { v4 as uuidv4 } from 'uuid';
6+
import os from 'os';
7+
8+
const execPromise = promisify(exec);
39

410
/**
511
* Service for generating PowerPoint presentations from HTML content
612
*/
713
export const pptService = {
814
/**
9-
* Generate a PowerPoint presentation from HTML content
15+
* Generate a PowerPoint presentation from HTML content using pandoc
1016
* @param {string} html - The HTML content to convert to PowerPoint
1117
* @param {string} title - The title for the presentation
1218
* @returns {Promise<Buffer>} - A buffer containing the PowerPoint file data
1319
*/
1420
async generatePpt(html, title = 'Presentation') {
15-
// Create a DOM from the HTML
16-
const dom = new JSDOM(html);
17-
const document = dom.window.document;
18-
19-
// Create a new PowerPoint presentation
20-
const pptx = new PptxGenJS();
21-
22-
// Set presentation properties
23-
pptx.layout = 'LAYOUT_16x9';
24-
pptx.author = 'Document Generation API';
25-
pptx.title = title;
26-
27-
// Extract headings and content from HTML
28-
const headings = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
29-
30-
if (headings.length === 0) {
31-
// If no headings found, create a single slide with the HTML content
32-
const slide = pptx.addSlide();
33-
slide.addText(document.body.textContent || 'Empty content', {
34-
x: 0.5,
35-
y: 0.5,
36-
w: '90%',
37-
h: 1,
38-
fontSize: 18,
39-
color: '363636'
40-
});
41-
} else {
42-
// Create slides based on headings
43-
headings.forEach((heading, index) => {
44-
const slide = pptx.addSlide();
45-
46-
// Add heading as title
47-
slide.addText(heading.textContent || `Slide ${index + 1}`, {
48-
x: 0.5,
49-
y: 0.5,
50-
w: '90%',
51-
fontSize: 24,
52-
bold: true,
53-
color: '0000FF'
54-
});
55-
56-
// Get content until next heading or end of document
57-
let content = '';
58-
let nextElement = heading.nextElementSibling;
59-
60-
while (nextElement && !['H1', 'H2', 'H3', 'H4', 'H5', 'H6'].includes(nextElement.tagName)) {
61-
if (nextElement.textContent) {
62-
content += nextElement.textContent + '\n';
63-
}
64-
nextElement = nextElement.nextElementSibling;
65-
}
66-
67-
// Add content if available
68-
if (content.trim()) {
69-
slide.addText(content, {
70-
x: 0.5,
71-
y: 1.5,
72-
w: '90%',
73-
fontSize: 14,
74-
color: '363636'
75-
});
76-
}
77-
78-
// Look for images in the section
79-
const sectionStart = heading;
80-
const sectionEnd = nextElement;
81-
let currentElement = sectionStart;
82-
83-
while (currentElement && currentElement !== sectionEnd) {
84-
if (currentElement.tagName === 'IMG' && currentElement.src) {
85-
try {
86-
// For base64 images
87-
if (currentElement.src.startsWith('data:image')) {
88-
slide.addImage({
89-
data: currentElement.src,
90-
x: 1,
91-
y: 3,
92-
w: 4,
93-
h: 3
94-
});
95-
}
96-
// Note: For external images, we would need to fetch them first
97-
// This is simplified for the example
98-
} catch (err) {
99-
console.error('Error adding image to slide:', err);
100-
}
101-
}
102-
currentElement = currentElement.nextElementSibling;
103-
}
104-
});
21+
try {
22+
// Create temporary files for input and output
23+
const tempDir = os.tmpdir();
24+
const inputId = uuidv4();
25+
const outputId = uuidv4();
26+
const inputPath = path.join(tempDir, `${inputId}.html`);
27+
const outputPath = path.join(tempDir, `${outputId}.pptx`);
28+
29+
// Write HTML to temporary file
30+
await fs.promises.writeFile(inputPath, html, 'utf8');
31+
32+
// Build the pandoc command with options
33+
let command = `pandoc -f html -t pptx "${inputPath}" -o "${outputPath}"`;
34+
35+
// Add title if provided
36+
if (title) {
37+
command += ` --metadata title="${title}"`;
38+
}
39+
40+
console.log(`Executing pandoc command: ${command}`);
41+
42+
// Execute pandoc command
43+
const { stderr } = await execPromise(command);
44+
45+
if (stderr && !stderr.includes('WARNING')) {
46+
throw new Error(`Pandoc error: ${stderr}`);
47+
}
48+
49+
// Read the generated PPTX file
50+
const pptBuffer = await fs.promises.readFile(outputPath);
51+
52+
// Clean up temporary files
53+
try {
54+
await fs.promises.unlink(inputPath);
55+
await fs.promises.unlink(outputPath);
56+
} catch (cleanupError) {
57+
console.warn('Error cleaning up temporary files:', cleanupError);
58+
}
59+
60+
return pptBuffer;
61+
} catch (error) {
62+
console.error('Error generating PowerPoint with pandoc:', error);
63+
64+
// If pandoc fails, provide a detailed error message
65+
if (error.stderr) {
66+
console.error('Pandoc error output:', error.stderr);
67+
}
68+
69+
throw new Error(`Failed to generate PowerPoint: ${error.message}`);
10570
}
106-
107-
// Generate the PowerPoint file
108-
const pptBuffer = await pptx.writeFile({ outputType: 'nodebuffer' });
109-
110-
return pptBuffer;
11171
}
11272
};

0 commit comments

Comments
 (0)