@@ -4,65 +4,85 @@ import path from 'path';
44import { promisify } from 'util' ;
55import { v4 as uuidv4 } from 'uuid' ;
66import os from 'os' ;
7+ import * as XLSX from 'xlsx' ;
78
89const execPromise = promisify ( exec ) ;
910
1011/**
1112 * Service for converting XLSX files to Markdown content
12- * Note: Pandoc's support for XLSX is limited - it may not handle complex spreadsheets well
1313 */
1414export const xlsxToMarkdownService = {
1515 /**
16- * Convert an XLSX file to Markdown content using pandoc
16+ * Convert an XLSX file to Markdown content by first converting to CSV
17+ * and then using pandoc to convert CSV to Markdown
1718 * @param {Buffer } xlsxBuffer - The XLSX file buffer to convert
1819 * @returns {Promise<string> } - A string containing the Markdown content
1920 */
2021 async convertToMarkdown ( xlsxBuffer ) {
2122 try {
22- // Create temporary files for input and output
23+ // Create temporary directory for our files
2324 const tempDir = os . tmpdir ( ) ;
2425 const inputId = uuidv4 ( ) ;
25- const outputId = uuidv4 ( ) ;
2626 const inputPath = path . join ( tempDir , `${ inputId } .xlsx` ) ;
27- const outputPath = path . join ( tempDir , `${ outputId } .md` ) ;
2827
2928 // Write XLSX buffer to temporary file
3029 await fs . promises . writeFile ( inputPath , xlsxBuffer ) ;
3130
32- // Try to use pandoc to convert XLSX to Markdown
33- // Note: This may fail as pandoc has limited XLSX support
34- // We'll attempt it but provide a fallback error message
35- try {
36- const command = `pandoc -f xlsx -t markdown "${ inputPath } " -o "${ outputPath } "` ;
31+ // Read the workbook using xlsx package
32+ const workbook = XLSX . readFile ( inputPath ) ;
33+
34+ // Array to store markdown content from each sheet
35+ const markdownParts = [ ] ;
36+
37+ // Process each sheet in the workbook
38+ for ( const sheetName of workbook . SheetNames ) {
39+ console . log ( `Processing sheet: ${ sheetName } ` ) ;
40+
41+ // Get the worksheet
42+ const worksheet = workbook . Sheets [ sheetName ] ;
43+
44+ // Convert worksheet to CSV
45+ const csvContent = XLSX . utils . sheet_to_csv ( worksheet ) ;
46+
47+ // Create temporary files for CSV and Markdown
48+ const csvId = uuidv4 ( ) ;
49+ const mdId = uuidv4 ( ) ;
50+ const csvPath = path . join ( tempDir , `${ csvId } .csv` ) ;
51+ const mdPath = path . join ( tempDir , `${ mdId } .md` ) ;
52+
53+ // Write CSV content to file
54+ await fs . promises . writeFile ( csvPath , csvContent ) ;
55+
56+ // Use pandoc to convert CSV to Markdown
57+ const command = `pandoc -f csv -t markdown "${ csvPath } " -o "${ mdPath } "` ;
3758 console . log ( `Executing pandoc command: ${ command } ` ) ;
3859
3960 await execPromise ( command ) ;
4061
4162 // Read the generated Markdown file
42- const markdownContent = await fs . promises . readFile ( outputPath , 'utf8' ) ;
63+ const sheetMarkdown = await fs . promises . readFile ( mdPath , 'utf8' ) ;
4364
44- // Clean up temporary files
45- try {
46- await fs . promises . unlink ( inputPath ) ;
47- await fs . promises . unlink ( outputPath ) ;
48- } catch ( cleanupError ) {
49- console . warn ( 'Error cleaning up temporary files:' , cleanupError ) ;
50- }
51-
52- return markdownContent ;
53- } catch ( pandocError ) {
54- // If pandoc doesn't support XLSX directly, provide a helpful error
55- console . warn ( 'Pandoc XLSX conversion failed, this format may not be fully supported:' , pandocError . message ) ;
65+ // Add sheet name as header and append to markdown parts
66+ markdownParts . push ( `# ${ sheetName } \n\n${ sheetMarkdown } \n\n` ) ;
5667
57- // Clean up input file
68+ // Clean up temporary CSV and MD files
5869 try {
59- await fs . promises . unlink ( inputPath ) ;
70+ await fs . promises . unlink ( csvPath ) ;
71+ await fs . promises . unlink ( mdPath ) ;
6072 } catch ( cleanupError ) {
61- console . warn ( 'Error cleaning up input file :' , cleanupError ) ;
73+ console . warn ( 'Error cleaning up temporary files :' , cleanupError ) ;
6274 }
63-
64- throw new Error ( 'XLSX to Markdown conversion is not fully supported by pandoc. Consider converting the Excel file to CSV or another supported format first.' ) ;
6575 }
76+
77+ // Clean up the input XLSX file
78+ try {
79+ await fs . promises . unlink ( inputPath ) ;
80+ } catch ( cleanupError ) {
81+ console . warn ( 'Error cleaning up input file:' , cleanupError ) ;
82+ }
83+
84+ // Combine all markdown parts
85+ return markdownParts . join ( '---\n\n' ) ;
6686 } catch ( error ) {
6787 console . error ( 'Error converting XLSX to Markdown:' , error ) ;
6888
0 commit comments