Skip to content

Commit 1b4eeac

Browse files
committed
Add DOCX conversion support using Pandoc and update README.md with new requirements
1 parent 5c8f094 commit 1b4eeac

File tree

4 files changed

+151
-13
lines changed

4 files changed

+151
-13
lines changed

README.md

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ Terminal-based file converter built with Go. It provides a modern, user-friendly
6262

6363
| Input | Output |
6464
|-------------------------------------------|---------------------------------------------------------------------------|
65-
| `.pdf`, `.md`, `.html` | `.pdf`, `.md`, `.html`, `.epub`, `.mobi`, `.azw`, `.azw3`, `.fb2` |
65+
| `.pdf`, `.md`, `.html` | `.pdf`, `.md`, `.html`, `.docx`, `.epub`, `.mobi`, `.azw`, `.azw3`, `.fb2` |
66+
| `.docx` | `.md`, `.html`, `.txt` |
6667
| `.epub`, `.mobi`, `.azw`, `.azw3`, `.fb2` | `.epub`, `.mobi`, `.azw`, `.azw3`, `.fb2`, `.pdf`, `.html`, `.txt`, `.md` |
6768
| `.csv` | `.xlsx` |
6869
| `.xlsx`, `.xls` | `.csv` |
@@ -73,24 +74,31 @@ Terminal-based file converter built with Go. It provides a modern, user-friendly
7374
- Markdown to styled HTML with responsive design
7475
- Markdown/HTML to EPUB conversion
7576
- EPUB/MOBI/AZW/AZW3/FB2 conversions via Calibre (if installed)
77+
- DOCX conversions via Pandoc (if installed)
7678
- PDF compression/optimization
7779
- CSV to Excel conversion with styled headers and auto-fit columns
7880
- Excel to CSV export (exports first sheet)
7981

80-
> **Note:** Video and audio conversion requires `ffmpeg`. Ebook conversions beyond EPUB require Calibre's `ebook-convert`.
82+
> **Note:** Video and audio conversion requires `ffmpeg`. Ebook conversions beyond EPUB require Calibre's `ebook-convert`. DOCX conversions require Pandoc.
8183
8284
**PATH Note (ebook-convert):**
8385
- **Linux:** Typically available at `/usr/bin/ebook-convert` (or `/snap/bin/ebook-convert`). Ensure the directory is on `PATH`.
8486
- **macOS (Homebrew):** `/opt/homebrew/bin/ebook-convert` (Apple Silicon) or `/usr/local/bin/ebook-convert` (Intel).
8587
- **Windows:** `C:\Program Files\Calibre2\ebook-convert.exe` (or `C:\Program Files (x86)\Calibre2\ebook-convert.exe`). Add the folder to `PATH` if not detected.
8688

89+
**PATH Note (pandoc):**
90+
- **Linux:** Typically available at `/usr/bin/pandoc` or `/usr/local/bin/pandoc`.
91+
- **macOS (Homebrew):** `/opt/homebrew/bin/pandoc` (Apple Silicon) or `/usr/local/bin/pandoc` (Intel).
92+
- **Windows:** `C:\Program Files\Pandoc\pandoc.exe` (or `C:\Program Files (x86)\Pandoc\pandoc.exe`). Add the folder to `PATH` if not detected.
93+
8794
## Installation
8895

8996
### Prerequisites
9097

9198
- **Go 1.21+**
9299
- **ffmpeg** (required for video/audio conversion)
93100
- **Calibre (ebook-convert)** (required for ebook conversions beyond EPUB)
101+
- **Pandoc** (required for DOCX conversions)
94102

95103
### Quick Install
96104

@@ -129,13 +137,13 @@ task build
129137

130138
```bash
131139
# Ubuntu/Debian
132-
sudo apt update && sudo apt install ffmpeg calibre
140+
sudo apt update && sudo apt install ffmpeg calibre pandoc
133141

134142
# Fedora
135-
sudo dnf install ffmpeg calibre
143+
sudo dnf install ffmpeg calibre pandoc
136144

137145
# Arch Linux
138-
sudo pacman -S ffmpeg calibre
146+
sudo pacman -S ffmpeg calibre pandoc
139147

140148
# Build
141149
go build -o golter main.go
@@ -156,6 +164,9 @@ brew install ffmpeg
156164
# Install Calibre (ebook-convert)
157165
brew install --cask calibre
158166

167+
# Install Pandoc
168+
brew install pandoc
169+
159170
# Build
160171
go build -o golter main.go
161172

@@ -175,12 +186,18 @@ winget install ffmpeg
175186
# Calibre (ebook-convert)
176187
winget install calibre.calibre
177188
189+
# Pandoc
190+
winget install --id JohnMacFarlane.Pandoc
191+
178192
# Or using chocolatey
179193
choco install ffmpeg
180194
181195
# Calibre (ebook-convert)
182196
choco install calibre
183197
198+
# Pandoc
199+
choco install pandoc
200+
184201
# Build
185202
go build -o golter.exe main.go
186203

internal/converter/document.go

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,11 @@ func (c *DocumentConverter) CanConvert(srcExt, targetExt string) bool {
7171
case ".pdf":
7272
return targetExt == ".md" || targetExt == ".pdf"
7373
case ".md":
74-
return targetExt == ".pdf" || targetExt == ".html" || isEbookExt(targetExt)
74+
return targetExt == ".pdf" || targetExt == ".html" || targetExt == ".docx" || isEbookExt(targetExt)
7575
case ".html":
76-
return targetExt == ".md" || isEbookExt(targetExt)
76+
return targetExt == ".md" || targetExt == ".docx" || isEbookExt(targetExt)
77+
case ".docx":
78+
return targetExt == ".md" || targetExt == ".html" || targetExt == ".txt"
7779
case ".csv":
7880
return targetExt == ".xlsx" || targetExt == ".xls"
7981
case ".xlsx", ".xls":
@@ -88,7 +90,7 @@ func (c *DocumentConverter) CanConvert(srcExt, targetExt string) bool {
8890
}
8991

9092
func (c *DocumentConverter) SupportedSourceExtensions() []string {
91-
return []string{".pdf", ".md", ".html", ".epub", ".mobi", ".azw", ".azw3", ".fb2", ".csv", ".xlsx", ".xls"}
93+
return []string{".pdf", ".md", ".html", ".docx", ".epub", ".mobi", ".azw", ".azw3", ".fb2", ".csv", ".xlsx", ".xls"}
9294
}
9395

9496
func (c *DocumentConverter) SupportedTargetFormats(srcExt string) []string {
@@ -101,9 +103,11 @@ func (c *DocumentConverter) SupportedTargetFormats(srcExt string) []string {
101103
case ".pdf":
102104
return []string{".md", ".pdf"} // .pdf -> .pdf implies compression
103105
case ".md":
104-
return []string{".html", ".pdf", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}
106+
return []string{".html", ".pdf", ".docx", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}
105107
case ".html":
106-
return []string{".md", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}
108+
return []string{".md", ".docx", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}
109+
case ".docx":
110+
return []string{".md", ".html", ".txt"}
107111
case ".csv":
108112
return []string{".xlsx"}
109113
case ".xlsx", ".xls":
@@ -145,6 +149,8 @@ func (c *DocumentConverter) Convert(src, target string, opts Options) error {
145149
return c.convertMarkdownToHTML(src, target)
146150
} else if targetExt == ".pdf" {
147151
return c.convertMarkdownToPDF(src, target)
152+
} else if targetExt == ".docx" {
153+
return c.convertWithPandoc(src, target, opts)
148154
} else if targetExt == ".epub" {
149155
return c.convertMarkdownToEPUB(src, target)
150156
} else if isEbookExt(targetExt) {
@@ -153,11 +159,17 @@ func (c *DocumentConverter) Convert(src, target string, opts Options) error {
153159
case ".html":
154160
if targetExt == ".md" {
155161
return c.convertHTMLToMarkdown(src, target)
162+
} else if targetExt == ".docx" {
163+
return c.convertWithPandoc(src, target, opts)
156164
} else if targetExt == ".epub" {
157165
return c.convertHTMLToEPUB(src, target)
158166
} else if isEbookExt(targetExt) {
159167
return c.convertHTMLToEbook(src, target, opts)
160168
}
169+
case ".docx":
170+
if targetExt == ".md" || targetExt == ".html" || targetExt == ".txt" {
171+
return c.convertWithPandoc(src, target, opts)
172+
}
161173
case ".csv":
162174
if targetExt == ".xlsx" || targetExt == ".xls" {
163175
return c.convertCSVToExcel(src, target)
@@ -471,6 +483,28 @@ func (c *DocumentConverter) convertEbookWithCalibre(src, target string, opts Opt
471483
return nil
472484
}
473485

486+
func (c *DocumentConverter) convertWithPandoc(src, target string, opts Options) error {
487+
_, err := exec.LookPath("pandoc")
488+
if err != nil {
489+
return fmt.Errorf("pandoc not found: please install Pandoc to convert DOCX formats (https://pandoc.org)")
490+
}
491+
492+
args := []string{src, "-o", target}
493+
if extra, ok := opts["pandocArgs"].([]string); ok && len(extra) > 0 {
494+
args = append(args, extra...)
495+
} else if extraStr, ok := opts["pandocArgs"].(string); ok && strings.TrimSpace(extraStr) != "" {
496+
args = append(args, strings.Fields(extraStr)...)
497+
}
498+
499+
cmd := exec.Command("pandoc", args...)
500+
output, err := cmd.CombinedOutput()
501+
if err != nil {
502+
return fmt.Errorf("pandoc failed: %w\nOutput: %s", err, string(output))
503+
}
504+
505+
return nil
506+
}
507+
474508
// convertCSVToExcel converts a CSV file to Excel format
475509
func (c *DocumentConverter) convertCSVToExcel(src, target string) error {
476510
// Open CSV file

internal/converter/document_test.go

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func TestDocumentConverter_SupportedExtensions(t *testing.T) {
2020
if len(srcExts) == 0 {
2121
t.Error("SupportedSourceExtensions returned empty list")
2222
}
23-
expectedSrc := []string{".pdf", ".md", ".html", ".epub", ".mobi", ".azw", ".azw3", ".fb2", ".csv", ".xlsx", ".xls"}
23+
expectedSrc := []string{".pdf", ".md", ".html", ".docx", ".epub", ".mobi", ".azw", ".azw3", ".fb2", ".csv", ".xlsx", ".xls"}
2424
for _, exp := range expectedSrc {
2525
found := false
2626
for _, got := range srcExts {
@@ -295,6 +295,43 @@ func TestDocumentConverter_Convert_EbookFromMarkdownAndHTML(t *testing.T) {
295295
}
296296
}
297297

298+
func TestDocumentConverter_Convert_DocxPandoc(t *testing.T) {
299+
if !ensurePandocInPath(t) {
300+
t.Skip("pandoc not found, skipping DOCX conversion tests")
301+
}
302+
303+
tmpDir, err := os.MkdirTemp("", "golter_docx_test")
304+
if err != nil {
305+
t.Fatalf("failed to create temp dir: %v", err)
306+
}
307+
defer os.RemoveAll(tmpDir)
308+
309+
c := &DocumentConverter{}
310+
311+
mdPath := filepath.Join(tmpDir, "src.md")
312+
if err := os.WriteFile(mdPath, []byte("# Title\n\nHello docx"), 0644); err != nil {
313+
t.Fatalf("failed to write md: %v", err)
314+
}
315+
316+
// MD -> DOCX
317+
docxPath := filepath.Join(tmpDir, "out.docx")
318+
if err := c.Convert(mdPath, docxPath, Options{}); err != nil {
319+
t.Fatalf("Convert(MD->DOCX) failed: %v", err)
320+
}
321+
if _, err := os.Stat(docxPath); os.IsNotExist(err) {
322+
t.Fatalf("Target DOCX not created: %s", docxPath)
323+
}
324+
325+
// DOCX -> MD
326+
backToMD := filepath.Join(tmpDir, "back.md")
327+
if err := c.Convert(docxPath, backToMD, Options{}); err != nil {
328+
t.Fatalf("Convert(DOCX->MD) failed: %v", err)
329+
}
330+
if _, err := os.Stat(backToMD); os.IsNotExist(err) {
331+
t.Fatalf("Target MD not created: %s", backToMD)
332+
}
333+
}
334+
298335
func TestDocumentConverter_Name(t *testing.T) {
299336
c := &DocumentConverter{}
300337
if !strings.Contains(c.Name(), "Document Converter") {
@@ -317,14 +354,22 @@ func TestDocumentConverter_CanConvert(t *testing.T) {
317354
// Markdown
318355
{".md", ".pdf", true},
319356
{".md", ".html", true},
357+
{".md", ".docx", true},
320358
{".md", ".epub", true},
321359
{".md", ".txt", false},
322360

323361
// HTML
324362
{".html", ".md", true},
363+
{".html", ".docx", true},
325364
{".html", ".epub", true},
326365
{".html", ".pdf", false},
327366

367+
// DOCX
368+
{".docx", ".md", true},
369+
{".docx", ".html", true},
370+
{".docx", ".txt", true},
371+
{".docx", ".pdf", false},
372+
328373
// CSV/Excel
329374
{".csv", ".xlsx", true},
330375
{".csv", ".xls", true},
@@ -364,8 +409,9 @@ func TestDocumentConverter_SupportedTargetFormats(t *testing.T) {
364409
want []string
365410
}{
366411
{".pdf", []string{".md", ".pdf"}},
367-
{".md", []string{".html", ".pdf", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}},
412+
{".md", []string{".html", ".pdf", ".docx", ".epub", ".mobi", ".azw", ".azw3", ".fb2"}},
368413
{".csv", []string{".xlsx"}},
414+
{".docx", []string{".md", ".html", ".txt"}},
369415
{".epub", []string{".pdf", ".md", ".html", ".mobi", ".azw", ".azw3", ".fb2", ".txt"}},
370416
{".mobi", []string{".epub", ".azw", ".azw3", ".fb2", ".pdf", ".html", ".txt", ".md"}},
371417
}
@@ -434,3 +480,44 @@ func ensureEbookConvertInPath(t *testing.T) bool {
434480

435481
return false
436482
}
483+
484+
func ensurePandocInPath(t *testing.T) bool {
485+
binName := "pandoc"
486+
if runtime.GOOS == "windows" {
487+
binName = "pandoc.exe"
488+
}
489+
490+
if _, err := exec.LookPath(binName); err == nil {
491+
return true
492+
}
493+
494+
var candidates []string
495+
if runtime.GOOS == "windows" {
496+
candidates = []string{
497+
`C:\\Program Files\\Pandoc\\pandoc.exe`,
498+
`C:\\Program Files (x86)\\Pandoc\\pandoc.exe`,
499+
}
500+
} else {
501+
candidates = []string{
502+
"/usr/bin/pandoc",
503+
"/usr/local/bin/pandoc",
504+
"/opt/homebrew/bin/pandoc",
505+
}
506+
}
507+
508+
for _, p := range candidates {
509+
if _, err := os.Stat(p); err == nil {
510+
dir := filepath.Dir(p)
511+
current := os.Getenv("PATH")
512+
if !strings.Contains(current, dir) {
513+
if err := os.Setenv("PATH", dir+string(os.PathListSeparator)+current); err != nil {
514+
t.Logf("failed to update PATH for pandoc: %v", err)
515+
return false
516+
}
517+
}
518+
return true
519+
}
520+
}
521+
522+
return false
523+
}

internal/version/version.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111

1212
// Current represents the current version of the application
1313
const (
14-
Current = "0.1.1"
14+
Current = "0.1.2"
1515
Repo = "sametcn99/golter"
1616
)
1717

0 commit comments

Comments
 (0)