Skip to content

Commit a9ef820

Browse files
committed
organize the codebase into modules for better maintainability.
1 parent 1b4eeac commit a9ef820

22 files changed

+1898
-1783
lines changed

internal/converter/doc_ebook.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package converter
2+
3+
import (
4+
"fmt"
5+
"io"
6+
"os"
7+
"os/exec"
8+
"path/filepath"
9+
"strings"
10+
11+
md "github.com/JohannesKaufmann/html-to-markdown"
12+
"github.com/go-pdf/fpdf"
13+
"github.com/taylorskalyo/goreader/epub"
14+
)
15+
16+
func (c *DocumentConverter) convertEbookToMarkdown(src, target string, opts Options) error {
17+
tempHTML, cleanup, err := tempPathWithExt("golter_ebook_html", ".html")
18+
if err != nil {
19+
return err
20+
}
21+
defer cleanup()
22+
23+
if err := c.convertEbookWithCalibre(src, tempHTML, opts); err != nil {
24+
return err
25+
}
26+
27+
return c.convertHTMLToMarkdown(tempHTML, target)
28+
}
29+
30+
func (c *DocumentConverter) convertEbookWithCalibre(src, target string, opts Options) error {
31+
_, err := exec.LookPath("ebook-convert")
32+
if err != nil {
33+
return fmt.Errorf("ebook-convert not found: please install Calibre to convert ebook formats (https://calibre-ebook.com)")
34+
}
35+
36+
args := []string{src, target}
37+
if extra, ok := opts["ebookArgs"].([]string); ok && len(extra) > 0 {
38+
args = append(args, extra...)
39+
} else if extraStr, ok := opts["ebookArgs"].(string); ok && strings.TrimSpace(extraStr) != "" {
40+
args = append(args, strings.Fields(extraStr)...)
41+
}
42+
43+
cmd := exec.Command("ebook-convert", args...)
44+
output, err := cmd.CombinedOutput()
45+
if err != nil {
46+
return fmt.Errorf("ebook-convert failed: %w\nOutput: %s", err, string(output))
47+
}
48+
49+
return nil
50+
}
51+
52+
func (c *DocumentConverter) convertEPUBToMarkdown(src, target string) error {
53+
rc, err := epub.OpenReader(src)
54+
if err != nil {
55+
return fmt.Errorf("failed to open EPUB: %w", err)
56+
}
57+
defer rc.Close()
58+
59+
if len(rc.Rootfiles) == 0 {
60+
return fmt.Errorf("no rootfiles found in EPUB")
61+
}
62+
63+
book := rc.Rootfiles[0]
64+
var contentBuilder strings.Builder
65+
converter := md.NewConverter("", true, nil)
66+
67+
// Iterate through spine items
68+
for _, item := range book.Spine.Itemrefs {
69+
if item.Item == nil {
70+
continue
71+
}
72+
73+
// Open the file from the EPUB
74+
f, err := item.Item.Open()
75+
if err != nil {
76+
continue
77+
}
78+
79+
b, err := io.ReadAll(f)
80+
f.Close()
81+
if err != nil {
82+
continue
83+
}
84+
85+
// Convert HTML content to Markdown
86+
markdown, err := converter.ConvertString(string(b))
87+
if err != nil {
88+
continue
89+
}
90+
91+
contentBuilder.WriteString(markdown)
92+
contentBuilder.WriteString("\n\n---\n\n")
93+
}
94+
95+
if err := os.WriteFile(target, []byte(contentBuilder.String()), 0644); err != nil {
96+
return fmt.Errorf("failed to write markdown file: %w", err)
97+
}
98+
99+
return nil
100+
}
101+
102+
func (c *DocumentConverter) convertEPUBToHTML(src, target string) error {
103+
rc, err := epub.OpenReader(src)
104+
if err != nil {
105+
return fmt.Errorf("failed to open EPUB: %w", err)
106+
}
107+
defer rc.Close()
108+
109+
if len(rc.Rootfiles) == 0 {
110+
return fmt.Errorf("no rootfiles found in EPUB")
111+
}
112+
113+
book := rc.Rootfiles[0]
114+
var contentBuilder strings.Builder
115+
116+
contentBuilder.WriteString("<!DOCTYPE html><html><body>")
117+
118+
// Iterate through spine items
119+
for _, item := range book.Spine.Itemrefs {
120+
if item.Item == nil {
121+
continue
122+
}
123+
124+
f, err := item.Item.Open()
125+
if err != nil {
126+
continue
127+
}
128+
129+
b, err := io.ReadAll(f)
130+
f.Close()
131+
if err != nil {
132+
continue
133+
}
134+
135+
// Simple concatenation of body content would be better, but full HTML concatenation is easier for now
136+
// Ideally we should strip <html>, <head>, <body> tags and just take the inner content
137+
// For simplicity, we just append the whole thing, browsers handle nested html tags somewhat okay-ish
138+
// or better: just append the raw content.
139+
contentBuilder.Write(b)
140+
contentBuilder.WriteString("<hr>")
141+
}
142+
143+
contentBuilder.WriteString("</body></html>")
144+
145+
if err := os.WriteFile(target, []byte(contentBuilder.String()), 0644); err != nil {
146+
return fmt.Errorf("failed to write HTML file: %w", err)
147+
}
148+
149+
return nil
150+
}
151+
152+
func (c *DocumentConverter) convertEPUBToPDF(src, target string) error {
153+
// First convert to HTML
154+
tempHTML := strings.TrimSuffix(target, filepath.Ext(target)) + "_temp.html"
155+
if err := c.convertEPUBToHTML(src, tempHTML); err != nil {
156+
return err
157+
}
158+
defer os.Remove(tempHTML)
159+
160+
// Then convert HTML to PDF (using existing logic logic, but we need to read the temp file)
161+
// We can reuse convertMarkdownToPDF logic but starting from HTML
162+
163+
// Read HTML source
164+
source, err := os.ReadFile(tempHTML)
165+
if err != nil {
166+
return fmt.Errorf("failed to read temp HTML file: %w", err)
167+
}
168+
169+
// Create PDF
170+
pdfDoc := fpdf.New("P", "mm", "A4", "")
171+
pdfDoc.SetMargins(20, 20, 20)
172+
pdfDoc.AddPage()
173+
pdfDoc.SetFont("Arial", "", 12)
174+
175+
_, lineHt := pdfDoc.GetFontSize()
176+
html := pdfDoc.HTMLBasicNew()
177+
html.Write(lineHt, string(source))
178+
179+
if err := pdfDoc.OutputFileAndClose(target); err != nil {
180+
return fmt.Errorf("failed to create PDF: %w", err)
181+
}
182+
183+
return nil
184+
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package converter
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
)
8+
9+
func TestDocumentConverter_EPUB(t *testing.T) {
10+
tmpDir, err := os.MkdirTemp("", "golter_epub_test")
11+
if err != nil {
12+
t.Fatalf("failed to create temp dir: %v", err)
13+
}
14+
defer os.RemoveAll(tmpDir)
15+
16+
c := &DocumentConverter{}
17+
epubPath := filepath.Join(tmpDir, "test.epub")
18+
createTestEPUB(t, epubPath)
19+
20+
t.Run("EPUB->MD", func(t *testing.T) {
21+
target := filepath.Join(tmpDir, "epub_out.md")
22+
if err := c.Convert(epubPath, target, Options{}); err != nil {
23+
t.Errorf("Convert(EPUB->MD) failed: %v", err)
24+
}
25+
})
26+
27+
t.Run("EPUB->HTML", func(t *testing.T) {
28+
target := filepath.Join(tmpDir, "epub_out.html")
29+
if err := c.Convert(epubPath, target, Options{}); err != nil {
30+
t.Errorf("Convert(EPUB->HTML) failed: %v", err)
31+
}
32+
})
33+
34+
t.Run("EPUB->PDF", func(t *testing.T) {
35+
target := filepath.Join(tmpDir, "epub_out.pdf")
36+
if err := c.Convert(epubPath, target, Options{}); err != nil {
37+
t.Errorf("Convert(EPUB->PDF) failed: %v", err)
38+
}
39+
})
40+
}
41+
42+
func TestDocumentConverter_Convert_Integration_EbookCalibre(t *testing.T) {
43+
if !ensureEbookConvertInPath(t) {
44+
t.Skip("ebook-convert not found, skipping ebook conversion tests")
45+
}
46+
47+
tmpDir, err := os.MkdirTemp("", "golter_ebook_test")
48+
if err != nil {
49+
t.Fatalf("failed to create temp dir: %v", err)
50+
}
51+
defer os.RemoveAll(tmpDir)
52+
53+
c := &DocumentConverter{}
54+
55+
// Create a source EPUB
56+
epubPath := filepath.Join(tmpDir, "test.epub")
57+
createTestEPUB(t, epubPath)
58+
59+
t.Run("EPUB->MOBI", func(t *testing.T) {
60+
mobiPath := filepath.Join(tmpDir, "test.mobi")
61+
if err := c.Convert(epubPath, mobiPath, Options{}); err != nil {
62+
t.Fatalf("Convert(EPUB->MOBI) failed: %v", err)
63+
}
64+
if _, err := os.Stat(mobiPath); os.IsNotExist(err) {
65+
t.Fatalf("Target MOBI not created: %s", mobiPath)
66+
}
67+
})
68+
69+
t.Run("MOBI->EPUB", func(t *testing.T) {
70+
mobiPath := filepath.Join(tmpDir, "test.mobi")
71+
backToEPUB := filepath.Join(tmpDir, "back.epub")
72+
if err := c.Convert(mobiPath, backToEPUB, Options{}); err != nil {
73+
t.Fatalf("Convert(MOBI->EPUB) failed: %v", err)
74+
}
75+
if _, err := os.Stat(backToEPUB); os.IsNotExist(err) {
76+
t.Fatalf("Target EPUB not created: %s", backToEPUB)
77+
}
78+
})
79+
80+
t.Run("EPUB->AZW3", func(t *testing.T) {
81+
azw3Path := filepath.Join(tmpDir, "test.azw3")
82+
if err := c.Convert(epubPath, azw3Path, Options{}); err != nil {
83+
t.Fatalf("Convert(EPUB->AZW3) failed: %v", err)
84+
}
85+
if _, err := os.Stat(azw3Path); os.IsNotExist(err) {
86+
t.Fatalf("Target AZW3 not created: %s", azw3Path)
87+
}
88+
})
89+
}
90+
91+
func TestDocumentConverter_Convert_EbookFromMarkdownAndHTML(t *testing.T) {
92+
if !ensureEbookConvertInPath(t) {
93+
t.Skip("ebook-convert not found, skipping ebook conversion tests")
94+
}
95+
96+
tmpDir, err := os.MkdirTemp("", "golter_ebook_src_test")
97+
if err != nil {
98+
t.Fatalf("failed to create temp dir: %v", err)
99+
}
100+
defer os.RemoveAll(tmpDir)
101+
102+
c := &DocumentConverter{}
103+
104+
t.Run("MD->MOBI", func(t *testing.T) {
105+
mdPath := filepath.Join(tmpDir, "src.md")
106+
if err := os.WriteFile(mdPath, []byte("# Title\n\nHello ebook"), 0644); err != nil {
107+
t.Fatalf("failed to write md: %v", err)
108+
}
109+
mobiPath := filepath.Join(tmpDir, "md_out.mobi")
110+
if err := c.Convert(mdPath, mobiPath, Options{}); err != nil {
111+
t.Fatalf("Convert(MD->MOBI) failed: %v", err)
112+
}
113+
if _, err := os.Stat(mobiPath); os.IsNotExist(err) {
114+
t.Fatalf("Target MOBI not created: %s", mobiPath)
115+
}
116+
})
117+
118+
t.Run("HTML->AZW3", func(t *testing.T) {
119+
htmlPath := filepath.Join(tmpDir, "src.html")
120+
if err := os.WriteFile(htmlPath, []byte("<html><body><h1>Title</h1><p>Hello ebook</p></body></html>"), 0644); err != nil {
121+
t.Fatalf("failed to write html: %v", err)
122+
}
123+
azw3Path := filepath.Join(tmpDir, "html_out.azw3")
124+
if err := c.Convert(htmlPath, azw3Path, Options{}); err != nil {
125+
t.Fatalf("Convert(HTML->AZW3) failed: %v", err)
126+
}
127+
if _, err := os.Stat(azw3Path); os.IsNotExist(err) {
128+
t.Fatalf("Target AZW3 not created: %s", azw3Path)
129+
}
130+
})
131+
}

internal/converter/doc_html.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package converter
2+
3+
import (
4+
"fmt"
5+
"io"
6+
"os"
7+
"path/filepath"
8+
"strings"
9+
10+
md "github.com/JohannesKaufmann/html-to-markdown"
11+
goepub "github.com/bmaupin/go-epub"
12+
)
13+
14+
func (c *DocumentConverter) convertHTMLToMarkdown(src, target string) error {
15+
f, err := os.Open(src)
16+
if err != nil {
17+
return fmt.Errorf("failed to open HTML file: %w", err)
18+
}
19+
defer f.Close()
20+
21+
b, err := io.ReadAll(f)
22+
if err != nil {
23+
return fmt.Errorf("failed to read HTML file: %w", err)
24+
}
25+
26+
converter := md.NewConverter("", true, nil)
27+
markdown, err := converter.ConvertString(string(b))
28+
if err != nil {
29+
return fmt.Errorf("failed to convert HTML to markdown: %w", err)
30+
}
31+
32+
if err := os.WriteFile(target, []byte(markdown), 0644); err != nil {
33+
return fmt.Errorf("failed to write markdown file: %w", err)
34+
}
35+
36+
return nil
37+
}
38+
39+
func (c *DocumentConverter) convertHTMLToEPUB(src, target string) error {
40+
source, err := os.ReadFile(src)
41+
if err != nil {
42+
return fmt.Errorf("failed to read HTML file: %w", err)
43+
}
44+
45+
title := strings.TrimSuffix(filepath.Base(src), ".html")
46+
e := goepub.NewEpub(title)
47+
e.SetAuthor("Golter Converter")
48+
49+
_, err = e.AddSection(string(source), "Chapter 1", "", "")
50+
if err != nil {
51+
return fmt.Errorf("failed to add content to EPUB: %w", err)
52+
}
53+
54+
if err := e.Write(target); err != nil {
55+
return fmt.Errorf("failed to write EPUB file: %w", err)
56+
}
57+
58+
return nil
59+
}
60+
61+
func (c *DocumentConverter) convertHTMLToEbook(src, target string, opts Options) error {
62+
if strings.EqualFold(filepath.Ext(target), ".epub") {
63+
return c.convertHTMLToEPUB(src, target)
64+
}
65+
66+
tempEPUB, cleanup, err := tempPathWithExt("golter_ebook_epub", ".epub")
67+
if err != nil {
68+
return err
69+
}
70+
defer cleanup()
71+
72+
if err := c.convertHTMLToEPUB(src, tempEPUB); err != nil {
73+
return err
74+
}
75+
76+
return c.convertEbookWithCalibre(tempEPUB, target, opts)
77+
}

0 commit comments

Comments
 (0)