feat: Add support for MOBI and AZW file formats

mashu3 · mashu3 · commit bb68a0c317c3 · 2023-08-15T01:35:13.000+09:00
diff --git a/README.md b/README.md
@@ -12,19 +12,17 @@ The resulting PDF files are optimized to resemble Japanese manga in terms of pag
 
 ## Requirement
 The script uses the Python libraries **[img2pdf](https://pypi.org/project/img2pdf/)** and **[pikepdf](https://pypi.org/project/pikepdf/)** to do the conversion.
-Moreover, it uses **[lxml](https://pypi.org/project/lxml/)** to read the EPUB files and **[rarfile](https://pypi.org/project/rarfile/)** to read the RAR archive files.
+Moreover, it uses **[lxml](https://pypi.org/project/lxml/)** to read the EPUB files and **[rarfile](https://pypi.org/project/rarfile/)** to read the RAR archive files, and **[mobi](https://pypi.org/project/mobi/)** to handle both MOBI and AZW files.
 
 It requires the installation of these packages in order to work properly.
 
 **Note**
-- This script can only handle DRM-free fixed-layout EPUB files.
+- This script can only handle DRM-free fixed-layout EPUB, MOBI, and AZW3 files.
 - Please ensure that the image files you input are named in numerical order according to their page sequence. For example, `page_01.jpg`, `page_02.jpg`, `page_03.jpg`, and so on, or `001.jpg`, `002.jpg`, `003.jpg`, and so on. This will ensure that the pages are converted and compiled in the correct order.
 
 ## Usage
-This script can take input in the form of `zip`, `cbz`, `rar`, `cbr`, `epub` files or directories containing images (`jpg`, `jpeg`, `png`, `gif`, `bmp`) of manga or comic pages.
-
 The program can be executed from the command line with the following options:
-- The `input_path` argument represents the path to the input file. To execute the Python script correctly, specify the `input_path` argument as the path to the input file containing manga or comic images in any of the supported formats, such as `zip`, `cbz`, `rar`, `cbr`, `epub`, or a directory containing images in formats such as `jpg`, `jpeg`, `png`, `gif`, or `bmp`.
+- The `input_path` argument is the path to the input file. To execute the Python script correctly, specify the `input_path` argument as the path to the input file containing manga or comic images in one of the supported formats, such as `zip`, `cbz`, `rar`, `cbr`, `epub`, `mobi`, `azw`, or a directory containing images in formats such as `jpg`, `jpeg`, `png`, `gif`, or `bmp`.
 - The `output_path` argument is the path to the output PDF file. To use the script, simply run the Python script with the path to the input file or directory as the argument. If the `--output` option is not specified, the output file name will be automatically generated based on the name of the input file or directory.
 - The `pagelayout` parameter can take in the following values:
     - `SinglePage` -> Single page display
diff --git a/setup.py b/setup.py
@@ -1,9 +1,10 @@
 from setuptools import setup, find_packages
 
-VERSION = "0.2.0"
+VERSION = "0.2.1"
 
 INSTALL_REQUIRES = (
     "lxml",
+    "mobi",
     "numpy",
     "img2pdf",
     "Pillow",
diff --git a/src/manga2pdf.py b/src/manga2pdf.py
@@ -6,12 +6,14 @@
 import os
 import re
 import sys
+import mobi
+import shutil
 import img2pdf
 import pikepdf
-import tempfile
 import rarfile
 import zipfile
 import argparse
+import tempfile
 import warnings
 import numpy as np
 from PIL import Image
@@ -38,6 +40,11 @@ def set_convert_to_grayscale(self, flag):
     def is_image_file(self, filename):
         return any(filename.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp'])
     
+    # Function to determine whether the given path is a mobi filr or azw file or not
+    def is_mobi_file(self, path):
+        ext = os.path.splitext(path)[1].lower()
+        return ext in ['.mobi', '.azw3', '.azw']
+    
     # Function to determine whether the given path is an epub file or not
     def is_epub_file(self, path):
         ext = os.path.splitext(path)[1].lower()
@@ -241,10 +248,27 @@ def extract_epub_metadata(self, epub, opf_name: str):
                 else:
                     epub_metadata[key] = None
         return epub_metadata
-
+    
     # Function to convert input files to a PDF file
     def convert(self):
-        if self.is_epub_file(self.input_path):
+        if self.is_mobi_file(self.input_path):
+            try:
+                sys.stdout = open(os.devnull, 'w')
+                tmpfile, self.epub_path = mobi.extract(self.input_path)
+                sys.stdout.close()
+                sys.stdout = sys.__stdout__
+                if self.is_epub_file(self.epub_path):
+                    with zipfile.ZipFile(self.epub_path) as epub:
+                        page_names = self.extract_epub_contents(epub)[0]
+                        page_items = self.extract_epub_contents(epub)[1]
+                        ncx_name = self.extract_epub_contents(epub)[2]
+                        opf_name = self.extract_epub_contents(epub)[3]
+                        page_index = self.extract_epub_index(epub, page_names, ncx_name)
+                        epub_metadata = self.extract_epub_metadata(epub, opf_name)
+                shutil.rmtree(tmpfile)
+            except Exception as e:
+                print(f"Error extracting the MOBI file: {e}")
+        elif self.is_epub_file(self.input_path):
             with zipfile.ZipFile(self.input_path) as epub:
                 page_names = self.extract_epub_contents(epub)[0]
                 page_items = self.extract_epub_contents(epub)[1]
@@ -283,7 +307,7 @@ def convert(self):
         pdf_obj = io.BytesIO(img2pdf.convert(page_items))
         
         with pikepdf.Pdf.open(pdf_obj) as pdf:
-            if self.is_epub_file(self.input_path):
+            if self.is_epub_file(self.input_path) or self.is_epub_file(self.epub_path):
                 with pdf.open_metadata(set_pikepdf_as_editor=False) as pdf_metadata:
                     pdf_metadata['dc:title'] = epub_metadata['title'] if epub_metadata['title'] else ''
                     pdf_metadata['dc:creator'] = epub_metadata['creator'] if epub_metadata['creator'] else ''
@@ -350,7 +374,7 @@ def main():
     parser.add_argument('-m', '--pagemode', type=str, default='UseNone', 
                         choices=['UseOutlines', 'UseThumbs', 'FullScreen', 'UseOC', 'UseAttachments'],
                         help='''\
-(default)UseNone -> Neither document outline nor thumbnail images visible
+(default) UseNone -> Neither document outline nor thumbnail images visible
 UseOutlines -> Document outline visible
 UseThumbs -> Thumbnail images visible
 FullScreen -> Full-screen mode
@@ -359,7 +383,7 @@ def main():
     parser.add_argument('-d', '--direction', type=str, default='R2L', choices=['L2R', 'R2L'],
                         help='''\
 L2R -> Left Binding
-(default)R2L -> Right Binding''')
+(default) R2L -> Right Binding''')
     parser.add_argument('-j', '--jpeg', action='store_true', help='Convert images to JPEG')
     parser.add_argument('-g', '--grayscale', action='store_true', help='Convert images to grayscale')
     parser.add_argument('-gui', action='store_true', help='Launch GUI')
@@ -375,8 +399,8 @@ def main():
             sys.exit(1)
         if not os.path.isdir(args.input_path):
             ext = os.path.splitext(args.input_path)[1].lower()
-            if not ext in ['.zip', '.cbz', '.rar', '.cbr', '.epub']:
-                print('Error: The input file format is not supported. The currently supported formats are: .zip, .cbz, .rar, .cbr, and .epub.')
+            if not ext in ['.zip', '.cbz', '.rar', '.cbr', '.epub', '.mobi', '.azw3', '.azw']:
+                print('Error: The input file format is not supported. The currently supported formats are: .zip, .cbz, .rar, .cbr, .mobi, .azw3, .azw, and .epub.')
                 sys.exit(1)
         if args.output_path is not None:
             if not args.output_path.endswith('.pdf'):
diff --git a/src/manga2pdf_gui.py b/src/manga2pdf_gui.py
@@ -220,7 +220,7 @@ def browse_output_path(self):
             self.set_output_path(path)
 
     def browse_input_file(self):
-        filetypes = (("zip files", "*.zip"), ("cbz files", "*.cbz"), ("rar files", "*.rar"), ("cbr files", "*.cbr"), ("epub files", "*.epub"), ("all files", "*.*"))
+        filetypes = (("zip files", "*.zip"), ("cbz files", "*.cbz"), ("rar files", "*.rar"), ("cbr files", "*.cbr"), ("epub files", "*.epub"), ("mobi files", "*.mobi"), ("azw files", "*.azw3"), ("azw files", "*.azw"), ("all files", "*.*"))
         path = filedialog.askopenfilename(filetypes=filetypes)
         if path:
             self.input_path = path.replace('/', os.sep)
@@ -368,6 +368,9 @@ def run_convert(self):
                 messagebox.showerror("Error", complete_error_text[self.language], parent=self.master)
 
         except Exception as e:
+            # Close process window when done
+            processing_window.grab_release()
+            processing_window.destroy()
             complete_error_text = {"en": "Conversion failed", "ja": "エラーで変換処理に失敗しました"}
             messagebox.showerror(title="Error", message=f"{complete_error_text[self.language]}\n{str(e)}", parent=self.master)