Merge pull request #293 from TeamMsgExtractor/next-release

TheElementalOfDestruction · web-flow · commit 7ed422353242 · 2022-09-30T20:12:03.000-07:00
v0.36.4
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -1 +1,3 @@
-custom: ['https://www.buymeacoffee.com/DestructionE', 'https://www.patreon.com/DestructionE']
+ko_fi: DestructionE
+patreon: DestructionE
+custom: ['https://www.buymeacoffee.com/DestructionE']
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+**v0.36.4**
+* [[TeamMsgExtractor #291](https://github.com/TeamMsgExtractor/msg-extractor/issues/291)] Fixed typo in `MSGFile.saveRaw` that may have existed for a significant amount of time. It was using the wrong function (same name, but with different capitalization) but was hidden until `MSGFile` stopped being derived from `OleFileIO`.
+* Added logging code to `MessageBase.getSavePdfBody` to log the list that is going to be used to run `wkhtmltopdf`. This is mainly for debugging purposes, to allow users to potentially see why their arguments may be failing.
+* Updating funding information on GitHub and the `README` with more ways to support the module's development.
+* Fixed one of the exceptions in `MessageBase.getSavePdfBody` not using an fstring which caused it to omit information.
+* Changed the way `wkhtmltopdf` is called to patch a possible security vulnerability. This also seems to have fixed [[TeamMsgExtractor #291](https://github.com/TeamMsgExtractor/msg-extractor/issues/291)].
+
 **v0.36.3**
 * Added an option to skip the body if it could not be found, rather than throwing an error. This will cause no file to be made for it in the event no valid body exists. For the save functions, this option is `skipBodyNotFound` and from the command line the option is `--skip-body-not-found`.
 * Fixed a bug that caused contacts to save the business phone with two colons instead of 1.
diff --git a/README.rst b/README.rst
@@ -147,7 +147,10 @@ Supporting The Module
 ---------------------
 
 If you'd like to donate to help support the development of the module, you can
-donate to Destiny at her Buy Me a Coffee page here: `Buy Me a Coffee`_
+donate to Destiny using one of the following services:
+* `Buy Me a Coffee`_
+* `Ko-fi`_
+* `Patreon`_
 
 Installation
 ------------
@@ -227,8 +230,8 @@ your access to the newest major version of extract-msg.
 .. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
    :target: LICENSE.txt
 
-.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.36.3-blue.svg
-   :target: https://pypi.org/project/extract-msg/0.36.3/
+.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.36.4-blue.svg
+   :target: https://pypi.org/project/extract-msg/0.36.4/
 
 .. |PyPI2| image:: https://img.shields.io/badge/python-3.6+-brightgreen.svg
    :target: https://www.python.org/downloads/release/python-367/
@@ -242,4 +245,6 @@ your access to the newest major version of extract-msg.
 .. _Seamus Tuohy: https://github.com/seamustuohy
 .. _Discord: https://discord.com/invite/B77McRmzdc
 .. _Buy Me a Coffee: https://www.buymeacoffee.com/DestructionE
+.. _Ko-fi: https://ko-fi.com/destructione
+.. _Patreon: https://www.patreon.com/DestructionE
 .. _msg-explorer: https://pypi.org/project/msg-explorer/
diff --git a/extract_msg/__init__.py b/extract_msg/__init__.py
@@ -27,8 +27,8 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 __author__ = 'Destiny Peterson & Matthew Walker'
-__date__ = '2022-08-29'
-__version__ = '0.36.3'
+__date__ = '2022-09-29'
+__version__ = '0.36.4'
 
 import logging
 
diff --git a/extract_msg/message_base.py b/extract_msg/message_base.py
@@ -372,36 +372,44 @@ def getSavePdfBody(self, **kwargs) -> bytes:
         # Immediately try to find the executable.
         wkPath = findWk(kwargs.get('wkPath'))
 
-        # First thing is first, we need to parse our wkOptions if
-        # they exist.
+        # First thing is first, we need to parse our wkOptions if they exist.
         wkOptions = kwargs.get('wkOptions')
         if wkOptions:
             try:
-                # Try to convert to a list, whatever it is, and
-                # fail if it is not possible.
+                # Try to convert to a list, whatever it is, and fail if it is
+                # not possible.
                 parsedWkOptions = [*wkOptions]
             except TypeError:
-                raise TypeError(':param wkOptions: must be an iterable, not {type(wkOptions)}.')
+                raise TypeError(f':param wkOptions: must be an iterable, not {type(wkOptions)}.')
         else:
             parsedWkOptions = []
 
-        # Confirm that all of our options we now have are either
-        # strings or bytes.
+        # Confirm that all of our options we now have are either strings or
+        # bytes.
         if not all(isinstance(option, (str, bytes)) for option in parsedWkOptions):
             raise TypeError(':param wkOptions: must be an iterable of strings and bytes.')
 
-        # We call the program to convert the html, but give tell it
-        # the data will go in and come out through stdin and stdout,
-        # respectively. This way we don't have to write temporary
-        # files to the disk. We also ask that it be quiet about it.
-        process = subprocess.Popen([wkPath, *parsedWkOptions, '-', '-'], shell = True, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
-        # Give the program the data and wait for the program to
-        # finish.
-        output = process.communicate(self.getSaveHtmlBody(**kwargs))
+        processArgs = [wkPath, *parsedWkOptions, '-', '-']
+        # Log the arguments.
+        logger.info(f'Converting to PDF with the following arguments: {processArgs}')
+
+
+        # Get the html body *before* calling Popen.
+        htmlBody = self.getSaveHtmlBody(**kwargs)
+
+        # We call the program to convert the html, but give tell it the data
+        # will go in and come out through stdin and stdout, respectively. This
+        # way we don't have to write temporary files to the disk. We also ask
+        # that it be quiet about it.
+        process = subprocess.run(processArgs, input = htmlBody, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
+        # Give the program the data and wait for the program to finish.
+        #output = process.communicate(htmlBody)
+
+        # If it errored, throw it as an exception.
         if process.returncode != 0:
-            raise WKError(output[1].decode('utf-8'))
+            raise WKError(process.stderr.decode('utf-8'))
 
-        return output[0]
+        return process.stdout
 
     def getSaveRtfBody(self, **kwargs) -> bytes:
         """
diff --git a/extract_msg/msg.py b/extract_msg/msg.py
@@ -523,7 +523,7 @@ def saveRaw(self, path):
             raise FileExistsError(f'File "{path}" already exists.')
         with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) as zfile:
             # Loop through all the directories
-            for dir_ in self.listdir():
+            for dir_ in self.listDir():
                 sysdir = '/'.join(dir_)
                 code = dir_[-1][-8:]
                 if constants.PROPERTIES.get(code):