minor fix

hamelsmu · hamelsmu · commit 6822a02057aa · 2019-06-18T17:54:25.000-07:00
diff --git a/mdparse/parser.py b/mdparse/parser.py
@@ -187,7 +187,12 @@ def sym(text:str) -> str:
         text = regex.sub(json_regex, ' xxxjson ', text)
         
         return text
-            
+
+    @staticmethod
+    def strip(text:str) -> str:
+        """get rid of whitespace in beginning and end of lines"""
+        return '\n'.join([s.strip() for s in text.split('\n') if s.strip()])
+
     ### transformations that are the same from factory functions
     # large headers: h1
     hL =   partial(prepend.__func__, 'xxxhl', 'h1')
@@ -196,7 +201,7 @@ def sym(text:str) -> str:
     # small headers: h4, h5, h6
     hS =   partial(prepend.__func__, 'xxxhs', ['h4', 'h5', 'h6'])
     # code blocks
-    code = partial(enclose.__func__, ' xxxcdb ', ' xxxcde ', 'code', 2)
+    code = partial(enclose.__func__, ' xxxcdb ', ' xxxcde\n ', 'code', 2)
     # paragraph blocks (plain text)
     txt =  partial(prepend.__func__, '', 'xxxp')
     # block quotes
@@ -209,4 +214,4 @@ def sym(text:str) -> str:
 
 transform_pre_rules = [md.parse, md.hL, md.hM, md.hS, md.lst, md.bqt, 
                        md.code, md.tbl, md.st, md.txt, md.lnk, md.img, 
-                       md.hr, md.get_text, md.sym]
+                       md.hr, md.get_text, md.sym, md.strip]
diff --git a/notebooks/Demo.ipynb b/notebooks/Demo.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -128,7 +128,7 @@
        "<IPython.core.display.Markdown object>"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -159,18 +159,19 @@
       "xxxhl This is a test markdown that has different types of formatting\n",
       "xxxhm Header 2\n",
       "Hello world this is going to be a long sentence that. also has a newline:\n",
-      " lala lala\n",
+      "lala lala\n",
       "continuing the sentence here.\n",
       "more text.\n",
       "xxxlistB bullet 1\n",
       "bullet 2\n",
       "xxxlistE\n",
       "\"quoted text\"\n",
       "xxxhs a small header\n",
-      " xxxcdb lang-python def something(x):\n",
-      " \\\"\"\"docstring\\\"\"\"\n",
-      " return False\n",
-      " xxxcde xxxatmention  somebody\n",
+      "xxxcdb lang-python def something(x):\n",
+      "\\\"\"\"docstring\\\"\"\"\n",
+      "return False\n",
+      "xxxcde\n",
+      "xxxatmention  somebody\n",
       "xxxqb blockquote text is here!\n",
       "xxxqe\n",
       "hobbit-hole xxxlnkhb xxxhtml xxxlnkhe xxxlnktb Hobbit lifestyles xxxlnkte xxxlistB first\n",
@@ -180,13 +181,14 @@
       "xxxqe\n",
       "xxximg Tux, the Linux mascot xxximgf png\n",
       "xxtbl First Header|Second Header\n",
-      "xxxhr Hello xxxcdb something xxxcde here and stuff is there. google xxxlnkhb www.google.com xxxlnkhe. random text\n",
+      "xxxhr Hello xxxcdb something xxxcde\n",
+      "here and stuff is there. google xxxlnkhb www.google.com xxxlnkhe. random text\n",
       "xxxdelb The world is flat. xxxdele We now know that the world is round.\n",
       "xxxlistB [x] Write the press release\n",
       "[ ] Update the website\n",
       "[ ] Contact the media\n",
       "xxxlistE\n",
-      " xxxcdb http://www.example.com xxxcde\n"
+      "xxxcdb http://www.example.com xxxcde\n"
      ]
     }
    ],
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 setup(
     name='mdparse',
     packages=find_packages(),
-    version='0.10',
+    version='0.12',
     description='Parsing markdown files for deep learning.',
     author='Hamel Husain',
     author_email='hamel.husain@gmail.com',
@@ -65,7 +65,7 @@
                   'pytest-xdist',
                   'pytest-cov'],
     },
-    download_url='https://github.com/machine-learning-apps/mdparse/archive/v.10.zip',
+    download_url='https://github.com/machine-learning-apps/mdparse/archive/v.12.zip',
     classifiers=[
         'Development Status :: 1 - Planning',
         'Programming Language :: Python :: 3.7',