Skip to content
This repository was archived by the owner on Dec 9, 2022. It is now read-only.

Commit 6822a02

Browse files
committed
minor fix
1 parent 8c2cfb1 commit 6822a02

File tree

3 files changed

+25
-18
lines changed

3 files changed

+25
-18
lines changed

mdparse/parser.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,12 @@ def sym(text:str) -> str:
187187
text = regex.sub(json_regex, ' xxxjson ', text)
188188

189189
return text
190-
190+
191+
@staticmethod
192+
def strip(text:str) -> str:
193+
"""get rid of whitespace in beginning and end of lines"""
194+
return '\n'.join([s.strip() for s in text.split('\n') if s.strip()])
195+
191196
### transformations that are the same from factory functions
192197
# large headers: h1
193198
hL = partial(prepend.__func__, 'xxxhl', 'h1')
@@ -196,7 +201,7 @@ def sym(text:str) -> str:
196201
# small headers: h4, h5, h6
197202
hS = partial(prepend.__func__, 'xxxhs', ['h4', 'h5', 'h6'])
198203
# code blocks
199-
code = partial(enclose.__func__, ' xxxcdb ', ' xxxcde ', 'code', 2)
204+
code = partial(enclose.__func__, ' xxxcdb ', ' xxxcde\n ', 'code', 2)
200205
# paragraph blocks (plain text)
201206
txt = partial(prepend.__func__, '', 'xxxp')
202207
# block quotes
@@ -209,4 +214,4 @@ def sym(text:str) -> str:
209214

210215
transform_pre_rules = [md.parse, md.hL, md.hM, md.hS, md.lst, md.bqt,
211216
md.code, md.tbl, md.st, md.txt, md.lnk, md.img,
212-
md.hr, md.get_text, md.sym]
217+
md.hr, md.get_text, md.sym, md.strip]

notebooks/Demo.ipynb

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -14,7 +14,7 @@
1414
},
1515
{
1616
"cell_type": "code",
17-
"execution_count": 17,
17+
"execution_count": 2,
1818
"metadata": {},
1919
"outputs": [],
2020
"source": [
@@ -24,7 +24,7 @@
2424
},
2525
{
2626
"cell_type": "code",
27-
"execution_count": 18,
27+
"execution_count": 3,
2828
"metadata": {},
2929
"outputs": [
3030
{
@@ -50,7 +50,7 @@
5050
},
5151
{
5252
"cell_type": "code",
53-
"execution_count": 19,
53+
"execution_count": 4,
5454
"metadata": {},
5555
"outputs": [
5656
{
@@ -128,7 +128,7 @@
128128
"<IPython.core.display.Markdown object>"
129129
]
130130
},
131-
"execution_count": 19,
131+
"execution_count": 4,
132132
"metadata": {},
133133
"output_type": "execute_result"
134134
}
@@ -149,7 +149,7 @@
149149
},
150150
{
151151
"cell_type": "code",
152-
"execution_count": 20,
152+
"execution_count": 5,
153153
"metadata": {},
154154
"outputs": [
155155
{
@@ -159,18 +159,19 @@
159159
"xxxhl This is a test markdown that has different types of formatting\n",
160160
"xxxhm Header 2\n",
161161
"Hello world this is going to be a long sentence that. also has a newline:\n",
162-
" lala lala\n",
162+
"lala lala\n",
163163
"continuing the sentence here.\n",
164164
"more text.\n",
165165
"xxxlistB bullet 1\n",
166166
"bullet 2\n",
167167
"xxxlistE\n",
168168
"\"quoted text\"\n",
169169
"xxxhs a small header\n",
170-
" xxxcdb lang-python def something(x):\n",
171-
" \\\"\"\"docstring\\\"\"\"\n",
172-
" return False\n",
173-
" xxxcde xxxatmention somebody\n",
170+
"xxxcdb lang-python def something(x):\n",
171+
"\\\"\"\"docstring\\\"\"\"\n",
172+
"return False\n",
173+
"xxxcde\n",
174+
"xxxatmention somebody\n",
174175
"xxxqb blockquote text is here!\n",
175176
"xxxqe\n",
176177
"hobbit-hole xxxlnkhb xxxhtml xxxlnkhe xxxlnktb Hobbit lifestyles xxxlnkte xxxlistB first\n",
@@ -180,13 +181,14 @@
180181
"xxxqe\n",
181182
"xxximg Tux, the Linux mascot xxximgf png\n",
182183
"xxtbl First Header|Second Header\n",
183-
"xxxhr Hello xxxcdb something xxxcde here and stuff is there. google xxxlnkhb www.google.com xxxlnkhe. random text\n",
184+
"xxxhr Hello xxxcdb something xxxcde\n",
185+
"here and stuff is there. google xxxlnkhb www.google.com xxxlnkhe. random text\n",
184186
"xxxdelb The world is flat. xxxdele We now know that the world is round.\n",
185187
"xxxlistB [x] Write the press release\n",
186188
"[ ] Update the website\n",
187189
"[ ] Contact the media\n",
188190
"xxxlistE\n",
189-
" xxxcdb http://www.example.com xxxcde\n"
191+
"xxxcdb http://www.example.com xxxcde\n"
190192
]
191193
}
192194
],

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
setup(
55
name='mdparse',
66
packages=find_packages(),
7-
version='0.10',
7+
version='0.12',
88
description='Parsing markdown files for deep learning.',
99
author='Hamel Husain',
1010
author_email='[email protected]',
@@ -65,7 +65,7 @@
6565
'pytest-xdist',
6666
'pytest-cov'],
6767
},
68-
download_url='https://github.com/machine-learning-apps/mdparse/archive/v.10.zip',
68+
download_url='https://github.com/machine-learning-apps/mdparse/archive/v.12.zip',
6969
classifiers=[
7070
'Development Status :: 1 - Planning',
7171
'Programming Language :: Python :: 3.7',

0 commit comments

Comments
 (0)