Skip to content

Commit ac46ca0

Browse files
author
s2010515
committed
update parameter in html parsed txt
1 parent 6a0749f commit ac46ca0

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

cadmus/parsing/clean_html.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ def clean_html(p_text):
77
if p_text != None:
88

99
# we want to check remove the [, , , ] that were made when we stripping out references etc
10-
p_text = re.sub('\[[, ]*\]', ' ', text)
11-
p_text = re.sub('\[(, )*\]', ' ', text)
10+
p_text = re.sub('\[[, ]*\]', ' ', p_text)
11+
p_text = re.sub('\[(, )*\]', ' ', p_text)
1212

1313
# remove any of the unicode characeters and \n chars
14-
p_text = unicodedata.normalize("NFKD", text)
14+
p_text = unicodedata.normalize("NFKD", p_text)
1515

1616
p_text = remove_link(p_text)
1717
p_text = p_text.replace('https://','')

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setuptools.setup(
44
name="cadmus",
5-
version="0.2.3",
5+
version="0.2.4",
66
author="Jamie Campbell, Ian Simpson, Antoine Lain",
77
author_email="Jamie.campbell@igmm.ed.ac.uk, Ian.Simpson@ed.ac.uk, Antoine.Lain@ed.ac.uk",
88
description="This projects is to build full text retrieval system setup for generation of large biomedical corpora from published literature.",

0 commit comments

Comments
 (0)