-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpoetry_to_prose.py
More file actions
executable file
·76 lines (54 loc) · 2.45 KB
/
poetry_to_prose.py
File metadata and controls
executable file
·76 lines (54 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
"""Removes line endings from lines that don't end with sentence-terminating
punctuation. Additionally, it removes leading and trailing spaces from each
line.
Usage:
./poetry_to_prose.py FILENAME
This script is copyright 2016 by Patrick Mooney. It is licensed under the GPL,
either version 3 or (at your option) any later version. See the file LICENSE.md
for a copy of this license.
"""
import sys
import nltk # See http://www.nltk.org/install.html
debugging = True
def quick_tokenize(what):
"""Produce an NLTK-tokenized list of tuples: (word, part of speech)."""
return nltk.pos_tag(nltk.word_tokenize(what))
def starts_proper(what):
"""Return TRUE if NLTK thinks the first word in WHAT is a proper noun."""
return (quick_tokenize(what)[0][1] == "NNP")
def lower_first(what):
"""Return the string WHAT, as passed in, except that the first character is
forced to be lowercase.
"""
if len(what) == 0:
return ""
elif len(what) == 1:
return what.lower()
else:
return what[0].lower() + what[1:]
def main(the_filename):
if debugging: print('\n\nProcessing %s ...' % the_filename)
with open(the_filename) as the_file:
the_text = the_file.readlines()
output_file = [][:]
for which_line in the_text:
which_line = which_line.strip()
if len(output_file) == 0:
output_file.append(which_line + '\n')
else:
if len(which_line) == 0:
output_file.append('\n')
elif len(output_file[-1].strip()) > 0 and output_file[-1].strip()[-1] in '.!?':
output_file.append(which_line + ' \n')
else:
if not which_line[0].isupper(): # If the line starts with a lowercase letter, just copy it onto the end of prev. line.
output_file[-1] = output_file[-1].strip() + ' ' + which_line + '\n'
elif starts_proper(which_line): # Ditto for lines that start with proper nouns
output_file[-1] = output_file[-1].strip() + ' ' + which_line + '\n'
else: # Otherwise, lowercase the first letter of the sentence before adding it.
output_file[-1] = output_file[-1].strip() + ' ' + lower_first(which_line)
with open(the_filename, 'w') as the_file:
the_file.writelines(output_file)
if __name__ == "__main__":
main(sys.argv[1])