Skip to content

Commit 28dbce9

Browse files
committed
read.Text
1 parent be3068b commit 28dbce9

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

udapi/block/read/text.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""Text class is a reader for word-wrapped plain-text files."""
2+
from udapi.core.basereader import BaseReader
3+
from udapi.core.root import Root
4+
5+
6+
class Text(BaseReader):
7+
r"""A reader for plain-text files with sentences on one or more lines.
8+
9+
Sentences are separated by one or more empty lines.
10+
Newlines within sentences are substituted by a space.
11+
12+
Args:
13+
rstrip: a set of characters to be stripped from the end of each line.
14+
Default='\r\n '. You can use rstrip='\n' if you want to preserve
15+
any space or '\r' (Carriage Return) at end of line,
16+
so that `udpipe.Base` keeps these characters in `SpacesAfter`.
17+
As most blocks do not expect whitespace other than a space to appear
18+
in the processed text, using this feature is at your own risk.
19+
"""
20+
def __init__(self, rstrip='\r\n ', **kwargs):
21+
self.rstrip = rstrip
22+
super().__init__(**kwargs)
23+
24+
@staticmethod
25+
def is_multizone_reader():
26+
"""Can this reader read bundles which contain more zones?.
27+
28+
This implementation returns always False.
29+
"""
30+
return False
31+
32+
def read_tree(self, document=None):
33+
if self.filehandle is None:
34+
return None
35+
lines = []
36+
line = None
37+
while True:
38+
line = self.filehandle.readline()
39+
# if readline() returns an empty string, the end of the file has been
40+
# reached, while a blank line is represented by '\n'
41+
# (or '\r\n' if reading a Windows file on Unix machine).
42+
if line == '':
43+
if not lines:
44+
return None
45+
else:
46+
break
47+
elif line in {'\n', '\r\n'}:
48+
if not lines:
49+
continue
50+
else:
51+
break
52+
else:
53+
lines.append(line.rstrip(self.rstrip))
54+
55+
root = Root()
56+
root.text = " ".join(lines)
57+
return root

0 commit comments

Comments
 (0)