Skip to content

Commit cd9b962

Browse files
committed
Write sentences in a HTML list.
1 parent ab86f1b commit cd9b962

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

udapi/block/write/sentenceshtml.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""SentencesHtml class is a writer for sentences in HTML list (could be Google-translated, remembering sentence correspondence)."""
2+
from udapi.core.basewriter import BaseWriter
3+
4+
5+
class SentencesHtml(BaseWriter):
6+
"""A writer of sentences in HTML list (one per item).
7+
8+
Usage:
9+
udapy write.SentencesHtml if_missing=empty < my.conllu > my.html
10+
"""
11+
12+
def __init__(self, title='Sentences from CoNLL-U', if_missing='detokenize', **kwargs):
13+
"""Create the SentencesHtml writer block.
14+
15+
Parameters:
16+
if_missing: What to do if `root.text` is `None`? (default=detokenize)
17+
* `detokenize`: use `root.compute_text()` to compute the sentence.
18+
* `empty`: print an empty line
19+
* `warn_detokenize`, `warn_empty`: in addition emit a warning via `logging.warning()`
20+
* `fatal`: raise an exception
21+
"""
22+
super().__init__(**kwargs)
23+
self.title = title
24+
self.if_missing = if_missing
25+
26+
def before_process_document(self, document):
27+
super().before_process_document(document)
28+
print('<!DOCTYPE html>\n<html>\n<head>\n<meta charset="utf-8">')
29+
print('<title>' + self.title + '</title>')
30+
print('</head>\n<body>\n<ul>\n')
31+
32+
def after_process_document(self, document):
33+
print("</ul>\n</body>\n</html>")
34+
super().after_process_document(document)
35+
36+
def process_tree(self, tree):
37+
print(' <li id="%s">%s</li>' % (tree.sent_id, tree.get_sentence(self.if_missing)))

0 commit comments

Comments
 (0)