|
| 1 | +"""SentencesHtml class is a writer for sentences in HTML list (could be Google-translated, remembering sentence correspondence).""" |
| 2 | +from udapi.core.basewriter import BaseWriter |
| 3 | + |
| 4 | + |
| 5 | +class SentencesHtml(BaseWriter): |
| 6 | + """A writer of sentences in HTML list (one per item). |
| 7 | +
|
| 8 | + Usage: |
| 9 | + udapy write.SentencesHtml if_missing=empty < my.conllu > my.html |
| 10 | + """ |
| 11 | + |
| 12 | + def __init__(self, title='Sentences from CoNLL-U', if_missing='detokenize', **kwargs): |
| 13 | + """Create the SentencesHtml writer block. |
| 14 | +
|
| 15 | + Parameters: |
| 16 | + if_missing: What to do if `root.text` is `None`? (default=detokenize) |
| 17 | + * `detokenize`: use `root.compute_text()` to compute the sentence. |
| 18 | + * `empty`: print an empty line |
| 19 | + * `warn_detokenize`, `warn_empty`: in addition emit a warning via `logging.warning()` |
| 20 | + * `fatal`: raise an exception |
| 21 | + """ |
| 22 | + super().__init__(**kwargs) |
| 23 | + self.title = title |
| 24 | + self.if_missing = if_missing |
| 25 | + |
| 26 | + def before_process_document(self, document): |
| 27 | + super().before_process_document(document) |
| 28 | + print('<!DOCTYPE html>\n<html>\n<head>\n<meta charset="utf-8">') |
| 29 | + print('<title>' + self.title + '</title>') |
| 30 | + print('</head>\n<body>\n<ul>\n') |
| 31 | + |
| 32 | + def after_process_document(self, document): |
| 33 | + print("</ul>\n</body>\n</html>") |
| 34 | + super().after_process_document(document) |
| 35 | + |
| 36 | + def process_tree(self, tree): |
| 37 | + print(' <li id="%s">%s</li>' % (tree.sent_id, tree.get_sentence(self.if_missing))) |
0 commit comments