Skip to content

Commit 64f5bc7

Browse files
committed
print also number of documents and paragraphs
if any, based on newdoc and newpar annotations
1 parent 13088ed commit 64f5bc7

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

udapi/block/util/wc.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def __init__(self, tsv=False, **kwargs):
1313
"""
1414
super().__init__(**kwargs)
1515
self.trees, self.words, self.mwts, self.tokens, self.empty = 0, 0, 0, 0, 0
16+
self.docs, self.paragraphs = 0, 0
1617
self.tsv = tsv
1718

1819
def process_tree(self, tree):
@@ -22,13 +23,21 @@ def process_tree(self, tree):
2223
self.mwts += mwtoks
2324
self.tokens += len(tree.token_descendants) if mwtoks else len(tree.descendants)
2425
self.empty += len(tree.empty_nodes)
26+
if tree.newdoc:
27+
self.docs += 1
28+
if tree.newpar:
29+
self.paragraphs += 1
2530

2631
def process_end(self):
2732
if self.tsv:
28-
print('\t'.join(map(str, (self.trees, self.words, self.tokens, self.mwts, self.empty))))
33+
print('\t'.join(map(str, (self.trees, self.words, self.tokens, self.mwts, self.empty, self.docs, self.paragraphs))))
2934
else:
3035
print('%8d trees\n%8d words' % (self.trees, self.words))
3136
if self.mwts:
3237
print('%8d multi-word tokens\n%8d tokens' % (self.mwts, self.tokens))
3338
if self.empty:
3439
print('%8d empty nodes' % self.empty)
40+
if self.docs:
41+
print('%8d documents' % self.docs)
42+
if self.paragraphs:
43+
print('%8d paragraphs' % self.paragraphs)

0 commit comments

Comments
 (0)