Skip to content

Commit c651b04

Browse files
committed
util.Normalize sent_id_prefix
1 parent e495f6f commit c651b04

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

udapi/block/util/normalize.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,28 @@ class Normalize(Block):
2020
util.Eval node='node.misc["NonExistentAttribute"] = None'
2121
"""
2222

23-
def __init__(self, feats=True, misc=True, sent_id=False, start_sent_id=1, **kwargs):
23+
def __init__(self, feats=True, misc=True, sent_id=False, start_sent_id=1, sent_id_prefix="", **kwargs):
2424
"""
2525
Args:
2626
`feats`: normalize the ordering of FEATS. Default=True.
2727
`misc`: normalize the ordering of MISC. Default=True.
2828
`sent_id`: normalize sent_id so it forms a sequence of integers. Default=False.
2929
`start_sent_id`: the first sent_id number
30+
`sent_id_prefix`: a string to be prepended before the integer sent_id. Default=empty string.
3031
"""
3132
super().__init__(**kwargs)
3233
self.feats = feats
3334
self.misc = misc
3435
self.sent_id = sent_id
3536
self.next_sent_id = start_sent_id
37+
self.sent_id_prefix = sent_id_prefix
38+
if sent_id_prefix or start_sent_id != 1:
39+
self.sent_id = True
3640
# TODO: normalize also the order of standardized comments like text, sent_id,...
3741

3842
def process_bundle(self, bundle):
3943
if self.sent_id:
40-
bundle.bundle_id = str(self.next_sent_id)
44+
bundle.bundle_id = self.sent_id_prefix + str(self.next_sent_id)
4145
self.next_sent_id += 1
4246

4347
for tree in bundle:

0 commit comments

Comments
 (0)