Skip to content

Commit fd0b3a2

Browse files
committed
When converting tokens to json, include the morphemes in the output if present. Could also add them to the conllu, perhaps
1 parent 284e9b4 commit fd0b3a2

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

stanza/models/common/doc.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,14 @@ class MWTProcessingType(Enum):
4747
CONSTITUENCY = 'constituency'
4848
COREF_CHAINS = 'coref_chains'
4949
LINE_NUMBER = 'line_number'
50+
MORPHEMES = 'morphemes'
5051

5152
# field indices when converting the document to conll
5253
FIELD_TO_IDX = {ID: 0, TEXT: 1, LEMMA: 2, UPOS: 3, XPOS: 4, FEATS: 5, HEAD: 6, DEPREL: 7, DEPS: 8, MISC: 9}
5354
FIELD_NUM = len(FIELD_TO_IDX)
5455

55-
DEFAULT_OUTPUT_FIELDS = [ID, TEXT, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC, START_CHAR, END_CHAR, NER, MULTI_NER, MEXP, COREF_CHAINS]
56-
NO_OFFSETS_OUTPUT_FIELDS = [ID, TEXT, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC, NER, MULTI_NER, MEXP, COREF_CHAINS]
56+
DEFAULT_OUTPUT_FIELDS = [ID, TEXT, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC, START_CHAR, END_CHAR, NER, MULTI_NER, MEXP, COREF_CHAINS, MORPHEMES]
57+
NO_OFFSETS_OUTPUT_FIELDS = [ID, TEXT, LEMMA, UPOS, XPOS, FEATS, HEAD, DEPREL, DEPS, MISC, NER, MULTI_NER, MEXP, COREF_CHAINS, MORPHEMES]
5758

5859
class DocJSONEncoder(json.JSONEncoder):
5960
def default(self, obj):

0 commit comments

Comments
 (0)