Skip to content

Commit b37a6a7

Browse files
committed
Map from various tagsets to Wordnet Pos
1 parent 1642942 commit b37a6a7

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

nltk/corpus/reader/wordnet.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,6 +2108,23 @@ def filter_forms(forms):
21082108
# 2. Return all that are in the database (and check the original too)
21092109
return filter_forms([form] + forms)
21102110

2111+
def tag2pos(self, tag, tagset="en-ptb") -> str:
2112+
"""
2113+
Convert a tag from one of the tagsets in nltk_data/taggers/universal_tagset, to a
2114+
WordNet Part-of-Speech, using Universal Tags (Petrov et al., 2012) as intermediary.
2115+
Return None when WordNet does not cover that Pos.
2116+
2117+
>>> import nltk
2118+
>>> tagged = nltk.tag.pos_tag(nltk.tokenize.word_tokenize("Banks check books."))
2119+
>>> print([(word, tag, nltk.corpus.wordnet.tag2pos(tag)) for word,tag in tagged])
2120+
[('Banks', 'NNS', 'n'), ('check', 'VBP', 'v'), ('books', 'NNS', 'n'), ('.', '.', None)]
2121+
"""
2122+
2123+
from nltk.tag import map_tag
2124+
2125+
utag2wnpos = {self._FILEMAP[pos].upper(): pos for pos in self._FILEMAP}
2126+
return utag2wnpos.get(map_tag(tagset, "universal", tag), None)
2127+
21112128
#############################################################
21122129
# Create information content from corpus
21132130
#############################################################

0 commit comments

Comments
 (0)