Skip to content

Commit 96ff36c

Browse files
committed
auto-semtagging for advp
1 parent ed03ae5 commit 96ff36c

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

src/main/groovy/ua/net/nlp/tools/tag/SemTags.groovy

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import groovy.transform.CompileStatic;
2121
@CompileStatic
2222
public class SemTags {
2323
static final String baseDir = "/ua/net/nlp/dict_uk/semtags"
24+
static Map<String, String> ADVP_DERIVATS
2425

2526
def categories = ["noun", "adj", "adv", "verb", "numr"]
2627

@@ -62,8 +63,11 @@ public class SemTags {
6263
if( parts.length >= 3 && parts[2].trim().startsWith(':') ) {
6364
add = parts[2].trim()
6465
}
66+
67+
def word = parts[0]
6568
def semtags = parts[1]
66-
def key = parts[0] + " " + cat
69+
def pos = cat
70+
def key = word + " " + pos
6771

6872
if( ! (key in semanticTags) ) {
6973
semanticTags[key] = [:]
@@ -79,6 +83,11 @@ public class SemTags {
7983
}
8084
}
8185
}
86+
87+
ADVP_DERIVATS = getClass().getResource("/org/languagetool/resource/uk/derivats.txt").readLines().collectEntries {
88+
def parts = it.split()
89+
[(parts[0]) : parts[1]]
90+
}
8291

8392
if( ! options.quiet ) {
8493
long tm2 = System.currentTimeMillis()
@@ -91,6 +100,14 @@ public class SemTags {
91100
if( options.semanticTags && tkn.getLemma() != null && posTag != null ) {
92101
def lemma = tkn.getLemma()
93102
String posTagKey = posTag.replaceFirst(/:.*/, '')
103+
104+
if( posTagKey.startsWith("advp") ) {
105+
lemma = ADVP_DERIVATS[lemma]
106+
posTagKey = "verb"
107+
if( ! lemma )
108+
return null
109+
}
110+
94111
String key = "$lemma $posTagKey"
95112

96113
Map<String, List<String>> potentialSemTags = semanticTags.get(key)

src/test/groovy/ua/net/nlp/tools/tag/TagTextSemTest.groovy

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ class TagTextSemTest {
9595
}
9696

9797

98-
9998
@Test
10099
public void testSemanticAlt() {
101100
def expected=
@@ -114,4 +113,20 @@ class TagTextSemTest {
114113
TagResult tagged = tagText.tagText("колеґа по\u2013турецьки")
115114
assertEquals expected, tagged.tagged
116115
}
116+
117+
118+
@Test
119+
public void testSemanticDerivat() {
120+
def expected=
121+
"""<sentence>
122+
<token value="стверджуючи" lemma="стверджуючи" tags="advp:imperf" semtags="1:speech:2:effect" />
123+
</sentence>
124+
<paragraph/>
125+
"""
126+
127+
tagText.setOptions(new TagOptions(semanticTags: true, tokenFormat: true))
128+
TagResult tagged = tagText.tagText("стверджуючи")
129+
assertEquals expected, tagged.tagged
130+
}
131+
117132
}

0 commit comments

Comments
 (0)