@@ -26,7 +26,7 @@ public class DisambigStats {
2626 private static final String statsFile = " /ua/net/nlp/tools/stats/lemma_freqs_hom.txt"
2727 static final String statsVersion = " 3.2.1"
2828
29- boolean disambigBySuffix = true // DisambigModule.wordEnding in options.disambiguate
29+ boolean disambigBySuffix = true
3030 boolean disambigByContext = true
3131 boolean writeDerivedStats = false
3232
@@ -145,9 +145,9 @@ public class DisambigStats {
145145 double wordRate = getRateByWord(anToken, statsForWord, ti, ctxQ_)
146146 double rate = wordRate
147147
148- boolean prevPrep = ti . idx > 0 && hasPosTag(ti . tokens[ti . idx -1 ], " prep " )
149- boolean unforceTag = ! prevPrep && ti . tokens[ti . idx] . getCleanToken() . endsWith( " ів " )
150-
148+ boolean unforceTag = statsForWord != null && ! statsForWord . any { wr , stat -> wr . lemma == anToken . lemma }
149+ debugStats( " unforce: %s " , unforceTag )
150+
151151 if ( ti. idx > 0
152152 && anToken. getPOSTag(). contains(" :prop" )
153153 && anToken. getLemma() ==~ / [А-ЯІЇЄҐ][а-яіїєґ'-]{3,}(-[А-ЯІЇЄҐ][а-яіїєґ'-]{3,})?/ ) {
@@ -169,8 +169,8 @@ public class DisambigStats {
169169 sfxRate = getRateBySuffix(anToken, ti, sfx2RateSum, ctxQ, 2 )
170170 }
171171 if ( sfxRate ) {
172- sfxRate / = 6.1e3
173- // sfxRate /= unforceTag ? 6.1e4 : 6.1e3
172+ // sfxRate /= 6.1e3
173+ sfxRate / = unforceTag ? 6.1e5 : 6.1e3
174174 debugStats(" sfx3 rate: -> %f" , round(sfxRate))
175175 rate + = sfxRate
176176 wordEndingUsed = true
@@ -183,7 +183,7 @@ public class DisambigStats {
183183 double ctxQ = 6.0e7 // 4.5e7
184184 double postagRate = getRateByTag(anToken, ti, withXp, tagRateSum, ctxQ)
185185 if ( postagRate ) {
186- postagRate / = unforceTag ? 6.1e4 : 6.2e3
186+ postagRate / = unforceTag ? 6.1e5 : 6.2e3
187187 debugStats(" tag rate: -> %f" , round(postagRate))
188188 rate + = postagRate
189189 }
0 commit comments