Skip to content

Commit 9cd26cd

Browse files
committed
adjust word counts
1 parent 05cfafb commit 9cd26cd

File tree

4 files changed

+12
-7
lines changed

4 files changed

+12
-7
lines changed

build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,8 @@ def get_diff_cmd(outputDir, file) {
267267

268268
task diff(type: Exec) {
269269
def dictFile="dict_corp_lt.txt"
270+
inputs.file "$outputDir/words.txt"
271+
outputs.file "$outputDir/words.txt.diff"
270272

271273
// def cmd = get_diff_cmd(outputDir, dictFile)
272274
// cmd += "; " + get_diff_cmd(outputDir, 'words.txt')

out/stats/dict_stats.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
Всього лем: 440897
2-
словникових лем (без advp/bad/slang/alt, без омонімів imperf/perf) 353224
1+
Всього лем: 440898
2+
словникових лем (без advp/bad/alt/vulg/obsc/abbr, двовидові дієслова, як одна лема) 333055
33

44
Частоти за тегами:
55
adj 108553
@@ -16,11 +16,11 @@ conj 157
1616
intj 476
1717
noninfl 1050
1818
prop 5
19-
noun 190325
19+
noun 190326
2020
anim 68079
2121
fname 5723
2222
geo 20721
23-
inanim 122153
23+
inanim 122154
2424
lname 33438
2525
nv 17474
2626
pname 1041

src/main/groovy/org/dict_uk/expand/Expand.groovy

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1575,9 +1575,12 @@ class Expand {
15751575
if( line.contains("/v") && line.contains(":imperf:perf") ) {
15761576
double_form_cnt += 1
15771577
}
1578-
if( line =~ / \/[a-z].*:bad|:slang|:alt/ ) {
1578+
else if( line =~ /( \/[a-z]|v_naz|:inf|).*:bad|:alt|:obsc|:vulg|:abbr/ ) {
15791579
double_form_cnt += 1
15801580
}
1581+
else if( line =~ / (\/v.*\.advp|advp)/ ) {
1582+
double_form_cnt += 1
1583+
}
15811584

15821585
prepared_lines << lineGroup
15831586
}

src/main/groovy/org/dict_uk/expand/Util.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ class Util {
260260
int cnt = 0
261261
int cnt_std = 0
262262

263-
for(String line in lines ) {
263+
for(String line in lines) {
264264
if( line[0] == " ")
265265
continue
266266

@@ -306,7 +306,7 @@ class Util {
306306

307307
new File("stats", "dict_stats.txt").withWriter("utf-8") { stat_f ->
308308
stat_f.printf("Всього лем: %d\n", cnt)
309-
stat_f.printf(" словникових лем (без advp/bad/slang/alt, без омонімів imperf/perf) %s\n", (cnt_std - double_form_cnt))
309+
stat_f.printf(" словникових лем (без advp/bad/alt/vulg/obsc/abbr, двовидові дієслова, як одна лема) %s\n", (cnt_std - double_form_cnt))
310310
stat_f.print("\nЧастоти за тегами:\n")
311311

312312
def ordered_pos_freq = pos_stat.keySet().toList().sort()

0 commit comments

Comments
 (0)