File tree Expand file tree Collapse file tree 4 files changed +63
-32
lines changed
pipeline/clean/opuscleaner/configs Expand file tree Collapse file tree 4 files changed +63
-32
lines changed Original file line number Diff line number Diff line change 3030 },
3131 "language" : null
3232 },
33+ {
34+ "filter" : " regexp" ,
35+ "parameters" : {
36+ "PATTERN" : " s#([\\ x{3040}-\\ x{309F}\\ x{30A0}-\\ x{30FF}\\ x{FF00}-\\ x{FFEF}\\ x{4E00}-\\ x{9FAF}\\ x{3000}-\\ x{303F}\\ x{3400}-\\ x{4DBF}\\ ?])\\ !#\\ 1\\ x{ff01}#g"
37+ },
38+ "language" : " ja"
39+ },
40+ {
41+ "filter" : " regexp" ,
42+ "parameters" : {
43+ "PATTERN" : " s#([\\ x{3040}-\\ x{309F}\\ x{30A0}-\\ x{30FF}\\ x{FF00}-\\ x{FFEF}\\ x{4E00}-\\ x{9FAF}\\ x{3000}-\\ x{303F}\\ x{3400}-\\ x{4DBF}\\ ?])\\ ?#\\ 1\\ x{ff1f}#g"
44+ },
45+ "language" : " ja"
46+ },
47+ {
48+ "filter" : " regexp" ,
49+ "parameters" : {
50+ "PATTERN" : " s#([\\ x{3040}-\\ x{309F}\\ x{30A0}-\\ x{30FF}\\ x{FF00}-\\ x{FFEF}\\ x{4E00}-\\ x{9FAF}\\ x{3000}-\\ x{303F}\\ x{3400}-\\ x{4DBF}\\ ?])\\ :#\\ 1\\ x{ff1a}#g"
51+ },
52+ "language" : " ja"
53+ },
54+ {
55+ "filter" : " regexp" ,
56+ "parameters" : {
57+ "PATTERN" : " s#\\ .\\ .\\ .\\ x{3002}#...#g"
58+ },
59+ "language" : " ja"
60+ },
61+ {
62+ "filter" : " regexp" ,
63+ "parameters" : {
64+ "PATTERN" : " s#\\ x{30fb}\\ x{30fb}\\ x{30fb}#\\ x{2026}#g"
65+ },
66+ "language" : " ja"
67+ },
68+ {
69+ "filter" : " regexp" ,
70+ "parameters" : {
71+ "PATTERN" : " s#\\ . ?\\ . ?\\ . ?#\\ x{2026}#g"
72+ },
73+ "language" : " ja"
74+ },
75+ {
76+ "filter" : " regexp" ,
77+ "parameters" : {
78+ "PATTERN" : " s#\\ x{ff0c}#\\ x{3001}#g"
79+ },
80+ "language" : " ja"
81+ },
82+ {
83+ "filter" : " regexp" ,
84+ "parameters" : {
85+ "PATTERN" : " s#([\\ x{3040}-\\ x{309F}\\ x{30A0}-\\ x{30FF}\\ x{FF00}-\\ x{FFEF}\\ x{4E00}-\\ x{9FAF}\\ x{3000}-\\ x{303F}\\ x{3400}-\\ x{4DBF}]),#\\ x{3001}#g"
86+ },
87+ "language" : " ja"
88+ },
89+ {
90+ "filter" : " regexp" ,
91+ "parameters" : {
92+ "PATTERN" : " s#([\\ x{3040}-\\ x{309F}\\ x{30A0}-\\ x{30FF}\\ x{FF00}-\\ x{FFEF}\\ x{4E00}-\\ x{9FAF}\\ x{3000}-\\ x{303F}\\ x{3400}-\\ x{4DBF}])\\ .\\ b#\\ x{3002}#g"
93+ },
94+ "language" : " ja"
95+ },
3396 {
3497 "filter" : " fix_wiki" ,
3598 "parameters" : {
56119 },
57120 "language" : null
58121 },
59- {
60- "filter" : " num_mismatch" ,
61- "parameters" : {
62- "RATIO" : 1 ,
63- "DEBUG" : false
64- },
65- "language" : null
66- },
67122 {
68123 "filter" : " fasttext_filter" ,
69124 "parameters" : {
Original file line number Diff line number Diff line change 5656 },
5757 "language" : null
5858 },
59- {
60- "filter" : " num_mismatch" ,
61- "parameters" : {
62- "RATIO" : 1 ,
63- "DEBUG" : false
64- },
65- "language" : null
66- },
6759 {
6860 "filter" : " fasttext_filter" ,
6961 "parameters" : {
Original file line number Diff line number Diff line change 5656 },
5757 "language" : null
5858 },
59- {
60- "filter" : " num_mismatch" ,
61- "parameters" : {
62- "RATIO" : 1 ,
63- "DEBUG" : false
64- },
65- "language" : null
66- },
6759 {
6860 "filter" : " fasttext_filter" ,
6961 "parameters" : {
Original file line number Diff line number Diff line change 5656 },
5757 "language" : null
5858 },
59- {
60- "filter" : " num_mismatch" ,
61- "parameters" : {
62- "RATIO" : 1 ,
63- "DEBUG" : false
64- },
65- "language" : null
66- },
6759 {
6860 "filter" : " fasttext_filter" ,
6961 "parameters" : {
You can’t perform that action at this time.
0 commit comments