File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed
src/main/groovy/ua/net/nlp/tools/tag Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -37,8 +37,9 @@ class TagTextCore {
3737
3838 public static final Pattern PUNCT_PATTERN = Pattern . compile(/ [,.:;!?\/ ()\[\] {}«»„“"'…\u 2013\u 2014\u 201D\u 201C•■♦-]+/ ) // "
3939 public static final Pattern SYMBOL_PATTERN = Pattern . compile(/ [%&@$*+=<>\u 00A0-\u 00BF\u 2000-\u 20CF\u 2100-\u 218F\u 2200-\u 22FF]+/ )
40+ // |[а-яіїєґА-ЯІЇЄҐ][а-яіїєґА-ЯІЇЄҐ'\u02BC\u2019]*[а-яіїєґА-ЯІЇЄҐ]-
4041 static final Pattern UNKNOWN_PATTERN = Pattern . compile(/ (.*-)?[а-яіїєґА-ЯІЇЄҐ][а-яіїєґА-ЯІЇЄҐ'\u 02BC\u 2019]+(-.*)?/ )
41- static final Pattern NON_UK_PATTERN = Pattern . compile(/ ^[\# №u2013-]|[\u 2013-]$|[ ыэъё]|[а-яіїєґ][a-z]|[a-z][а-яіїєґ]/ , Pattern . CASE_INSENSITIVE |Pattern . UNICODE_CASE )
42+ static final Pattern NON_UK_PATTERN = Pattern . compile(/ ^[\# №u2013-]|[ыэъё]|[а-яіїєґ][a-z]|[a-z][а-яіїєґ]/ , Pattern . CASE_INSENSITIVE |Pattern . UNICODE_CASE )
4243 static final Pattern UNCLASS_PATTERN = Pattern . compile(/ \p {IsLatin}[\p {IsLatin}\p {IsDigit}-]*|[0-9]+-?[а-яіїєґА-ЯІЇЄҐ]+|[а-яіїєґА-ЯІЇЄҐ]+-?[0-9]+/ )
4344 static final Pattern NONINFL_PATTERN = Pattern . compile(/ [а-зй-яіїєґ]/ , Pattern . CASE_INSENSITIVE |Pattern . UNICODE_CASE ) // exclude Russian и
4445 public static final Pattern XML_TAG_PATTERN = Pattern . compile(/ <\/ ?[a-zA-Z_0-9]+>/ )
You can’t perform that action at this time.
0 commit comments