2424package org .eolang .lints .misc ;
2525
2626import com .jcabi .xml .XML ;
27- import edu .stanford .nlp .ling .CoreAnnotations ;
28- import edu .stanford .nlp .pipeline .CoreDocument ;
29- import edu .stanford .nlp .pipeline .StanfordCoreNLP ;
3027import java .io .IOException ;
28+ import java .net .URI ;
29+ import java .net .URISyntaxException ;
3130import java .util .Arrays ;
3231import java .util .Collection ;
3332import java .util .LinkedList ;
3433import java .util .Locale ;
35- import java .util .Properties ;
3634import java .util .regex .Pattern ;
37- import java .util .stream .Collectors ;
3835import java .util .stream .Stream ;
36+ import opennlp .tools .postag .POSModel ;
37+ import opennlp .tools .postag .POSTaggerME ;
3938import org .cactoos .io .ResourceOf ;
39+ import org .cactoos .list .ListOf ;
4040import org .cactoos .text .TextOf ;
4141import org .cactoos .text .UncheckedText ;
4242import org .eolang .lints .Defect ;
4545
4646/**
4747 * Lint that checks test object name is a verb in singular.
48- * This lint uses <a href="https://stanfordnlp.github.io/CoreNLP/">Stanford CoreNLP model </a>
48+ * This lint uses <a href="https://opennlp.apache.org/">OpenNLP models </a>
4949 * with POS tagging capabilities in order to determine the part of speech and
50- * tense for test object name. Originally, we used <a href="https://opennlp.apache.org/">OpenNLP</a>
51- * library to do that, but switched to the Stanford CoreNLP, due to merging all
52- * verb tags into single `VERB` POS tag, that sacrifices important information
53- * for us about verb tenses, and appeared in OpenNLP 2.4.0+. You can read more
54- * about the reason of this <a href="https://github.com/objectionary/lints/issues/129">here</a>
55- * and <a href="https://github.com/objectionary/lints/pull/126#issuecomment-2531121073">here</a>.
50+ * tense for test object name.
5651 * @since 0.0.22
57- * @todo #129 :60min Library stanford-corenlp-4.5.7-models.jar takes too much in size .
58- * Currently, JAR takes ~452mb, which may cause some troubles to the users of
59- * the lints library. Let's think what we can do about this. We should check is
60- * it possible to get rid of this dependency and download models from the other
61- * source .
52+ * @todo #257 :60min Configure model download only during the build and place into the JAR .
53+ * Currently, we download model file each time when creating the lint, which may
54+ * be slow in the usage of this lint. Instead, let's configure maven to download
55+ * model file during the build, and place into JAR, so lint will be able to locate
56+ * file from resources faster .
6257 */
6358public final class LtTestNotVerb implements Lint <XML > {
6459
@@ -68,46 +63,50 @@ public final class LtTestNotVerb implements Lint<XML> {
6863 private static final Pattern KEBAB = Pattern .compile ("-" );
6964
7065 /**
71- * Properties of NLP pipeline .
66+ * Part-Of-Speech tagger .
7267 */
73- private final Properties properties ;
68+ private final POSTaggerME model ;
7469
7570 /**
7671 * Ctor.
77- * @param props Pipeline properties
7872 */
79- public LtTestNotVerb (final Properties props ) {
80- this . properties = new Properties ( props );
73+ public LtTestNotVerb () {
74+ this ( LtTestNotVerb . defaultPosModel () );
8175 }
8276
8377 /**
8478 * Ctor.
79+ * @param mdl Part-Of-Speech model
8580 */
86- public LtTestNotVerb () {
87- this (LtTestNotVerb .defaults ());
81+ public LtTestNotVerb (final POSModel mdl ) {
82+ this (new POSTaggerME (mdl ));
83+ }
84+
85+ /**
86+ * Ctor.
87+ * @param pos Part-Of-Speech tagger
88+ */
89+ public LtTestNotVerb (final POSTaggerME pos ) {
90+ this .model = pos ;
8891 }
8992
9093 @ Override
9194 public Collection <Defect > defects (final XML xmir ) throws IOException {
9295 final Collection <Defect > defects = new LinkedList <>();
93- final StanfordCoreNLP pipeline = new StanfordCoreNLP (this .properties );
9496 for (final XML object : xmir .nodes ("/program[metas/meta[head='tests']]/objects/o[@name]" )) {
9597 final String name = object .xpath ("@name" ).get (0 );
96- final CoreDocument doc = new CoreDocument (
97- Stream
98- .concat (
99- Stream .of ("It" ),
100- Arrays .stream (LtTestNotVerb .KEBAB .split (name ))
101- )
102- .map (s -> s .toLowerCase (Locale .ROOT ))
103- .collect (Collectors .joining (" " ))
104- );
105- pipeline .annotate (doc );
106- if (
107- !"VBZ" .equals (
108- doc .tokens ().get (1 ).get (CoreAnnotations .PartOfSpeechAnnotation .class )
98+ final String first = new ListOf <>(
99+ this .model .tag (
100+ Stream
101+ .concat (
102+ Stream .of ("It" ),
103+ Arrays .stream (LtTestNotVerb .KEBAB .split (name ))
104+ )
105+ .map (s -> s .toLowerCase (Locale .ROOT ))
106+ .toArray (String []::new )
109107 )
110- ) {
108+ ).get (1 );
109+ if (!"VBZ" .equals (first )) {
111110 defects .add (
112111 new Defect .Default (
113112 "unit-test-is-not-verb" ,
@@ -141,13 +140,20 @@ public String name() {
141140 return "unit-test-is-not-verb" ;
142141 }
143142
144- /**
145- * Prestructor for default properties.
146- * @return Properties.
147- */
148- private static Properties defaults () {
149- final Properties props = new Properties ();
150- props .setProperty ("annotators" , "tokenize,pos" );
151- return props ;
143+ private static POSModel defaultPosModel () {
144+ try {
145+ return new POSModel (
146+ new URI ("https://opennlp.sourceforge.net/models-1.5/en-pos-perceptron.bin" )
147+ .toURL ()
148+ );
149+ } catch (final IOException exception ) {
150+ throw new IllegalStateException (
151+ "Failed to read from I/O" , exception
152+ );
153+ } catch (final URISyntaxException exception ) {
154+ throw new IllegalStateException (
155+ "URI syntax is broken" , exception
156+ );
157+ }
152158 }
153159}
0 commit comments