2424import java .util .ArrayList ;
2525import java .util .List ;
2626import java .util .Objects ;
27+ import java .util .stream .Collectors ;
2728
2829/**
2930 * @author Michele Rastelli
@@ -38,26 +39,89 @@ private static String stringToHex(String str) {
3839 return hex .toString ();
3940 }
4041
42+ private static String hexToString (String hex ) {
43+ final StringBuilder result = new StringBuilder ();
44+ for (int i = 0 ; i < hex .length () - 1 ; i += 2 ) {
45+ String tempInHex = hex .substring (i , (i + 2 ));
46+ int decimal = Integer .parseInt (tempInHex , 16 );
47+ result .append ((char ) decimal );
48+ }
49+ return result .toString ();
50+ }
51+
4152 public StopwordsAnalyzerProperties () {
4253 stopwords = new ArrayList <>();
54+ hex = true ;
4355 }
4456
45- private List <String > stopwords ;
57+ private final List <String > stopwords ;
58+ private final boolean hex ;
4659
4760 /**
48- * @return array of hex-encoded strings that describe the tokens to be discarded.
61+ * @return list of hex-encoded strings that describe the tokens to be discarded.
62+ * @deprecated use {@link #getStopwordsAsHexList()} instead
4963 */
64+ @ Deprecated
5065 public List <String > getStopwords () {
51- return stopwords ;
66+ return getStopwordsAsHexList ();
67+ }
68+
69+ /**
70+ * @return list of verbatim strings that describe the tokens to be discarded.
71+ */
72+ public List <String > getStopwordsAsStringList () {
73+ if (hex ) {
74+ return stopwords .stream ()
75+ .map (StopwordsAnalyzerProperties ::hexToString )
76+ .collect (Collectors .toList ());
77+ } else {
78+ return stopwords ;
79+ }
5280 }
5381
82+ /**
83+ * @return list of hex-encoded strings that describe the tokens to be discarded.
84+ */
85+ public List <String > getStopwordsAsHexList () {
86+ if (hex ) {
87+ return stopwords ;
88+ } else {
89+ return stopwords .stream ()
90+ .map (StopwordsAnalyzerProperties ::stringToHex )
91+ .collect (Collectors .toList ());
92+ }
93+ }
94+
95+ /**
96+ * @return if false each string in {@link #stopwords} is used as verbatim, if true as hex-encoded.
97+ */
98+ public boolean getHex () {
99+ return hex ;
100+ }
101+
102+ /**
103+ * @param value stopword as verbatim string
104+ * @return this
105+ */
54106 public StopwordsAnalyzerProperties addStopwordAsString (final String value ) {
55- stopwords .add (stringToHex (value ));
107+ if (hex ) {
108+ stopwords .add (stringToHex (value ));
109+ } else {
110+ stopwords .add (value );
111+ }
56112 return this ;
57113 }
58114
115+ /**
116+ * @param value stopword as hex string
117+ * @return this
118+ */
59119 public StopwordsAnalyzerProperties addStopwordAsHex (final String value ) {
60- stopwords .add (value );
120+ if (hex ) {
121+ stopwords .add (value );
122+ } else {
123+ stopwords .add (hexToString (value ));
124+ }
61125 return this ;
62126 }
63127
@@ -66,11 +130,11 @@ public boolean equals(Object o) {
66130 if (this == o ) return true ;
67131 if (o == null || getClass () != o .getClass ()) return false ;
68132 StopwordsAnalyzerProperties that = (StopwordsAnalyzerProperties ) o ;
69- return Objects .equals (stopwords , that .stopwords );
133+ return hex == that . hex && Objects .equals (stopwords , that .stopwords );
70134 }
71135
72136 @ Override
73137 public int hashCode () {
74- return Objects .hash (stopwords );
138+ return Objects .hash (stopwords , hex );
75139 }
76140}
0 commit comments