Skip to content

Commit 93107d6

Browse files
committed
LUCENE-9575: fix style violations so precommit passes again
1 parent c087f6f commit 93107d6

File tree

4 files changed

+109
-82
lines changed

4 files changed

+109
-82
lines changed

lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilter.java

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,22 @@
1717

1818
package org.apache.lucene.analysis.pattern;
1919

20+
import java.io.IOException;
21+
import java.util.regex.Matcher;
22+
import java.util.regex.Pattern;
2023
import org.apache.lucene.analysis.TokenFilter;
2124
import org.apache.lucene.analysis.TokenStream;
2225
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
2326
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
2427
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
2528

26-
import java.io.IOException;
27-
import java.util.regex.Matcher;
28-
import java.util.regex.Pattern;
29-
3029
/**
31-
* Set a type attribute to a parameterized value when tokens are matched by any of a several regex patterns. The
32-
* value set in the type attribute is parameterized with the match groups of the regex used for matching.
33-
* In combination with TypeAsSynonymFilter and DropIfFlagged filter this can supply complex synonym patterns
34-
* that are protected from subsequent analysis, and optionally drop the original term based on the flag
35-
* set in this filter. See {@link PatternTypingFilterFactory} for full documentation.
30+
* Set a type attribute to a parameterized value when tokens are matched by any of a several regex
31+
* patterns. The value set in the type attribute is parameterized with the match groups of the regex
32+
* used for matching. In combination with TypeAsSynonymFilter and DropIfFlagged filter this can
33+
* supply complex synonym patterns that are protected from subsequent analysis, and optionally drop
34+
* the original term based on the flag set in this filter. See {@link PatternTypingFilterFactory}
35+
* for full documentation.
3636
*
3737
* @see PatternTypingFilterFactory
3838
* @since 8.8.0
@@ -44,7 +44,7 @@ public class PatternTypingFilter extends TokenFilter {
4444
private final FlagsAttribute flagAtt = addAttribute(FlagsAttribute.class);
4545
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
4646

47-
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
47+
public PatternTypingFilter(TokenStream input, PatternTypingRule... replacementAndFlagByPattern) {
4848
super(input);
4949
this.replacementAndFlagByPattern = replacementAndFlagByPattern;
5050
}
@@ -55,7 +55,8 @@ public final boolean incrementToken() throws IOException {
5555
for (PatternTypingRule rule : replacementAndFlagByPattern) {
5656
Matcher matcher = rule.getPattern().matcher(termAtt);
5757
if (matcher.find()) {
58-
// allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string creation
58+
// allow 2nd reset() and find() that occurs inside replaceFirst to avoid excess string
59+
// creation
5960
typeAtt.setType(matcher.replaceFirst(rule.getTypeTemplate()));
6061
flagAtt.setFlags(rule.getFlags());
6162
return true;
@@ -66,9 +67,7 @@ public final boolean incrementToken() throws IOException {
6667
return false;
6768
}
6869

69-
/**
70-
* Value holding class for pattern typing rules.
71-
*/
70+
/** Value holding class for pattern typing rules. */
7271
public static class PatternTypingRule {
7372
private final Pattern pattern;
7473
private final int flags;

lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTypingFilterFactory.java

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,21 @@
1717

1818
package org.apache.lucene.analysis.pattern;
1919

20-
import org.apache.lucene.analysis.TokenFilterFactory;
21-
import org.apache.lucene.analysis.TokenStream;
22-
import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
23-
import org.apache.lucene.util.ResourceLoader;
24-
import org.apache.lucene.util.ResourceLoaderAware;
25-
2620
import java.io.IOException;
2721
import java.util.ArrayList;
2822
import java.util.List;
2923
import java.util.Map;
3024
import java.util.regex.Pattern;
31-
25+
import org.apache.lucene.analysis.TokenFilterFactory;
26+
import org.apache.lucene.analysis.TokenStream;
27+
import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
28+
import org.apache.lucene.util.ResourceLoader;
29+
import org.apache.lucene.util.ResourceLoaderAware;
3230

3331
/**
34-
* Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By itself this
35-
* filter is not very useful. Normally it is combined with a filter that reacts to types or flags.
32+
* Provides a filter that will analyze tokens with the analyzer from an arbitrary field type. By
33+
* itself this filter is not very useful. Normally it is combined with a filter that reacts to types
34+
* or flags.
3635
*
3736
* <pre class="prettyprint" >
3837
* &lt;fieldType name="text_taf" class="solr.TextField" positionIncrementGap="100"&gt;
@@ -44,37 +43,38 @@
4443
* ignore="word,&amp;lt;ALPHANUM&amp;gt;,&amp;lt;NUM&amp;gt;,&amp;lt;SOUTHEAST_ASIAN&amp;gt;,&amp;lt;IDEOGRAPHIC&amp;gt;,&amp;lt;HIRAGANA&amp;gt;,&amp;lt;KATAKANA&amp;gt;,&amp;lt;HANGUL&amp;gt;,&amp;lt;EMOJI&amp;gt;"/&gt;
4544
* &lt;/analyzer&gt;
4645
* &lt;/fieldType&gt;</pre>
47-
* <p>
48-
* Note that a configuration such as above may interfere with multi-word synonyms. The patterns file has the format:
46+
*
47+
* <p>Note that a configuration such as above may interfere with multi-word synonyms. The patterns
48+
* file has the format:
49+
*
4950
* <pre>
5051
* (flags) (pattern) ::: (replacement)
5152
* </pre>
52-
* Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a type of
53-
* 'legal2_401_k' whenever either one is encountered one would use:
53+
*
54+
* Therefore to set the first 2 flag bits on the original token matching 401k or 401(k) and adding a
55+
* type of 'legal2_401_k' whenever either one is encountered one would use:
56+
*
5457
* <pre>
5558
* 3 (\d+)\(?([a-z])\)? ::: legal2_$1_$2
5659
* </pre>
57-
* Note that the number indicating the flag bits to set must not have leading spaces and be followed by a single
58-
* space, and must be 0 if no flags should be set. The flags number should not contain commas or a decimal point.
59-
* Lines for which the first character is <code>#</code> will be ignored as comments. Does not support producing
60-
* a synonym textually identical to the original term.
60+
*
61+
* Note that the number indicating the flag bits to set must not have leading spaces and be followed
62+
* by a single space, and must be 0 if no flags should be set. The flags number should not contain
63+
* commas or a decimal point. Lines for which the first character is <code>#</code> will be ignored
64+
* as comments. Does not support producing a synonym textually identical to the original term.
6165
*
6266
* @lucene.spi {@value #NAME}
6367
* @since 8.8
6468
*/
6569
public class PatternTypingFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
6670

67-
/**
68-
* SPI name
69-
*/
71+
/** SPI name */
7072
public static final String NAME = "patternTyping";
7173

7274
private final String patternFile;
7375
private PatternTypingRule[] rules;
7476

75-
/**
76-
* Creates a new PatternTypingFilterFactory
77-
*/
77+
/** Creates a new PatternTypingFilterFactory */
7878
public PatternTypingFilterFactory(Map<String, String> args) {
7979
super(args);
8080
patternFile = require(args, "patternFile");
@@ -83,9 +83,7 @@ public PatternTypingFilterFactory(Map<String, String> args) {
8383
}
8484
}
8585

86-
/**
87-
* Default ctor for compatibility with SPI
88-
*/
86+
/** Default ctor for compatibility with SPI */
8987
public PatternTypingFilterFactory() {
9088
throw defaultCtorException();
9189
}
@@ -94,16 +92,19 @@ public PatternTypingFilterFactory() {
9492
public void inform(ResourceLoader loader) throws IOException {
9593
List<PatternTypingRule> ruleList = new ArrayList<>();
9694
List<String> lines = getLines(loader, patternFile);
97-
// format: # regex ::: typename[_$1[_$2 ...]] (technically _$1 does not need the '_' but it usually makes sense)
95+
// format: # regex ::: typename[_$1[_$2 ...]] (technically _$1 does not need the '_' but it
96+
// usually makes sense)
9897
// eg: 2 (\d+\(?([a-z])\)?\(?(\d+)\)? ::: legal3_$1_$2_3
9998
// which yields legal3_501_c_3 for 501(c)(3) or 501c3 and sets the second lowest bit in flags
10099
for (String line : lines) {
101100
int firstSpace = line.indexOf(" "); // no leading spaces allowed
102101
int flagsVal = Integer.parseInt(line.substring(0, firstSpace));
103102
line = line.substring(firstSpace + 1);
104-
String[] split = line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
103+
String[] split =
104+
line.split(" ::: "); // arbitrary, unlikely to occur in a useful regex easy to read
105105
if (split.length != 2) {
106-
throw new RuntimeException("The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
106+
throw new RuntimeException(
107+
"The PatternTypingFilter: Always two there are, no more, no less, a pattern and a replacement (separated by ' ::: ' )");
107108
}
108109
Pattern compiled = Pattern.compile(split[0]);
109110
ruleList.add(new PatternTypingRule(compiled, flagsVal, split[1]));

lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilter.java

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,18 @@
1616
*/
1717
package org.apache.lucene.analysis.pattern;
1818

19+
import java.io.IOException;
20+
import java.util.regex.Pattern;
1921
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
2022
import org.apache.lucene.analysis.CannedTokenStream;
2123
import org.apache.lucene.analysis.Token;
2224
import org.apache.lucene.analysis.TokenStream;
2325
import org.apache.lucene.analysis.pattern.PatternTypingFilter.PatternTypingRule;
2426

25-
import java.io.IOException;
26-
import java.util.regex.Pattern;
27-
28-
/**
29-
* Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file
30-
*/
27+
/** Test that this filter sets a type for tokens matching patterns defined in a patterns.txt file */
3128
public class TestPatternTypingFilter extends BaseTokenStreamTestCase {
3229

33-
/**
34-
* Test the straight forward cases. When all flags match the token should be dropped
35-
*/
30+
/** Test the straight forward cases. When all flags match the token should be dropped */
3631
public void testPatterns() throws Exception {
3732

3833
Token tokenA1 = new Token("One", 0, 2);
@@ -43,15 +38,26 @@ public void testPatterns() throws Exception {
4338

4439
TokenStream ts = new CannedTokenStream(tokenA1, tokenA2, tokenA3, tokenB1, tokenB2);
4540

46-
//2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
47-
ts = new PatternTypingFilter(ts,
48-
new PatternTypingRule(Pattern.compile("^(\\d+)\\(?([a-z])\\)?$"),2,"legal2_$1_$2"));
41+
// 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
42+
ts =
43+
new PatternTypingFilter(
44+
ts,
45+
new PatternTypingRule(Pattern.compile("^(\\d+)\\(?([a-z])\\)?$"), 2, "legal2_$1_$2"));
4946

50-
assertTokenStreamContents(ts, new String[]{
51-
"One", "401(k)", "two", "three", "401k"}, null, null,
52-
new String[]{"word", "legal2_401_k", "word", "word", "legal2_401_k"},
53-
null, null, null, null, null, false, null,
54-
new int[]{0, 2, 0, 0, 2});
47+
assertTokenStreamContents(
48+
ts,
49+
new String[] {"One", "401(k)", "two", "three", "401k"},
50+
null,
51+
null,
52+
new String[] {"word", "legal2_401_k", "word", "word", "legal2_401_k"},
53+
null,
54+
null,
55+
null,
56+
null,
57+
null,
58+
false,
59+
null,
60+
new int[] {0, 2, 0, 0, 2});
5561
}
5662

5763
public void testFirstPatternWins() throws IOException {
@@ -61,17 +67,27 @@ public void testFirstPatternWins() throws IOException {
6167

6268
TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);
6369

64-
//2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
65-
PatternTypingRule p1 = new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
66-
PatternTypingRule p2 = new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");
70+
// 2 ^(\d+)\(?([a-z])\)?$ ::: legal2_$1_$2
71+
PatternTypingRule p1 =
72+
new PatternTypingRule(Pattern.compile("^(\\d+)-(\\d+)$"), 6, "$1_hnum_$2");
73+
PatternTypingRule p2 =
74+
new PatternTypingRule(Pattern.compile("^(\\w+)-(\\w+)$"), 2, "$1_hword_$2");
6775

68-
ts = new PatternTypingFilter(ts, p1,p2); // 101
76+
ts = new PatternTypingFilter(ts, p1, p2); // 101
6977

70-
assertTokenStreamContents(ts, new String[]{
71-
"One", "forty-two", "4-2"}, null, null,
72-
new String[]{"word", "forty_hword_two", "4_hnum_2"},
73-
null, null, null, null, null, false, null,
74-
new int[]{0, 2, 6});
78+
assertTokenStreamContents(
79+
ts,
80+
new String[] {"One", "forty-two", "4-2"},
81+
null,
82+
null,
83+
new String[] {"word", "forty_hword_two", "4_hnum_2"},
84+
null,
85+
null,
86+
null,
87+
null,
88+
null,
89+
false,
90+
null,
91+
new int[] {0, 2, 6});
7592
}
76-
7793
}

lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTypingFilterFactory.java

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525
import org.apache.lucene.analysis.util.StringMockResourceLoader;
2626
import org.apache.lucene.util.Version;
2727

28-
/**
29-
* This test just ensures the factory works
30-
*/
28+
/** This test just ensures the factory works */
3129
public class TestPatternTypingFilterFactory extends BaseTokenStreamFactoryTestCase {
3230

3331
public void testFactory() throws Exception {
@@ -37,16 +35,29 @@ public void testFactory() throws Exception {
3735

3836
TokenStream ts = new CannedTokenStream(tokenA1, tokenA3, tokenB1);
3937

40-
TokenFilterFactory tokenFilterFactory = tokenFilterFactory("patternTyping", Version.LATEST, new StringMockResourceLoader(
41-
"6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" +
42-
"2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"
43-
), "patternFile", "patterns.txt");
38+
TokenFilterFactory tokenFilterFactory =
39+
tokenFilterFactory(
40+
"patternTyping",
41+
Version.LATEST,
42+
new StringMockResourceLoader(
43+
"6 \\b(\\d+)-(\\d+) ::: $1_hnum_$2\n" + "2 \\b(\\w+)-(\\w+) ::: $1_hword_$2"),
44+
"patternFile",
45+
"patterns.txt");
4446

4547
ts = tokenFilterFactory.create(ts);
46-
assertTokenStreamContents(ts, new String[]{
47-
"One", "forty-two", "4-2"}, null, null,
48-
new String[]{"word", "forty_hword_two", "4_hnum_2"},
49-
null, null, null, null, null, false, null,
50-
new int[]{0, 2, 6});
48+
assertTokenStreamContents(
49+
ts,
50+
new String[] {"One", "forty-two", "4-2"},
51+
null,
52+
null,
53+
new String[] {"word", "forty_hword_two", "4_hnum_2"},
54+
null,
55+
null,
56+
null,
57+
null,
58+
null,
59+
false,
60+
null,
61+
new int[] {0, 2, 6});
5162
}
5263
}

0 commit comments

Comments
 (0)