Skip to content

Commit b5b9c44

Browse files
committed
Move TextMatcher from Platform UI to Platform Core Resources
The TextMatcher class is used in the UI component, despite not depending on any UI classes. By moving it to the platform, it can be used in both mixed and pure E4 applications.
1 parent 5d8ee71 commit b5b9c44

File tree

3 files changed

+279
-1
lines changed

3 files changed

+279
-1
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2020 Thomas Wolf<[email protected]> and others.
3+
*
4+
* This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Public License 2.0
6+
* which accompanies this distribution, and is available at
7+
* https://www.eclipse.org/legal/epl-2.0/
8+
*
9+
* SPDX-License-Identifier: EPL-2.0
10+
*******************************************************************************/
11+
package org.eclipse.core.internal.utils;
12+
13+
import java.util.ArrayList;
14+
import java.util.Arrays;
15+
import java.util.Collections;
16+
import java.util.List;
17+
import java.util.Objects;
18+
import java.util.regex.Pattern;
19+
import org.eclipse.core.text.StringMatcher;
20+
21+
/**
22+
* Similar to {@link StringMatcher}, this {@code TextMatcher} matches a pattern
23+
* that may contain the wildcards '?' or '*' against a text. However, the
24+
* matching is not only done on the full text, but also on individual words from
25+
* the text, and if the pattern contains whitespace, the pattern is split into
26+
* sub-patterns and those are matched, too.
27+
* <p>
28+
* The precise rules are:
29+
* </p>
30+
* <ul>
31+
* <li>Leading and trailing whitespace in the pattern is ignored.</li>
32+
* <li>If the full pattern matches the full text, the match succeeds.</li>
33+
* <li>If the full pattern matches a single word of the text, the match
34+
* succeeds.</li>
35+
* <li>If all sub-patterns match a prefix of the whole text or any prefix of any
36+
* word, the match succeeds.</li>
37+
* <li>Otherwise, the match fails.</li>
38+
* </ul>
39+
* <p>
40+
* An empty pattern matches only the empty text.
41+
* </p>
42+
*/
43+
public final class TextMatcher {
44+
45+
private static final Pattern NON_WORD = Pattern.compile("\\W+", Pattern.UNICODE_CHARACTER_CLASS); //$NON-NLS-1$
46+
47+
private final StringMatcher full;
48+
49+
private final List<StringMatcher> parts;
50+
51+
/**
52+
* Creates a new {@link TextMatcher}.
53+
*
54+
* @param pattern to match
55+
* @param ignoreCase whether to do case-insensitive matching
56+
* @param ignoreWildCards whether to treat '?' and '*' as normal characters, not
57+
* as wildcards
58+
* @throws IllegalArgumentException if {@code pattern == null}
59+
*/
60+
public TextMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) {
61+
full = new StringMatcher(pattern.trim(), ignoreCase, ignoreWildCards);
62+
parts = splitPattern(pattern, ignoreCase, ignoreWildCards);
63+
}
64+
65+
private List<StringMatcher> splitPattern(String pattern,
66+
boolean ignoreCase, boolean ignoreWildCards) {
67+
String pat = pattern.trim();
68+
if (pat.isEmpty()) {
69+
return Collections.emptyList();
70+
}
71+
String[] subPatterns = pat.split("\\s+"); //$NON-NLS-1$
72+
if (subPatterns.length <= 1) {
73+
return Collections.emptyList();
74+
}
75+
List<StringMatcher> matchers = new ArrayList<>();
76+
for (String s : subPatterns) {
77+
if (s == null || s.isEmpty()) {
78+
continue;
79+
}
80+
StringMatcher m = new StringMatcher(s, ignoreCase, ignoreWildCards);
81+
m.usePrefixMatch();
82+
matchers.add(m);
83+
}
84+
return matchers;
85+
}
86+
87+
/**
88+
* Determines whether the given {@code text} matches the pattern.
89+
*
90+
* @param text String to match; must not be {@code null}
91+
* @return {@code true} if the whole {@code text} matches the pattern;
92+
* {@code false} otherwise
93+
* @throws IllegalArgumentException if {@code text == null}
94+
*/
95+
public boolean match(String text) {
96+
if (text == null) {
97+
throw new IllegalArgumentException();
98+
}
99+
return match(text, 0, text.length());
100+
}
101+
102+
/**
103+
* Determines whether the given sub-string of {@code text} from {@code start}
104+
* (inclusive) to {@code end} (exclusive) matches the pattern.
105+
*
106+
* @param text String to match in; must not be {@code null}
107+
* @param start start index (inclusive) within {@code text} of the sub-string to
108+
* match
109+
* @param end end index (exclusive) within {@code text} of the sub-string to
110+
* match
111+
* @return {@code true} if the given slice of {@code text} matches the pattern;
112+
* {@code false} otherwise
113+
* @throws IllegalArgumentException if {@code text == null}
114+
*/
115+
public boolean match(String text, int start, int end) {
116+
if (text == null) {
117+
throw new IllegalArgumentException();
118+
}
119+
if (start > end) {
120+
return false;
121+
}
122+
int tlen = text.length();
123+
start = Math.max(0, start);
124+
end = Math.min(end, tlen);
125+
if (full.match(text, start, end)) {
126+
return true;
127+
}
128+
String[] words = getWords(text.substring(start, end));
129+
if (match(full, words)) {
130+
return true;
131+
}
132+
if (parts.isEmpty()) {
133+
return false;
134+
}
135+
for (StringMatcher subMatcher : parts) {
136+
if (!subMatcher.match(text, start, end) && !match(subMatcher, words)) {
137+
return false;
138+
}
139+
}
140+
return true;
141+
}
142+
143+
private boolean match(StringMatcher matcher, String[] words) {
144+
return Arrays.stream(words).filter(Objects::nonNull).anyMatch(matcher::match);
145+
}
146+
147+
/**
148+
* Splits a given text into words.
149+
*
150+
* @param text to split
151+
* @return the words of the text
152+
*/
153+
public static String[] getWords(String text) {
154+
// Previous implementations (in the removed StringMatcher) used the ICU
155+
// BreakIterator to split the text. That worked well, but in 2020 it was decided
156+
// to drop the dependency to the ICU library due to its size. The JDK
157+
// BreakIterator splits differently, causing e.g.
158+
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=563121 . The NON_WORD regexp
159+
// appears to work well for programming language text, but may give sub-optimal
160+
// results for natural languages. See also
161+
// https://bugs.eclipse.org/bugs/show_bug.cgi?id=90579 .
162+
return NON_WORD.split(text);
163+
}
164+
165+
@Override
166+
public String toString() {
167+
return '[' + full.toString() + ',' + parts + ']';
168+
}
169+
}

resources/tests/org.eclipse.core.tests.resources/src/org/eclipse/core/tests/internal/utils/AllUtilsTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
@Suite
2121
@SelectClasses({ //
2222
ObjectMapTest.class, //
23-
FileUtilTest.class, })
23+
FileUtilTest.class, //
24+
TextMatcherTest.class })
2425
public class AllUtilsTests {
2526
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2020 Thomas Wolf<[email protected]> and others.
3+
*
4+
* This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Public License 2.0
6+
* which accompanies this distribution, and is available at
7+
* https://www.eclipse.org/legal/epl-2.0/
8+
*
9+
* SPDX-License-Identifier: EPL-2.0
10+
*******************************************************************************/
11+
package org.eclipse.core.tests.internal.utils;
12+
13+
import static org.junit.Assert.assertFalse;
14+
import static org.junit.Assert.assertTrue;
15+
16+
import org.eclipse.core.internal.utils.TextMatcher;
17+
import org.junit.Test;
18+
19+
/**
20+
* Tests for {@link TextMatcher}.
21+
*/
22+
public class TextMatcherTest {
23+
24+
@Test
25+
public void testEmpty() {
26+
assertTrue(new TextMatcher("", false, false).match(""));
27+
assertFalse(new TextMatcher("", false, false).match("foo"));
28+
assertFalse(new TextMatcher("", false, false).match("foo bar baz"));
29+
assertTrue(new TextMatcher("", false, true).match(""));
30+
assertFalse(new TextMatcher("", false, true).match("foo"));
31+
assertFalse(new TextMatcher("", false, true).match("foo bar baz"));
32+
}
33+
34+
@Test
35+
public void testSuffixes() {
36+
assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_123"));
37+
assertFalse(new TextMatcher("fo*ar", false, false).match("foobar_baz"));
38+
}
39+
40+
@Test
41+
public void testChinese() {
42+
assertTrue(new TextMatcher("喜欢", false, false).match("我 喜欢 吃 苹果。"));
43+
// This test would work only if word-splitting used the ICU BreakIterator.
44+
// "Words" are as shown above.
45+
// assertTrue(new TextMatcher("喜欢", false, false).match("我喜欢吃苹果。"));
46+
}
47+
48+
@Test
49+
public void testSingleWords() {
50+
assertTrue(new TextMatcher("huhn", false, false).match("hahn henne hühner küken huhn"));
51+
assertTrue(new TextMatcher("h?hner", false, false).match("hahn henne hühner küken huhn"));
52+
assertTrue(new TextMatcher("h*hner", false, false).match("hahn henne hühner küken huhn"));
53+
assertTrue(new TextMatcher("hühner", false, false).match("hahn henne hühner küken huhn"));
54+
// Full pattern must match word fully
55+
assertFalse(new TextMatcher("h?hner", false, false).match("hahn henne hühnerhof küken huhn"));
56+
assertFalse(new TextMatcher("h*hner", false, false).match("hahn henne hühnerhof küken huhn"));
57+
assertFalse(new TextMatcher("hühner", false, false).match("hahn henne hühnerhof küken huhn"));
58+
59+
assertTrue(new TextMatcher("huhn", false, true).match("hahn henne hühner küken huhn"));
60+
assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühner küken huhn"));
61+
assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühner küken huhn"));
62+
assertTrue(new TextMatcher("hühner", false, true).match("hahn henne hühner küken huhn"));
63+
// Full pattern must match word fully
64+
assertFalse(new TextMatcher("h?hner", false, true).match("hahn henne hühnerhof küken huhn"));
65+
assertFalse(new TextMatcher("h*hner", false, true).match("hahn henne hühnerhof küken huhn"));
66+
assertFalse(new TextMatcher("hühner", false, true).match("hahn henne hühnerhof küken huhn"));
67+
68+
// Bug 570390: Pattern starting/ending with whitespace should still match
69+
assertTrue(new TextMatcher("hahn ", false, false).match("hahn henne hühnerhof küken huhn"));
70+
assertTrue(new TextMatcher("huhn ", false, false).match("hahn henne hühnerhof küken huhn"));
71+
assertTrue(new TextMatcher(" hahn", false, false).match("hahn henne hühnerhof küken huhn"));
72+
assertTrue(new TextMatcher(" huhn", false, false).match("hahn henne hühnerhof küken huhn"));
73+
}
74+
75+
@Test
76+
public void testMultipleWords() {
77+
assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühner küken huhn"));
78+
assertTrue(new TextMatcher("huhn h?hner", false, false).match("hahn henne hühnerhof küken huhn"));
79+
assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühner küken huhn"));
80+
assertFalse(new TextMatcher("huhn h?hner", false, true).match("hahn henne hühnerhof küken huhn"));
81+
assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühner küken huhn"));
82+
assertTrue(new TextMatcher("huhn h*hner", false, false).match("hahn henne hühnerhof küken huhn"));
83+
assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühner küken huhn"));
84+
assertFalse(new TextMatcher("huhn h*hner", false, true).match("hahn henne hühnerhof küken huhn"));
85+
assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühner küken huhn"));
86+
assertTrue(new TextMatcher("huhn hühner", false, false).match("hahn henne hühnerhof küken huhn"));
87+
assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühner küken huhn"));
88+
assertTrue(new TextMatcher("huhn hühner", false, true).match("hahn henne hühnerhof küken huhn"));
89+
90+
// Bug 570390: Pattern starting/ending with whitespace should still match
91+
assertTrue(new TextMatcher("huhn hahn ", false, false).match("hahn henne hühnerhof küken huhn"));
92+
assertTrue(new TextMatcher("hahn huhn ", false, false).match("hahn henne hühnerhof küken huhn"));
93+
assertTrue(new TextMatcher(" huhn hahn", false, false).match("hahn henne hühnerhof küken huhn"));
94+
assertTrue(new TextMatcher(" hahn huhn", false, false).match("hahn henne hühnerhof küken huhn"));
95+
}
96+
97+
@Test
98+
public void testCaseInsensitivity() {
99+
assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühner küken huhn"));
100+
assertTrue(new TextMatcher("Huhn HÜHNER", true, false).match("hahn henne hühnerhof küken huhn"));
101+
assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühner küken huhn"));
102+
assertTrue(new TextMatcher("Huhn HÜHNER", true, true).match("hahn henne hühnerhof küken huhn"));
103+
assertTrue(new TextMatcher("HüHnEr", true, false).match("hahn henne hühner küken huhn"));
104+
assertFalse(new TextMatcher("HüHnEr", true, false).match("hahn henne hühnerhof küken huhn"));
105+
assertTrue(new TextMatcher("HüHnEr", true, true).match("hahn henne hühner küken huhn"));
106+
assertFalse(new TextMatcher("HüHnEr", true, true).match("hahn henne hühnerhof küken huhn"));
107+
}
108+
}

0 commit comments

Comments
 (0)