Skip to content

Commit b6215cc

Browse files
author
Markus M. Geipel
committed
Merge pull request #135 from dr0i/simple-character-class
Simple character class
2 parents 6d2c9c9 + 83344e9 commit b6215cc

File tree

3 files changed

+112
-2
lines changed

3 files changed

+112
-2
lines changed

src/main/java/org/culturegraph/mf/morph/WildcardDataRegistry.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import java.util.List;
1919

20+
import org.culturegraph.mf.util.tries.SimpleRegexTrie;
2021
import org.culturegraph.mf.util.tries.WildcardTrie;
2122

2223

@@ -28,7 +29,7 @@
2829
*/
2930
final class WildcardRegistry<T> implements Registry<T> {
3031

31-
private final WildcardTrie<T> trie = new WildcardTrie<T>();
32+
private final SimpleRegexTrie<T> trie = new SimpleRegexTrie<T>();
3233

3334
@Override
3435
public void register(final String path, final T value) {
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
* Copyright 2013 Pascal Christoph, hbz
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.culturegraph.mf.util.tries;
17+
18+
import java.util.List;
19+
20+
/**
21+
* A wrapper for the {@link WildcardTrie} enabling the use of simple character
22+
* classes .
23+
*
24+
* @author Pascal Christoph
25+
*
26+
* @param <P>
27+
* type of value stored
28+
*/
29+
public class SimpleRegexTrie<P> {
30+
31+
private final WildcardTrie<P> trie;
32+
public static final String SIMPLE_CHARACTER_CLASS = "\\[.*\\]";
33+
34+
public SimpleRegexTrie() {
35+
trie = new WildcardTrie<P>();
36+
}
37+
38+
/**
39+
* Enables the use of simple character classes like 'a[agt][ac]'. Calls the
40+
* method of {@link WildcardTrie} for further treatment.
41+
*
42+
* @param keys
43+
* @param value
44+
*/
45+
public void put(final String keys, final P value) {
46+
if (keys.matches(".*" + SIMPLE_CHARACTER_CLASS + ".*")) {
47+
int charClassStart = keys.indexOf('[', 0);
48+
final int charClassEnd = keys.indexOf(']', 1);
49+
String begin = keys.substring(0, charClassStart);
50+
for (; charClassStart < charClassEnd - 1; charClassStart++) {
51+
char middle = keys.charAt(charClassStart + 1);
52+
String end = keys.substring(charClassEnd + 1, keys.length());
53+
put(begin + middle + end, value);
54+
}
55+
} else
56+
trie.put(keys, value);
57+
}
58+
59+
public List<P> get(final String key) {
60+
return trie.get(key);
61+
}
62+
63+
}

src/test/java/org/culturegraph/mf/morph/MetamorphTest.xml

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@
133133
</mm:rules>
134134
</mm:metamorph>
135135
</transformation>
136-
136+
137137
<result type="text/x-cg+xml" strict-key-order="false">
138138
<cgxml:cgxml version="1.0">
139139
<cgxml:records>
@@ -146,6 +146,52 @@
146146
</cgxml:cgxml>
147147
</result>
148148
</test-case>
149+
150+
<test-case name="[abc] simple character class match">
151+
<input type="text/x-cg+xml">
152+
<cgxml:cgxml version="1.0">
153+
<cgxml:records>
154+
<cgxml:record id="1">
155+
<cgxml:entity name="ABA">
156+
<cgxml:literal name="d" value="Aloha1" />
157+
</cgxml:entity>
158+
<cgxml:entity name="ABA">
159+
<cgxml:literal name="e" value="Aloha2" />
160+
</cgxml:entity>
161+
<cgxml:entity name="ABC">
162+
<cgxml:literal name="d" value="Aloha3" />
163+
</cgxml:entity>
164+
<cgxml:entity name="ABC">
165+
<cgxml:literal name="e" value="Aloha4" />
166+
</cgxml:entity>
167+
</cgxml:record>
168+
</cgxml:records>
169+
</cgxml:cgxml>
170+
</input>
171+
172+
<transformation type="text/x-metamorph+xml">
173+
<mm:metamorph version="1">
174+
<mm:rules>
175+
<mm:data source="AB[AC].[de]" name="Hawaii" />
176+
<mm:data source="ABC.d" name="Oahu" />
177+
</mm:rules>
178+
</mm:metamorph>
179+
</transformation>
180+
181+
<result type="text/x-cg+xml" strict-key-order="false">
182+
<cgxml:cgxml version="1.0">
183+
<cgxml:records>
184+
<cgxml:record id="1">
185+
<cgxml:literal name="Oahu" value="Aloha3" />
186+
<cgxml:literal name="Hawaii" value="Aloha1" />
187+
<cgxml:literal name="Hawaii" value="Aloha2" />
188+
<cgxml:literal name="Hawaii" value="Aloha3" />
189+
<cgxml:literal name="Hawaii" value="Aloha4" />
190+
</cgxml:record>
191+
</cgxml:records>
192+
</cgxml:cgxml>
193+
</result>
194+
</test-case>
149195

150196
<test-case name="xinclude">
151197
<input type="text/x-cg+xml">

0 commit comments

Comments
 (0)