Skip to content

Commit 7df9a55

Browse files
committed
Optimize path wildcard matching for wildcard field names. (#97, f64cbde)
Still a hotspot so we employ a bunch of additional strategies: - Avoid `Stream` API in hot path. - Sidestep `WildcardTrie` w.r.t. alternations. - Guard `SimpleRegexTrie` matching with prefix match.
1 parent 46fb034 commit 7df9a55

File tree

1 file changed

+77
-26
lines changed
  • metafix/src/main/java/org/metafacture/metafix

1 file changed

+77
-26
lines changed

metafix/src/main/java/org/metafacture/metafix/Value.java

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,21 @@
1717
package org.metafacture.metafix;
1818

1919
import org.metafacture.commons.tries.SimpleRegexTrie;
20+
import org.metafacture.commons.tries.WildcardTrie;
2021

2122
import java.util.ArrayList;
2223
import java.util.Collection;
2324
import java.util.ConcurrentModificationException;
2425
import java.util.HashMap;
2526
import java.util.HashSet;
2627
import java.util.LinkedHashMap;
28+
import java.util.LinkedHashSet;
2729
import java.util.List;
2830
import java.util.Map;
2931
import java.util.Objects;
3032
import java.util.Set;
3133
import java.util.concurrent.atomic.AtomicReference;
3234
import java.util.function.BiConsumer;
33-
import java.util.function.BiFunction;
3435
import java.util.function.Consumer;
3536
import java.util.function.Predicate;
3637
import java.util.regex.Matcher;
@@ -210,6 +211,7 @@ public Value asList(final Consumer<Array> consumer) {
210211
if (path != null) {
211212
path = FixPath.RESERVED_FIELD_PATTERN.matcher(newName).replaceAll(Matcher.quoteReplacement(split(path)[0]));
212213
}
214+
213215
return this;
214216
}
215217

@@ -219,6 +221,7 @@ public Value asList(final Consumer<Array> consumer) {
219221
final String lastSegment = pathSegments[pathSegments.length - 1];
220222
this.path = container.path + "." + lastSegment;
221223
}
224+
222225
return this;
223226
}
224227

@@ -229,6 +232,7 @@ public Value asList(final Consumer<Array> consumer) {
229232
else {
230233
path = fallback + suffix;
231234
}
235+
232236
return this;
233237
}
234238

@@ -471,7 +475,10 @@ public void remove(final int index) {
471475
public static class Hash extends AbstractValueType {
472476

473477
// NOTE: Keep in sync with `WildcardTrie`/`SimpleRegexTrie` implementation in metafacture-core.
474-
private static final Matcher PATTERN_MATCHER = Pattern.compile("[*?|]|\\[[^\\]]").matcher("");
478+
private static final Pattern ALTERNATION_PATTERN = Pattern.compile(WildcardTrie.OR_STRING, Pattern.LITERAL);
479+
private static final Matcher PATTERN_MATCHER = Pattern.compile("[*?]|\\[[^\\]]").matcher("");
480+
481+
private static final Map<String, String> PREFIX_CACHE = new HashMap<>();
475482

476483
private static final Map<String, Map<String, Boolean>> TRIE_CACHE = new HashMap<>();
477484
private static final SimpleRegexTrie<String> TRIE = new SimpleRegexTrie<>();
@@ -491,7 +498,7 @@ protected Hash() {
491498
* @return true if this hash contains the metadata field, false otherwise
492499
*/
493500
public boolean containsField(final String field) {
494-
return matchFields(field, Stream::anyMatch);
501+
return !findFields(field).isEmpty();
495502
}
496503

497504
public boolean containsPath(final String fieldPath) {
@@ -575,22 +582,29 @@ public Value get(final String field) {
575582

576583
/*package-private*/ Value get(final String field, final boolean enforceStringValue) { // TODO use Type.String etc.?
577584
// TODO: special treatment (only) for exact matches?
578-
final List<Value> list = findFields(field).map(actualField -> {
579-
final Value value = getField(actualField);
580-
if (enforceStringValue) {
581-
value.asString();
582-
}
583-
return value;
584-
}).collect(Collectors.toList());
585-
return list.isEmpty() ? null : list.size() == 1 ? list.get(0) : newArray(a -> list.forEach(v -> v.matchType()
586-
.ifArray(b -> b.forEach(a::add))
587-
.orElse(a::add)));
585+
final Set<String> set = findFields(field);
586+
587+
return set.isEmpty() ? null : set.size() == 1 ? getField(set.iterator().next(), enforceStringValue) :
588+
newArray(a -> set.forEach(f -> getField(f, enforceStringValue).matchType()
589+
.ifArray(b -> b.forEach(a::add))
590+
.orElse(a::add)
591+
));
588592
}
589593

590594
public Value getField(final String field) {
591595
return map.get(field);
592596
}
593597

598+
private Value getField(final String field, final boolean enforceStringValue) {
599+
final Value value = getField(field);
600+
601+
if (enforceStringValue) {
602+
value.asString();
603+
}
604+
605+
return value;
606+
}
607+
594608
public Value getList(final String field, final Consumer<Array> consumer) {
595609
return asList(get(field), consumer);
596610
}
@@ -612,13 +626,15 @@ public void addAll(final Hash hash) {
612626
*/
613627
public void add(final String field, final Value newValue) {
614628
final Value oldValue = new FixPath(field).findIn(this);
629+
615630
if (oldValue == null) {
616631
put(field, newValue);
617632
}
618633
else {
619634
if (!oldValue.isArray()) { // repeated field: convert single val to first in array
620635
oldValue.updatePathAppend(".1", field);
621636
}
637+
622638
put(field, oldValue.asList(oldVals -> newValue.asList(newVals -> {
623639
for (int i = 0; i < newVals.size(); ++i) {
624640
oldVals.add(newVals.get(i).updatePathAppend("." + (i + 1 + oldVals.size()), field));
@@ -634,6 +650,7 @@ public void add(final String field, final Value newValue) {
634650
*/
635651
public void remove(final String field) {
636652
final FixPath fixPath = new FixPath(field);
653+
637654
if (fixPath.size() > 1) {
638655
fixPath.removeNestedFrom(this);
639656
}
@@ -652,7 +669,10 @@ public void removeField(final String field) {
652669
* @param fields the field names
653670
*/
654671
public void retainFields(final Collection<String> fields) {
655-
map.keySet().retainAll(fields.stream().flatMap(this::findFields).collect(Collectors.toSet()));
672+
final Set<String> retainFields = new HashSet<>();
673+
fields.forEach(f -> retainFields.addAll(findFields(f)));
674+
675+
map.keySet().retainAll(retainFields);
656676
}
657677

658678
/**
@@ -702,24 +722,55 @@ public String toString() {
702722
* @param consumer the action to be performed for each value
703723
*/
704724
/*package-private*/ void modifyFields(final String pattern, final Consumer<String> consumer) {
705-
findFields(pattern).collect(Collectors.toSet()).forEach(consumer);
725+
findFields(pattern).forEach(consumer);
706726
}
707727

708-
private Stream<String> findFields(final String pattern) {
709-
return matchFields(pattern, Stream::filter);
728+
private Set<String> findFields(final String pattern) {
729+
final Set<String> fieldSet = new LinkedHashSet<>();
730+
731+
for (final String term : ALTERNATION_PATTERN.split(pattern)) {
732+
findFields(term, fieldSet);
733+
}
734+
735+
return fieldSet;
710736
}
711737

712-
private <T> T matchFields(final String pattern, final BiFunction<Stream<String>, Predicate<String>, T> function) {
713-
if (PATTERN_MATCHER.reset(pattern).find()) {
714-
final Map<String, Boolean> matcher = TRIE_CACHE.computeIfAbsent(pattern, k -> {
715-
TRIE.put(k, k);
716-
return new HashMap<>();
717-
});
738+
private void findFields(final String pattern, final Set<String> fieldSet) {
739+
if (!PREFIX_CACHE.containsKey(pattern)) {
740+
final Matcher patternMatcher = PATTERN_MATCHER.reset(pattern);
741+
742+
if (patternMatcher.find()) {
743+
TRIE.put(pattern, pattern);
744+
TRIE_CACHE.put(pattern, new HashMap<>());
718745

719-
return function.apply(map.keySet().stream(), f -> matcher.computeIfAbsent(f, k -> TRIE.get(k).contains(pattern)));
746+
PREFIX_CACHE.put(pattern, pattern.substring(0, patternMatcher.start()));
747+
}
748+
else {
749+
PREFIX_CACHE.put(pattern, null);
750+
}
720751
}
721-
else {
722-
return function.apply(Stream.of(pattern), map::containsKey);
752+
753+
final String prefix = PREFIX_CACHE.get(pattern);
754+
755+
if (prefix != null) {
756+
final Map<String, Boolean> fieldCache = TRIE_CACHE.get(pattern);
757+
758+
for (final String field : map.keySet()) {
759+
if (!fieldCache.containsKey(field)) {
760+
final boolean matches = field.startsWith(prefix) && TRIE.get(field).contains(pattern);
761+
fieldCache.put(field, matches);
762+
763+
if (matches) {
764+
fieldSet.add(field);
765+
}
766+
}
767+
else if (fieldCache.get(field)) {
768+
fieldSet.add(field);
769+
}
770+
}
771+
}
772+
else if (map.containsKey(pattern)) {
773+
fieldSet.add(pattern);
723774
}
724775
}
725776

0 commit comments

Comments
 (0)