17
17
package org .metafacture .metafix ;
18
18
19
19
import org .metafacture .commons .tries .SimpleRegexTrie ;
20
+ import org .metafacture .commons .tries .WildcardTrie ;
20
21
21
22
import java .util .ArrayList ;
22
23
import java .util .Collection ;
23
24
import java .util .ConcurrentModificationException ;
24
25
import java .util .HashMap ;
25
26
import java .util .HashSet ;
26
27
import java .util .LinkedHashMap ;
28
+ import java .util .LinkedHashSet ;
27
29
import java .util .List ;
28
30
import java .util .Map ;
29
31
import java .util .Objects ;
30
32
import java .util .Set ;
31
33
import java .util .concurrent .atomic .AtomicReference ;
32
34
import java .util .function .BiConsumer ;
33
- import java .util .function .BiFunction ;
34
35
import java .util .function .Consumer ;
35
36
import java .util .function .Predicate ;
36
37
import java .util .regex .Matcher ;
@@ -210,6 +211,7 @@ public Value asList(final Consumer<Array> consumer) {
210
211
if (path != null ) {
211
212
path = FixPath .RESERVED_FIELD_PATTERN .matcher (newName ).replaceAll (Matcher .quoteReplacement (split (path )[0 ]));
212
213
}
214
+
213
215
return this ;
214
216
}
215
217
@@ -219,6 +221,7 @@ public Value asList(final Consumer<Array> consumer) {
219
221
final String lastSegment = pathSegments [pathSegments .length - 1 ];
220
222
this .path = container .path + "." + lastSegment ;
221
223
}
224
+
222
225
return this ;
223
226
}
224
227
@@ -229,6 +232,7 @@ public Value asList(final Consumer<Array> consumer) {
229
232
else {
230
233
path = fallback + suffix ;
231
234
}
235
+
232
236
return this ;
233
237
}
234
238
@@ -471,7 +475,10 @@ public void remove(final int index) {
471
475
public static class Hash extends AbstractValueType {
472
476
473
477
// NOTE: Keep in sync with `WildcardTrie`/`SimpleRegexTrie` implementation in metafacture-core.
474
- private static final Matcher PATTERN_MATCHER = Pattern .compile ("[*?|]|\\ [[^\\ ]]" ).matcher ("" );
478
+ private static final Pattern ALTERNATION_PATTERN = Pattern .compile (WildcardTrie .OR_STRING , Pattern .LITERAL );
479
+ private static final Matcher PATTERN_MATCHER = Pattern .compile ("[*?]|\\ [[^\\ ]]" ).matcher ("" );
480
+
481
+ private static final Map <String , String > PREFIX_CACHE = new HashMap <>();
475
482
476
483
private static final Map <String , Map <String , Boolean >> TRIE_CACHE = new HashMap <>();
477
484
private static final SimpleRegexTrie <String > TRIE = new SimpleRegexTrie <>();
@@ -491,7 +498,7 @@ protected Hash() {
491
498
* @return true if this hash contains the metadata field, false otherwise
492
499
*/
493
500
public boolean containsField (final String field ) {
494
- return matchFields (field , Stream :: anyMatch );
501
+ return ! findFields (field ). isEmpty ( );
495
502
}
496
503
497
504
public boolean containsPath (final String fieldPath ) {
@@ -575,22 +582,29 @@ public Value get(final String field) {
575
582
576
583
/*package-private*/ Value get (final String field , final boolean enforceStringValue ) { // TODO use Type.String etc.?
577
584
// TODO: special treatment (only) for exact matches?
578
- final List <Value > list = findFields (field ).map (actualField -> {
579
- final Value value = getField (actualField );
580
- if (enforceStringValue ) {
581
- value .asString ();
582
- }
583
- return value ;
584
- }).collect (Collectors .toList ());
585
- return list .isEmpty () ? null : list .size () == 1 ? list .get (0 ) : newArray (a -> list .forEach (v -> v .matchType ()
586
- .ifArray (b -> b .forEach (a ::add ))
587
- .orElse (a ::add )));
585
+ final Set <String > set = findFields (field );
586
+
587
+ return set .isEmpty () ? null : set .size () == 1 ? getField (set .iterator ().next (), enforceStringValue ) :
588
+ newArray (a -> set .forEach (f -> getField (f , enforceStringValue ).matchType ()
589
+ .ifArray (b -> b .forEach (a ::add ))
590
+ .orElse (a ::add )
591
+ ));
588
592
}
589
593
590
594
public Value getField (final String field ) {
591
595
return map .get (field );
592
596
}
593
597
598
+ private Value getField (final String field , final boolean enforceStringValue ) {
599
+ final Value value = getField (field );
600
+
601
+ if (enforceStringValue ) {
602
+ value .asString ();
603
+ }
604
+
605
+ return value ;
606
+ }
607
+
594
608
public Value getList (final String field , final Consumer <Array > consumer ) {
595
609
return asList (get (field ), consumer );
596
610
}
@@ -612,13 +626,15 @@ public void addAll(final Hash hash) {
612
626
*/
613
627
public void add (final String field , final Value newValue ) {
614
628
final Value oldValue = new FixPath (field ).findIn (this );
629
+
615
630
if (oldValue == null ) {
616
631
put (field , newValue );
617
632
}
618
633
else {
619
634
if (!oldValue .isArray ()) { // repeated field: convert single val to first in array
620
635
oldValue .updatePathAppend (".1" , field );
621
636
}
637
+
622
638
put (field , oldValue .asList (oldVals -> newValue .asList (newVals -> {
623
639
for (int i = 0 ; i < newVals .size (); ++i ) {
624
640
oldVals .add (newVals .get (i ).updatePathAppend ("." + (i + 1 + oldVals .size ()), field ));
@@ -634,6 +650,7 @@ public void add(final String field, final Value newValue) {
634
650
*/
635
651
public void remove (final String field ) {
636
652
final FixPath fixPath = new FixPath (field );
653
+
637
654
if (fixPath .size () > 1 ) {
638
655
fixPath .removeNestedFrom (this );
639
656
}
@@ -652,7 +669,10 @@ public void removeField(final String field) {
652
669
* @param fields the field names
653
670
*/
654
671
public void retainFields (final Collection <String > fields ) {
655
- map .keySet ().retainAll (fields .stream ().flatMap (this ::findFields ).collect (Collectors .toSet ()));
672
+ final Set <String > retainFields = new HashSet <>();
673
+ fields .forEach (f -> retainFields .addAll (findFields (f )));
674
+
675
+ map .keySet ().retainAll (retainFields );
656
676
}
657
677
658
678
/**
@@ -702,24 +722,55 @@ public String toString() {
702
722
* @param consumer the action to be performed for each value
703
723
*/
704
724
/*package-private*/ void modifyFields (final String pattern , final Consumer <String > consumer ) {
705
- findFields (pattern ).collect ( Collectors . toSet ()). forEach (consumer );
725
+ findFields (pattern ).forEach (consumer );
706
726
}
707
727
708
- private Stream <String > findFields (final String pattern ) {
709
- return matchFields (pattern , Stream ::filter );
728
+ private Set <String > findFields (final String pattern ) {
729
+ final Set <String > fieldSet = new LinkedHashSet <>();
730
+
731
+ for (final String term : ALTERNATION_PATTERN .split (pattern )) {
732
+ findFields (term , fieldSet );
733
+ }
734
+
735
+ return fieldSet ;
710
736
}
711
737
712
- private <T > T matchFields (final String pattern , final BiFunction <Stream <String >, Predicate <String >, T > function ) {
713
- if (PATTERN_MATCHER .reset (pattern ).find ()) {
714
- final Map <String , Boolean > matcher = TRIE_CACHE .computeIfAbsent (pattern , k -> {
715
- TRIE .put (k , k );
716
- return new HashMap <>();
717
- });
738
+ private void findFields (final String pattern , final Set <String > fieldSet ) {
739
+ if (!PREFIX_CACHE .containsKey (pattern )) {
740
+ final Matcher patternMatcher = PATTERN_MATCHER .reset (pattern );
741
+
742
+ if (patternMatcher .find ()) {
743
+ TRIE .put (pattern , pattern );
744
+ TRIE_CACHE .put (pattern , new HashMap <>());
718
745
719
- return function .apply (map .keySet ().stream (), f -> matcher .computeIfAbsent (f , k -> TRIE .get (k ).contains (pattern )));
746
+ PREFIX_CACHE .put (pattern , pattern .substring (0 , patternMatcher .start ()));
747
+ }
748
+ else {
749
+ PREFIX_CACHE .put (pattern , null );
750
+ }
720
751
}
721
- else {
722
- return function .apply (Stream .of (pattern ), map ::containsKey );
752
+
753
+ final String prefix = PREFIX_CACHE .get (pattern );
754
+
755
+ if (prefix != null ) {
756
+ final Map <String , Boolean > fieldCache = TRIE_CACHE .get (pattern );
757
+
758
+ for (final String field : map .keySet ()) {
759
+ if (!fieldCache .containsKey (field )) {
760
+ final boolean matches = field .startsWith (prefix ) && TRIE .get (field ).contains (pattern );
761
+ fieldCache .put (field , matches );
762
+
763
+ if (matches ) {
764
+ fieldSet .add (field );
765
+ }
766
+ }
767
+ else if (fieldCache .get (field )) {
768
+ fieldSet .add (field );
769
+ }
770
+ }
771
+ }
772
+ else if (map .containsKey (pattern )) {
773
+ fieldSet .add (pattern );
723
774
}
724
775
}
725
776
0 commit comments