Skip to content

Commit aec5cca

Browse files
alanpaxtonline-o
authored andcommitted
[enhancement] map:merge duplicates: combine, reject
Reject by counting expected size of merge. Combine uses a merge-with-combine fn from the underlying collection code. Passes an initial set of tests. There’s some code we have written for completeness (MapType merge with combiner operator) which doesn’t seem to get called. Need to investigate/test further.
1 parent 801797e commit aec5cca

File tree

5 files changed

+173
-18
lines changed

5 files changed

+173
-18
lines changed

exist-core/src/main/java/org/exist/xquery/functions/map/AbstractMapType.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import javax.annotation.Nullable;
3333
import java.util.List;
34+
import java.util.function.BinaryOperator;
3435

3536
/**
3637
* Abstract base class for map types. A map item is also a function item. This class thus extends
@@ -70,11 +71,11 @@ public abstract class AbstractMapType extends FunctionReference
7071

7172
protected XQueryContext context;
7273

73-
protected AbstractMapType(XQueryContext context) {
74+
protected AbstractMapType(final XQueryContext context) {
7475
this(null, context);
7576
}
7677

77-
protected AbstractMapType(final Expression expression, XQueryContext context) {
78+
protected AbstractMapType(final Expression expression, final XQueryContext context) {
7879
super(expression, null);
7980
this.context = context;
8081
}
@@ -85,12 +86,15 @@ protected AbstractMapType(final Expression expression, XQueryContext context) {
8586

8687
public abstract AbstractMapType merge(Iterable<AbstractMapType> others);
8788

89+
public abstract AbstractMapType merge(Iterable<AbstractMapType> others, BinaryOperator<Sequence> mergeFn);
90+
8891
public abstract boolean contains(AtomicValue key);
8992

9093
public abstract Sequence keys();
9194

9295
public abstract AbstractMapType remove(AtomicValue[] keysAtomicValues) throws XPathException;
9396

97+
@SuppressWarnings("unused")
9498
public abstract int getKeyType();
9599

96100
public abstract int size();
@@ -106,12 +110,12 @@ public int getType() {
106110
}
107111

108112
@Override
109-
public void analyze(AnalyzeContextInfo contextInfo) throws XPathException {
113+
public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException {
110114
getAccessorFunc().analyze(contextInfo);
111115
}
112116

113117
@Override
114-
public Sequence eval(Sequence contextSequence) throws XPathException {
118+
public Sequence eval(final Sequence contextSequence) throws XPathException {
115119
return getAccessorFunc().eval(contextSequence);
116120
}
117121

@@ -127,12 +131,12 @@ public Sequence evalFunction(final Sequence contextSequence, final Item contextI
127131
}
128132

129133
@Override
130-
public void setArguments(List<Expression> arguments) throws XPathException {
134+
public void setArguments(final List<Expression> arguments) throws XPathException {
131135
getAccessorFunc().setArguments(arguments);
132136
}
133137

134138
@Override
135-
public void resetState(boolean postOptimization) {
139+
public void resetState(final boolean postOptimization) {
136140
getAccessorFunc().resetState(postOptimization);
137141
}
138142

exist-core/src/main/java/org/exist/xquery/functions/map/MapFunction.java

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,19 +265,15 @@ private Sequence merge(final Sequence[] args) throws XPathException {
265265
mergeDuplicates = MergeDuplicates.USE_LAST;
266266
}
267267

268-
if (mergeDuplicates == MergeDuplicates.REJECT) {
269-
throw new XPathException(this, ErrorCodes.FOJS0005, "map { \"duplicates\": \"reject\" } is not yet implemented by eXist-db");
270-
} else if (mergeDuplicates == MergeDuplicates.COMBINE) {
271-
throw new XPathException(this, ErrorCodes.FOJS0005, "map { \"combine\": \"reject\" } is not yet implemented by eXist-db");
272-
}
273-
274268
final Sequence maps = args[0];
275269
final int totalMaps = maps.getItemCount();
276270
final AbstractMapType head;
277271
final List<AbstractMapType> tail = new ArrayList<>(totalMaps - 1);
278272

279-
if (mergeDuplicates == MergeDuplicates.USE_LAST) {
273+
if (mergeDuplicates == MergeDuplicates.USE_LAST || mergeDuplicates == MergeDuplicates.COMBINE) {
280274
// head is the first map
275+
// USE_LAST will pick the item from the last map containing a duplicate item
276+
// COMBINE will combine duplicate items in head-first order
281277
head = (AbstractMapType) maps.itemAt(0);
282278
for (int i = 1; i < totalMaps; i++) {
283279
final AbstractMapType other = (AbstractMapType) maps.itemAt(i);
@@ -286,14 +282,51 @@ private Sequence merge(final Sequence[] args) throws XPathException {
286282

287283
} else {
288284
// head is the last map
285+
// USE_FIRST will pick the item from the first map containing a duplicate item
289286
head = (AbstractMapType) maps.itemAt(totalMaps - 1);
290287
for (int i = totalMaps - 2; i >= 0; i--) {
291288
final AbstractMapType other = (AbstractMapType) maps.itemAt(i);
292289
tail.add(other);
293290
}
294291
}
295292

296-
return head.merge(tail);
293+
if (mergeDuplicates == MergeDuplicates.COMBINE) {
294+
// Provide a callback function for merging items which share a key
295+
// Call merge variant
296+
final List<XPathException> mergeExceptions = new ArrayList<>();
297+
final AbstractMapType merged
298+
= head.merge(tail, (first, second) -> {
299+
try {
300+
final ValueSequence sequence = new ValueSequence(first);
301+
sequence.addAll(second);
302+
return sequence;
303+
} catch (final XPathException e) {
304+
//We cannot throw out of the MapType - pass exceptions here.
305+
mergeExceptions.add(e);
306+
}
307+
return Sequence.EMPTY_SEQUENCE;
308+
});
309+
if (!mergeExceptions.isEmpty()) {
310+
throw mergeExceptions.get(0);
311+
}
312+
return merged;
313+
}
314+
315+
final AbstractMapType result = head.merge(tail);
316+
317+
if (mergeDuplicates == MergeDuplicates.REJECT) {
318+
319+
int inputItemsSize = head.size();
320+
for (final AbstractMapType other : tail) {
321+
inputItemsSize += other.size();
322+
}
323+
if (inputItemsSize > result.size()) {
324+
// no duplicates, so we don't need to consider the duplicates
325+
throw new XPathException(this, ErrorCodes.FOJS0003, "map { \"duplicates\": \"reject\" } maps had duplicate entry");
326+
}
327+
}
328+
329+
return result;
297330
}
298331

299332
private Sequence forEach(final Sequence[] args) throws XPathException {

exist-core/src/main/java/org/exist/xquery/functions/map/MapType.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,15 @@
4343

4444
import javax.annotation.Nullable;
4545
import java.util.Iterator;
46+
import java.util.Optional;
47+
import java.util.function.BinaryOperator;
4648
import java.util.function.ToLongFunction;
4749

4850
/**
4951
* Full implementation of the XDM map() type based on an
5052
* immutable hash-map.
5153
*
52-
* @author <a href="mailto:[email protected]">Adam Rettter</a>
54+
* @author <a href="mailto:[email protected]">Adam Retter</a>
5355
*/
5456
public class MapType extends AbstractMapType {
5557

@@ -208,6 +210,43 @@ public AbstractMapType merge(final Iterable<AbstractMapType> others) {
208210
return new MapType(getExpression(), context, newMap.forked(), prevType);
209211
}
210212

213+
@Override
214+
public AbstractMapType merge(final Iterable<AbstractMapType> others, final BinaryOperator<Sequence> mergeFn) {
215+
216+
// create a transient map
217+
IMap<AtomicValue, Sequence> newMap = map.linear();
218+
219+
int prevType = keyType;
220+
for (final AbstractMapType other: others) {
221+
if (other instanceof MapType) {
222+
// MapType - optimise merge
223+
final MapType otherMap = (MapType) other;
224+
newMap = newMap.merge(otherMap.map, mergeFn);
225+
226+
if (prevType != otherMap.keyType) {
227+
prevType = MIXED_KEY_TYPES;
228+
}
229+
} else {
230+
// non MapType
231+
for (final IEntry<AtomicValue, Sequence> entry : other) {
232+
final AtomicValue key = entry.key();
233+
final Optional<Sequence> headEntry = newMap.get(key);
234+
if (headEntry.isPresent()) {
235+
newMap = newMap.put(key, mergeFn.apply(headEntry.get(), entry.value()));
236+
} else {
237+
newMap = newMap.put(key, entry.value());
238+
}
239+
if (prevType != key.getType()) {
240+
prevType = MIXED_KEY_TYPES;
241+
}
242+
}
243+
}
244+
}
245+
246+
// return an immutable map
247+
return new MapType(context, newMap.forked(), prevType);
248+
}
249+
211250
public void add(final AtomicValue key, final Sequence value) {
212251
setKeyType(key.getType());
213252
map = map.put(key, value);

exist-core/src/main/java/org/exist/xquery/functions/map/SingleKeyMapType.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
import javax.annotation.Nullable;
3535
import java.util.Iterator;
36+
import java.util.function.BinaryOperator;
3637

3738
import static org.exist.xquery.functions.map.MapType.newLinearMap;
3839

@@ -44,9 +45,9 @@
4445
*/
4546
public class SingleKeyMapType extends AbstractMapType {
4647

47-
private AtomicValue key;
48-
private Sequence value;
49-
private @Nullable Collator collator;
48+
private final AtomicValue key;
49+
private final Sequence value;
50+
private final @Nullable Collator collator;
5051

5152
public SingleKeyMapType(final XQueryContext context, final @Nullable Collator collator, final AtomicValue key, final Sequence value) {
5253
this(null, context, collator, key, value);
@@ -79,6 +80,12 @@ public AbstractMapType merge(final Iterable<AbstractMapType> others) {
7980
}
8081
}
8182

83+
@Override
84+
public AbstractMapType merge(final Iterable<AbstractMapType> others, final BinaryOperator<Sequence> mergeFn) {
85+
final MapType map = new MapType(context, collator, key, value);
86+
return map.merge(others, mergeFn);
87+
}
88+
8289
@Override
8390
public AbstractMapType put(final AtomicValue key, final Sequence value) {
8491
final IMap<AtomicValue, Sequence> map = newLinearMap(collator);

exist-core/src/test/xquery/maps/maps.xqm

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,78 @@ function mt:merge-duplicate-keys-use-last-explicit-2() {
250250
($mt:integerKeys(7), $specialWeek(7))
251251
};
252252

253+
declare
254+
%test:assertError("err:FOJS0003")
255+
function mt:merge-duplicate-keys-reject-has-duplicates() {
256+
let $specialWeek := map:merge((map { 7 : "Caturday" }, $mt:integerKeys), map { "duplicates": "reject" })
257+
return
258+
($specialWeek(7))
259+
};
260+
261+
declare
262+
%test:assertEquals("Saturday", "Saturday", "Caturday")
263+
function mt:merge-duplicate-keys-reject-no-duplicates() {
264+
let $specialWeek := map:merge((map { 8 : "Caturday" }, $mt:integerKeys), map { "duplicates": "reject" })
265+
return
266+
($mt:integerKeys(7), $specialWeek(7), $specialWeek(8))
267+
};
268+
269+
declare
270+
%test:assertEquals("Caturday","Maturday","Saturday")
271+
function mt:merge-duplicate-keys-combine-has-duplicates-sequence() {
272+
let $specialWeek := map:merge((map { 7 : ("Caturday","Maturday") }, $mt:integerKeys), map { "duplicates": "combine" })
273+
return
274+
($specialWeek(7))
275+
};
276+
277+
declare
278+
%test:assertEquals("Saturday","Caturday","Maturday")
279+
function mt:merge-duplicate-keys-combine-has-duplicates-sequence-order() {
280+
let $specialWeek := map:merge(($mt:integerKeys, map { 7 : ("Caturday","Maturday") }), map { "duplicates": "combine" })
281+
return
282+
($specialWeek(7))
283+
};
284+
285+
declare
286+
%test:assertEquals("Saturday","Caturday","Maturday", "Caturday", "Zaturday")
287+
function mt:merge-duplicate-keys-combine-3-has-duplicates-sequence-order() {
288+
let $specialWeek := map:merge(($mt:integerKeys, map { 7 : ("Caturday","Maturday") }, map { 7 : ("Caturday","Zaturday") }), map { "duplicates": "combine" })
289+
return
290+
($specialWeek(7))
291+
};
292+
293+
declare
294+
%test:assertEquals("Caturday","Saturday")
295+
function mt:merge-duplicate-keys-combine-has-duplicates-atomic() {
296+
let $specialWeek := map:merge((map { 7 : ("Caturday") }, $mt:integerKeys), map { "duplicates": "combine" })
297+
return
298+
($specialWeek(7))
299+
};
300+
301+
declare
302+
%test:assertEquals("Caturday","Saturday", "Maturday")
303+
function mt:merge-duplicate-keys-combine-has-duplicates-three() {
304+
let $specialWeek := map:merge((map { 7 : ("Caturday") }, $mt:integerKeys, map { 7: ("Maturday")}), map { "duplicates": "combine" })
305+
return
306+
($specialWeek(7))
307+
};
308+
309+
declare
310+
%test:assertEquals("Saturday", "Caturday", "Maturday", "Waturday")
311+
function mt:merge-duplicate-keys-combine-has-duplicates-four() {
312+
let $specialWeek := map:merge((map { 7 : () }, $mt:integerKeys, map { 7: ("Caturday", "Maturday", "Waturday")}), map { "duplicates": "combine" })
313+
return
314+
($specialWeek(7))
315+
};
316+
317+
declare
318+
%test:assertEquals("Caturday","Saturday")
319+
function mt:merge-duplicate-keys-combine-has-duplicates-empty-third() {
320+
let $specialWeek := map:merge((map { 7 : ("Caturday") }, $mt:integerKeys, map { 7: ()}), map { "duplicates": "combine" })
321+
return
322+
($specialWeek(7))
323+
};
324+
253325
declare
254326
%test:assertEmpty
255327
function mt:mapEmptyValue() {

0 commit comments

Comments
 (0)