Skip to content

Commit fc9c4fb

Browse files
authored
Merge pull request #4864 from evolvedbinary/hotfix/replace-tokenize-analyze-string
Fixes to fn:replace, fn:tokenize, and fn:analyze-string
2 parents 650609f + 92d2b6e commit fc9c4fb

File tree

6 files changed

+88
-144
lines changed

6 files changed

+88
-144
lines changed

exist-core/src/main/java/org/exist/xquery/functions/fn/FnModule.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ public class FnModule extends AbstractInternalModule {
181181
new FunctionDef(FunPosition.signature, FunPosition.class),
182182
new FunctionDef(FunQName.signature, FunQName.class),
183183
new FunctionDef(FunRemove.signature, FunRemove.class),
184-
new FunctionDef(FunReplace.signatures[0], FunReplace.class),
185-
new FunctionDef(FunReplace.signatures[1], FunReplace.class),
184+
new FunctionDef(FunReplace.FS_REPLACE[0], FunReplace.class),
185+
new FunctionDef(FunReplace.FS_REPLACE[1], FunReplace.class),
186186
new FunctionDef(FunReverse.signature, FunReverse.class),
187187
new FunctionDef(FunResolveURI.signatures[0], FunResolveURI.class),
188188
new FunctionDef(FunResolveURI.signatures[1], FunResolveURI.class),

exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ private void analyzeString(final MemTreeBuilder builder, final String input, Str
130130

131131
try {
132132
final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings);
133+
if (regularExpression.matches("")) {
134+
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
135+
}
133136

134137
//TODO(AR) cache the regular expression... might be possible through Saxon config
135138

exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
*
6060
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
6161
*/
62-
public class FunMatches extends Function implements Optimizable, IndexUseReporter {
62+
public final class FunMatches extends Function implements Optimizable, IndexUseReporter {
6363

6464
private static final FunctionParameterSequenceType FS_PARAM_INPUT = optParam("input", Type.STRING, "The input string");
6565
private static final FunctionParameterSequenceType FS_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern");

exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java

Lines changed: 40 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -28,44 +28,29 @@
2828
import net.sf.saxon.functions.Replace;
2929
import net.sf.saxon.regex.RegularExpression;
3030
import org.exist.dom.QName;
31-
import org.exist.xquery.Atomize;
32-
import org.exist.xquery.Cardinality;
33-
import org.exist.xquery.Dependency;
34-
import org.exist.xquery.DynamicCardinalityCheck;
35-
import org.exist.xquery.ErrorCodes;
36-
import org.exist.xquery.Expression;
37-
import org.exist.xquery.Function;
38-
import org.exist.xquery.FunctionSignature;
39-
import org.exist.xquery.Profiler;
40-
import org.exist.xquery.XPathException;
41-
import org.exist.xquery.XQueryContext;
42-
import org.exist.xquery.util.Error;
31+
import org.exist.xquery.*;
4332
import org.exist.xquery.value.FunctionParameterSequenceType;
44-
import org.exist.xquery.value.FunctionReturnSequenceType;
45-
import org.exist.xquery.value.Item;
4633
import org.exist.xquery.value.Sequence;
47-
import org.exist.xquery.value.SequenceType;
4834
import org.exist.xquery.value.StringValue;
4935
import org.exist.xquery.value.Type;
5036

37+
import static org.exist.xquery.FunctionDSL.*;
5138
import static org.exist.xquery.regex.RegexUtil.*;
5239

5340
/**
5441
* @author <a href="mailto:[email protected]">Adam Retter</a>
5542
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
5643
*/
57-
public class FunReplace extends FunMatches {
58-
59-
private static final String FUNCTION_DESCRIPTION_3_PARAM =
60-
"The function returns the xs:string that is obtained by replacing each non-overlapping substring " +
61-
"of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n";
62-
private static final String FUNCTION_DESCRIPTION_4_PARAM =
44+
public class FunReplace extends BasicFunction {
45+
46+
private static final QName FS_REPLACE_NAME = new QName("replace", Function.BUILTIN_FUNCTION_NS);
47+
48+
private static final String FS_REPLACE_DESCRIPTION =
6349
"The function returns the xs:string that is obtained by replacing each non-overlapping substring " +
6450
"of $input that matches the given $pattern with an occurrence of the $replacement string.\n\n" +
6551
"The $flags argument is interpreted in the same manner as for the fn:matches() function.\n\n" +
6652
"Calling the four argument version with the $flags argument set to a " +
67-
"zero-length string gives the same effect as using the three argument version.\n\n";
68-
private static final String FUNCTION_DESCRIPTION_COMMON =
53+
"zero-length string gives the same effect as using the three argument version.\n\n" +
6954
"If $input is the empty sequence, it is interpreted as the zero-length string.\n\nIf two overlapping " +
7055
"substrings of $input both match the $pattern, then only the first one (that is, the one whose first " +
7156
"character comes first in the $input string) is replaced.\n\nWithin the $replacement string, a variable " +
@@ -85,103 +70,59 @@ public class FunReplace extends FunMatches {
8570
"included \"as is\" in the replacement string, and the rules are reapplied using the number N " +
8671
"formed by stripping off this last digit.";
8772

88-
protected static final FunctionParameterSequenceType INPUT_ARG = new FunctionParameterSequenceType("input", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string");
89-
protected static final FunctionParameterSequenceType PATTERN_ARG = new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The pattern to match");
90-
protected static final FunctionParameterSequenceType REPLACEMENT_ARG = new FunctionParameterSequenceType("replacement", Type.STRING, Cardinality.EXACTLY_ONE, "The string to replace the pattern with");
91-
protected static final FunctionParameterSequenceType FLAGS_ARG = new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags");
92-
protected static final FunctionReturnSequenceType RETURN_TYPE = new FunctionReturnSequenceType(Type.STRING, Cardinality.EXACTLY_ONE, "the altered string");
93-
94-
public final static FunctionSignature[] signatures = {
95-
new FunctionSignature(
96-
new QName("replace", Function.BUILTIN_FUNCTION_NS),
97-
FUNCTION_DESCRIPTION_3_PARAM + FUNCTION_DESCRIPTION_COMMON,
98-
new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG },
99-
RETURN_TYPE
100-
),
101-
new FunctionSignature(
102-
new QName("replace", Function.BUILTIN_FUNCTION_NS),
103-
FUNCTION_DESCRIPTION_4_PARAM + FUNCTION_DESCRIPTION_COMMON,
104-
new SequenceType[] { INPUT_ARG, PATTERN_ARG, REPLACEMENT_ARG, FLAGS_ARG },
105-
RETURN_TYPE
106-
)
107-
};
73+
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_INPUT = optParam("input", Type.STRING, "The input string");
74+
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_PATTERN = param("pattern", Type.STRING, "The pattern to match");
75+
private static final FunctionParameterSequenceType FS_TOKENIZE_PARAM_REPLACEMENT = param("replacement", Type.STRING, "The string to replace the pattern with");
76+
77+
static final FunctionSignature [] FS_REPLACE = functionSignatures(
78+
FS_REPLACE_NAME,
79+
FS_REPLACE_DESCRIPTION,
80+
returns(Type.STRING, "the altered string"),
81+
arities(
82+
arity(
83+
FS_TOKENIZE_PARAM_INPUT,
84+
FS_TOKENIZE_PARAM_PATTERN,
85+
FS_TOKENIZE_PARAM_REPLACEMENT
86+
),
87+
arity(
88+
FS_TOKENIZE_PARAM_INPUT,
89+
FS_TOKENIZE_PARAM_PATTERN,
90+
FS_TOKENIZE_PARAM_REPLACEMENT,
91+
param("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags")
92+
)
93+
)
94+
);
10895

10996
public FunReplace(final XQueryContext context, final FunctionSignature signature) {
11097
super(context, signature);
11198
}
112-
113-
@Override
114-
public void setArguments(List<Expression> arguments) {
115-
steps.clear();
116-
Expression arg = arguments.get(0);
117-
arg = new DynamicCardinalityCheck(context, Cardinality.ZERO_OR_ONE, arg,
118-
new Error(Error.FUNC_PARAM_CARDINALITY, "1", getSignature()));
119-
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
120-
{arg = new Atomize(context, arg);}
121-
steps.add(arg);
122-
123-
arg = arguments.get(1);
124-
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
125-
new Error(Error.FUNC_PARAM_CARDINALITY, "2", getSignature()));
126-
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
127-
{arg = new Atomize(context, arg);}
128-
steps.add(arg);
129-
130-
arg = arguments.get(2);
131-
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
132-
new Error(Error.FUNC_PARAM_CARDINALITY, "3", getSignature()));
133-
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
134-
{arg = new Atomize(context, arg);}
135-
steps.add(arg);
136-
137-
if (arguments.size() == 4) {
138-
arg = arguments.get(3);
139-
arg = new DynamicCardinalityCheck(context, Cardinality.EXACTLY_ONE, arg,
140-
new Error(Error.FUNC_PARAM_CARDINALITY, "4", getSignature()));
141-
if(!Type.subTypeOf(arg.returnsType(), Type.ATOMIC))
142-
{arg = new Atomize(context, arg);}
143-
steps.add(arg);
144-
}
145-
}
14699

147100
@Override
148-
public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException {
149-
if (context.getProfiler().isEnabled()) {
150-
context.getProfiler().start(this);
151-
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
152-
if (contextSequence != null) {
153-
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);
154-
}
155-
if (contextItem != null) {
156-
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());
157-
}
158-
}
159-
101+
public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
160102
final Sequence result;
161-
final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem);
103+
final Sequence stringArg = args[0];
162104
if (stringArg.isEmpty()) {
163105
result = StringValue.EMPTY_STRING;
164106
} else {
165107
final String flags;
166-
if (getSignature().getArgumentCount() == 4) {
167-
flags = getArgument(3).eval(contextSequence, contextItem).getStringValue();
108+
if (args.length == 4) {
109+
flags = args[3].itemAt(0).getStringValue();
168110
} else {
169111
flags = "";
170112
}
171-
172113
final String string = stringArg.getStringValue();
173-
final Sequence patternSeq = getArgument(1).eval(contextSequence, contextItem);
174-
final String pattern = patternSeq.getStringValue();
175-
176-
final Sequence replaceSeq = getArgument(2).eval(contextSequence, contextItem);
177-
final String replace = replaceSeq.getStringValue();
114+
final String pattern = args[1].itemAt(0).getStringValue();
115+
final String replace = args[2].itemAt(0).getStringValue();
178116

179117
final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration();
180118

181119
final List<String> warnings = new ArrayList<>(1);
182120

183121
try {
184122
final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings);
123+
if (regularExpression.matches("")) {
124+
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
125+
}
185126

186127
//TODO(AR) cache the regular expression... might be possible through Saxon config
187128

@@ -210,11 +151,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
210151
}
211152
}
212153

213-
if (context.getProfiler().isEnabled()) {
214-
context.getProfiler().end(this, "", result);
215-
}
216-
217-
return result;
218-
154+
return result;
219155
}
220156
}

exist-core/src/main/java/org/exist/xquery/functions/fn/FunTokenize.java

Lines changed: 12 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,13 @@
2121
*/
2222
package org.exist.xquery.functions.fn;
2323

24+
import java.util.regex.Pattern;
2425
import java.util.regex.PatternSyntaxException;
2526

2627
import org.exist.dom.QName;
2728
import org.exist.util.PatternFactory;
28-
import org.exist.xquery.Dependency;
29-
import org.exist.xquery.ErrorCodes;
30-
import org.exist.xquery.Function;
31-
import org.exist.xquery.FunctionSignature;
32-
import org.exist.xquery.Profiler;
33-
import org.exist.xquery.XPathException;
34-
import org.exist.xquery.XQueryContext;
29+
import org.exist.xquery.*;
3530
import org.exist.xquery.value.FunctionParameterSequenceType;
36-
import org.exist.xquery.value.Item;
3731
import org.exist.xquery.value.Sequence;
3832
import org.exist.xquery.value.StringValue;
3933
import org.exist.xquery.value.Type;
@@ -46,7 +40,7 @@
4640
* @author <a href="mailto:[email protected]">Wolfgang Meier</a>
4741
* @see <a href="https://www.w3.org/TR/xpath-functions-31/#func-tokenize">https://www.w3.org/TR/xpath-functions-31/#func-tokenize</a>
4842
*/
49-
public class FunTokenize extends FunMatches {
43+
public class FunTokenize extends BasicFunction {
5044

5145
private static final QName FS_TOKENIZE_NAME = new QName("tokenize", Function.BUILTIN_FUNCTION_NS);
5246

@@ -78,20 +72,9 @@ public FunTokenize(final XQueryContext context, final FunctionSignature signatur
7872
}
7973

8074
@Override
81-
public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException {
82-
if (context.getProfiler().isEnabled()) {
83-
context.getProfiler().start(this);
84-
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
85-
if (contextSequence != null) {
86-
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);
87-
}
88-
if (contextItem != null) {
89-
context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());
90-
}
91-
}
92-
75+
public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException {
9376
final Sequence result;
94-
final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem);
77+
final Sequence stringArg = args[0];
9578
if (stringArg.isEmpty()) {
9679
result = Sequence.EMPTY_SEQUENCE;
9780
} else {
@@ -100,34 +83,30 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
10083
result = Sequence.EMPTY_SEQUENCE;
10184
} else {
10285
final int flags;
103-
if (getSignature().getArgumentCount() == 3) {
104-
flags = parseFlags(this, getArgument(2).eval(contextSequence, contextItem)
105-
.getStringValue());
86+
if (args.length == 3) {
87+
flags = parseFlags(this, args[2].itemAt(0).getStringValue());
10688
} else {
10789
flags = 0;
10890
}
10991

11092
final String pattern;
111-
if(getArgumentCount() == 1) {
93+
if (args.length == 1) {
11294
pattern = " ";
11395
string = FunNormalizeSpace.normalize(string);
11496
} else {
11597
if(hasLiteral(flags)) {
11698
// no need to change anything
117-
pattern = getArgument(1).eval(contextSequence, contextItem).getStringValue();
99+
pattern = args[1].itemAt(0).getStringValue();
118100
} else {
119101
final boolean ignoreWhitespace = hasIgnoreWhitespace(flags);
120102
final boolean caseBlind = hasCaseInsensitive(flags);
121-
pattern = translateRegexp(this, getArgument(1).eval(contextSequence, contextItem).getStringValue(), ignoreWhitespace, caseBlind);
103+
pattern = translateRegexp(this, args[1].itemAt(0).getStringValue(), ignoreWhitespace, caseBlind);
122104
}
123105
}
124106

125107
try {
126-
if (pat == null || (!pattern.equals(pat.pattern())) || flags != pat.flags()) {
127-
pat = PatternFactory.getInstance().getPattern(pattern, flags);
128-
}
129-
130-
if(pat.matcher("").matches()) {
108+
final Pattern pat = PatternFactory.getInstance().getPattern(pattern, flags);
109+
if (pat.matcher("").matches()) {
131110
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
132111
}
133112

@@ -144,10 +123,6 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr
144123
}
145124
}
146125

147-
if (context.getProfiler().isEnabled()) {
148-
context.getProfiler().end(this, "", result);
149-
}
150-
151126
return result;
152127
}
153128

exist-core/src/test/xquery/regex.xml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,40 @@
137137
<expected>lo</expected>
138138
</test>
139139

140+
<test output="text">
141+
<task>fn:replace-regex-match-empty-1</task>
142+
<code>fn:replace("12.34" , "^\D*", "")</code>
143+
<error>FORX0003</error>
144+
</test>
145+
140146
<test output="text">
141147
<task>fn:tokenize-qflag-1</task>
142148
<code>fn:tokenize("12.3.5.6", ".", "q")</code>
143149
<expected>12 3 5 6</expected>
144150
</test>
145151

152+
<test output="text">
153+
<task>fn:tokenize-single-input-1</task>
154+
<code>fn:tokenize("x,y", ",")</code>
155+
<expected>x y</expected>
156+
</test>
157+
158+
<test output="text">
159+
<task>fn:tokenize-single-input-2</task>
160+
<code>fn:tokenize(("a", "b", "x,y"), ",")</code>
161+
<error>XPTY0004</error>
162+
</test>
163+
164+
<test output="text">
165+
<task>fn:tokenize-regex-match-empty-1</task>
166+
<code>fn:tokenize("12.34" , "^\D*")</code>
167+
<error>FORX0003</error>
168+
</test>
169+
170+
<test output="text">
171+
<task>fn:analyze-string-regex-match-empty-1</task>
172+
<code>fn:analyze-string("12.34" , "^\D*")</code>
173+
<error>FORX0003</error>
174+
</test>
175+
146176
</TestSet>

0 commit comments

Comments
 (0)