Skip to content

Commit 71ed5c9

Browse files
committed
Add patindexOf() and appendLink() override with collateralCapture param
Allows specifying additional syntax to identify characters which might have been captured by {BrowseableURI} but which in the context of the capture (e.g., inside a single-quoted string) should be pushed back.
1 parent 919e363 commit 71ed5c9

File tree

3 files changed

+138
-27
lines changed

3 files changed

+138
-27
lines changed

src/org/opensolaris/opengrok/analysis/JFlexXref.java

Lines changed: 54 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import java.util.Set;
3636
import java.util.SortedSet;
3737
import java.util.TreeSet;
38+
import java.util.regex.Pattern;
3839
import org.opensolaris.opengrok.analysis.Definitions.Tag;
3940
import org.opensolaris.opengrok.analysis.Scopes.Scope;
4041
import org.opensolaris.opengrok.configuration.Project;
@@ -217,8 +218,8 @@ protected void appendProject() throws IOException {
217218
}
218219

219220
/**
220-
* Calls {@link #appendLink(java.lang.String, boolean)} with false to
221-
* disable {@code doPushback} handling.
221+
* Calls {@link #appendLink(java.lang.String, boolean)} with {@code url}
222+
* and false.
222223
* @param url the URL to append
223224
* @throws IOException if an error occurs while appending
224225
*/
@@ -227,30 +228,64 @@ protected void appendLink(String url) throws IOException {
227228
}
228229

229230
/**
230-
* Appends the {@code url} to the active {@link Writer}. If
231-
* {@code doPushback} is true, then any characters counted by
232-
* {@link StringUtils#countURIEndingPushback(java.lang.String)} are
233-
* handled by {@link #yypushback(int)} with {@code url} only partially
234-
* written.
235-
* <p>If the count is equal to the length of {@code url}, then it is
236-
* simply written, and nothing is pushed back.
231+
* Calls
232+
* {@link #appendLink(java.lang.String, boolean, java.util.regex.Pattern)}
233+
* with {@code url}, {@code doEndingPushback}, and null.
237234
* @param url the URL to append
238-
* @param doPushback a value indicating whether to test the {@code url}
239-
* with {@link StringUtils#countURIEndingPushback(java.lang.String)}.
235+
* @param doEndingPushback a value indicating whether to test the
236+
* {@code url} with
237+
* {@link StringUtils#countURIEndingPushback(java.lang.String)}
240238
* @throws IOException if an error occurs while appending
241239
*/
242-
protected void appendLink(String url, boolean doPushback)
240+
protected void appendLink(String url, boolean doEndingPushback)
243241
throws IOException {
244242

245-
if (doPushback) {
246-
int n = StringUtils.countURIEndingPushback(url);
247-
// Push back if positive, but not if equal to the current length,
248-
// or else the pushback might cause a neverending loop.
249-
if (n > 0 && n < url.length()) {
250-
yypushback(n);
251-
url = url.substring(0, url.length() - n);
243+
appendLink(url, doEndingPushback, null);
244+
}
245+
246+
/**
247+
* Appends the {@code url} to the active {@link Writer}.
248+
* <p>If {@code doEndingPushback} is true, then
249+
* {@link StringUtils#countURIEndingPushback(java.lang.String)} is enlisted
250+
* for use with {@link #yypushback(int)} -- i.e., {@code url} is only
251+
* partially written.
252+
* <p>If {@code collateralCapture} is not null, then its match in
253+
* {@code url} will alternatively mark the start of a count for pushback --
254+
* i.e., everything at and beyond the first {@code collateralCapture} match
255+
* will be considered not to belong to the URI.
256+
* <p>If the pushback count is equal to the length of {@code url}, then it
257+
* is simply written -- and nothing is pushed back -- in order to avoid a
258+
* never-ending {@code yylex()} loop.
259+
* @param url the URL to append
260+
* @param doEndingPushback a value indicating whether to test the
261+
* {@code url} with
262+
* {@link StringUtils#countURIEndingPushback(java.lang.String)}
263+
* @param collateralCapture optional pattern to indicate characters which
264+
* may have been captured as valid URI characters but in a particular
265+
* context should mark the start of a pushback
266+
* @throws IOException if an error occurs while appending
267+
*/
268+
protected void appendLink(String url, boolean doEndingPushback,
269+
Pattern collateralCapture)
270+
throws IOException {
271+
272+
int n = 0;
273+
if (doEndingPushback) {
274+
n = StringUtils.countURIEndingPushback(url);
275+
}
276+
if (collateralCapture != null) {
277+
int o = StringUtils.patindexOf(url, collateralCapture);
278+
if (o > 0) {
279+
int ccn = url.length() - o;
280+
if (ccn > n) n = ccn;
252281
}
253282
}
283+
// Push back if positive, but not if equal to the current length.
284+
if (n > 0 && n < url.length()) {
285+
yypushback(n);
286+
url = url.substring(0, url.length() - n);
287+
}
288+
254289
out.write("<a href=\"");
255290
out.write(Util.formQuoteEscape(url));
256291
out.write("\">");

src/org/opensolaris/opengrok/util/StringUtils.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
package org.opensolaris.opengrok.util;
2626

27+
import java.util.regex.Matcher;
2728
import java.util.regex.Pattern;
2829

2930
/**
@@ -34,8 +35,30 @@
3435
public final class StringUtils {
3536

3637
/**
37-
* Edit and paste (in NetBeans) for easy escaping:
38-
* {@code [a-zA-Z0-9_\-\.] }.
38+
* Matches an apostrophe not following a backslash escape or following an
39+
* even number¹ of backslash escapes:
40+
* <pre>
41+
* {@code
42+
* \'((?<=^.)|(?<=[^\\].)|(?<=^(\\\\){1,3}.)|(?<=[^\\](\\\\){1,3}.))
43+
* }
44+
* </pre>
45+
* (Edit above and paste below [in NetBeans] for easy String escaping.)
46+
* <p>
47+
* ¹"even number" is limited to 2,4,6 because Java look-behind is not
48+
* variable length but instead must have a definite upper bound in the
49+
* regex definition.
50+
*/
51+
public static final Pattern APOS_NO_BSESC =
52+
Pattern.compile("\\'((?<=^.)|(?<=[^\\\\].)|(?<=^(\\\\\\\\){1,3}.)|(?<=[^\\\\](\\\\\\\\){1,3}.))");
53+
54+
/**
55+
* Matches the same possible character as CommonPath.lexh's {FNameChar}:
56+
* <pre>
57+
* {@code
58+
* [a-zA-Z0-9_\-\.]
59+
* }
60+
* </pre>
61+
* (Edit above and paste below [in NetBeans] for easy String escaping.)
3962
*/
4063
private static final String FNAME_CHARS_PAT =
4164
"[a-zA-Z0-9_\\-\\.]";
@@ -47,10 +70,15 @@ public final class StringUtils {
4770
Pattern.compile("^" + FNAME_CHARS_PAT);
4871

4972
/**
50-
* Edit and paste (in NetBeans) for easy escaping:
51-
* {@code [a-zA-Z0-9\-\._~%:/\?\#\[\]@!\$&\'\(\)\*\+,;=] }
73+
* Matches one of the same¹ possible characters as Common.lexh's {URIChar}:
74+
* <pre>
75+
* {@code
76+
* [a-zA-Z0-9\-\._~%:/\?\#\[\]@!\$&\'\(\)\*\+,;=]
77+
* }
78+
* </pre>
79+
* (Edit above and paste below [in NetBeans] for easy String escaping.)
5280
* <p>
53-
* Backslash, '\', was in {URIChar} in many .lex files, but that is not
81+
* ¹Backslash, '\', was in {URIChar} in many .lex files, but that is not
5482
* a valid URI character per RFC-3986.
5583
*/
5684
private static final String URI_CHARS_PAT =
@@ -225,7 +253,7 @@ public static int countURIEndingPushback(String value) {
225253
}
226254
return n;
227255
}
228-
256+
229257
/**
230258
* Find out if string contains only alphanumeric characters.
231259
* @param str string to check
@@ -241,4 +269,18 @@ public static boolean isAlphanumeric(String str) {
241269

242270
return true;
243271
}
272+
273+
/**
274+
* Determines if the specified {@code pattern} matches in the specified
275+
* {@code value}.
276+
* @param value the string to inspect
277+
* @param pattern the pattern to match
278+
* @return the index of the first occurrence of the specified pattern, or
279+
* -1 if there is no such occurrence
280+
*/
281+
public static int patindexOf(String value, Pattern pattern) {
282+
Matcher m = pattern.matcher(value);
283+
if (!m.find()) return -1;
284+
return m.start();
285+
}
244286
}

test/org/opensolaris/opengrok/util/StringUtilsTest.java

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919

2020
/*
2121
* Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
2223
*/
2324
package org.opensolaris.opengrok.util;
2425

2526
import org.junit.Assert;
2627
import org.junit.Test;
27-
2828
import static org.junit.Assert.assertEquals;
2929

3030
/**
@@ -130,10 +130,44 @@ public void uriEmptyShouldNotCountAnyPushback() {
130130
int n = StringUtils.countURIEndingPushback(uri);
131131
assertEquals("empty pushback", 0, n);
132132
}
133-
133+
134134
@Test
135135
public void testIsAlphanumeric() {
136136
Assert.assertTrue(StringUtils.isAlphanumeric("foo123"));
137137
Assert.assertFalse(StringUtils.isAlphanumeric("foo_123"));
138138
}
139+
140+
@Test
141+
public void shouldMatchNonescapedApostrophe() {
142+
// Copy-and-paste the following so Netbeans does the escaping:
143+
// value: \'1-2-3\''
144+
final String value = "\\'1-2-3\\''";
145+
int i = StringUtils.patindexOf(value, StringUtils.APOS_NO_BSESC);
146+
assertEquals("unquoted apostrophe", 9, i);
147+
}
148+
149+
@Test
150+
public void shouldMatchApostropheAfterEvenEscapes() {
151+
// Copy-and-paste the following so Netbeans does the escaping:
152+
// value: \\'
153+
final String value = "\\\\'";
154+
int i = StringUtils.patindexOf(value, StringUtils.APOS_NO_BSESC);
155+
assertEquals("unquoted apostrophe after backslashes", 2, i);
156+
}
157+
158+
@Test
159+
public void shouldNotMatchApostropheAfterOddEscapes() {
160+
// Copy-and-paste the following so Netbeans does the escaping:
161+
// value: \\\'
162+
final String value = "\\\\\\'";
163+
int i = StringUtils.patindexOf(value, StringUtils.APOS_NO_BSESC);
164+
assertEquals("quoted apostrophe after backslashes", -1, i);
165+
}
166+
167+
@Test
168+
public void shouldMatchInitialApostrophe() {
169+
final String value = "'";
170+
int i = StringUtils.patindexOf(value, StringUtils.APOS_NO_BSESC);
171+
assertEquals("initial apostrophe", 0, i);
172+
}
139173
}

0 commit comments

Comments
 (0)