Skip to content

Commit 6c799fd

Browse files
committed
incorporating inspection comments
1 parent 0da6fcf commit 6c799fd

File tree

8 files changed

+175
-183
lines changed

8 files changed

+175
-183
lines changed

src/main/java/org/nibor/autolink/LinkExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public Builder emailDomainMustHaveDot(boolean emailDomainMustHaveDot) {
9393
*/
9494
public LinkExtractor build() {
9595
UrlScanner urlScanner = linkTypes.contains(LinkType.URL) ? new UrlScanner() : null;
96-
WwwUrlScanner wwwScanner = linkTypes.contains(LinkType.URL) ? new WwwUrlScanner() : null;
96+
WwwUrlScanner wwwScanner = linkTypes.contains(LinkType.WWW) ? new WwwUrlScanner() : null;
9797
EmailScanner emailScanner = linkTypes.contains(LinkType.EMAIL) ? new EmailScanner(emailDomainMustHaveDot) : null;
9898
return new LinkExtractor(urlScanner, wwwScanner, emailScanner);
9999
}

src/main/java/org/nibor/autolink/LinkSpan.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,4 @@ public interface LinkSpan {
2121
*/
2222
int getEndIndex();
2323

24-
/**
25-
* @return the found sequence
26-
*/
27-
CharSequence sequence();
2824
}

src/main/java/org/nibor/autolink/LinkType.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,9 @@ public enum LinkType {
1111
/**
1212
* Email address such as {@code [email protected]}
1313
*/
14-
EMAIL
14+
EMAIL,
15+
/**
16+
* URL such as {@code www.example.com}
17+
*/
18+
WWW
1519
}

src/main/java/org/nibor/autolink/internal/EmailScanner.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ public LinkSpan scan(CharSequence input, int triggerIndex, int rewindIndex) {
2525
}
2626

2727
int afterAt = triggerIndex + 1;
28-
int last = findLast(input, afterAt) + 1;
29-
if (last == 0) {
28+
int last = findLast(input, afterAt);
29+
if (last == -1) {
3030
return null;
3131
}
3232

33-
return new LinkSpanImpl(LinkType.EMAIL, first, last, input.subSequence(first, last));
33+
return new LinkSpanImpl(LinkType.EMAIL, first, last + 1);
3434
}
3535

3636
// See "Local-part" in RFC 5321, plus extensions in RFC 6531

src/main/java/org/nibor/autolink/internal/LinkSpanImpl.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@ public class LinkSpanImpl implements LinkSpan {
88
private final LinkType linkType;
99
private final int beginIndex;
1010
private final int endIndex;
11-
private final CharSequence sequence;
1211

13-
public LinkSpanImpl(LinkType linkType, int beginIndex, int endIndex, CharSequence sequence) {
12+
public LinkSpanImpl(LinkType linkType, int beginIndex, int endIndex) {
1413
this.linkType = linkType;
1514
this.beginIndex = beginIndex;
1615
this.endIndex = endIndex;
17-
this.sequence = sequence;
1816
}
1917

2018
@Override
@@ -32,11 +30,6 @@ public int getEndIndex() {
3230
return endIndex;
3331
}
3432

35-
@Override
36-
public CharSequence sequence() {
37-
return sequence;
38-
}
39-
4033
@Override
4134
public String toString() {
4235
return "Link{type=" + getType() + ", beginIndex=" + beginIndex + ", endIndex=" + endIndex + "}";

src/main/java/org/nibor/autolink/internal/Scanners.java

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,160 @@ public static boolean isAlnum(char c) {
1717
public static boolean isNonAscii(char c) {
1818
return c >= 0x80;
1919
}
20+
21+
public static int findLast(CharSequence input, int beginIndex) {
22+
int round = 0;
23+
int square = 0;
24+
int curly = 0;
25+
boolean doubleQuote = false;
26+
boolean singleQuote = false;
27+
int last = beginIndex;
28+
loop:
29+
for (int i = beginIndex; i < input.length(); i++) {
30+
char c = input.charAt(i);
31+
switch (c) {
32+
case '\u0000':
33+
case '\u0001':
34+
case '\u0002':
35+
case '\u0003':
36+
case '\u0004':
37+
case '\u0005':
38+
case '\u0006':
39+
case '\u0007':
40+
case '\u0008':
41+
case '\t':
42+
case '\n':
43+
case '\u000B':
44+
case '\f':
45+
case '\r':
46+
case '\u000E':
47+
case '\u000F':
48+
case '\u0010':
49+
case '\u0011':
50+
case '\u0012':
51+
case '\u0013':
52+
case '\u0014':
53+
case '\u0015':
54+
case '\u0016':
55+
case '\u0017':
56+
case '\u0018':
57+
case '\u0019':
58+
case '\u001A':
59+
case '\u001B':
60+
case '\u001C':
61+
case '\u001D':
62+
case '\u001E':
63+
case '\u001F':
64+
case ' ':
65+
case '<':
66+
case '>':
67+
case '\u007F':
68+
case '\u0080':
69+
case '\u0081':
70+
case '\u0082':
71+
case '\u0083':
72+
case '\u0084':
73+
case '\u0085':
74+
case '\u0086':
75+
case '\u0087':
76+
case '\u0088':
77+
case '\u0089':
78+
case '\u008A':
79+
case '\u008B':
80+
case '\u008C':
81+
case '\u008D':
82+
case '\u008E':
83+
case '\u008F':
84+
case '\u0090':
85+
case '\u0091':
86+
case '\u0092':
87+
case '\u0093':
88+
case '\u0094':
89+
case '\u0095':
90+
case '\u0096':
91+
case '\u0097':
92+
case '\u0098':
93+
case '\u0099':
94+
case '\u009A':
95+
case '\u009B':
96+
case '\u009C':
97+
case '\u009D':
98+
case '\u009E':
99+
case '\u009F':
100+
// These can never be part of an URL, so stop now. See RFC 3986 and RFC 3987.
101+
// Some characters are not in the above list, even they are not in "unreserved" or "reserved":
102+
// '"', '\\', '^', '`', '{', '|', '}'
103+
// The reason for this is that other link detectors also allow them. Also see below, we require
104+
// the quote and the braces to be balanced.
105+
break loop;
106+
case '?':
107+
case '!':
108+
case '.':
109+
case ',':
110+
case ':':
111+
case ';':
112+
// These may be part of an URL but not at the end
113+
break;
114+
case '/':
115+
// This may be part of an URL and at the end, but not if the previous character can't be the end of an URL
116+
if (last == i - 1) {
117+
last = i;
118+
}
119+
break;
120+
case '(':
121+
round++;
122+
break;
123+
case ')':
124+
round--;
125+
if (round >= 0) {
126+
last = i;
127+
} else {
128+
// More closing than opening brackets, stop now
129+
break loop;
130+
}
131+
break;
132+
case '[':
133+
// Allowed in IPv6 address host
134+
square++;
135+
break;
136+
case ']':
137+
// Allowed in IPv6 address host
138+
square--;
139+
if (square >= 0) {
140+
last = i;
141+
} else {
142+
// More closing than opening brackets, stop now
143+
break loop;
144+
}
145+
break;
146+
case '{':
147+
curly++;
148+
break;
149+
case '}':
150+
curly--;
151+
if (curly >= 0) {
152+
last = i;
153+
} else {
154+
// More closing than opening brackets, stop now
155+
break loop;
156+
}
157+
break;
158+
case '"':
159+
doubleQuote = !doubleQuote;
160+
if (!doubleQuote) {
161+
last = i;
162+
}
163+
break;
164+
case '\'':
165+
singleQuote = !singleQuote;
166+
if (!singleQuote) {
167+
last = i;
168+
}
169+
break;
170+
default:
171+
last = i;
172+
}
173+
}
174+
return last;
175+
}
20176
}

0 commit comments

Comments
 (0)