Skip to content

Commit 9a8a32b

Browse files
committed
Update
1 parent 528fb1f commit 9a8a32b

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

unicodetools/src/main/java/org/unicode/utilities/UrlUtilities.java

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,23 @@ public String unescape(String substring) {
176176
static final UnicodeSet validHost =
177177
new UnicodeSet(IUP.getSet("Idn_Status=" + Idn_Status_Values.valid))
178178
.addAll(IUP.getSet("Idn_Status=" + Idn_Status_Values.mapped))
179-
.add('.')
179+
.removeAll(new UnicodeSet("[:ascii:]"))
180+
.addAll(new UnicodeSet("[a-zA-Z0-9.]"))
180181
.freeze();
181182

182183
public static final class StringRange {
183-
final int start;
184-
final int limit;
184+
public final int start;
185+
public final int limit;
185186

186187
public StringRange(int start, int limit) {
187188
super();
188189
this.start = start;
189190
this.limit = limit;
190191
}
192+
193+
public String substring(String source) {
194+
return source.substring(start, limit);
195+
}
191196
}
192197

193198
/**
@@ -207,12 +212,12 @@ public static StringRange parseURL(String source, int startCodePointOffset) {
207212
int start = findStartMatcher.start();
208213
// dumb search for end of host, doesn't handle .. or edge cases, but this does not have to
209214
// be production-quality
210-
int current = findStartMatcher.end();
211-
int current2 = validHost.span(source, start, SpanCondition.CONTAINED);
212-
if (current == current2) {
215+
int protocolLimit = findStartMatcher.end();
216+
int hostLimit = validHost.span(source, protocolLimit, SpanCondition.CONTAINED);
217+
if (protocolLimit == hostLimit) {
213218
return null;
214219
}
215-
int limit = parsePathQueryFragment(source, current2);
220+
int limit = parsePathQueryFragment(source, hostLimit);
216221
return new StringRange(start, limit);
217222
}
218223

unicodetools/src/test/java/org/unicode/unittest/TestUrl.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.unicode.utilities.UrlUtilities;
3434
import org.unicode.utilities.UrlUtilities.LinkTermination;
3535
import org.unicode.utilities.UrlUtilities.Part;
36+
import org.unicode.utilities.UrlUtilities.StringRange;
3637

3738
/** The following is very temporary, just during the spec development. */
3839
public class TestUrl extends TestFmwk {
@@ -439,6 +440,15 @@ public void testOverlap() {
439440
}
440441
}
441442

443+
public void testParseUrl() {
444+
String source = "See http://example.com/foobar and http://a.us/foobar!";
445+
StringRange position = UrlUtilities.parseURL(source, 0);
446+
assertEquals(source, "http://example.com/foobar", position.substring(source));
447+
448+
position = UrlUtilities.parseURL(source, position.limit);
449+
assertEquals(source, "http://a.us/foobar", position.substring(source));
450+
451+
}
442452
// private static final String SPLIT1 = "\t"; // for debugging, "\n";
443453
//
444454
// private static final boolean VERBOSE_ASSERT = false;

0 commit comments

Comments
 (0)