Skip to content

Commit 3ab944b

Browse files
committed
Update dependencies and fix issue with jsoup
An issue appeared when upgrading jsoup. They must have changed the implementation of the "remove" method of "Elements.iterator()". Calling this method now removes the HTML element from the document, instead of just removing it from the Elements list.
1 parent 91841a9 commit 3ab944b

File tree

4 files changed

+56
-32
lines changed

4 files changed

+56
-32
lines changed

pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,22 @@
6868
<dependency>
6969
<groupId>org.jsoup</groupId>
7070
<artifactId>jsoup</artifactId>
71-
<version>1.16.1</version>
71+
<version>1.22.1</version>
7272
</dependency>
7373
<dependency>
7474
<groupId>org.freemarker</groupId>
7575
<artifactId>freemarker</artifactId>
76-
<version>2.3.32</version>
76+
<version>2.3.34</version>
7777
</dependency>
7878
<dependency>
7979
<groupId>com.fasterxml.jackson.core</groupId>
8080
<artifactId>jackson-core</artifactId>
81-
<version>2.15.2</version>
81+
<version>2.21.0</version>
8282
</dependency>
8383
<dependency>
8484
<groupId>com.fasterxml.jackson.core</groupId>
8585
<artifactId>jackson-databind</artifactId>
86-
<version>2.15.2</version>
86+
<version>2.21.0</version>
8787

8888
<!--
8989
The only classes that refer to objects from jackson-databind are JCardModule, JCardSerializer, and JCardDeserializer, all of which

src/main/java/ezvcard/io/html/HCardParser.java

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import java.io.InputStream;
77
import java.io.Reader;
88
import java.io.UncheckedIOException;
9-
import java.net.MalformedURLException;
109
import java.net.URL;
1110
import java.nio.file.Path;
11+
import java.time.Duration;
1212
import java.util.ArrayList;
1313
import java.util.Iterator;
1414
import java.util.List;
@@ -39,6 +39,7 @@
3939
import ezvcard.property.Url;
4040
import ezvcard.property.VCardProperty;
4141
import ezvcard.util.Gobble;
42+
import ezvcard.util.HtmlUtils;
4243
import ezvcard.util.IOUtils;
4344

4445
/*
@@ -88,8 +89,9 @@
8889
* wiki/hcard</a>
8990
*/
9091
public class HCardParser extends StreamReader {
92+
private static final Duration urlTimeout = Duration.ofSeconds(30);
93+
9194
private final String pageUrl;
92-
private final Elements vcardElements;
9395
private final Iterator<Element> vcardElementsIt;
9496
private final List<Label> labels = new ArrayList<>();
9597

@@ -109,7 +111,7 @@ public class HCardParser extends StreamReader {
109111
* @throws IOException if there's a problem loading the webpage
110112
*/
111113
public HCardParser(URL url) throws IOException {
112-
this(Jsoup.parse(url, 30000), url.toString());
114+
this(Jsoup.parse(url, (int) urlTimeout.toMillis()), url.toString());
113115
}
114116

115117
/**
@@ -207,36 +209,30 @@ public HCardParser(Document document) {
207209
public HCardParser(Document document, String pageUrl) {
208210
this.pageUrl = pageUrl;
209211

210-
String anchor = null;
211-
if (pageUrl != null) {
212-
try {
213-
URL url = new URL(pageUrl);
214-
anchor = url.getRef();
215-
} catch (MalformedURLException e) {
216-
anchor = null;
217-
}
218-
}
212+
String anchor = (pageUrl == null) ? null : HtmlUtils.getAnchorFromUrl(pageUrl);
219213

220-
Element searchUnder = null;
221-
if (anchor != null) {
222-
searchUnder = document.getElementById(anchor);
223-
}
214+
Element searchUnder = (anchor == null) ? null : document.getElementById(anchor);
224215
if (searchUnder == null) {
225216
searchUnder = document;
226217
}
227218

228-
vcardElements = searchUnder.getElementsByClass("vcard");
219+
/*
220+
* Nested vCards also show up in this list as separate list items. For
221+
* example, if the HTML document has one vCard and that vCard has one
222+
* nested vCard (i.e. AGENT property), this list will have two elements.
223+
*
224+
* Exclude the nested vCards from being processed as their own,
225+
* independent vCards.
226+
*/
227+
Elements vcardElementsIncludingNested = searchUnder.getElementsByClass("vcard");
229228

230-
//remove nested vcard elements
231-
Iterator<Element> it = vcardElements.iterator();
232-
while (it.hasNext()) {
233-
Element element = it.next();
234-
if (isChildOf(element, vcardElements)) {
235-
it.remove();
236-
}
237-
}
229+
//@formatter:off
230+
Elements vcardElementsWithoutNested = new Elements(vcardElementsIncludingNested.stream()
231+
.filter(element -> !isChildOf(element, vcardElementsIncludingNested))
232+
.collect(Collectors.toList()));
233+
//@formatter:on
238234

239-
vcardElementsIt = vcardElements.iterator();
235+
vcardElementsIt = vcardElementsWithoutNested.iterator();
240236
}
241237

242238
/**
@@ -246,8 +242,7 @@ public HCardParser(Document document, String pageUrl) {
246242
*/
247243
private HCardParser(Element embeddedVCard, String pageUrl) {
248244
this.pageUrl = pageUrl;
249-
vcardElements = new Elements(embeddedVCard);
250-
vcardElementsIt = vcardElements.iterator();
245+
vcardElementsIt = new Elements(embeddedVCard).iterator();
251246
}
252247

253248
@Override

src/main/java/ezvcard/util/HtmlUtils.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package ezvcard.util;
22

3+
import java.net.MalformedURLException;
4+
import java.net.URL;
5+
36
import org.jsoup.Jsoup;
47
import org.jsoup.nodes.Document;
58
import org.jsoup.nodes.Element;
@@ -70,6 +73,23 @@ public static Element toElement(String html, String baseUrl) {
7073
return d.getElementsByTag("body").first().children().first();
7174
}
7275

76+
/**
77+
* Extracts the anchor portion from a URL.
78+
* @param url the full URL
79+
* @return the anchor, or null if there is no anchor, or null if the given
80+
* string is not a valid URL
81+
*/
82+
public static String getAnchorFromUrl(String url) {
83+
URL urlObj;
84+
try {
85+
urlObj = new URL(url);
86+
} catch (MalformedURLException e) {
87+
return null;
88+
}
89+
90+
return urlObj.getRef();
91+
}
92+
7393
private HtmlUtils() {
7494
//hide
7595
}

src/test/java/ezvcard/util/HtmlUtilsTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static org.junit.Assert.assertEquals;
44
import static org.junit.Assert.assertFalse;
5+
import static org.junit.Assert.assertNull;
56
import static org.junit.Assert.assertTrue;
67

78
import org.jsoup.Jsoup;
@@ -80,4 +81,12 @@ public void toElement_with_base_url() {
8081
assertEquals("img", element.tagName());
8182
assertEquals("http://example.com/image.png", element.absUrl("src"));
8283
}
84+
85+
@Test
86+
public void getAnchorFromUrl() {
87+
assertEquals("foo", HtmlUtils.getAnchorFromUrl("https://domain.com/page#foo"));
88+
assertEquals("", HtmlUtils.getAnchorFromUrl("https://domain.com/page#"));
89+
assertNull(HtmlUtils.getAnchorFromUrl("https://domain.com/page"));
90+
assertNull(HtmlUtils.getAnchorFromUrl("not a URL"));
91+
}
8392
}

0 commit comments

Comments
 (0)