Skip to content

Commit 49e58c2

Browse files
committed
Catch spurious available() throws
Fixes #2474 Was causing CI flakes on the JDK 8 runner; quite possibly in other environments too.
1 parent b77e51b commit 49e58c2

File tree

4 files changed

+37
-4
lines changed

4 files changed

+37
-4
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
### Bug Fixes
66
* Android (R8/ProGuard): added a rule to ignore the optional `re2j` dependency when not present. [#2459](https://github.com/jhy/jsoup/issues/2459)
77
* In `NodeTraversor`, removing or replacing the current node during `head()` no longer re-visits the replacement node, preventing loops. Traversal now continues correctly from nodes that occupy the original position after mutation, and will not advance past the original root subtree. Also, clarified in the documentation which inserted nodes are visited during the current traversal. [#2472](https://github.com/jhy/jsoup/issues/2472)
8+
* Parsing during charset sniffing no longer fails if an advisory `available()` call throws `IOException`, as seen on JDK 8 `HttpURLConnection`. [#2474](https://github.com/jhy/jsoup/issues/2474)
89

910
## 1.22.1 (2026-Jan-01)
1011

src/main/java/org/jsoup/internal/SimpleBufferedInput.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ private void fill() throws IOException {
8080
bufLength = read + bufPos;
8181
capRemaining -= read;
8282
while (byteBuf.length - bufLength > 0 && capRemaining > 0) { // read in more if we have space, without blocking
83-
if (in.available() < 1) break;
83+
try {
84+
if (in.available() < 1) break;
85+
} catch (IOException e) {
86+
break; // available() is advisory; keep the bytes we've already buffered
87+
}
8488
toRead = Math.min(byteBuf.length - bufLength, capRemaining);
8589
if (toRead <= 0) break;
8690
read = in.read(byteBuf, bufLength, toRead);
@@ -116,8 +120,7 @@ public int available() throws IOException {
116120
if (buffered > 0) {
117121
return buffered; // doesn't include those in.available(), but mostly used as a block test
118122
}
119-
int avail = inReadFully ? 0 : in.available();
120-
return avail;
123+
return inReadFully ? 0 : in.available();
121124
}
122125

123126
void capRemaining(int newRemaining) {

src/main/java/org/jsoup/internal/SimpleStreamReader.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public int read(char[] charArray, int off, int len) throws IOException {
4343
while (true) {
4444
CoderResult result = decoder.decode(byteBuf, charBuf, readFully);
4545
if (result.isUnderflow()) {
46-
if (readFully || !charBuf.hasRemaining() || (charBuf.position() > 0) && !(in.available() > 0))
46+
if (readFully || !charBuf.hasRemaining() || (charBuf.position() > 0) && !hasAvailableBytes())
4747
break;
4848
int read = bufferUp();
4949
if (read < 0) {
@@ -64,6 +64,14 @@ public int read(char[] charArray, int off, int len) throws IOException {
6464
return charBuf.position();
6565
}
6666

67+
private boolean hasAvailableBytes() {
68+
try {
69+
return in.available() > 0;
70+
} catch (IOException e) {
71+
return false; // available() is advisory; a real read can still consume buffered bytes or reach EOF
72+
}
73+
}
74+
6775
private int bufferUp() throws IOException {
6876
assert byteBuf != null; // already validated ^
6977
byteBuf.compact();

src/test/java/org/jsoup/helper/DataUtilTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,4 +418,25 @@ public int available() {
418418
}
419419
}
420420

421+
@Test
422+
void charsetSniffingIgnoresAdvisoryAvailableIOException() throws IOException {
423+
// https://github.com/jhy/jsoup/issues/2474
424+
// JDK 8's HttpURLConnection stream may throw from available() once the peer has closed the socket;
425+
// that advisory failure does not mean we can't still consume bytes already buffered or read to clean EOF.
426+
String html = "<!doctype html><html><head><title>One</title></head><body>Two</body></html>";
427+
byte[] bytes = html.getBytes(StandardCharsets.UTF_8);
428+
InputStream stream = new FilterInputStream(new ByteArrayInputStream(bytes)) {
429+
@Override
430+
public int available() throws IOException {
431+
throw new IOException("Stream closed.");
432+
}
433+
};
434+
ControllableInputStream in = ControllableInputStream.wrap(stream, 0);
435+
436+
Document doc = DataUtil.parseInputStream(in, null, "http://example.com/", Parser.htmlParser());
437+
438+
assertEquals("One", doc.title());
439+
assertEquals("Two", doc.body().text());
440+
}
441+
421442
}

0 commit comments

Comments
 (0)