Skip to content

Commit 2f6a1f7

Browse files
authored
Fix wrong url returned for redirects (#14)
* Use redirected url * Added redirection test * Clean up * Removed todo * Clean up * Clean up * Add new test * bump version * Clean up * Clean up * Clean up
1 parent 6cb6ade commit 2f6a1f7

20 files changed

+180
-105
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
<groupId>ai.preferred</groupId>
1212
<artifactId>venom</artifactId>
13-
<version>4.2.7-SNAPSHOT</version>
13+
<version>4.2.7</version>
1414
<packaging>jar</packaging>
1515

1616
<name>${project.groupId}:${project.artifactId}</name>

src/main/java/ai/preferred/venom/fetcher/AsyncResponseConsumer.java

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
import ai.preferred.venom.response.BaseResponse;
2222
import ai.preferred.venom.response.Response;
2323
import ai.preferred.venom.utils.ResponseDecompressor;
24-
import ai.preferred.venom.utils.UrlUtil;
2524
import ai.preferred.venom.validator.Validator;
2625
import com.ibm.icu.text.CharsetDetector;
2726
import com.ibm.icu.text.CharsetMatch;
2827
import org.apache.commons.io.IOUtils;
2928
import org.apache.http.*;
29+
import org.apache.http.client.protocol.HttpClientContext;
3030
import org.apache.http.entity.ContentType;
3131
import org.apache.http.nio.ContentDecoder;
3232
import org.apache.http.nio.IOControl;
@@ -43,8 +43,9 @@
4343
import org.slf4j.LoggerFactory;
4444

4545
import java.io.IOException;
46-
import java.net.URISyntaxException;
46+
import java.net.URI;
4747
import java.nio.charset.UnsupportedCharsetException;
48+
import java.util.List;
4849
import java.util.Set;
4950

5051
/**
@@ -146,29 +147,29 @@ private byte[] getContent(final HttpEntity entity) throws IOException {
146147
* @return An instance of base response
147148
* @throws IOException Reading http response
148149
*/
149-
private BaseResponse createVenomResponse(final boolean compressed) throws IOException {
150+
private BaseResponse createVenomResponse(final boolean compressed, final HttpContext context) throws IOException {
150151
if (compressed) {
151152
RESPONSE_DECOMPRESSOR.decompress(httpResponse);
152153
}
153154

155+
final HttpClientContext clientContext = HttpClientContext.adapt(context);
156+
final List<URI> redirectedLocations = clientContext.getRedirectLocations();
157+
final String url;
158+
if (redirectedLocations == null) {
159+
url = request.getUrl();
160+
} else {
161+
url = redirectedLocations.get(redirectedLocations.size() - 1).toString();
162+
}
163+
154164
final HttpEntity entity = httpResponse.getEntity();
155165
final byte[] content = getContent(entity);
156166
request.getDiagnostics().setSize(content.length);
157167
final ContentType contentType = getContentType(entity);
158168
final Header[] headers = httpResponse.getAllHeaders();
159169

160-
String tryBaseUrl;
161-
try {
162-
tryBaseUrl = UrlUtil.getBaseUrl(request);
163-
} catch (URISyntaxException e) {
164-
LOGGER.warn("Could not parse base URL: " + request.getUrl());
165-
tryBaseUrl = request.getUrl();
166-
}
167-
final String baseUrl = tryBaseUrl;
168-
169170
return new BaseResponse(
170171
httpResponse.getStatusLine().getStatusCode(),
171-
baseUrl,
172+
url,
172173
content,
173174
contentType,
174175
headers,
@@ -253,7 +254,7 @@ protected final BaseResponse buildResult(final HttpContext context) throws Excep
253254
throw new StopCodeException(statusCode, "Stop code received.");
254255
}
255256

256-
final BaseResponse response = createVenomResponse(compressed);
257+
final BaseResponse response = createVenomResponse(compressed, context);
257258
releaseResources();
258259

259260
final Validator.Status status;

src/main/java/ai/preferred/venom/fetcher/StorageFetcher.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import ai.preferred.venom.storage.FileManager;
2525
import ai.preferred.venom.storage.Record;
2626
import ai.preferred.venom.storage.StorageException;
27-
import ai.preferred.venom.utils.UrlUtil;
2827
import ai.preferred.venom.validator.EmptyContentValidator;
2928
import ai.preferred.venom.validator.PipelineValidator;
3029
import ai.preferred.venom.validator.StatusOkValidator;
@@ -35,7 +34,6 @@
3534
import org.slf4j.LoggerFactory;
3635

3736
import javax.validation.constraints.NotNull;
38-
import java.net.URISyntaxException;
3937
import java.util.Collections;
4038
import java.util.Map;
4139
import java.util.concurrent.Future;
@@ -145,16 +143,7 @@ public void cancelled() {
145143

146144
LOGGER.debug("Record found with id: {}", record.getId());
147145

148-
String tryBaseUrl;
149-
try {
150-
tryBaseUrl = UrlUtil.getBaseUrl(request);
151-
} catch (URISyntaxException e) {
152-
LOGGER.warn("Could not parse base URL: " + request.getUrl());
153-
tryBaseUrl = request.getUrl();
154-
}
155-
final String baseUrl = tryBaseUrl;
156-
157-
final StorageResponse response = new StorageResponse(record, baseUrl);
146+
final StorageResponse response = new StorageResponse(record, request.getUrl());
158147
final Validator.Status status = validator.isValid(Unwrappable.unwrapRequest(request), response);
159148
if (status != Validator.Status.VALID) {
160149
future.failed(new ValidationException(status, response, "Invalid response."));

src/main/java/ai/preferred/venom/response/BaseResponse.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import org.apache.http.HttpHost;
2121
import org.apache.http.entity.ContentType;
2222

23+
import javax.validation.constraints.NotNull;
24+
2325
/**
2426
* @author Maksim Tkachenko
2527
* @author Truong Quoc Tuan
@@ -50,7 +52,7 @@ public class BaseResponse implements Response {
5052
/**
5153
* The base url of this response.
5254
*/
53-
private final String baseUrl;
55+
private final String url;
5456

5557
/**
5658
* The proxy used to obtain response.
@@ -61,16 +63,16 @@ public class BaseResponse implements Response {
6163
* Constructs a base response.
6264
*
6365
* @param statusCode Status code of the response
64-
* @param baseUrl Base url of the response
66+
* @param url Base url of the response
6567
* @param content Content from the response
6668
* @param contentType Content type of the response
6769
* @param headers Headers from the response
6870
* @param proxy Proxy used to obtain the response
6971
*/
70-
public BaseResponse(final int statusCode, final String baseUrl, final byte[] content, final ContentType contentType,
72+
public BaseResponse(final int statusCode, final String url, final byte[] content, final ContentType contentType,
7173
final Header[] headers, final HttpHost proxy) {
7274
this.statusCode = statusCode;
73-
this.baseUrl = baseUrl;
75+
this.url = url;
7476
this.content = content;
7577
this.contentType = contentType;
7678
this.headers = headers;
@@ -88,18 +90,23 @@ public final byte[] getContent() {
8890
}
8991

9092
@Override
91-
public final ContentType getContentType() {
93+
public final @NotNull ContentType getContentType() {
9294
return contentType;
9395
}
9496

9597
@Override
96-
public final Header[] getHeaders() {
98+
public final @NotNull Header[] getHeaders() {
9799
return headers;
98100
}
99101

100102
@Override
101-
public final String getBaseUrl() {
102-
return baseUrl;
103+
public final @NotNull String getUrl() {
104+
return url;
105+
}
106+
107+
@Override
108+
public final @NotNull String getBaseUrl() {
109+
return getUrl();
103110
}
104111

105112
@Override

src/main/java/ai/preferred/venom/response/Response.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,21 @@ public interface Response {
6464
@NotNull
6565
Header[] getHeaders();
6666

67+
/**
68+
* Returns the url used to fetch the response, if the request
69+
* is redirected, this will be the final requested url.
70+
*
71+
* @return stripped down version of requested url
72+
*/
73+
@NotNull
74+
String getUrl();
75+
6776
/**
6877
* Returns the base form of the url used in this request.
6978
*
7079
* @return stripped down version of requested url
7180
*/
81+
@Deprecated
7282
@NotNull
7383
String getBaseUrl();
7484

src/main/java/ai/preferred/venom/response/StorageResponse.java

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import org.apache.http.HttpHost;
2222
import org.apache.http.entity.ContentType;
2323

24+
import javax.validation.constraints.NotNull;
25+
2426

2527
/**
2628
* @author Ween Jiann Lee
@@ -33,19 +35,19 @@ public class StorageResponse implements Response, Retrievable {
3335
private final Record<?> record;
3436

3537
/**
36-
* The base url of this response.
38+
* The url of this response.
3739
*/
38-
private final String baseUrl;
40+
private final String url;
3941

4042
/**
4143
* Constructs a base response.
4244
*
43-
* @param record record holding this response
44-
* @param baseUrl base URL of the response
45+
* @param record record holding this response
46+
* @param url base URL of the response
4547
*/
46-
public StorageResponse(final Record<?> record, final String baseUrl) {
48+
public StorageResponse(final Record<?> record, final String url) {
4749
this.record = record;
48-
this.baseUrl = baseUrl;
50+
this.url = url;
4951
}
5052

5153
@Override
@@ -59,18 +61,23 @@ public final byte[] getContent() {
5961
}
6062

6163
@Override
62-
public final ContentType getContentType() {
64+
public final @NotNull ContentType getContentType() {
6365
return record.getContentType();
6466
}
6567

6668
@Override
67-
public final Header[] getHeaders() {
69+
public final @NotNull Header[] getHeaders() {
6870
return record.getResponseHeaders();
6971
}
7072

7173
@Override
72-
public final String getBaseUrl() {
73-
return baseUrl;
74+
public final @NotNull String getUrl() {
75+
return url;
76+
}
77+
78+
@Override
79+
public final @NotNull String getBaseUrl() {
80+
return getUrl();
7481
}
7582

7683
@Override

src/main/java/ai/preferred/venom/response/VResponse.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.jsoup.Jsoup;
2323
import org.jsoup.nodes.Document;
2424

25+
import javax.validation.constraints.NotNull;
2526
import java.nio.charset.Charset;
2627
import java.nio.charset.StandardCharsets;
2728

@@ -62,18 +63,23 @@ public final byte[] getContent() {
6263
}
6364

6465
@Override
65-
public final ContentType getContentType() {
66+
public final @NotNull ContentType getContentType() {
6667
return getInner().getContentType();
6768
}
6869

6970
@Override
70-
public final Header[] getHeaders() {
71+
public final @NotNull Header[] getHeaders() {
7172
return getInner().getHeaders();
7273
}
7374

7475
@Override
75-
public final String getBaseUrl() {
76-
return getInner().getBaseUrl();
76+
public final @NotNull String getUrl() {
77+
return getInner().getUrl();
78+
}
79+
80+
@Override
81+
public final @NotNull String getBaseUrl() {
82+
return getInner().getUrl();
7783
}
7884

7985
@Override
@@ -110,7 +116,7 @@ public final String getHtml(final Charset charset) {
110116
* @return jsoup document of response
111117
*/
112118
public final Document getJsoup() {
113-
return Jsoup.parse(getHtml(), getBaseUrl());
119+
return Jsoup.parse(getHtml(), getUrl());
114120
}
115121

116122
/**
@@ -120,7 +126,7 @@ public final Document getJsoup() {
120126
* @return jsoup document of response
121127
*/
122128
public final Document getJsoup(final Charset charset) {
123-
return Jsoup.parse(getHtml(charset), getBaseUrl());
129+
return Jsoup.parse(getHtml(charset), getUrl());
124130
}
125131

126132
@Override

src/main/java/ai/preferred/venom/utils/UrlUtil.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ private UrlUtil() {
4242
* @return base URL string
4343
* @throws URISyntaxException if not a proper URL
4444
*/
45+
@Deprecated
4546
public static String getBaseUrl(final Request request) throws URISyntaxException {
4647
final URI uri = new URI(request.getUrl());
4748
final URI baseUri = new URI(uri.getScheme(), null, uri.getHost(), uri.getPort(), uri.getPath(), null, null);

0 commit comments

Comments
 (0)