Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,24 @@
import com.linkedin.urls.detection.UrlDetector;
import com.linkedin.urls.detection.UrlDetectorOptions;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;

/**
* Utility class to detect links.
*/
public class LinkDetection {
private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient();

private static final Set<LinkFilter> DEFAULT_FILTERS =
Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);

/**
* Possible ways to filter a link.
Expand Down Expand Up @@ -58,6 +68,106 @@ public static boolean containsLink(String content) {
return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty());
}

/**
* Checks whether the given URL is considered broken.
*
* <p>
* A link is considered broken if:
* <ul>
* <li>The URL is invalid or malformed</li>
* <li>An HTTP request fails</li>
* <li>The HTTP response status code is outside the 200–399 range</li>
* </ul>
*
* <p>
* Notes:
* <ul>
* <li>Status code {@code 200} is considered valid, even if the response body is empty</li>
* <li>The response body content is not inspected</li>
* </ul>
*
* @param url the URL to check
* @return a future completing with {@code true} if the link is broken
*/

public static CompletableFuture<Boolean> isLinkBroken(String url) {
HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url))
.method("HEAD", HttpRequest.BodyPublishers.noBody())
.build();

return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding())
.thenApply(response -> {
int status = response.statusCode();
return status < 200 || status >= 400;
})
.exceptionally(ignored -> true)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the idiomatic name for something you ignore is _

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed ignored lambda parameters to _ where applicable (e.g. exceptionally(_ -> ...), thenApply(_ -> ...)) to clearly indicate intentional non-usage.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you forgot one:

.exceptionally(ignored -> true); // still never null

.thenCompose(result -> {
if (!Boolean.TRUE.equals(result)) {
return CompletableFuture.completedFuture(false);
}
HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build();
return HTTP_CLIENT.sendAsync(getRequest, HttpResponse.BodyHandlers.discarding())
.thenApply(resp -> resp.statusCode() >= 400)
.exceptionally(ignored -> true); // still never null
});
}

/**
* Replaces all broken links in the given text with the provided replacement string.
*
* <p>
* Example:
*
* <pre>{@code
* replaceDeadLinks("""
* Test
* http://deadlink/1
* http://workinglink/1
* """, "broken")
* }</pre>
*
* <p>
* Results in:
*
* <pre>{@code
* Test
* broken
* http://workinglink/1
* }</pre>
*
* @param text the input text containing URLs
* @param replacement the string to replace broken links with
* @return a future containing the modified text
*/

public static CompletableFuture<String> replaceDeadLinks(String text, String replacement) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

improve naming. avoid multiple terms for the same think. you called it isLinkBroken and now you call it replaceDeadLinks. either dead or broken, not both. pick one and align the other

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not done yet

List<String> links = extractLinks(text, DEFAULT_FILTERS);

if (links.isEmpty()) {
return CompletableFuture.completedFuture(text);
}

List<CompletableFuture<String>> deadLinkFutures = links.stream()
.distinct()
.map(link -> isLinkBroken(link)
.thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better: instead of map(foo -> ...thenApply(...)), use .map(foo -> ...).filter(...) then you also dont have all these null items in ur list, polluting it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not done yet


.toList();

return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0]))
.thenApply(ignored -> deadLinkFutures.stream()
.map(CompletableFuture::join)
.filter(Objects::nonNull)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.filter(Objects::nonNull) that one isnt needed anymore with the above fix

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not done yet

.toList())
.thenApply(deadLinks -> {
String result = text;
for (String deadLink : deadLinks) {
result = result.replace(deadLink, replacement);
}
return result;
});
}

private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
String raw = url.getOriginalUrl();
if (filter.contains(LinkFilter.SUPPRESSED) && raw.contains(">")) {
Expand All @@ -76,7 +186,6 @@ private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
// Remove trailing punctuation
link = link.substring(0, link.length() - 1);
}

return Optional.of(link);
}

Expand Down
Loading