Skip to content

Commit 856f4f0

Browse files
committed
LinkChecker: Make a "tool" which useful for checking links in directory for accessibility
1 parent 76d175f commit 856f4f0

File tree

3 files changed

+344
-0
lines changed

3 files changed

+344
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package name.valery1707.problem
2+
3+
import java.net.URI
4+
import java.net.http.HttpClient
5+
import java.net.http.HttpRequest
6+
import java.net.http.HttpResponse
7+
import java.nio.file.Path
8+
import java.time.Duration
9+
import java.time.Instant
10+
import java.time.temporal.ChronoField.NANO_OF_SECOND
11+
import kotlin.io.path.ExperimentalPathApi
12+
import kotlin.io.path.PathWalkOption
13+
import kotlin.io.path.readText
14+
import kotlin.io.path.walk
15+
16+
/**
17+
* todo Add description
18+
* todo Make async (probably with coroutines)
19+
*/
20+
class LinkChecker(private val root: Path) {
21+
/**
22+
* Сканируем все файлы из директории, ищем в тексте ссылки, проверяем их на доступность
23+
*/
24+
@OptIn(ExperimentalPathApi::class)
25+
fun findInvalid(client: HttpClient): Map<String, String> {
26+
val filePos2uriCheck = root
27+
.walk(PathWalkOption.FOLLOW_LINKS)
28+
.map { root.relativize(it) }
29+
.map {
30+
it to loadFile(root.resolve(it))
31+
}
32+
.flatMap { pathWithText ->
33+
pathWithText.second.findUri()
34+
.map { (pathWithText.first to it.first) to it.second }
35+
}
36+
.take(20)// todo Remove limit
37+
.map {
38+
it.first to (it.second to it.second.check(client))
39+
}
40+
.filter { it.second.second.first != 200 }
41+
.toList()
42+
// todo remove
43+
println("filePos2uriCheck = $filePos2uriCheck")
44+
return filePos2uriCheck
45+
.associateBy(
46+
{ "${it.first.first}:${it.first.second}" },
47+
{
48+
when (it.second.second.first) {
49+
in HTTP_REDIRECT -> "${it.second.first} -> ${it.second.second.first} -> ${it.second.second.second}"
50+
-1 -> "${it.second.first} -> ${it.second.second.first} -> ${it.second.second.second.query}"
51+
else -> "${it.second.first} -> ${it.second.second.first}"
52+
}
53+
},
54+
)
55+
}
56+
57+
private fun loadFile(path: Path): String {
58+
return path.readText()
59+
}
60+
61+
companion object {
62+
/**
63+
* https://stackoverflow.com/a/45690571
64+
*/
65+
private val URI_PATTERN_FULL = ("" +
66+
"(?<scheme>[a-z][a-z0-9+.-]+):" +
67+
"(?<authority>\\/\\/(?<user>[^@]+@)?(?<host>[a-z0-9.\\-_~]+)(?<port>:\\d+)?)?" +
68+
"(?<path>(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])+(?:\\/(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])*)*|(?:\\/(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@])+)*)?" +
69+
"(?<query>\\?(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@]|[/?])+)?" +
70+
"(?<fragment>\\#(?:[a-z0-9-._~]|%[a-f0-9]|[!\$&'()*+,;=:@]|[/?])+)?" +
71+
"").toRegex(RegexOption.IGNORE_CASE)
72+
73+
private val URI_PATTERN_SIMPLE = URI_PATTERN_FULL.pattern
74+
.replace("()", "")
75+
.replace("?:", "")
76+
.replace("+)*)?(?<query>", "*)*)?(?<query>")
77+
.replace("(?<user>[^@]+@)", "(?<user>[\\w]+@)")
78+
.toRegex(RegexOption.IGNORE_CASE)
79+
80+
private fun MatchResult.position(text: String): String {
81+
val prefix = text.subSequence(0, range.last)
82+
val col = range.first - prefix.indexOfLast { it == '\n' }
83+
val line = 1 + prefix.count { it == '\n' }
84+
return "$line:$col"
85+
}
86+
87+
private fun String.findUri() = URI_PATTERN_SIMPLE
88+
.findAll(this)
89+
.filter { it.value.startsWith("http") }
90+
.map { uri ->
91+
(uri.position(this)) to (uri.value.trimEnd('.').toURI())
92+
}
93+
.filter { it.second != null }
94+
.map { it.first to it.second!! }
95+
.filter { it.second.scheme in setOf("http", "https") }
96+
97+
internal fun String.toURI(): URI? = try {
98+
URI.create(this)
99+
} catch (e: IllegalArgumentException) {
100+
null
101+
}
102+
103+
private fun URI.check(client: HttpClient): Pair<Int, URI> {
104+
val request = HttpRequest.newBuilder(this).GET().build()
105+
// todo Cache
106+
return try {
107+
// todo Logging
108+
println("Check: $this")
109+
val response = client.send(request, HttpResponse.BodyHandlers.discarding())
110+
when (response.statusCode()) {
111+
//Redirects: extract new location
112+
in HTTP_REDIRECT -> response.statusCode() to response.headers().firstValue("Location")!!.get().toURI()!!
113+
114+
//Rate limiting: wait and retry
115+
in HTTP_RATE_LIMIT -> {
116+
val now = Instant.now()
117+
val await = response.headers()
118+
119+
// todo Extract to method
120+
// https://docs.github.com/en/rest/overview/resources-in-the-rest-api?apiVersion=2022-11-28#checking-your-rate-limit-status
121+
.map()["x-ratelimit-reset"]
122+
?.asSequence()
123+
?.map(String::toLong)?.map(Instant::ofEpochSecond)
124+
?.map { Duration.between(now.with(NANO_OF_SECOND, 0), it) }
125+
?.map(Duration::toMillis)
126+
?.filter { it >= 0 }
127+
?.firstOrNull()
128+
129+
?: 500
130+
131+
// todo Logging
132+
println("Await: $await ms")
133+
Thread.sleep(await)
134+
check(client)
135+
}
136+
137+
else -> response.statusCode() to response.uri()
138+
}
139+
} catch (e: Exception) {
140+
// todo Logging
141+
-1 to URI.create("http://host?message=${e.message?.replace(" ", "%20")}")
142+
}
143+
}
144+
145+
private val HTTP_REDIRECT = setOf(301, 302, 307, 308)
146+
private val HTTP_RATE_LIMIT = setOf(403)
147+
}
148+
}
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
package name.valery1707.problem
2+
3+
import name.valery1707.problem.LinkChecker.Companion.toURI
4+
import org.assertj.core.api.Assertions.assertThat
5+
import org.assertj.core.api.Assertions.fail
6+
import org.assertj.core.api.Assumptions.assumeThat
7+
import org.junit.jupiter.api.Test
8+
import org.junit.jupiter.params.ParameterizedTest
9+
import org.junit.jupiter.params.provider.ValueSource
10+
import java.net.Authenticator
11+
import java.net.CookieHandler
12+
import java.net.InetSocketAddress
13+
import java.net.ProxySelector
14+
import java.net.URI
15+
import java.net.http.HttpClient
16+
import java.net.http.HttpHeaders
17+
import java.net.http.HttpRequest
18+
import java.net.http.HttpResponse
19+
import java.nio.file.Path
20+
import java.time.Duration
21+
import java.time.Instant
22+
import java.util.*
23+
import java.util.concurrent.CompletableFuture
24+
import java.util.concurrent.Executor
25+
import javax.net.ssl.SSLContext
26+
import javax.net.ssl.SSLParameters
27+
import javax.net.ssl.SSLSession
28+
import kotlin.io.path.toPath
29+
30+
typealias ResponseBuilder<T> = (HttpRequest) -> HttpResponse<T>
31+
typealias ResponseMeta = Pair<Int, Map<String, String>>
32+
33+
internal class LinkCheckerTest {
34+
35+
@ParameterizedTest
36+
@ValueSource(
37+
strings = [
38+
"./path/to/real/project",
39+
],
40+
)
41+
internal fun checkReal(path: Path) {
42+
assumeThat(path).isDirectory.isReadable
43+
val client = HttpClient
44+
.newBuilder()
45+
.followRedirects(HttpClient.Redirect.NEVER)
46+
.proxy(proxy)
47+
.build()
48+
val checker = LinkChecker(path)
49+
assertThat(checker.findInvalid(client)).isEmpty()
50+
}
51+
52+
@Test
53+
@Suppress("HttpUrlsUsage")
54+
internal fun testDemo() {
55+
val path = javaClass.getResource("/linkChecker/Demo.md")?.toURI()?.toPath()?.parent
56+
assertThat(path).isNotNull.isDirectory.isReadable
57+
58+
fun ok(): ResponseMeta = 200 to mapOf()
59+
fun notFound(): ResponseMeta = 404 to mapOf()
60+
fun redirect(code: Int, target: String): ResponseMeta = code to mapOf("Location" to target)
61+
fun rateLimitGH(awaitMillis: Long): ResponseMeta = 403 to mapOf("x-ratelimit-reset" to Instant.now().plusMillis(awaitMillis).epochSecond.toString())
62+
63+
//Check links via: curl --silent -X GET --head 'URL'
64+
val client = MockedHttpClient.fromMeta(
65+
mapOf(
66+
"https://ya.ru" to listOf(
67+
redirect(302, "https://ya.ru/"),
68+
),
69+
"https://ya.ru/" to listOf(
70+
ok(),
71+
),
72+
"http://schema.org" to listOf(
73+
redirect(301, "https://schema.org/"),
74+
),
75+
"https://github.com/androidx/androidx/blob/androidx-main/build.gradle" to listOf(
76+
//todo Calculate header value on building response
77+
//Will wait some time
78+
rateLimitGH(2111),
79+
//Will wait zero time
80+
rateLimitGH(10),
81+
//Will wait default time
82+
rateLimitGH(-1500),
83+
ok(),
84+
),
85+
"https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt" to listOf(
86+
notFound(),
87+
),
88+
),
89+
)
90+
91+
val checker = LinkChecker(path!!)
92+
93+
assertThat(checker.findInvalid(client)).containsExactlyInAnyOrderEntriesOf(
94+
mapOf(
95+
"Demo.md:1:25" to "https://ya.ru -> 302 -> https://ya.ru/",
96+
"Demo.md:3:14" to "http://schema.org -> 301 -> https://schema.org/",
97+
"Demo.md:5:14" to "https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt -> 404",
98+
),
99+
)
100+
}
101+
102+
@ParameterizedTest
103+
@ValueSource(
104+
strings = [
105+
"some invalid uri",
106+
],
107+
)
108+
internal fun testInvalidUriString(uriString: String) {
109+
assertThat(uriString.toURI()).isNull()
110+
}
111+
112+
private val proxy: ProxySelector by lazy {
113+
sequenceOf(
114+
"genproxy" to 8080,
115+
)
116+
.map { InetSocketAddress(it.first, it.second) }
117+
.filterNot { it.isUnresolved }
118+
.map { ProxySelector.of(it) }
119+
.firstOrNull()
120+
?: ProxySelector.getDefault()
121+
}
122+
123+
private class MockedHttpClient(
124+
private val worker: ResponseBuilder<Any?>,
125+
) : HttpClient() {
126+
override fun cookieHandler(): Optional<CookieHandler> = Optional.empty()
127+
override fun connectTimeout(): Optional<Duration> = Optional.empty()
128+
override fun followRedirects(): Redirect = Redirect.NEVER
129+
override fun proxy(): Optional<ProxySelector> = Optional.empty()
130+
override fun sslContext(): SSLContext = SSLContext.getDefault()
131+
override fun sslParameters(): SSLParameters = sslContext().defaultSSLParameters
132+
override fun authenticator(): Optional<Authenticator> = Optional.empty()
133+
override fun version(): Version = Version.HTTP_1_1
134+
override fun executor(): Optional<Executor> = Optional.empty()
135+
136+
override fun <T : Any?> sendAsync(
137+
request: HttpRequest,
138+
responseBodyHandler: HttpResponse.BodyHandler<T>,
139+
pushPromiseHandler: HttpResponse.PushPromiseHandler<T>?,
140+
): CompletableFuture<HttpResponse<T>> = sendAsync(request, responseBodyHandler)
141+
142+
override fun <T : Any?> sendAsync(
143+
request: HttpRequest,
144+
responseBodyHandler: HttpResponse.BodyHandler<T>,
145+
): CompletableFuture<HttpResponse<T>> = CompletableFuture.supplyAsync { send(request, responseBodyHandler) }
146+
147+
@Suppress("UNCHECKED_CAST")
148+
override fun <T : Any?> send(request: HttpRequest, responseBodyHandler: HttpResponse.BodyHandler<T>): HttpResponse<T> =
149+
worker(request) as HttpResponse<T>
150+
151+
companion object {
152+
fun fromMeta(responses: Map<String, List<ResponseMeta>>): HttpClient = fromBuilders(
153+
responses.mapValues {
154+
it.value
155+
.map<ResponseMeta, ResponseBuilder<Any?>> { meta ->
156+
{ req ->
157+
MockedHttpResponse.fromRequest(req, meta.first, meta.second.mapValues { h -> listOf(h.value) })
158+
}
159+
}
160+
.toMutableList()
161+
},
162+
)
163+
164+
fun fromBuilders(responses: Map<String, MutableList<ResponseBuilder<Any?>>>): HttpClient = MockedHttpClient { req ->
165+
responses[req.uri().toString()]?.removeFirst()?.invoke(req) ?: fail("Unknown response builders for ${req.uri()}")
166+
}
167+
}
168+
}
169+
170+
private class MockedHttpResponse<T : Any?>(
171+
private val request: HttpRequest,
172+
private val statusCode: Int,
173+
private val headers: HttpHeaders,
174+
) : HttpResponse<T> {
175+
override fun statusCode(): Int = statusCode
176+
override fun request(): HttpRequest = request
177+
override fun previousResponse(): Optional<HttpResponse<T>> = Optional.empty()
178+
override fun headers(): HttpHeaders = headers
179+
override fun body(): T? = null
180+
override fun sslSession(): Optional<SSLSession> = Optional.empty()
181+
override fun uri(): URI = request().uri()
182+
override fun version(): HttpClient.Version = request().version().orElse(HttpClient.Version.HTTP_1_1)
183+
184+
companion object {
185+
fun <T : Any?> fromRequest(request: HttpRequest, statusCode: Int, headers: Map<String, List<String>>): HttpResponse<T> = MockedHttpResponse(
186+
request, statusCode, HttpHeaders.of(headers) { _, _ -> true },
187+
)
188+
}
189+
}
190+
191+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Link with name: [named](https://ya.ru).
2+
Link with name: [named](https://ya.ru/).
3+
Link inlined http://schema.org.
4+
Link with rate limiting: https://github.com/androidx/androidx/blob/androidx-main/build.gradle
5+
Link absent: https://github.com/androidx/androidx/blob/androidx-main/buildSrc/public/src/main/kotlin/androidx/build/LibraryGroups.kt

0 commit comments

Comments
 (0)