Skip to content

Commit ba4dc7e

Browse files
committed
Merge remote-tracking branch 'otel/main' into health-metrics
2 parents 5b71238 + 5bda810 commit ba4dc7e

File tree

26 files changed

+708
-631
lines changed

26 files changed

+708
-631
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: Javadoc.io site crawler (daily)
2+
3+
on:
4+
schedule:
5+
- cron: "30 1 * * *" # daily at 1:30 UTC
6+
workflow_dispatch:
7+
8+
permissions:
9+
contents: read
10+
11+
jobs:
12+
crawl:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
16+
17+
- uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4.7.1
18+
with:
19+
distribution: temurin
20+
java-version: 17
21+
22+
- name: Set up gradle
23+
uses: gradle/actions/setup-gradle@06832c7b30a0129d7fb559bcc6e43d26f6374244 # v4.3.1
24+
25+
- name: Run crawler
26+
run: ./gradlew :javadoc-crawler:crawl

buildSrc/build.gradle.kts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ dependencies {
5454
implementation("com.google.auto.value:auto-value-annotations:1.11.0")
5555
// When updating, update above in plugins too
5656
implementation("com.diffplug.spotless:spotless-plugin-gradle:7.0.3")
57-
implementation("com.gradle.develocity:com.gradle.develocity.gradle.plugin:4.0")
57+
implementation("com.gradle.develocity:com.gradle.develocity.gradle.plugin:4.0.1")
5858
implementation("com.squareup:javapoet:1.13.0")
5959
implementation("com.squareup.wire:wire-compiler")
6060
implementation("com.squareup.wire:wire-gradle-plugin")
@@ -66,7 +66,7 @@ dependencies {
6666
implementation("net.ltgt.gradle:gradle-nullaway-plugin:2.2.0")
6767
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:2.1.20")
6868
implementation("org.owasp:dependency-check-gradle:12.1.1")
69-
implementation("ru.vyarus:gradle-animalsniffer-plugin:2.0.0")
69+
implementation("ru.vyarus:gradle-animalsniffer-plugin:2.0.1")
7070
}
7171

7272
// We can't apply conventions to this build so include important ones such as the Java compilation

buildSrc/src/main/kotlin/otel.java-conventions.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ java {
4242

4343
checkstyle {
4444
configDirectory.set(file("$rootDir/buildscripts/"))
45-
toolVersion = "10.23.0"
45+
toolVersion = "10.23.1"
4646
isIgnoreFailures = false
4747
configProperties["rootDir"] = rootDir
4848
}

dependencyManagement/build.gradle.kts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ val DEPENDENCY_BOMS = listOf(
1313
// (which is EPL licensed) or armeria bom (which is Apache licensed but is getting flagged
1414
// by FOSSA for containing EPL-licensed)
1515

16-
"com.fasterxml.jackson:jackson-bom:2.18.3",
16+
"com.fasterxml.jackson:jackson-bom:2.19.0",
1717
"com.google.guava:guava-bom:33.4.8-jre",
1818
"com.google.protobuf:protobuf-bom:4.30.2",
1919
"com.squareup.okhttp3:okhttp-bom:4.12.0",
2020
"com.squareup.okio:okio-bom:3.11.0", // applies to transitive dependencies of okhttp
2121
"io.grpc:grpc-bom:1.72.0",
2222
"io.netty:netty-bom:4.2.0.Final",
23-
"io.zipkin.brave:brave-bom:6.1.0",
23+
"io.zipkin.brave:brave-bom:6.2.0",
2424
"io.zipkin.reporter2:zipkin-reporter-bom:3.5.0",
2525
"org.assertj:assertj-bom:3.27.3",
2626
"org.testcontainers:testcontainers-bom:1.21.0",
@@ -73,12 +73,12 @@ val DEPENDENCIES = listOf(
7373
"io.prometheus:simpleclient_httpserver:${prometheusClientVersion}",
7474
"javax.annotation:javax.annotation-api:1.3.2",
7575
"com.github.stefanbirkner:system-rules:1.19.0",
76-
"com.google.api.grpc:proto-google-common-protos:2.55.2",
76+
"com.google.api.grpc:proto-google-common-protos:2.56.0",
7777
"com.google.code.findbugs:jsr305:3.0.2",
7878
"com.google.guava:guava-beta-checker:1.0",
7979
"com.sun.net.httpserver:http:20070405",
8080
"com.tngtech.archunit:archunit-junit5:1.4.0",
81-
"com.uber.nullaway:nullaway:0.12.6",
81+
"com.uber.nullaway:nullaway:0.12.7",
8282
"edu.berkeley.cs.jqf:jqf-fuzz:1.7", // jqf-fuzz version 1.8+ requires Java 11+
8383
"eu.rekawek.toxiproxy:toxiproxy-java:2.1.7",
8484
"io.github.netmikey.logunit:logunit-jul:2.0.0",
@@ -89,7 +89,7 @@ val DEPENDENCIES = listOf(
8989
"io.opentracing:opentracing-api:0.33.0",
9090
"io.opentracing:opentracing-noop:0.33.0",
9191
"junit:junit:4.13.2",
92-
"nl.jqno.equalsverifier:equalsverifier:3.19.3",
92+
"nl.jqno.equalsverifier:equalsverifier:3.19.4",
9393
"org.awaitility:awaitility:4.3.0",
9494
"org.bouncycastle:bcpkix-jdk15on:1.70",
9595
"org.codehaus.mojo:animal-sniffer-annotations:1.24",

gradle/wrapper/gradle-wrapper.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionSha256Sum=20f1b1176237254a6fc204d8434196fa11a4cfb387567519c61556e8710aed78
4-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip
3+
distributionSha256Sum=61ad310d3c7d3e5da131b76bbf22b5a4c0786e9d892dae8c1658d4b484de3caa
4+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip
55
networkTimeout=10000
66
validateDistributionUrl=true
77
zipStoreBase=GRADLE_USER_HOME

javadoc-crawler/README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Javadoc Crawler
2+
3+
## Context
4+
5+
The javadocs.io website lazy loads content only when the artifacts have been accessed, which can
6+
lead to inaccuracies and confusion when someone loads the
7+
https://www.javadoc.io/doc/io.opentelemetry page, since the published `Latest version` will only be
8+
accurate if someone has accessed the page for the actual latest version.
9+
10+
This module provides a simple scraper that pulls the list of all `io.opentelemetry` artifacts from
11+
maven central and then visits each corresponding page on the javadoc.io website in order to trigger
12+
loading them into the site's system.
13+
14+
See https://github.com/open-telemetry/opentelemetry-java/issues/7294 for more information.
15+
16+
## How to run
17+
18+
```bash
19+
./gradlew :javadoc-crawler:crawl
20+
```

javadoc-crawler/build.gradle.kts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
plugins {
2+
id("otel.java-conventions")
3+
}
4+
5+
dependencies {
6+
implementation("com.fasterxml.jackson.core:jackson-databind")
7+
testImplementation("org.assertj:assertj-core:3.27.3")
8+
}
9+
10+
description = "OpenTelemetry Javadoc Crawler"
11+
otelJava.moduleName.set("io.opentelemetry.javadocs")
12+
13+
tasks {
14+
withType<JavaCompile>().configureEach {
15+
sourceCompatibility = "17"
16+
targetCompatibility = "17"
17+
options.release.set(17)
18+
}
19+
20+
// only test on java 17+
21+
val testJavaVersion: String? by project
22+
if (testJavaVersion != null && Integer.valueOf(testJavaVersion) < 17) {
23+
test {
24+
enabled = false
25+
}
26+
}
27+
28+
val crawl by registering(JavaExec::class) {
29+
dependsOn(classes)
30+
31+
mainClass.set("io.opentelemetry.javadocs.JavaDocsCrawler")
32+
classpath(sourceSets["main"].runtimeClasspath)
33+
}
34+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.javadocs;
7+
8+
public class Artifact {
9+
private final String name;
10+
private final String version;
11+
12+
public Artifact(String name, String version) {
13+
this.name = name;
14+
this.version = version;
15+
}
16+
17+
public String getName() {
18+
return name;
19+
}
20+
21+
public String getVersion() {
22+
return version;
23+
}
24+
}
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.javadocs;
7+
8+
import com.fasterxml.jackson.databind.ObjectMapper;
9+
import java.io.IOException;
10+
import java.net.URI;
11+
import java.net.http.HttpClient;
12+
import java.net.http.HttpRequest;
13+
import java.net.http.HttpResponse;
14+
import java.util.ArrayList;
15+
import java.util.List;
16+
import java.util.Locale;
17+
import java.util.Map;
18+
import java.util.Optional;
19+
import java.util.logging.Level;
20+
import java.util.logging.Logger;
21+
22+
/**
23+
* The javadoc.io site relies on someone accessing the page for an artifact version in order to
24+
* update the contents of the site. This will query Maven Central for all artifacts under
25+
* io.opentelemetry in order to identify the latest versions. Then it will crawl the associated
26+
* pages on the javadoc.io site to trigger updates.
27+
*/
28+
public final class JavaDocsCrawler {
29+
private static final String GROUP = "io.opentelemetry";
30+
private static final String MAVEN_CENTRAL_BASE_URL =
31+
"https://search.maven.org/solrsearch/select?q=g:";
32+
private static final String JAVA_DOCS_BASE_URL = "https://javadoc.io/doc/";
33+
private static final int PAGE_SIZE = 20;
34+
private static final int THROTTLE_MS = 500;
35+
36+
// visible for testing
37+
static final String JAVA_DOC_DOWNLOADED_TEXT = "Javadoc is being downloaded";
38+
39+
private static final Logger logger = Logger.getLogger(JavaDocsCrawler.class.getName());
40+
private static final ObjectMapper objectMapper = new ObjectMapper();
41+
42+
public static void main(String[] args) throws Exception {
43+
HttpClient client = HttpClient.newHttpClient();
44+
List<Artifact> artifacts = getArtifacts(client);
45+
if (artifacts.isEmpty()) {
46+
logger.log(Level.SEVERE, "No artifacts found");
47+
return;
48+
}
49+
logger.info(String.format(Locale.ROOT, "Found %d artifacts", artifacts.size()));
50+
51+
List<String> updated = crawlJavaDocs(client, artifacts);
52+
if (updated.isEmpty()) {
53+
logger.info("No updates were needed");
54+
return;
55+
}
56+
57+
logger.info("Artifacts that triggered updates:\n" + String.join("\n", updated));
58+
}
59+
60+
static List<Artifact> getArtifacts(HttpClient client) throws IOException, InterruptedException {
61+
int start = 0;
62+
Integer numFound;
63+
List<Artifact> result = new ArrayList<>();
64+
65+
do {
66+
if (start != 0) {
67+
Thread.sleep(THROTTLE_MS); // try not to DDoS the site, it gets knocked over easily
68+
}
69+
70+
Map<?, ?> map = queryMavenCentral(client, start);
71+
72+
numFound =
73+
Optional.ofNullable(map)
74+
.map(mavenResult -> (Map<?, ?>) mavenResult.get("response"))
75+
.map(response -> (Integer) response.get("numFound"))
76+
.orElse(null);
77+
78+
List<Artifact> artifacts = convertToArtifacts(map);
79+
result.addAll(artifacts);
80+
81+
start += PAGE_SIZE;
82+
} while (numFound != null && start < numFound);
83+
84+
return result;
85+
}
86+
87+
private static List<Artifact> convertToArtifacts(Map<?, ?> map) {
88+
return Optional.ofNullable(map)
89+
.map(mavenResults -> (Map<?, ?>) mavenResults.get("response"))
90+
.map(response -> (List<?>) response.get("docs"))
91+
.map(
92+
docs -> {
93+
List<Artifact> artifacts = new ArrayList<>();
94+
for (Object doc : docs) {
95+
Map<?, ?> docMap = (Map<?, ?>) doc;
96+
String artifact = (String) docMap.get("a");
97+
String version = (String) docMap.get("latestVersion");
98+
if (artifact != null && version != null) {
99+
artifacts.add(new Artifact(artifact, version));
100+
}
101+
}
102+
return artifacts;
103+
})
104+
.orElseGet(ArrayList::new);
105+
}
106+
107+
private static Map<?, ?> queryMavenCentral(HttpClient client, int start)
108+
throws IOException, InterruptedException {
109+
URI uri =
110+
URI.create(
111+
String.format(
112+
Locale.ROOT,
113+
"%s%s&rows=%d&start=%d&wt=json",
114+
MAVEN_CENTRAL_BASE_URL,
115+
GROUP,
116+
PAGE_SIZE,
117+
start));
118+
119+
HttpRequest request = HttpRequest.newBuilder(uri).GET().build();
120+
121+
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
122+
if (response.statusCode() != 200) {
123+
logger.log(
124+
Level.SEVERE,
125+
"Unexpected response code: " + response.statusCode() + ": " + response.body());
126+
throw new IOException("Unable to pull Maven central artifacts list");
127+
}
128+
return objectMapper.readValue(response.body(), Map.class);
129+
}
130+
131+
static List<String> crawlJavaDocs(HttpClient client, List<Artifact> artifacts)
132+
throws IOException, InterruptedException {
133+
List<String> updatedArtifacts = new ArrayList<>();
134+
135+
for (Artifact artifact : artifacts) {
136+
String[] parts = artifact.getName().split("-");
137+
StringBuilder path = new StringBuilder();
138+
path.append(JAVA_DOCS_BASE_URL)
139+
.append(GROUP)
140+
.append("/")
141+
.append(artifact.getName())
142+
.append("/")
143+
.append(artifact.getVersion())
144+
.append("/")
145+
.append(String.join("/", parts))
146+
.append("/package-summary.html");
147+
148+
HttpRequest crawlRequest = HttpRequest.newBuilder(URI.create(path.toString())).GET().build();
149+
HttpResponse<String> crawlResponse =
150+
client.send(crawlRequest, HttpResponse.BodyHandlers.ofString());
151+
152+
// gets a status code 303 when version exists and the site redirects it to use /latest/
153+
if (crawlResponse.statusCode() != 200 && crawlResponse.statusCode() != 303) {
154+
logger.log(
155+
Level.WARNING,
156+
String.format(
157+
Locale.ROOT,
158+
"Crawl failed for %s with status code %d at URL %s\nResponse: %s",
159+
artifact.getName(),
160+
crawlResponse.statusCode(),
161+
path,
162+
crawlResponse.body()));
163+
continue;
164+
}
165+
166+
if (crawlResponse.body().contains(JAVA_DOC_DOWNLOADED_TEXT)) {
167+
updatedArtifacts.add(artifact.getName());
168+
}
169+
170+
Thread.sleep(THROTTLE_MS); // some light throttling
171+
}
172+
return updatedArtifacts;
173+
}
174+
175+
private JavaDocsCrawler() {}
176+
}

0 commit comments

Comments
 (0)