1515import java .util .List ;
1616import java .util .Locale ;
1717import java .util .Map ;
18+ import java .util .Objects ;
1819import java .util .Optional ;
1920import java .util .logging .Level ;
2021import java .util .logging .Logger ;
22+ import java .util .stream .Collectors ;
2123
2224/**
2325 * The javadoc.io site relies on someone accessing the page for an artifact version in order to
2628 * pages on the javadoc.io site to trigger updates.
2729 */
2830public final class JavaDocsCrawler {
29- private static final String GROUP = "io.opentelemetry" ;
31+ // Track list of groups and the minimum artifact versions that should be crawled. Update to the
32+ // latest periodically to avoid crawling artifacts that stopped being published.
33+ private static final Map <String , String > GROUPS_AND_MIN_VERSION =
34+ Map .of (
35+ "io.opentelemetry" , "1.49.0" ,
36+ "io.opentelemetry.instrumentation" , "2.15.0" ,
37+ "io.opentelemetry.contrib" , "1.46.0" ,
38+ "io.opentelemetry.semconv" , "1.32.0" ,
39+ "io.opentelemetry.proto" , "1.3.2" );
40+
3041 private static final String MAVEN_CENTRAL_BASE_URL =
3142 "https://search.maven.org/solrsearch/select?q=g:" ;
3243 private static final String JAVA_DOCS_BASE_URL = "https://javadoc.io/doc/" ;
@@ -41,23 +52,34 @@ public final class JavaDocsCrawler {
4152
4253 public static void main (String [] args ) throws Exception {
4354 HttpClient client = HttpClient .newHttpClient ();
44- List <Artifact > artifacts = getArtifacts (client );
45- if (artifacts .isEmpty ()) {
46- logger .log (Level .SEVERE , "No artifacts found" );
47- return ;
48- }
49- logger .info (String .format (Locale .ROOT , "Found %d artifacts" , artifacts .size ()));
5055
51- List <String > updated = crawlJavaDocs (client , artifacts );
52- if (updated .isEmpty ()) {
53- logger .info ("No updates were needed" );
54- return ;
55- }
56+ for (Map .Entry <String , String > groupAndMinVersion : GROUPS_AND_MIN_VERSION .entrySet ()) {
57+ String group = groupAndMinVersion .getKey ();
58+
59+ List <Artifact > artifacts = getArtifacts (client , group );
60+ if (artifacts .isEmpty ()) {
61+ logger .log (Level .SEVERE , "No artifacts found for group " + group );
62+ continue ;
63+ }
64+ logger .info (
65+ String .format (Locale .ROOT , "Found %d artifacts for group " + group , artifacts .size ()));
5666
57- logger .info ("Artifacts that triggered updates:\n " + String .join ("\n " , updated ));
67+ List <Artifact > updated = crawlJavaDocs (client , groupAndMinVersion .getValue (), artifacts );
68+ if (updated .isEmpty ()) {
69+ logger .info ("No updates were needed for group " + group );
70+ continue ;
71+ }
72+
73+ logger .info (
74+ "Artifacts that triggered updates for group "
75+ + group
76+ + ":\n "
77+ + updated .stream ().map (Artifact ::toString ).collect (Collectors .joining ("\n " )));
78+ }
5879 }
5980
60- static List <Artifact > getArtifacts (HttpClient client ) throws IOException , InterruptedException {
81+ static List <Artifact > getArtifacts (HttpClient client , String group )
82+ throws IOException , InterruptedException {
6183 int start = 0 ;
6284 Integer numFound ;
6385 List <Artifact > result = new ArrayList <>();
@@ -67,7 +89,7 @@ static List<Artifact> getArtifacts(HttpClient client) throws IOException, Interr
6789 Thread .sleep (THROTTLE_MS ); // try not to DDoS the site, it gets knocked over easily
6890 }
6991
70- Map <?, ?> map = queryMavenCentral (client , start );
92+ Map <?, ?> map = queryMavenCentral (client , group , start );
7193
7294 numFound =
7395 Optional .ofNullable (map )
@@ -93,26 +115,26 @@ private static List<Artifact> convertToArtifacts(Map<?, ?> map) {
93115 List <Artifact > artifacts = new ArrayList <>();
94116 for (Object doc : docs ) {
95117 Map <?, ?> docMap = (Map <?, ?>) doc ;
96- String artifact = ( String ) docMap .get ("a " );
97- String version = ( String ) docMap .get ("latestVersion " );
98- if ( artifact != null && version != null ) {
99- artifacts . add ( new Artifact ( artifact , version ) );
100- }
118+ String group = Objects . requireNonNull (( String ) docMap .get ("g" ), "g " );
119+ String artifact = Objects . requireNonNull (( String ) docMap .get ("a" ), "a " );
120+ String version =
121+ Objects . requireNonNull (( String ) docMap . get ( "latestVersion" ), "latestVersion" );
122+ artifacts . add ( new Artifact ( Objects . requireNonNull ( group ), artifact , version ));
101123 }
102124 return artifacts ;
103125 })
104126 .orElseGet (ArrayList ::new );
105127 }
106128
107- private static Map <?, ?> queryMavenCentral (HttpClient client , int start )
129+ private static Map <?, ?> queryMavenCentral (HttpClient client , String group , int start )
108130 throws IOException , InterruptedException {
109131 URI uri =
110132 URI .create (
111133 String .format (
112134 Locale .ROOT ,
113135 "%s%s&rows=%d&start=%d&wt=json" ,
114136 MAVEN_CENTRAL_BASE_URL ,
115- GROUP ,
137+ group ,
116138 PAGE_SIZE ,
117139 start ));
118140
@@ -122,21 +144,35 @@ private static List<Artifact> convertToArtifacts(Map<?, ?> map) {
122144 if (response .statusCode () != 200 ) {
123145 logger .log (
124146 Level .SEVERE ,
125- "Unexpected response code: " + response .statusCode () + ": " + response .body ());
147+ "Unexpected response code "
148+ + response .statusCode ()
149+ + " for uri: "
150+ + uri .toASCIIString ()
151+ + "\n "
152+ + response .body ());
126153 throw new IOException ("Unable to pull Maven central artifacts list" );
127154 }
128155 return objectMapper .readValue (response .body (), Map .class );
129156 }
130157
131- static List <String > crawlJavaDocs (HttpClient client , List <Artifact > artifacts )
158+ static List <Artifact > crawlJavaDocs (
159+ HttpClient client , String minVersion , List <Artifact > artifacts )
132160 throws IOException , InterruptedException {
133- List <String > updatedArtifacts = new ArrayList <>();
161+ List <Artifact > updatedArtifacts = new ArrayList <>();
134162
135163 for (Artifact artifact : artifacts ) {
164+ if (artifact .getVersion ().compareTo (minVersion ) < 0 ) {
165+ logger .info (
166+ String .format (
167+ "Skipping crawling %s due to version %s being less than minVersion %s" ,
168+ artifact , artifact .getVersion (), minVersion ));
169+ continue ;
170+ }
171+
136172 String [] parts = artifact .getName ().split ("-" );
137173 StringBuilder path = new StringBuilder ();
138174 path .append (JAVA_DOCS_BASE_URL )
139- .append (GROUP )
175+ .append (artifact . getGroup () )
140176 .append ("/" )
141177 .append (artifact .getName ())
142178 .append ("/" )
@@ -146,6 +182,7 @@ static List<String> crawlJavaDocs(HttpClient client, List<Artifact> artifacts)
146182 .append ("/package-summary.html" );
147183
148184 HttpRequest crawlRequest = HttpRequest .newBuilder (URI .create (path .toString ())).GET ().build ();
185+ logger .info (String .format ("Crawling %s at: %s" , artifact , path ));
149186 HttpResponse <String > crawlResponse =
150187 client .send (crawlRequest , HttpResponse .BodyHandlers .ofString ());
151188
@@ -156,15 +193,15 @@ static List<String> crawlJavaDocs(HttpClient client, List<Artifact> artifacts)
156193 String .format (
157194 Locale .ROOT ,
158195 "Crawl failed for %s with status code %d at URL %s\n Response: %s" ,
159- artifact . getName () ,
196+ artifact ,
160197 crawlResponse .statusCode (),
161198 path ,
162199 crawlResponse .body ()));
163200 continue ;
164201 }
165202
166203 if (crawlResponse .body ().contains (JAVA_DOC_DOWNLOADED_TEXT )) {
167- updatedArtifacts .add (artifact . getName () );
204+ updatedArtifacts .add (artifact );
168205 }
169206
170207 Thread .sleep (THROTTLE_MS ); // some light throttling
0 commit comments