5
5
import com .fasterxml .jackson .databind .node .ArrayNode ;
6
6
import org .apache .hc .client5 .http .HttpResponseException ;
7
7
import org .apache .hc .client5 .http .fluent .Request ;
8
+ import org .hydev .mcpm .client .models .PluginYml ;
8
9
import org .hydev .mcpm .server .crawlers .spiget .SpigetResource ;
10
+ import org .hydev .mcpm .utils .PluginJarFile ;
11
+ import org .hydev .mcpm .utils .StoredHashMap ;
12
+ import org .hydev .mcpm .utils .TemporaryDir ;
9
13
10
14
import java .io .File ;
11
15
import java .io .IOException ;
17
21
import static com .fasterxml .jackson .databind .DeserializationFeature .FAIL_ON_UNKNOWN_PROPERTIES ;
18
22
import static com .fasterxml .jackson .databind .SerializationFeature .INDENT_OUTPUT ;
19
23
import static java .lang .String .format ;
24
+ import static org .hydev .mcpm .Constants .JACKSON ;
20
25
import static org .hydev .mcpm .utils .GeneralUtils .makeUrl ;
21
26
import static org .hydev .mcpm .utils .GeneralUtils .safeSleep ;
22
27
28
33
*/
29
34
public class SpigetCrawler
30
35
{
31
- public static final ObjectMapper JACKSON = new ObjectMapper ()
32
- .configure (FAIL_ON_UNKNOWN_PROPERTIES , false ).enable (INDENT_OUTPUT );;
33
-
34
36
private final String spiget = "https://api.spiget.org/v2" ;
35
37
private final String userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" ;
36
38
private final long mtDelay = 1000 ;
37
39
private final File dataDir ;
40
+ private final StoredHashMap <Long , String > blacklist ;
38
41
39
42
public SpigetCrawler (File dataDir )
40
43
{
41
44
this .dataDir = dataDir ;
45
+ this .blacklist = new StoredHashMap <>(new File (dataDir , "crawler/spiget/blacklist.json" ));
42
46
}
43
47
44
48
/**
@@ -152,43 +156,82 @@ private File getTemporaryDownloadPath(SpigetResource res)
152
156
*/
153
157
private File getLatestPath (SpigetResource res )
154
158
{
155
- return new File (dataDir , format ("pkgs/spiget/%s/%s/release.jar" , res .name (), res .id ()));
159
+ return new File (dataDir , format ("pkgs/spiget/%s/%s/release.jar" , res .id (), res . version () .id ()));
156
160
}
157
161
158
162
/**
159
- * Download the latest version of a plugin if not present
163
+ * Check update for a plugin
160
164
*
161
165
* @param res Resource
162
166
*/
163
- private void downloadLatest (SpigetResource res )
167
+ private void checkUpdate (SpigetResource res )
164
168
{
165
- var fp = getTemporaryDownloadPath (res );
166
- if (fp .isFile () || res .external ()) return ;
169
+ // Plugin is in the blacklist, skip
170
+ if (blacklist .containsKey (res .id ())) return ;
171
+
172
+ // Latest version already exists in local fs, skip
173
+ var fp = getLatestPath (res );
174
+ if (fp .isFile ()) return ;
175
+
176
+ // Resource is marked as external, we can't download it
177
+ if (res .external ()) return ;
167
178
168
- // Make request
179
+ // Try downloading the file
180
+ System .out .printf ("Trying to download %s: %s\n " , res .id (), res .name ());
169
181
var url = makeUrl (format (spiget + "/resources/%s/download" , res .id ()));
170
182
171
- try
183
+ try ( var tmp = new TemporaryDir ())
172
184
{
185
+ var tmpFile = new File (tmp .path , "tmp.jar" );
186
+
173
187
// Write bytes
188
+ // TODO: Maybe we can do this without tmp files, like read zip file content from byte array
189
+ var jarBytes = Request .get (url ).addHeader ("User-Agent" , userAgent ).execute ().returnContent ().asBytes ();
190
+ Files .write (tmpFile .toPath (), jarBytes );
191
+
192
+ // Try to read plugin.yml from it
193
+ String metaStr ;
194
+ try (var plugin = new PluginJarFile (tmpFile ))
195
+ {
196
+ metaStr = plugin .readString ("plugin.yml" );
197
+ }
198
+ catch (Exception e )
199
+ {
200
+ // Cannot read plugin.yml, that means it's not a standard plugin, add to blacklist
201
+ System .out .printf ("Cannot read plugin.yml (%s: %s)\n " , res .id (), res .name ());
202
+ blacklist .put (res .id (), "Cannot read plugin.yml" );
203
+ return ;
204
+ }
205
+
206
+ // Success, write to file
174
207
fp .getParentFile ().mkdirs ();
175
- Files .write (fp .toPath (), Request .get (url ).addHeader ("User-Agent" , userAgent ).execute ()
176
- .returnContent ().asBytes ());
208
+
209
+ // Write meta to plugin.yml
210
+ Files .writeString (new File (fp .getParentFile (), "plugin.yml" ).toPath (), metaStr );
211
+
212
+ // Write jar to release.jar
213
+ Files .write (fp .toPath (), jarBytes );
177
214
178
215
System .out .printf ("Downloaded (%s) %s latest version jar\n " , res .id (), res .name ());
179
216
}
180
217
catch (HttpResponseException e )
181
218
{
182
219
// Not found
183
- if (e .getMessage ().contains ("404" )) return ;
220
+ if (e .getMessage ().contains ("404" ))
221
+ blacklist .put (res .id (), "HTTP 404: Not found" );
222
+
184
223
// "External resource cannot be downloaded"
185
- if (e .getMessage ().contains ("400" )) return ;
224
+ else if (e .getMessage ().contains ("400" ))
225
+ blacklist .put (res .id (), "HTTP 400: Probably external resource" );
226
+
186
227
// Blocked by cloudflare
187
- if (e .getMessage ().contains ("520" )) return ;
228
+ else if (e .getMessage ().contains ("520" ))
229
+ blacklist .put (res .id (), "HTTP 520: External site, blocked by CloudFlare" );
230
+
188
231
// This happens when the server has an error (e.g. when a plugin doesn't have files to download)
189
- if (e .getMessage ().contains ("502" )) return ;
232
+ //else if (e.getMessage().contains("502")) return;
233
+
190
234
System .out .println ("Error when downloading " + url + " " + e .getMessage ());
191
- //e.printStackTrace();
192
235
}
193
236
catch (IOException e )
194
237
{
@@ -201,15 +244,16 @@ public static void main(String[] args)
201
244
{
202
245
var crawler = new SpigetCrawler (new File (".mcpm" ));
203
246
var res = crawler .crawlAllResources (false ).stream ()
204
- .filter (it -> it .downloads () > 1000 && !it .external ()).toList ();
247
+ .filter (it -> it .downloads () > 100 && !it .external ()).toList ();
205
248
206
249
System .out .println (res .size ());
207
250
208
- res .stream ().filter (it -> !crawler .getTemporaryDownloadPath (it ).isFile ()).parallel ().forEach (it ->
251
+ // TODO: Parallelize this. Currently causes ConcurrentModificationException with StoredHashMap
252
+ res .stream ().filter (it -> !crawler .getLatestPath (it ).isFile ()).forEach (it ->
209
253
{
210
- crawler .downloadLatest (it );
254
+ crawler .checkUpdate (it );
211
255
212
- safeSleep (crawler .mtDelay );
256
+ // safeSleep(crawler.mtDelay);
213
257
});
214
258
}
215
259
}
0 commit comments