|
42 | 42 |
|
43 | 43 | use constant EMPTY => q{};
|
44 | 44 |
|
| 45 | +# Number of pages taken into account at once in submodule get_mw_page_list |
| 46 | +use constant SLICE_SIZE => 50; |
| 47 | + |
| 48 | +# Number of linked mediafile to get at once in get_linked_mediafiles |
| 49 | +# The query is split in small batches because of the MW API limit of |
| 50 | +# the number of links to be returned (500 links max). |
| 51 | +use constant BATCH_SIZE => 10; |
| 52 | + |
| 53 | +use constant HTTP_CODE_OK => 200; |
| 54 | + |
45 | 55 | my $remotename = $ARGV[0];
|
46 | 56 | my $url = $ARGV[1];
|
47 | 57 |
|
@@ -229,13 +239,13 @@ sub get_mw_page_list {
|
229 | 239 | my $pages = shift;
|
230 | 240 | my @some_pages = @$page_list;
|
231 | 241 | while (@some_pages) {
|
232 |
| - my $last_page = 50; |
| 242 | + my $last_page = SLICE_SIZE; |
233 | 243 | if ($#some_pages < $last_page) {
|
234 | 244 | $last_page = $#some_pages;
|
235 | 245 | }
|
236 | 246 | my @slice = @some_pages[0..$last_page];
|
237 | 247 | get_mw_first_pages(\@slice, $pages);
|
238 |
| - @some_pages = @some_pages[51..$#some_pages]; |
| 248 | + @some_pages = @some_pages[(SLICE_SIZE + 1)..$#some_pages]; |
239 | 249 | }
|
240 | 250 | return;
|
241 | 251 | }
|
@@ -385,9 +395,7 @@ sub get_linked_mediafiles {
|
385 | 395 | my $pages = shift;
|
386 | 396 | my @titles = map { $_->{title} } values(%{$pages});
|
387 | 397 |
|
388 |
| - # The query is split in small batches because of the MW API limit of |
389 |
| - # the number of links to be returned (500 links max). |
390 |
| - my $batch = 10; |
| 398 | + my $batch = BATCH_SIZE; |
391 | 399 | while (@titles) {
|
392 | 400 | if ($#titles < $batch) {
|
393 | 401 | $batch = $#titles;
|
@@ -469,7 +477,7 @@ sub download_mw_mediafile {
|
469 | 477 | my $download_url = shift;
|
470 | 478 |
|
471 | 479 | my $response = $mediawiki->{ua}->get($download_url);
|
472 |
| - if ($response->code == 200) { |
| 480 | + if ($response->code == HTTP_CODE_OK) { |
473 | 481 | return $response->decoded_content;
|
474 | 482 | } else {
|
475 | 483 | print {*STDERR} "Error downloading mediafile from :\n";
|
|
0 commit comments