Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions classes/robot/crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class crawler {
/** @var array Retrieved courses from get_recentcourses */
private static $recentcourses = false;

/**
* Returns configuration object if it has been initialised.
* If it is not initialises then it creates and returns it.
Expand Down Expand Up @@ -1306,6 +1309,13 @@ public function scrape($url) {
$method = 'GET';
}

curl_setopt($s, CURLOPT_BUFFERSIZE, 128);
curl_setopt($s, CURLOPT_NOPROGRESS, false);
curl_setopt($s, CURLOPT_PROGRESSFUNCTION, function ($downloadsize, $downloaded, $uploadsize, $uploaded) {
// If $Downloaded exceeds bigfilesize, returning non-0 breaks the connection!
return ($downloaded > (1024 * 1000 * self::get_config()->bigfilesize)) ? 1 : 0;
});

$result = (object) [];
$result->url = $url;

Expand Down Expand Up @@ -1528,6 +1538,11 @@ public function should_be_authenticated($url) {
* @return array
*/
public function get_recentcourses() {

if (self::$recentcourses != false) {
return self::$recentcourses;
}

global $DB;
$config = self::get_config();

Expand Down Expand Up @@ -1558,6 +1573,7 @@ public function get_recentcourses() {
}
$rs->close();

self::$recentcourses = $recentcourses;
return $recentcourses;
}

Expand Down
2 changes: 2 additions & 0 deletions lang/en/tool_crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@
$string['retentionperiod'] = 'Retention period for bad URLs';
$string['retentionperioddesc'] = 'How many days to keep bad URLs in database.';
$string['retry'] = 'Retry';
$string['retryall'] = 'Retry all';
$string['retryallmessage'] = 'All links readded to queue to be retried';
$string['robotcleanup'] = 'Robot cleanup';
$string['robotstatus'] = 'Status';
$string['seedurl'] = 'Seed URL';
Expand Down
29 changes: 28 additions & 1 deletion report.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
$perpage = optional_param('perpage', 50, PARAM_INT);
$courseid = optional_param('course', 0, PARAM_INT);
$retryid = optional_param('retryid', 0, PARAM_INT);
$retryall = optional_param('retryall', 0, PARAM_BOOL);
$start = $page * $perpage;

$sqlfilter = '';
Expand Down Expand Up @@ -69,7 +70,11 @@
require_capability('moodle/site:config', context_system::instance());
admin_externalpage_setup('tool_crawler_' . $report);
}
echo $OUTPUT->header();

// Do not start output if we are redirecting back to this page.
if ($retryall == 0) {
echo $OUTPUT->header();
}

require('tabs.php');
echo $tabs;
Expand All @@ -80,6 +85,7 @@
}

$datetimeformat = get_string('strftimerecentsecondshtml', 'tool_crawler');
$retryallbutton = '';

if ($report == 'broken' || $report == 'reference') {
$reference = $report == 'reference';
Expand Down Expand Up @@ -138,6 +144,13 @@
}
$table->data = [];
foreach ($data as $row) {
if ($retryall == 1) {
$retryid = $row->toid;
$persistent = new \tool_crawler\local\url();
$persistent->reset_for_recrawl($retryid);
continue;
}

$text = trim($row->text);
if ($text == "") {
$text = get_string('missing', 'tool_crawler');
Expand Down Expand Up @@ -165,6 +178,19 @@
}
$table->data[] = $data;
}
if ($retryall == 1) {
$url = $PAGE->url;
redirect($url->raw_out(true), get_string('retryallmessage', 'tool_crawler'));
}
if (!empty($table->data)) {
$retryallbutton = html_writer::link(
new moodle_url(
$navurl->out(),
['retryall' => 1]
),
get_string('retryall', 'tool_crawler')
);
}
} else if ($report == 'queued') {
$sql = " FROM {tool_crawler_url} a
LEFT JOIN {course} c ON c.id = a.courseid
Expand Down Expand Up @@ -371,6 +397,7 @@
]
));
echo get_string($report . '_header', 'tool_crawler');
echo $retryallbutton;
echo html_writer::table($table);
echo $OUTPUT->paging_bar($count, $page, $perpage, $baseurl);
echo $OUTPUT->footer();
Loading