Skip to content

Commit fe36c4e

Browse files
author
Nathan Nguyen
committed
Add link checker at course level
1 parent 152723c commit fe36c4e

22 files changed

+1060
-39
lines changed

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,24 @@ The same effect could be achieved even without role "Robot nofollow" by
142142
assigning role "Robot" on the contexts you want to be crawled. But
143143
using the combination of two roles gives more flexibility.
144144

145+
## Disable site-wide crawling and allow non-admin user to run the tool at course context
146+
### New settings at '/admin/settings.php?section=tool_crawler'
147+
- coursemode: enable course link checker for at course level
148+
- emailto: Notification will be sent to all users with 'tool/crawler:courseconfig' permission and this email address.
149+
150+
### Once enabled the following item will displayed at course admin setting 'Link checker robot'
151+
- Click on 'run link checking' to add the course to crawling queue
152+
- Click on 'rerun link checking' if you want to re-run crawling on the the course
153+
- Click on 'stop link checking' to remove the course from the queue and all crawled links belong to the course
154+
155+
### The bot account should have the following permission:
156+
- View subjects without participation: moodle/course:view
157+
- View hidden subjects: moodle/course:viewhiddencourses
158+
- View hidden sections: moodle/course:viewhiddensections
159+
- View hidden book chapters: mod/book:viewhiddenchapters
160+
- View hidden activities: moodle/course:viewhiddenactivities
161+
- See hidden categoriesmoodle/category:viewhiddencategories
162+
145163
# Testing
146164

147165
## Test basic authentication with curl

classes/form/courselinkchecker.php

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
<?php
2+
// This file is part of Moodle - http://moodle.org/
3+
//
4+
// Moodle is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// Moodle is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU General Public License
15+
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16+
17+
namespace tool_crawler\form;
18+
19+
defined('MOODLE_INTERNAL') || die();
20+
21+
require_once("$CFG->libdir/formslib.php");
22+
use html_writer;
23+
use moodleform;
24+
25+
/**
26+
* Form to run/stop/reset crawling
27+
*
28+
* @package tool_crawler
29+
* @author Nathan Nguyen <[email protected]>
30+
* @copyright Catalyst IT
31+
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
32+
*/
33+
class courselinkchecker extends moodleform {
34+
/**
35+
* Form definition.
36+
*/
37+
protected function definition() {
38+
$mform = $this->_form;
39+
$course = $this->_customdata['course'];
40+
$queuecourse = $this->_customdata['queuecourse'];
41+
42+
$mform->addElement('hidden', 'courseid', $course->id);
43+
$mform->setType('courseid', PARAM_INT);
44+
45+
$buttonarray = array();
46+
if (empty($queuecourse)) {
47+
$buttonarray[] = $mform->createElement('submit', 'addcourse', get_string('addcourse', 'tool_crawler'));
48+
} else {
49+
if (!empty($queuecourse->timefinish)) {
50+
$buttonarray[] = $mform->createElement('submit', 'resetcourse', get_string('resetcourse', 'tool_crawler'));
51+
$buttonarray[] = $mform->createElement('submit', 'stopcourse', get_string('stopcourse', 'tool_crawler'));
52+
} else {
53+
$buttonarray[] = $mform->createElement('submit', 'stopcourse', get_string('stopcourse', 'tool_crawler'));
54+
}
55+
}
56+
$buttonarray[] = $mform->createElement('cancel');
57+
$mform->addGroup($buttonarray, 'buttonar', '', ' ', false);
58+
59+
}
60+
}
61+

classes/helper.php

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
<?php
2+
// This file is part of Moodle - http://moodle.org/
3+
//
4+
// Moodle is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// Moodle is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU General Public License
15+
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16+
17+
namespace tool_crawler;
18+
defined('MOODLE_INTERNAL') || die();
19+
20+
/**
21+
* Provide helper functions for crawling on a course
22+
*
23+
* @package tool_crawler
24+
* @author Nathan Nguyen <[email protected]>
25+
* @copyright Catalyst IT
26+
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
27+
*/
28+
class helper {
29+
/**
30+
* Queue a course for link checking
31+
*
32+
* @param int $courseid course ID
33+
*/
34+
public static function queue_course($courseid) {
35+
global $DB;
36+
$record = self::get_queue_course($courseid);
37+
38+
if (!empty($record)) {
39+
$record->timestart = null;
40+
$record->timefinish = null;
41+
$DB->update_record('tool_crawler_course', $record);
42+
} else {
43+
$record = new \stdClass();
44+
$record->courseid = $courseid;
45+
$record->timestart = null;
46+
$record->timefinish = null;
47+
$DB->insert_record('tool_crawler_course', $record);
48+
}
49+
50+
// Reset.
51+
self::clear_course_link($courseid);
52+
53+
}
54+
55+
/**
56+
* Get queue course based on course id
57+
*
58+
* @param int $courseid
59+
* @return false|mixed|\stdClass
60+
*/
61+
public static function get_queue_course($courseid) {
62+
global $DB;
63+
return $DB->get_record('tool_crawler_course', ['courseid' => $courseid]);
64+
}
65+
66+
/**
67+
* Get unfinished course link checking
68+
*
69+
* @return array
70+
*/
71+
public static function get_onqueue_course_ids() {
72+
global $DB;
73+
return $DB->get_fieldset_select('tool_crawler_course', 'courseid', 'timefinish is null');
74+
}
75+
76+
/**
77+
* Remove course from the queue
78+
*
79+
* @param int $courseid
80+
*/
81+
public static function dequeue_course($courseid) {
82+
global $DB;
83+
$DB->delete_records('tool_crawler_course', ['courseid' => $courseid]);
84+
}
85+
86+
/**
87+
* Reset link crawling for a course
88+
*
89+
* @param int $courseid
90+
*/
91+
public static function clear_course_link($courseid) {
92+
global $DB;
93+
$DB->delete_records('tool_crawler_url', ['courseid' => $courseid]);
94+
$DB->delete_records('tool_crawler_edge', ['courseid' => $courseid]);
95+
}
96+
97+
/**
98+
* Start link crawling on a course
99+
*
100+
* @param int $courseid
101+
*/
102+
public static function start_course_crawling($courseid) {
103+
global $DB;
104+
$DB->set_field('tool_crawler_course', 'timestart', time(), ['courseid' => $courseid]);
105+
}
106+
107+
/**
108+
* Finish crawling on a course
109+
*
110+
* @param int $courseid
111+
*/
112+
public static function finish_course_crawling($courseid) {
113+
global $DB;
114+
$DB->set_field('tool_crawler_course', 'timefinish', time(), ['courseid' => $courseid]);
115+
self::send_email($courseid);
116+
}
117+
118+
/**
119+
* Caluclate progress of crawling on a course
120+
*
121+
* @param int $courseid
122+
* @return array|void
123+
*/
124+
public static function calculate_progress($courseid) {
125+
$queuecourse = self::get_queue_course($courseid);
126+
if (empty($queuecourse)) {
127+
return;
128+
}
129+
130+
if (empty($queuecourse->timestart)) {
131+
return;
132+
}
133+
134+
$url = new \tool_crawler\local\url();
135+
$queuesize = $url->get_queue_size($courseid);
136+
$processed = $url->get_processed($queuecourse->timestart, $courseid);
137+
138+
if ($queuesize == 0) {
139+
$progress = 1;
140+
} else {
141+
$progress = $processed / ($processed + $queuesize);
142+
}
143+
144+
$duration = time() - $queuecourse->timestart;
145+
$eta = $progress > 0 ? userdate(floor($duration / $progress + $queuecourse->timestart)) : '';
146+
147+
if (!empty($queuecourse->timefinish)) {
148+
$delta = $queuecourse->timefinish - $queuecourse->timestart;
149+
} else {
150+
$delta = time() - $queuecourse->timestart;
151+
}
152+
153+
$duration = sprintf('%02d:%02d:%02d', $delta / 60 / 60, $delta / 60 % 60, $delta % 60);
154+
$progress = sprintf('%.2f%%', $progress * 100);
155+
156+
return [$eta, $duration, $progress];
157+
}
158+
159+
/**
160+
* Translate http code
161+
*
162+
* @param string $code
163+
* @return string
164+
*/
165+
public static function translate_httpcode($code) {
166+
// List of http code.
167+
$httpcodes = [
168+
'100' => get_string('httpcode_100', 'tool_crawler'),
169+
'101' => get_string('httpcode_101', 'tool_crawler'),
170+
'200' => get_string('httpcode_200', 'tool_crawler'),
171+
'201' => get_string('httpcode_201', 'tool_crawler'),
172+
'202' => get_string('httpcode_202', 'tool_crawler'),
173+
'203' => get_string('httpcode_203', 'tool_crawler'),
174+
'204' => get_string('httpcode_204', 'tool_crawler'),
175+
'205' => get_string('httpcode_205', 'tool_crawler'),
176+
'300' => get_string('httpcode_300', 'tool_crawler'),
177+
'301' => get_string('httpcode_301', 'tool_crawler'),
178+
'302' => get_string('httpcode_302', 'tool_crawler'),
179+
'303' => get_string('httpcode_303', 'tool_crawler'),
180+
'400' => get_string('httpcode_400', 'tool_crawler'),
181+
'401' => get_string('httpcode_401', 'tool_crawler'),
182+
'403' => get_string('httpcode_403', 'tool_crawler'),
183+
'404' => get_string('httpcode_404', 'tool_crawler'),
184+
'405' => get_string('httpcode_405', 'tool_crawler'),
185+
'406' => get_string('httpcode_406', 'tool_crawler'),
186+
'408' => get_string('httpcode_408', 'tool_crawler'),
187+
'409' => get_string('httpcode_409', 'tool_crawler'),
188+
'410' => get_string('httpcode_410', 'tool_crawler'),
189+
'411' => get_string('httpcode_411', 'tool_crawler'),
190+
'413' => get_string('httpcode_413', 'tool_crawler'),
191+
'414' => get_string('httpcode_414', 'tool_crawler'),
192+
'415' => get_string('httpcode_415', 'tool_crawler'),
193+
'417' => get_string('httpcode_417', 'tool_crawler'),
194+
'500' => get_string('httpcode_500', 'tool_crawler'),
195+
'501' => get_string('httpcode_501', 'tool_crawler'),
196+
'502' => get_string('httpcode_502', 'tool_crawler'),
197+
'503' => get_string('httpcode_503', 'tool_crawler'),
198+
'504' => get_string('httpcode_504', 'tool_crawler'),
199+
'505' => get_string('httpcode_505', 'tool_crawler'),
200+
];
201+
202+
return $httpcodes[$code] ?? '';
203+
}
204+
205+
/**
206+
*
207+
* Send email to user
208+
*
209+
* @param int $courseid
210+
*/
211+
public static function send_email($courseid) {
212+
$notifyemail = get_config('tool_crawler', 'emailto');
213+
214+
$context = \context_course::instance($courseid);
215+
$users = get_users_by_capability($context, 'tool/crawler:courseconfig');
216+
217+
if (!empty($notifyemail)) {
218+
$user = new \stdClass();
219+
$user->id = -1;
220+
$user->email = $notifyemail;
221+
$user->mailformat = 1;
222+
$users[] = $user;
223+
}
224+
225+
$url = new \moodle_url('/admin/tool/crawler/course.php', ['id' => $courseid]);
226+
$noticehtml = get_string('emailcontent', 'tool_crawler', $url->out());
227+
$subject = get_string('emailsubject', 'tool_crawler');
228+
229+
foreach ($users as $user) {
230+
email_to_user(
231+
$user,
232+
get_admin(),
233+
$subject,
234+
$noticehtml,
235+
$noticehtml
236+
);
237+
}
238+
}
239+
240+
/**
241+
* Count broken links
242+
*
243+
* @param int $courseid the course id
244+
* @return int number of broken links
245+
*/
246+
public static function count_broken_links($courseid) {
247+
global $DB;
248+
$sql = "SELECT count(1) AS count
249+
FROM {tool_crawler_url} b
250+
LEFT JOIN {tool_crawler_edge} l ON l.b = b.id
251+
LEFT JOIN {tool_crawler_url} a ON l.a = a.id
252+
LEFT JOIN {course} c ON c.id = a.courseid
253+
WHERE b.httpcode != '200' AND c.id = $courseid";
254+
return $DB->count_records_sql($sql);
255+
}
256+
}

0 commit comments

Comments
 (0)