Skip to content

Commit 15e020a

Browse files
committed
add support for zenki
1 parent f1b9ce5 commit 15e020a

File tree

4 files changed

+29
-18
lines changed

4 files changed

+29
-18
lines changed

.cspell.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
"pkgs",
2727
"nixpkgs",
2828
"libquery",
29-
"replacen"
29+
"replacen",
30+
"zenki"
3031
],
3132
"dictionaries": [
3233
"softwareTerms",

scraper/src/main.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ async fn get_courses_of(base_url: &str) -> Vec<Course> {
6262
futures::future::join_all(courses)
6363
.await
6464
.into_iter()
65-
.collect::<Vec<_>>()
65+
.flatten()
66+
.collect()
6667
}
6768

6869
lazy_static! {

scraper/src/parser.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use anyhow::anyhow;
22
use lazy_static::lazy_static;
3-
use scraper::{Html, Selector};
3+
use scraper::{ElementRef, Html, Selector};
44

55
use crate::types::*;
66

@@ -17,19 +17,24 @@ lazy_static! {
1717
Selector::parse(".catalog-page-detail-table-cell.code-cell").unwrap();
1818
}
1919

20-
pub fn parse_course_info(html: Html) -> anyhow::Result<Course> {
21-
Ok(Course {
22-
name: select(&html, &NAME_SELECTOR, 1)?,
23-
teacher: select(&html, &TEACHER_SELECTOR, 1)?,
24-
semester: select_all(&html, &SEMESTER_SELECTOR, 1)?.join(","),
25-
period: select(&html, &PERIOD_SELECTOR, 1)?,
26-
code: select_all(&html, &CODE_SELECTOR, 1)?.join(" "),
27-
})
20+
pub fn parse_course_info(html: Html) -> anyhow::Result<Vec<Course>> {
21+
html.select(&Selector::parse(".catalog-page-detail-table-row").unwrap())
22+
.skip(1)
23+
.map(|el| {
24+
Ok(Course {
25+
name: select(&el, &NAME_SELECTOR)?,
26+
teacher: select(&el, &TEACHER_SELECTOR)?,
27+
semester: select_all(&el, &SEMESTER_SELECTOR)?.join(","),
28+
period: select(&el, &PERIOD_SELECTOR)?,
29+
code: select_all(&el, &CODE_SELECTOR)?.join(" "),
30+
})
31+
})
32+
.collect()
2833
}
2934

30-
fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result<String> {
31-
html.select(selector)
32-
.nth(nth)
35+
fn select(el: &ElementRef, selector: &Selector) -> anyhow::Result<String> {
36+
el.select(selector)
37+
.next()
3338
.ok_or(anyhow!(
3439
"Couldn't find matching element for selector {:?}",
3540
selector,
@@ -38,12 +43,12 @@ fn select(html: &Html, selector: &Selector, nth: usize) -> anyhow::Result<String
3843
}
3944

4045
fn select_all<'a>(
41-
html: &'a Html,
46+
html: &'a ElementRef,
4247
selector: &'static Selector,
43-
nth: usize,
48+
// nth: usize,
4449
) -> anyhow::Result<Vec<&'a str>> {
4550
html.select(selector)
46-
.nth(nth)
51+
.next()
4752
.ok_or(anyhow!(
4853
"Couldn't find matching element for selector {:?}",
4954
selector,

scraper/src/urls.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
pub static URLS: [(&str, &str); 10] = [
1+
pub static URLS: [(&str, &str); 11] = [
2+
(
3+
"zenki",
4+
"https://catalog.he.u-tokyo.ac.jp/result?q=&type=all&faculty_id=&facet=%7B%22faculty_type%22%3A%5B%22jd%22%5D%7D&page=",
5+
),
26
(
37
"law",
48
"https://catalog.he.u-tokyo.ac.jp/result?type=ug&faculty_id=1&page=",

0 commit comments

Comments
 (0)