Skip to content

Commit cade90f

Browse files
committed
finished parsing, fetch functions
1 parent bcaf542 commit cade90f

File tree

15 files changed

+263
-198
lines changed

15 files changed

+263
-198
lines changed

src/fetch/mod.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use reqwest::{Client, Error as RequestError};
2+
3+
use crate::parse::LocationMeta;
4+
5+
pub async fn fetch_locations_page(client: &reqwest::Client) -> Result<String, RequestError> {
6+
static URL: &str = "https://nutrition.sa.ucsc.edu/";
7+
let response = client.get(URL).send().await?;
8+
response.text().await
9+
}
10+
11+
pub fn make_client() -> reqwest::Client {
12+
Client::builder()
13+
.danger_accept_invalid_certs(true)
14+
.build()
15+
.expect("error building client")
16+
}
17+
18+
pub async fn fetch_location_page(
19+
client: &reqwest::Client,
20+
location_meta: LocationMeta,
21+
) -> Result<String, RequestError> {
22+
static COOKIES: &str = "WebInaCartDates=; WebInaCartMeals=; WebInaCartQtys=; WebInaCartRecipes=; WebInaCartLocation=";
23+
let id = location_meta.id();
24+
let cookies = format!("{COOKIES}{id}");
25+
let url = location_meta.url();
26+
client
27+
.get(url.clone())
28+
.header("Cookie", cookies)
29+
.send()
30+
.await?
31+
.text()
32+
.await
33+
}
34+
35+
#[cfg(test)]
36+
mod tests {
37+
use super::*;
38+
use url::Url;
39+
40+
#[tokio::test]
41+
async fn test_fetch_locations_page() {
42+
let client = make_client();
43+
let _page = fetch_locations_page(&client).await.unwrap();
44+
}
45+
46+
async fn test_fetch_location_page() {
47+
let client = make_client();
48+
let url: Url = "https://nutrition.sa.ucsc.edu/shortmenu.aspx?\
49+
sName=UC+Santa+Cruz+Dining&\
50+
locationNum=40&\
51+
locationName=College+Nine/John+R.+Lewis+Dining+Hall&naFlag=1"
52+
.parse()
53+
.expect("url should be valid");
54+
let location_meta = LocationMeta::from_url(url).expect("location meta should be valid");
55+
fetch_location_page(&client, location_meta).await.unwrap();
56+
}
57+
}

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#![allow(dead_code)] // TODO: remove this line once done with crate
22

3+
mod fetch;
34
mod parse;
4-
mod scrape_menus;
55

66
use std::{convert::Infallible, env, error::Error, net::SocketAddr, sync::Arc};
77

src/parse/daily_menu/allergens.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ mod tests {
9797

9898
use scraper::Selector;
9999

100-
use crate::get_or_init_selector;
100+
use crate::static_selector;
101101

102102
use super::*;
103103
const HTML: &str = r#"
@@ -171,9 +171,9 @@ mod tests {
171171

172172
let doc = scraper::Html::parse_document(HTML);
173173
static SELECTOR: OnceLock<Selector> = OnceLock::new();
174-
let selector = get_or_init_selector!(SELECTOR, "img");
174+
static_selector!(DATE_SELECTOR <- "img");
175175
let mut all_allergen_flags = AllergenFlags::empty();
176-
for element in doc.select(&selector) {
176+
for element in doc.select(&DATE_SELECTOR) {
177177
let img_url = element.value().attr("src").unwrap(); // all img elements should have a src attribute
178178
let allergen_flags = AllergenInfo::img_url_to_allergen(img_url)
179179
.expect("All img urls in this example should be valid");

src/parse/daily_menu/daily_menu.rs

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,54 @@ use chrono::NaiveDate;
22

33
use super::meal::Meal;
44
use crate::parse::Error;
5+
use crate::static_selector;
6+
57
#[derive(Debug)]
68
pub struct DailyMenu<'a> {
79
date: NaiveDate,
810
meals: Vec<Meal<'a>>,
911
}
1012

13+
impl PartialEq for DailyMenu<'_> {
14+
fn eq(&self, other: &Self) -> bool {
15+
self.date == other.date
16+
}
17+
}
18+
19+
impl PartialOrd for DailyMenu<'_> {
20+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
21+
self.date.partial_cmp(&other.date)
22+
}
23+
}
24+
25+
impl Eq for DailyMenu<'_> {}
26+
27+
impl Ord for DailyMenu<'_> {
28+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
29+
self.date.cmp(&other.date)
30+
}
31+
}
32+
1133
impl<'a> DailyMenu<'a> {
1234
pub fn from_html_element(element: scraper::ElementRef<'a>) -> Result<Self, Error> {
13-
// TODO
14-
todo!("Implement the MealsOnDate::from_html_element function")
35+
static_selector!(DATE_SELECTOR <- "input[name=strCurSearchDays]");
36+
static_selector!(MEAL_SELECTOR <- r##"table[bordercolors="#FFFF00"]"##);
37+
let date_str = element
38+
.select(&DATE_SELECTOR)
39+
.next()
40+
.ok_or_else(|| Error::html_parse_error("Date field not found"))?
41+
.attr("value")
42+
.ok_or_else(|| Error::html_parse_error("No value on date field"))?;
43+
44+
let date = NaiveDate::parse_from_str(date_str, "%m/%d/%Y")
45+
.map_err(|_x| Error::html_parse_error("Date is not in valid format."))?;
46+
47+
let meals = element
48+
.select(&MEAL_SELECTOR)
49+
.map(Meal::from_html_element)
50+
.collect::<Result<_, Error>>()?;
51+
52+
Ok(Self { date, meals })
1553
}
1654
}
1755

@@ -25,7 +63,7 @@ mod tests {
2563
let html =
2664
fs::read_to_string("./src/parse/html_examples/daily_menu/meals_on_date.html").unwrap();
2765
let document = scraper::Html::parse_document(&html);
28-
let meals = DailyMenu::from_html_element(&document.root_element())
66+
let meals = DailyMenu::from_html_element(document.root_element())
2967
.expect("The example html should be valid");
3068
assert_eq!(
3169
meals.date,

src/parse/daily_menu/food_item.rs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
use super::allergens::AllergenInfo;
2-
use crate::get_or_init_selector;
3-
use crate::parse::Error;
42
use crate::parse::text_from_selection::{get_inner_text, text_from_selection};
3+
use crate::parse::Error;
4+
use crate::static_selector;
55
use rusty_money::{iso, Money};
6-
use scraper::Selector;
7-
use std::sync::OnceLock;
86

97
#[derive(Debug)]
108
pub struct FoodItem<'a> {
@@ -27,19 +25,16 @@ impl<'a> FoodItem<'a> {
2725
// example html tr element at ./html_examples/food_item.html
2826

2927
// get name with css selector .shortmenurecipes > span
30-
static NAME_SELECTOR: OnceLock<Selector> = OnceLock::new();
31-
let name_selector = get_or_init_selector!(NAME_SELECTOR, ".shortmenurecipes > span");
32-
let name = text_from_selection(name_selector, element, "foodItem", "name")?.trim_end();
28+
static_selector!(NAME_SELECTOR <- ".shortmenurecipes > span");
29+
let name = text_from_selection(&NAME_SELECTOR, element, "foodItem", "name")?.trim_end();
3330
// get allergen info with css selector td > img
34-
static ALLERGEN_INFO_SELECTOR: OnceLock<Selector> = OnceLock::new();
35-
let allergen_info_selector = get_or_init_selector!(ALLERGEN_INFO_SELECTOR, "td > img");
31+
static_selector!(ALLERGEN_INFO_SELECTOR <- "td > img");
3632
let allergen_info =
37-
AllergenInfo::from_html_elements(element.select(&allergen_info_selector))?;
33+
AllergenInfo::from_html_elements(element.select(&ALLERGEN_INFO_SELECTOR))?;
3834

3935
// try to get price with css selector .shortmenuprices > span
40-
static PRICE_SELECTOR: OnceLock<Selector> = OnceLock::new();
41-
let price_selector = get_or_init_selector!(PRICE_SELECTOR, ".shortmenuprices > span");
42-
let price_element = element.select(&price_selector).next();
36+
static_selector!(PRICE_SELECTOR <- ".shortmenuprices > span");
37+
let price_element = element.select(&PRICE_SELECTOR).next();
4338
let price = if let Some(price_element) = price_element {
4439
let price: &str = get_inner_text(price_element, "price")?; // will look like "$5.00"
4540
// if price is equal to &nbsp; then return None
@@ -70,7 +65,8 @@ mod tests {
7065
fn test_food_item_from_html_element() {
7166
// source: https://nutrition.sa.ucsc.edu/menuSamp.asp?locationNum=40&locationName=Colleges+Nine+%26+Ten&sName=&naFlag=
7267
// load the html file
73-
let html = std::fs::read_to_string("./src/parse/html_examples/daily_menu/food_item.html").unwrap(); // file system should be reliable
68+
let html =
69+
std::fs::read_to_string("./src/parse/html_examples/daily_menu/food_item.html").unwrap(); // file system should be reliable
7470
let doc = scraper::Html::parse_document(&html);
7571
let food_item = FoodItem::from_html_element(doc.root_element())
7672
.expect("The example html should be valid");

src/parse/daily_menu/get_or_init_selector.rs

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/parse/daily_menu/meal.rs

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
use std::{iter::Peekable, vec};
22

3-
use scraper::{element_ref::Select, selectable::Selectable, Selector};
3+
use scraper::{element_ref::Select, selectable::Selectable};
44

5-
use crate::{get_or_init_selector, parse::text_from_selection::text_from_selection};
5+
use crate::{parse::text_from_selection::text_from_selection, static_selector};
66

77
use super::food_item::FoodItem;
88
use crate::parse::Error;
99

10-
use std::sync::OnceLock;
11-
1210
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
1311
pub enum MealType {
1412
Breakfast,
@@ -28,22 +26,17 @@ pub struct Meal<'a> {
2826
impl<'a> Meal<'a> {
2927
pub fn from_html_element(element: scraper::ElementRef<'a>) -> Result<Self, Error> {
3028
// example html div element at ./html_examples/meal.html
31-
static ROW_SELECTOR: OnceLock<Selector> = OnceLock::new();
32-
let row_selector = get_or_init_selector!(
33-
ROW_SELECTOR,
34-
r##"table[bordercolor="#FFFF00"] > tbody > tr"##
35-
);
36-
let mut top_level_row_iter = element.select(row_selector);
29+
static_selector!(ROW_SELECTOR <- r##"table[bordercolor="#FFFF00"] > tbody > tr"##);
30+
let mut top_level_row_iter = element.select(&ROW_SELECTOR);
3731
let meal_name_row = top_level_row_iter.next().ok_or_else(|| {
3832
Error::html_parse_error("The meal should have a row for the meal type.")
3933
})?;
4034
let meal_item_row = top_level_row_iter.next().ok_or_else(|| {
4135
Error::html_parse_error("The meal should have a row for the meal items.")
4236
})?;
43-
static MEAL_TYPE_SELECTOR: OnceLock<Selector> = OnceLock::new();
44-
let meal_type_selector = get_or_init_selector!(MEAL_TYPE_SELECTOR, ".shortmenumeals");
37+
static_selector!(MEAL_TYPE_SELECTOR <- ".shortmenumeals");
4538
let meal_type =
46-
text_from_selection(&meal_type_selector, meal_name_row, "meal", "meal type")?;
39+
text_from_selection(&MEAL_TYPE_SELECTOR, meal_name_row, "meal", "meal type")?;
4740
// print out meal type
4841
let meal_type = match meal_type {
4942
"Breakfast" => MealType::Breakfast,
@@ -54,9 +47,8 @@ impl<'a> Meal<'a> {
5447
_ => MealType::AllDay,
5548
};
5649

57-
static SECTION_SELECTOR: OnceLock<Selector> = OnceLock::new();
58-
let section_selector = get_or_init_selector!(SECTION_SELECTOR, "table > tbody > tr");
59-
let section_elements = meal_item_row.select(&section_selector);
50+
static_selector!(SECTION_NAME_SELECTOR <- "table > tbody > tr");
51+
let section_elements = meal_item_row.select(&SECTION_NAME_SELECTOR);
6052
let sections = SectionIterator::new(section_elements.peekable(), meal_type);
6153
let mut sections_vec: Vec<MealSection> = vec![];
6254
for section in sections {
@@ -115,23 +107,21 @@ pub struct MealSection<'a> {
115107
impl<'a> MealSection<'a> {
116108
// takes in an iterator of tr elements of a specific meal and consumes the elements to create a MealSection
117109
pub fn from_html_elements(elements: &mut Peekable<Select<'a, 'a>>) -> Result<Self, Error> {
118-
static MEAL_NAME_SELECTOR: OnceLock<Selector> = OnceLock::new();
119-
let section_name_selector =
120-
get_or_init_selector!(MEAL_NAME_SELECTOR, ".shortmenucats > span");
110+
static_selector!(SECTION_NAME_SELECTOR <- ".shortmenucats > span");
121111

122112
// if the first element does not match the section name selector, then return an error
123113
let first_element = elements.next().ok_or_else(|| {
124114
Error::html_parse_error("Every section should have a name as the first element.")
125115
})?;
126-
let name = text_from_selection(section_name_selector, first_element, "section", "name")?;
116+
let name = text_from_selection(&SECTION_NAME_SELECTOR, first_element, "section", "name")?;
127117

128118
// trim off first and last three characters since the name looks like -- name --
129119
let name = &name[3..name.len() - 3];
130120

131121
// iterate through by peeking and calling handle_element
132122
let mut food_items = vec![];
133123
while let Some(element) = elements.peek() {
134-
if element.select(&section_name_selector).next().is_some() {
124+
if element.select(&SECTION_NAME_SELECTOR).next().is_some() {
135125
break;
136126
}
137127
if let Some(food_item) = Self::handle_element(*element)? {
@@ -143,11 +133,9 @@ impl<'a> MealSection<'a> {
143133
}
144134

145135
fn handle_element(element: scraper::ElementRef<'a>) -> Result<Option<FoodItem<'a>>, Error> {
146-
static SECTION_NAME_SELECTOR: OnceLock<Selector> = OnceLock::new();
147-
let section_name_selector =
148-
get_or_init_selector!(SECTION_NAME_SELECTOR, ".shortmenucats > span");
149-
if element.select(&section_name_selector).next().is_some() {
150-
return Ok(None);
136+
static_selector!(SECTION_NAME_SELECTOR <- ".shortmenucats > span");
137+
if element.select(&SECTION_NAME_SELECTOR).next().is_some() {
138+
Ok(None)
151139
} else {
152140
let out = FoodItem::from_html_element(element)?;
153141
Ok(Some(out))

src/parse/daily_menu/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
mod allergens;
22
mod daily_menu;
33
mod food_item;
4-
mod get_or_init_selector;
54
mod meal;
65
pub use daily_menu::DailyMenu;

0 commit comments

Comments
 (0)