|
| 1 | +/* |
| 2 | +
|
| 3 | +* Uses wikipedia crate to fetch pages |
| 4 | +
|
| 5 | +* Processes page content |
| 6 | +
|
| 7 | +* Collects timing metrics |
| 8 | +
|
| 9 | +* Concurrent page processing |
| 10 | +
|
| 11 | +* Shows crate usage and concurrency in Rust |
| 12 | +*/ |
| 13 | + |
| 14 | +use rayon::prelude::*; |
| 15 | +use wikipedia::http::default::Client; |
| 16 | +use wikipedia::Page; |
| 17 | +use wikipedia::Wikipedia; |
| 18 | + |
| 19 | +struct ProcessedPage { |
| 20 | + title: String, |
| 21 | + data: String, |
| 22 | +} |
| 23 | + |
| 24 | +const PAGES: [&str; 9] = [ |
| 25 | + "Giannis Antetokounmpo", |
| 26 | + "James Harden", |
| 27 | + "Russell Westbrook", |
| 28 | + "Stephen Curry", |
| 29 | + "Kevin Durant", |
| 30 | + "LeBron James", |
| 31 | + "Kobe Bryant", |
| 32 | + "Michael Jordan", |
| 33 | + "Shaquille O'Neal", |
| 34 | +]; |
| 35 | + |
| 36 | +fn process_page(page: &Page<Client>) -> ProcessedPage { |
| 37 | + let title = page.get_title().unwrap(); |
| 38 | + let content = page.get_content().unwrap(); |
| 39 | + ProcessedPage { |
| 40 | + title, |
| 41 | + data: content, |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +//times how long it takes to process the pages and total time |
| 46 | +fn main() { |
| 47 | + //start timer |
| 48 | + let start = std::time::Instant::now(); |
| 49 | + let wikipedia = Wikipedia::<Client>::default(); |
| 50 | + let pages: Vec<_> = PAGES |
| 51 | + .par_iter() //parallel iterator |
| 52 | + .map(|&p| wikipedia.page_from_title(p.to_string())) |
| 53 | + .collect(); |
| 54 | + |
| 55 | + let processed_pages: Vec<ProcessedPage> = pages.par_iter().map(process_page).collect(); |
| 56 | + for page in processed_pages { |
| 57 | + //time how long it takes to process each page |
| 58 | + let start_page = std::time::Instant::now(); |
| 59 | + |
| 60 | + println!("Title: {}", page.title.as_str()); |
| 61 | + //grab first sentence of the page |
| 62 | + let first_sentence = page.data.split('.').next().unwrap(); |
| 63 | + println!("First sentence: {}", first_sentence); |
| 64 | + //count the number of words in the page |
| 65 | + let word_count = page.data.split_whitespace().count(); |
| 66 | + println!("Word count: {}", word_count); |
| 67 | + //prints time it took to process each page |
| 68 | + println!("Page time: {:?}", start_page.elapsed()); |
| 69 | + } |
| 70 | + //descriptive statistics of: total time, average time per page, and total number of pages, as well as the number of threads used |
| 71 | + println!("Total time: {:?}", start.elapsed()); |
| 72 | + println!( |
| 73 | + "Average time per page: {:?}", |
| 74 | + start.elapsed() / PAGES.len() as u32 |
| 75 | + ); |
| 76 | + println!("Total number of pages: {}", PAGES.len()); |
| 77 | + println!("Number of threads: {}", rayon::current_num_threads()); |
| 78 | +} |
0 commit comments