Skip to content

Commit c8c8bc9

Browse files
committed
feat: add vega.meta.toml file + remove locale subdir from downloaded data
1 parent 21b6822 commit c8c8bc9

File tree

9 files changed

+155
-142
lines changed

9 files changed

+155
-142
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "vegapull"
3-
version = "1.0.0"
3+
version = "1.1.0"
44
description = "Command line tool for scraping data for the One Piece Trading Card Game"
55
repository = "https://github.com/Coko7/vegapull"
66
authors = ["Coko <91132775+Coko7@users.noreply.github.com>"]

src/cli.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use anyhow::Result;
22
use clap::{command, Parser, Subcommand, ValueEnum};
33
use inquire_derive::Selectable;
4+
use serde::{Deserialize, Serialize};
45
use std::{
56
ffi::OsString,
67
fmt::{self},
@@ -79,7 +80,7 @@ pub enum Commands {
7980
Config,
8081
}
8182

82-
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq, Selectable)]
83+
#[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq, Selectable, Serialize, Deserialize)]
8384
pub enum LanguageCode {
8485
#[value(name = "english", alias = "en")]
8586
English,

src/commands/pull_all.rs

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,21 @@ use anyhow::{bail, Result};
22
use inquire::{Confirm, Text};
33
use log::{debug, info};
44
use rayon::prelude::*;
5-
use std::{collections::HashMap, fs, path::PathBuf, time::Instant};
5+
use std::{
6+
collections::{HashMap, HashSet},
7+
fs,
8+
path::PathBuf,
9+
time::SystemTime,
10+
};
611
use yansi::Paint;
712

813
use crate::{
9-
card::Card, cli::LanguageCode, localizer::Localizer, scraper::OpTcgScraper, storage::DataStore,
14+
card::Card,
15+
cli::LanguageCode,
16+
localizer::Localizer,
17+
pack::PackId,
18+
scraper::OpTcgScraper,
19+
storage::{DataStore, PullMode, VegaMetaStats},
1020
utils,
1121
};
1222

@@ -110,14 +120,14 @@ fn pull_all_interactive(config_path: Option<PathBuf>, user_agent: Option<String>
110120

111121
eprintln!("Fetching list of packs...");
112122

113-
let start = Instant::now();
123+
let start = SystemTime::now();
114124

115125
let packs = scraper.fetch_packs()?;
116126
store.write_packs(&packs)?;
117127

118128
eprintln!("Found {} packs!\n", packs.len());
119129

120-
let pack_ids = packs.iter().map(|p| p.id.as_str()).collect::<Vec<_>>();
130+
let pack_ids: HashSet<PackId> = packs.keys().cloned().collect();
121131

122132
eprintln!("Now fetching all the cards for each pack...");
123133
let all_cards = scraper.fetch_all_cards(&pack_ids, true)?;
@@ -153,30 +163,22 @@ fn pull_all_interactive(config_path: Option<PathBuf>, user_agent: Option<String>
153163
});
154164
}
155165

156-
let duration = start.elapsed();
166+
let duration = start.elapsed()?;
157167

158168
eprintln!(
159169
"\nFinal data is available in: {}",
160170
inputs.data_dir.display()
161171
);
162-
eprintln!("Full download completed after: {:?}", duration);
172+
eprintln!("Full download took: {:?}", duration);
173+
174+
store.write_vega_stats(VegaMetaStats::new(
175+
inputs.language,
176+
start.into(),
177+
duration.as_millis().try_into()?,
178+
inputs.download_images,
179+
PullMode::All,
180+
pack_ids,
181+
))?;
182+
163183
Ok(())
164184
}
165-
166-
// fn download_images(scraper: &OpTcgScraper, cards: &HashMap<String, Card>) -> Result<()> {
167-
// let card_values = cards.values().collect::<Vec<_>>();
168-
// let images = scraper.fetch_all_card_images(&cards)?;
169-
//
170-
// images.par_iter().for_each(|(card_id, image_data)| {
171-
// let card = cards_by_id
172-
// .get(card_id)
173-
// .unwrap_or_else(|| panic!("card should exist: {card_id}"));
174-
//
175-
// store
176-
// .write_image(card, image_data.to_vec())
177-
// .unwrap_or_else(|_| panic!("write_image failed for: {card_id}"));
178-
// debug!("wrote image_data for: {}", card_id);
179-
// });
180-
//
181-
// Ok(())
182-
// }

src/commands/pull_cards.rs

Lines changed: 28 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
use anyhow::{bail, Result};
22
use log::{debug, error, info};
33
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
4-
use std::{collections::HashMap, path::Path, time::Instant};
4+
use std::{
5+
collections::{HashMap, HashSet},
6+
path::Path,
7+
time::SystemTime,
8+
};
59

610
use crate::{
7-
card::Card, cli::LanguageCode, localizer::Localizer, scraper::OpTcgScraper, storage::DataStore,
11+
card::Card,
12+
cli::LanguageCode,
13+
localizer::Localizer,
14+
scraper::OpTcgScraper,
15+
storage::{DataStore, PullMode, VegaMetaStats},
816
utils,
917
};
1018

@@ -23,7 +31,7 @@ pub fn pull_cards(
2331
let store = DataStore::new(output_dir, language);
2432

2533
eprintln!("fetching all cards for pack {pack_id}...");
26-
let start = Instant::now();
34+
let start = SystemTime::now();
2735

2836
let cards = scraper.fetch_cards(pack_id)?;
2937
if cards.is_empty() {
@@ -58,79 +66,24 @@ pub fn pull_cards(
5866
});
5967
}
6068

61-
let duration = start.elapsed();
69+
println!(
70+
"downloaded cards for pack {} to: {}",
71+
pack_id,
72+
output_dir.display()
73+
);
74+
75+
let duration = start.elapsed()?;
6276

6377
info!("list_cards took: {:?}", duration);
78+
79+
store.write_vega_stats(VegaMetaStats::new(
80+
language,
81+
start.into(),
82+
duration.as_millis().try_into()?,
83+
with_images,
84+
PullMode::SinglePack,
85+
HashSet::from([pack_id.to_owned()]),
86+
))?;
87+
6488
Ok(())
6589
}
66-
67-
// pub fn download_images_fast(
68-
// language: LanguageCode,
69-
// cards: Vec<Card>,
70-
// output_dir: &Path,
71-
// user_agent: Option<String>,
72-
// ) -> Result<()> {
73-
// let localizer = Localizer::load(language)?;
74-
// let scraper = OpTcgScraper::new(localizer, user_agent);
75-
//
76-
// if output_dir.exists() {
77-
// error!("output directory already `{}` exists", output_dir.display());
78-
// bail!(
79-
// "cannot create directory `{}` to store images because it already exists",
80-
// output_dir.display()
81-
// );
82-
// }
83-
//
84-
// match fs::create_dir_all(output_dir) {
85-
// Ok(_) => info!("successfully created `{}`", output_dir.display()),
86-
// Err(e) => bail!("failed to create `{}`: {}", output_dir.display(), e),
87-
// }
88-
//
89-
// info!("downloading images...");
90-
// let start = Instant::now();
91-
//
92-
// let mut handles = vec![];
93-
//
94-
// let completed_count = Arc::new(AtomicUsize::new(0));
95-
// let all_cards = cards.len();
96-
//
97-
// let scraper = Arc::new(scraper);
98-
// let output_dir = output_dir.to_path_buf();
99-
//
100-
// for card in cards.into_iter() {
101-
// let scraper = Arc::clone(&scraper);
102-
// let output_dir = output_dir.clone();
103-
// let completed_count = Arc::clone(&completed_count);
104-
//
105-
// let handle = thread::spawn(move || {
106-
// let img_url = card.img_url.clone();
107-
// let img_path = download_card_image(&output_dir, &scraper, card).unwrap();
108-
// let current = completed_count.fetch_add(1, Ordering::SeqCst) + 1;
109-
//
110-
// eprintln!(
111-
// "[{}/{}] succesfully saved image `{}` to `{}`",
112-
// current,
113-
// all_cards,
114-
// img_url,
115-
// img_path.display()
116-
// );
117-
//
118-
// debug!(
119-
// "[{}/{}] saved image `{}` to `{}`",
120-
// current,
121-
// all_cards,
122-
// img_url,
123-
// img_path.display()
124-
// );
125-
// });
126-
// handles.push(handle);
127-
// }
128-
//
129-
// for handle in handles {
130-
// handle.join().unwrap();
131-
// }
132-
//
133-
// let duration = start.elapsed();
134-
// info!("downloading images took: {:?}", duration);
135-
// Ok(())
136-
// }

src/commands/pull_packs.rs

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
use anyhow::Result;
22
use log::debug;
3-
use std::{path::Path, time::Instant};
3+
use std::{collections::HashSet, path::Path, time::SystemTime};
44

55
use crate::{
6-
cli::LanguageCode, localizer::Localizer, scraper::OpTcgScraper, storage::DataStore, utils,
6+
cli::LanguageCode,
7+
localizer::Localizer,
8+
pack::PackId,
9+
scraper::OpTcgScraper,
10+
storage::{DataStore, PullMode, VegaMetaStats},
11+
utils,
712
};
813

914
pub fn pull_packs(
@@ -19,9 +24,10 @@ pub fn pull_packs(
1924
let store = DataStore::new(output_dir, language);
2025

2126
eprintln!("fetching list of packs...");
22-
let start = Instant::now();
27+
let start = SystemTime::now();
2328

2429
let packs = scraper.fetch_packs()?;
30+
let pack_ids: HashSet<PackId> = packs.keys().cloned().collect();
2531
store.write_packs(&packs)?;
2632

2733
println!(
@@ -30,8 +36,18 @@ pub fn pull_packs(
3036
output_dir.display()
3137
);
3238

33-
let duration = start.elapsed();
39+
let duration = start.elapsed()?;
3440

3541
debug!("pull_packs took: {:?}", duration);
42+
43+
store.write_vega_stats(VegaMetaStats::new(
44+
language,
45+
start.into(),
46+
duration.as_millis().try_into()?,
47+
false,
48+
PullMode::PackListOnly,
49+
pack_ids,
50+
))?;
51+
3652
Ok(())
3753
}

src/pack.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use std::{
77
hash::{Hash, Hasher},
88
};
99

10+
pub type PackId = String;
11+
1012
#[derive(Debug, Deserialize, Serialize)]
1113
pub struct Pack {
1214
pub id: String,

src/scraper.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use log::{debug, info};
33
use rayon::prelude::*;
44
use scraper::Html;
55
use std::{
6-
collections::HashMap,
6+
collections::{HashMap, HashSet},
77
thread,
88
time::{Duration, Instant},
99
};
1010

1111
use crate::{
1212
card::{Card, CardScraper},
1313
localizer::Localizer,
14-
pack::Pack,
14+
pack::{Pack, PackId},
1515
};
1616

1717
pub struct OpTcgScraper {
@@ -53,7 +53,7 @@ impl OpTcgScraper {
5353
full_url
5454
}
5555

56-
pub fn fetch_packs(&self) -> Result<Vec<Pack>> {
56+
pub fn fetch_packs(&self) -> Result<HashMap<PackId, Pack>> {
5757
let url = self.cardlist_endpoint();
5858
debug!("GET `{}`", url);
5959

@@ -74,12 +74,12 @@ impl OpTcgScraper {
7474

7575
let series_selector = scraper::Selector::parse(sel).unwrap();
7676

77-
let mut packs = Vec::new();
77+
let mut packs = HashMap::new();
7878
for element in document.select(&series_selector) {
7979
match Pack::new(element) {
8080
Ok(pack) => {
8181
if !pack.id.is_empty() {
82-
packs.push(pack);
82+
packs.insert(pack.id.clone(), pack);
8383
}
8484
}
8585
Err(e) => bail!("failed to scrape data about packs: {}", e),
@@ -94,12 +94,12 @@ impl OpTcgScraper {
9494

9595
pub fn fetch_all_cards(
9696
&self,
97-
pack_ids: &[&str],
97+
pack_ids: &HashSet<PackId>,
9898
report_progress: bool,
9999
) -> Result<HashMap<String, Vec<Card>>> {
100100
pack_ids
101101
.par_iter()
102-
.map(|&pid| {
102+
.map(|pid| {
103103
info!("fetching all cards for pack {} via rayon", pid);
104104
let pack_id = pid.to_string();
105105
self.fetch_cards(&pack_id).map(|cards| {

0 commit comments

Comments
 (0)