Skip to content

Commit 3c7f783

Browse files
committed
[arenabuddy] improve scraping of basic land arts
1 parent 966c324 commit 3c7f783

File tree

1 file changed

+52
-39
lines changed

1 file changed

+52
-39
lines changed

arenabuddy/cli/src/commands/scrape_mtga.rs

Lines changed: 52 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ async fn enrich_with_scryfall(mtga_cards: Vec<MtgaCard>, scryfall_host: &str) ->
186186
let mut failed_cards = Vec::new();
187187
let mut processed_sets = 0;
188188

189-
// Cache basic land data to avoid repeated fetches
190-
let mut basic_land_cache: HashMap<i64, serde_json::Value> = HashMap::new();
189+
// Cache arena_id lookups to avoid repeated Scryfall fetches
190+
let mut arena_id_cache: HashMap<i64, serde_json::Value> = HashMap::new();
191191

192192
// Process each set
193193
for (set_code, mtga_set_cards) in cards_by_set {
@@ -244,56 +244,47 @@ async fn enrich_with_scryfall(mtga_cards: Vec<MtgaCard>, scryfall_host: &str) ->
244244

245245
cards.push(card);
246246
cards_by_id.insert(mtga_card.grp_id);
247-
} else if let Some(fallback_id) = get_basic_land_fallback_id(&mtga_card.name) {
248-
// For basic lands, fetch the canonical card from Scryfall for metadata/images
249-
// Use cache to avoid repeated fetches
250-
let basic_land_json = if let Some(cached) = basic_land_cache.get(&fallback_id) {
251-
debug!(
252-
"Using cached basic land data for '{}' (fallback ID: {})",
253-
mtga_card.name, fallback_id
254-
);
255-
Some(cached.clone())
256-
} else {
257-
debug!(
258-
"Fetching canonical basic land data for '{}' (fallback ID: {}, actual ID: {})",
259-
mtga_card.name, fallback_id, mtga_card.grp_id
260-
);
261-
262-
// Rate limit before the request
263-
tokio::time::sleep(Duration::from_millis(SCRYFALL_RATE_LIMIT_MS)).await;
264-
265-
// Fetch and cache
266-
if let Some(json) = fetch_scryfall_card_by_arena_id(&client, scryfall_host, fallback_id).await? {
267-
basic_land_cache.insert(fallback_id, json.clone());
268-
Some(json)
269-
} else {
270-
None
247+
} else {
248+
// Collector number miss — try fetching by the card's actual arena ID
249+
let card_json =
250+
fetch_or_cache_by_arena_id(&client, scryfall_host, &mut arena_id_cache, mtga_card.grp_id).await?;
251+
252+
// If that failed and it's a basic land, try the canonical fallback ID
253+
let card_json = match (card_json, get_basic_land_fallback_id(&mtga_card.name)) {
254+
(Some(json), _) => Some(json),
255+
(None, Some(fallback_id)) => {
256+
debug!(
257+
"Actual arena ID {} not found for '{}', trying fallback ID {}",
258+
mtga_card.grp_id, mtga_card.name, fallback_id
259+
);
260+
fetch_or_cache_by_arena_id(&client, scryfall_host, &mut arena_id_cache, fallback_id).await?
271261
}
262+
(None, None) => None,
272263
};
273264

274-
if let Some(json) = basic_land_json {
265+
if let Some(json) = card_json {
275266
let mut card = Card::from_json(&json);
276-
// Override with MTGA's actual GrpId so each variant has unique ID
277267
card.id = mtga_card.grp_id;
278-
// Update the set to match MTGA's set
279268
card.set = mtga_card.expansion_code.clone();
280-
281269
cards.push(card);
282270
cards_by_id.insert(mtga_card.grp_id);
283-
} else {
284-
// If even the fallback fetch fails, create minimal entry
285-
debug!("Fallback fetch failed for {}, using minimal card", mtga_card.name);
271+
} else if get_basic_land_fallback_id(&mtga_card.name).is_some() {
272+
// Last resort for basic lands: create minimal entry
273+
debug!(
274+
"All fetches failed for basic land '{}', using minimal card",
275+
mtga_card.name
276+
);
286277
let mut card = Card::new(mtga_card.grp_id, &mtga_card.expansion_code, &mtga_card.name);
287278
card.type_line = format!("Basic Land — {}", mtga_card.name.replace("Snow-Covered ", ""));
288279
cards.push(card);
289280
cards_by_id.insert(mtga_card.grp_id);
281+
} else {
282+
warn!(
283+
"Card not found in Scryfall set '{}': '{}' (number={})",
284+
set_code, mtga_card.name, mtga_card.collector_number
285+
);
286+
failed_cards.push(mtga_card);
290287
}
291-
} else {
292-
warn!(
293-
"Card not found in Scryfall set '{}': '{}' (number={})",
294-
set_code, mtga_card.name, mtga_card.collector_number
295-
);
296-
failed_cards.push(mtga_card);
297288
}
298289
}
299290

@@ -317,6 +308,28 @@ async fn enrich_with_scryfall(mtga_cards: Vec<MtgaCard>, scryfall_host: &str) ->
317308
Ok(cards)
318309
}
319310

311+
/// Fetch a card by arena ID, using a cache to avoid redundant Scryfall requests
312+
async fn fetch_or_cache_by_arena_id(
313+
client: &reqwest::Client,
314+
scryfall_host: &str,
315+
cache: &mut HashMap<i64, serde_json::Value>,
316+
arena_id: i64,
317+
) -> Result<Option<serde_json::Value>> {
318+
if let Some(cached) = cache.get(&arena_id) {
319+
debug!("Using cached Scryfall data for arena ID {}", arena_id);
320+
return Ok(Some(cached.clone()));
321+
}
322+
323+
tokio::time::sleep(Duration::from_millis(SCRYFALL_RATE_LIMIT_MS)).await;
324+
325+
if let Some(json) = fetch_scryfall_card_by_arena_id(client, scryfall_host, arena_id).await? {
326+
cache.insert(arena_id, json.clone());
327+
Ok(Some(json))
328+
} else {
329+
Ok(None)
330+
}
331+
}
332+
320333
/// Fetch all cards from a set via Scryfall, indexed by collector number
321334
async fn fetch_scryfall_set(
322335
client: &reqwest::Client,

0 commit comments

Comments
 (0)