-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
45 lines (38 loc) · 1.56 KB
/
config.py
File metadata and controls
45 lines (38 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
PCS_BASE = "https://www.procyclingstats.com"
# Scraping behaviour
REQUEST_DELAY = 1.5 # seconds between requests
REQUEST_TIMEOUT = 30 # seconds
MAX_RETRIES = 3
CACHE_DIR = "cache/html" # raw HTML cache to avoid re-fetching
DB_PATH = "data/cycling.db"
# Data range
SCRAPE_YEARS = [2023, 2024] # years of results to collect
# Race tiers to include (PCS classification codes)
# 2.UWT = UCI WorldTour, 2.HC = Hors Categorie, 2.1 = first division
RACE_CLASSES = ["2.UWT", "2.HC", "2.1", "1.UWT", "1.HC", "1.Pro", "2.Pro"]
# PCS circuit numbers + allowed classes per circuit for men's races.
# circuit 1 = UCI WorldTour (1.UWT, 2.UWT)
# circuit 13 = lower-tier European calendar (1.1, 2.1) — Étoile de Bessèges, GP Monseré, Samyn, etc.
# circuit 16 = miscellaneous lower-tier (2.1, some 2.Pro) — also contains women's races; class filter handles them
# circuit 26 = UCI ProSeries (1.Pro, 2.Pro) — Kuurne, Clasica de Almeria, Ruta del Sol, etc.
MEN_CIRCUITS = {"1": RACE_CLASSES, "13": ["1.1", "2.1"], "16": ["2.1", "2.Pro"], "26": ["1.Pro", "2.Pro"]}
# Women's WorldTour class codes (circuit=24 on PCS)
WOMEN_RACE_CLASSES = ["2.WWT", "1.WWT"]
WOMEN_CIRCUIT = "24"
# Race slugs whose stages are predominantly cobbled
COBBLED_RACE_SLUGS = [
"paris-roubaix",
"ronde-van-vlaanderen",
"e3-saxo",
"e3-harelbeke",
"dwars-door-vlaanderen",
"omloop-het-nieuwsblad",
"classic-brugge-de-panne",
"bredene-koksijde",
"nokere-koerse",
"gp-de-denain",
]
# Race slugs that are predominantly gravel
GRAVEL_RACE_SLUGS = [
"strade-bianche",
]