Skip to content

Commit 27b745c

Browse files
committed
Fix scraping maybe
1 parent 0af5e99 commit 27b745c

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

data/scrape.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def scrape_espn():
308308
# click the next page's button
309309
try:
310310
if page % 5 == 0:
311-
logging.info("scraping ESPN page %d...", page)
311+
logging.info("scraping ESPN: page=%d, players=%d", page, len(players))
312312
page += 1
313313
next_button.send_keys(Keys.ENTER)
314314
except Exception as err:
@@ -392,7 +392,7 @@ def scrape_cbs():
392392
)
393393
else:
394394
# very rare, seen for Alfred Morris in 2019
395-
logging.warn("skipping player, no position: %s", name_cell)
395+
logging.warning("skipping player, no position: %s", name_cell)
396396
continue
397397

398398
pos = pos.replace("FB", "RB")
@@ -450,7 +450,7 @@ def scrape_nfl():
450450
Last page:
451451
https://fantasy.nfl.com/research/projections?offset=1&position=O&sort=projectedPts&statCategory=projectedStats&statSeason=2019&statType=seasonProjectedStats&statWeek=1#researchProjections=researchProjections%2C%2Fresearch%2Fprojections%253Foffset%253D926%2526position%253DO%2526sort%253DprojectedPts%2526statCategory%253DprojectedStats%2526statSeason%253D2019%2526statType%253DseasonProjectedStats%2526statWeek%253D1%2Creplace
452452
453-
Just going to simluate clicking the next button until there's no next button
453+
Just going to simulate clicking the next button until there's no next button
454454
"""
455455

456456
out = RAW_PROJECTIONS
@@ -509,9 +509,13 @@ def scrape_nfl():
509509
headers = ["name", "pos", "team"] + headers
510510

511511
while True:
512-
soup = BeautifulSoup(
513-
DRIVER.execute_script("return document.body.innerHTML"), "html.parser"
514-
)
512+
try:
513+
soup = BeautifulSoup(
514+
DRIVER.execute_script("return document.body.innerHTML"), "html.parser"
515+
)
516+
except Exception as e:
517+
logging.warning("bailing on nfl pagination on error", exc_info=e)
518+
break
515519
table = soup.find("tbody")
516520

517521
# parse each player in the table
@@ -561,9 +565,8 @@ def scrape_nfl():
561565
page += 1
562566

563567
if page % 5 == 0:
564-
logging.info("scraping NFL page %d...", page)
568+
logging.info("scraping NFL: page=%d, players=%d", page, len(players))
565569

566-
time.sleep(1)
567570
scroll()
568571
time.sleep(1)
569572
except:
@@ -607,6 +610,7 @@ def scrape_fantasy_pros():
607610
df_set = False
608611

609612
for ppr_type, url in urls.items():
613+
logging.info("Scraping Fantasy Pros: ppr_type=%s, url=%s", ppr_type, url)
610614
DRIVER.get(url)
611615
time.sleep(1.5)
612616
scroll()
@@ -641,7 +645,7 @@ def scrape_fantasy_pros():
641645
name = name.split(" ")[-2]
642646
team = TEAM_TO_ABRV_MAP[name]
643647

644-
adp = tds[-1].get_text()
648+
adp = tds[-2].get_text()
645649

646650
player_data = {
647651
"name": name,
@@ -664,6 +668,7 @@ def scrape_fantasy_pros():
664668
df = df[["key", "name", "pos", "team", "bye"] + list(urls.keys())]
665669
df.to_csv(os.path.join(out, f"FantasyPros-{YEAR}.csv"), index=False)
666670

671+
logging.info("Validating Fantasy Pros players")
667672
validate(df, strict=False, skip_fantasy_pros_check=True)
668673

669674

0 commit comments

Comments
 (0)