Skip to content

Commit 4afb478

Browse files
committed
Try class name
1 parent d6776db commit 4afb478

File tree

1 file changed

+13
-18
lines changed

1 file changed

+13
-18
lines changed

data/scrape.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -201,14 +201,14 @@ def scrape():
201201
"""Scrape from all the sources and save to ./data/raw"""
202202

203203
try:
204-
scrape_espn()
205-
scrape_cbs()
204+
# scrape_espn()
205+
# scrape_cbs()
206206
scrape_nfl()
207-
scrape_fantasy_pros()
207+
# scrape_fantasy_pros()
208208
DRIVER.quit()
209209
except:
210210
DRIVER.quit()
211-
logging.exception("failed to scrape")
211+
logging.exception("Failed to scrape")
212212
raise
213213

214214

@@ -222,7 +222,7 @@ def scrape_espn():
222222
url = "http://fantasy.espn.com/football/players/projections"
223223
out = RAW_PROJECTIONS
224224

225-
logging.info("scraping ESPN")
225+
logging.info("Scraping ESPN")
226226
DRIVER.get(url)
227227
time.sleep(5) # wait for JS app to render
228228

@@ -308,7 +308,7 @@ def scrape_espn():
308308
# click the next page's button
309309
try:
310310
if page % 5 == 0:
311-
logging.info("scraping ESPN: page=%d, players=%d", page, len(players))
311+
logging.info("Scraping ESPN: page=%d, players=%d", page, len(players))
312312
page += 1
313313
next_button.send_keys(Keys.ENTER)
314314
except Exception as err:
@@ -346,7 +346,7 @@ def scrape_cbs():
346346

347347
url = "https://www.cbssports.com/fantasy/football/stats"
348348
out = RAW_PROJECTIONS
349-
logging.info("scraping CBS")
349+
logging.info("Scraping CBS")
350350

351351
players = []
352352
for pos in ["QB", "RB", "WR", "TE", "DST", "K"]:
@@ -519,7 +519,6 @@ def scrape_nfl():
519519
table = soup.find("tbody")
520520

521521
# parse each player in the table
522-
logged = False
523522
for row in table.find_all("tr"):
524523
if isinstance(row, NavigableString):
525524
continue
@@ -548,10 +547,6 @@ def scrape_nfl():
548547
team = "JAX"
549548
data = [name, pos_team[0], team]
550549

551-
if not logged:
552-
logging.info("Fetched first page of results: name=%s, position=%s", name, pos_team)
553-
logged = True
554-
555550
data += [
556551
td.get_text().strip() if "-" not in td.get_text() else np.nan
557552
for td in row.find_all("td")[3:]
@@ -564,22 +559,22 @@ def scrape_nfl():
564559

565560
# find and click the next button
566561
try:
567-
next_button = DRIVER.find_element(By.XPATH, '//a[text()=">"]')
562+
next_button = DRIVER.find_element(By.CLASS_NAME, 'next')
568563
actions = ActionChains(DRIVER)
569-
actions.move_to_element(next_button).click().perform()
564+
actions.click(next_button).perform()
570565
page += 1
571566

572567
if page % 5 == 0:
573-
logging.info("scraping NFL: page=%d, players=%d", page, len(players))
568+
logging.info("Scraping NFL: page=%d, players=%d, first_on_page=%s", page, len(players), name)
574569

575570
scroll()
576571
time.sleep(0.5)
577572
except:
578573
if page == 0:
579-
logging.exception("failed to click next button")
574+
logging.exception("Failed to click next button")
580575
break
581576

582-
logging.info("skipped %d free-agents", free_agents)
577+
logging.info("Skipped %d free-agents", free_agents)
583578

584579
df = pd.DataFrame(players)
585580
df["two_pts"] = df["2pt"]
@@ -604,7 +599,7 @@ def scrape_fantasy_pros():
604599
"""
605600

606601
out = RAW_ADP
607-
logging.info("scraping Fantasy Pros")
602+
logging.info("Scraping Fantasy Pros")
608603

609604
urls = {
610605
"std": "https://www.fantasypros.com/nfl/adp/overall.php",

0 commit comments

Comments
 (0)