@@ -201,14 +201,14 @@ def scrape():
201201 """Scrape from all the sources and save to ./data/raw"""
202202
203203 try :
204- scrape_espn ()
205- scrape_cbs ()
204+ # scrape_espn()
205+ # scrape_cbs()
206206 scrape_nfl ()
207- scrape_fantasy_pros ()
207+ # scrape_fantasy_pros()
208208 DRIVER .quit ()
209209 except :
210210 DRIVER .quit ()
211- logging .exception ("failed to scrape" )
211+ logging .exception ("Failed to scrape" )
212212 raise
213213
214214
@@ -222,7 +222,7 @@ def scrape_espn():
222222 url = "http://fantasy.espn.com/football/players/projections"
223223 out = RAW_PROJECTIONS
224224
225- logging .info ("scraping ESPN" )
225+ logging .info ("Scraping ESPN" )
226226 DRIVER .get (url )
227227 time .sleep (5 ) # wait for JS app to render
228228
@@ -308,7 +308,7 @@ def scrape_espn():
308308 # click the next page's button
309309 try :
310310 if page % 5 == 0 :
311- logging .info ("scraping ESPN: page=%d, players=%d" , page , len (players ))
311+ logging .info ("Scraping ESPN: page=%d, players=%d" , page , len (players ))
312312 page += 1
313313 next_button .send_keys (Keys .ENTER )
314314 except Exception as err :
@@ -346,7 +346,7 @@ def scrape_cbs():
346346
347347 url = "https://www.cbssports.com/fantasy/football/stats"
348348 out = RAW_PROJECTIONS
349- logging .info ("scraping CBS" )
349+ logging .info ("Scraping CBS" )
350350
351351 players = []
352352 for pos in ["QB" , "RB" , "WR" , "TE" , "DST" , "K" ]:
@@ -519,7 +519,6 @@ def scrape_nfl():
519519 table = soup .find ("tbody" )
520520
521521 # parse each player in the table
522- logged = False
523522 for row in table .find_all ("tr" ):
524523 if isinstance (row , NavigableString ):
525524 continue
@@ -548,10 +547,6 @@ def scrape_nfl():
548547 team = "JAX"
549548 data = [name , pos_team [0 ], team ]
550549
551- if not logged :
552- logging .info ("Fetched first page of results: name=%s, position=%s" , name , pos_team )
553- logged = True
554-
555550 data += [
556551 td .get_text ().strip () if "-" not in td .get_text () else np .nan
557552 for td in row .find_all ("td" )[3 :]
@@ -564,22 +559,22 @@ def scrape_nfl():
564559
565560 # find and click the next button
566561 try :
567- next_button = DRIVER .find_element (By .XPATH , '//a[text()=">"] ' )
562+ next_button = DRIVER .find_element (By .CLASS_NAME , 'next ' )
568563 actions = ActionChains (DRIVER )
569- actions .move_to_element ( next_button ). click ().perform ()
564+ actions .click (next_button ).perform ()
570565 page += 1
571566
572567 if page % 5 == 0 :
573- logging .info ("scraping NFL: page=%d, players=%d" , page , len (players ))
568+ logging .info ("Scraping NFL: page=%d, players=%d, first_on_page=%s " , page , len (players ), name )
574569
575570 scroll ()
576571 time .sleep (0.5 )
577572 except :
578573 if page == 0 :
579- logging .exception ("failed to click next button" )
574+ logging .exception ("Failed to click next button" )
580575 break
581576
582- logging .info ("skipped %d free-agents" , free_agents )
577+ logging .info ("Skipped %d free-agents" , free_agents )
583578
584579 df = pd .DataFrame (players )
585580 df ["two_pts" ] = df ["2pt" ]
@@ -604,7 +599,7 @@ def scrape_fantasy_pros():
604599 """
605600
606601 out = RAW_ADP
607- logging .info ("scraping Fantasy Pros" )
602+ logging .info ("Scraping Fantasy Pros" )
608603
609604 urls = {
610605 "std" : "https://www.fantasypros.com/nfl/adp/overall.php" ,
0 commit comments