diff --git a/basketball_reference_web_scraper/html.py b/basketball_reference_web_scraper/html.py index ec109a28..6609f142 100644 --- a/basketball_reference_web_scraper/html.py +++ b/basketball_reference_web_scraper/html.py @@ -770,8 +770,10 @@ def team_names_query(self): return \ '//*[@id="content"]' \ '//div[@class="scorebox"]' \ - '//div[@itemprop="performer"]' \ - '//a[@itemprop="name"]' + '//strong' \ + '//a' + # '//div[@itemprop="performer"]' \ + # '//a[@itemprop="name"]' @property def play_by_play_table(self): @@ -850,9 +852,11 @@ def has_play_by_play_data(self): # Need to avoid rows that indicate start of period # Or denote tipoff / end of period (colspan = 5) # Or are one of the table headers for each period group (aria-label = Time) + # And remove events that happen during dead time at start / end of period (e.g. substitutions) return not self.is_start_of_period \ and self.html[1].get('colspan') != '5' \ - and self.timestamp_cell.get('aria-label') != 'Time' + and not self.timestamp_cell.get('aria-label') in ['Time', ''] \ + and not self.timestamp.endswith('00.0') class DailyBoxScoresPage: diff --git a/basketball_reference_web_scraper/parsers.py b/basketball_reference_web_scraper/parsers.py index b8a997a7..7181c28a 100644 --- a/basketball_reference_web_scraper/parsers.py +++ b/basketball_reference_web_scraper/parsers.py @@ -536,7 +536,7 @@ def parse(self, play_by_plays, away_team, home_team): result = [] for play_by_play in play_by_plays: if play_by_play.is_start_of_period: - current_period += 1 + current_period = int(play_by_play.html.get('id').strip('q')) elif play_by_play.has_play_by_play_data: result.append(self.format_data( current_period=current_period,