diff --git a/basketball_reference_web_scraper/client.py b/basketball_reference_web_scraper/client.py index 2694c5f3..edea34f0 100644 --- a/basketball_reference_web_scraper/client.py +++ b/basketball_reference_web_scraper/client.py @@ -163,6 +163,29 @@ def players_season_totals(season_end_year, output_type=None, output_file_path=No ) return output_service.output(data=values, options=options) +def shooting_diet_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None, + json_options=None): + try: + http_service = HTTPService(parser=ParserService()) + values = http_service.shooting_diet_totals(season_end_year=season_end_year) + except requests.exceptions.HTTPError as http_error: + if http_error.response.status_code == requests.codes.not_found: + raise InvalidSeason(season_end_year=season_end_year) + else: + raise http_error + options = OutputOptions.of( + file_options=FileOptions.of(path=output_file_path, mode=output_write_option), + output_type=output_type, + json_options=json_options, + #CHANGE THIS + csv_options={"column_names": PLAYER_SEASON_TOTALS_COLUMN_NAMES} + ) + output_service = OutputService( + json_writer=JSONWriter(value_formatter=BasketballReferenceJSONEncoder), + csv_writer=CSVWriter(value_formatter=format_value) + ) + return output_service.output(data=values, options=options) + def players_advanced_season_totals(season_end_year, include_combined_values=False, output_type=None, output_file_path=None, output_write_option=None, json_options=None): diff --git a/basketball_reference_web_scraper/html.py b/basketball_reference_web_scraper/html.py index 94871e65..6816797e 100644 --- a/basketball_reference_web_scraper/html.py +++ b/basketball_reference_web_scraper/html.py @@ -147,6 +147,42 @@ def points(self): return '' + @property + def field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_point_field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="fg3_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def free_throw_percentage(self): + cells = self.html.xpath('td[@data-stat="ft_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def efficiency_field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="efg_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + class PlayerBoxScoreRow(BasicBoxScoreRow): def __init__(self, html): @@ -304,6 +340,36 @@ def rows(self): return player_season_totals_rows +class ShootingDietTable: + def __init__(self, html): + self.html = html + + @property + def rows_query(self): + # Basketball Reference includes individual rows for players that played for multiple teams in a season. + # It also includes a "League Average" row that has a class value of 'norank'. + return """ + //table[@id='shooting'] + /tbody + /tr[ + not(contains(@class, 'thead')) and + not(contains(@class, 'norank')) + ] + """ + + @property + def rows(self): + shot_diet_rows = [] + for row_html in self.html.xpath(self.rows_query): + row = ShootingDietRow(html=row_html) + # Basketball Reference includes a "total" row for players that got traded + # which is essentially a sum of all player team rows + # I want to avoid including those, so I check the "team" field value for "TOT" + if not row.is_combined_totals: + shot_diet_rows.append(row) + return shot_diet_rows + + class PlayerAdvancedSeasonTotalsRow(PlayerIdentificationRow): def __init__(self, html): @@ -787,7 +853,310 @@ def points(self): return '' + @property + def field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_point_field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="fg3_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def free_throw_percentage(self): + cells = self.html.xpath('td[@data-stat="ft_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def efficiency_field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="efg_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + + +class ShootingDietRow: + def __init__(self, html): + self.html = html + + @property + def position_abbreviations(self): + cells = self.html.xpath('td[@data-stat="pos"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def age(self): + cells = self.html.xpath('td[@data-stat="age"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def games_played(self): + cells = self.html.xpath('td[@data-stat="games"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def games_started(self): + cells = self.html.xpath('td[@data-stat="games_started"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def is_combined_totals(self): + # No longer says 'TOT' - now says 2TM, 3TM, etc. + # Can safely use the 'TM' suffix as an identifier as no team abbreviations + # end in 'TM' + return self.team_abbreviation.endswith("TM") + + @property + def team_abbreviation(self): + cells = self.html.xpath('td[@data-stat="team_name_abbr"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def player_cell(self): + cells = self.html.xpath('td[@data-stat="name_display"]') + + if len(cells) > 0: + return cells[0] + + return None + + @property + def name(self): + cell = self.player_cell + if cell is None: + return '' + + return cell.text_content() + @property + def playing_time(self): + cells = self.html.xpath('td[@data-stat="mp"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def minutes_played(self): + return self.playing_time + + @property + def field_goal_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def average_distance(self): + cells = self.html.xpath('td[@data-stat="avg_dist"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def two_point_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_fg2a"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def zero_to_three_meters_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_00_03"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_to_ten_meters_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_03_10"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def ten_to_sixteen_meters_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_10_16"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def sixteen_to_rest_meters_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_16_xx"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_point_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_fg3a"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def two_point_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_fg2a"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def zero_to_three_meters_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_00_03"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_to_ten_meters_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_03_10"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def ten_to_sixteen_meters_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_10_16"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def sixteen_to_rest_meters_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_16_xx"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def three_point_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_fg3a"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def assisted_two_point_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_ast_fg2"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def assisted_three_point_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_ast_fg3"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def dunk_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fga_dunk"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def dunk_number(self): + cells = self.html.xpath('td[@data-stat="fg_dunk"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def corner_three_take_percentage(self): + cells = self.html.xpath('td[@data-stat="pct_fg3a_corner3"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + + @property + def corner_three_percentage(self): + cells = self.html.xpath('td[@data-stat="fg_pct_corner3"]') + + if len(cells) > 0: + return cells[0].text_content() + + return '' + #add heaves maybe later? class BoxScoresPage: def __init__(self, html): self.html = html diff --git a/basketball_reference_web_scraper/http_service.py b/basketball_reference_web_scraper/http_service.py index 466b566a..6cf80ee8 100644 --- a/basketball_reference_web_scraper/http_service.py +++ b/basketball_reference_web_scraper/http_service.py @@ -5,7 +5,7 @@ from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \ PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \ - PlayerPage, StandingsPage + PlayerPage, StandingsPage, ShootingDietTable class HTTPService: @@ -133,6 +133,17 @@ def players_season_totals(self, season_end_year): table = PlayerSeasonTotalTable(html=html.fromstring(response.content)) return self.parser.parse_player_season_totals(totals=table.rows) + def shooting_diet_totals(self, season_end_year): + url = '{BASE_URL}/leagues/NBA_{season_end_year}_shooting.html'.format( + BASE_URL=HTTPService.BASE_URL, + season_end_year=season_end_year, + ) + response = requests.get(url=url) + + response.raise_for_status() + table = ShootingDietTable(html=html.fromstring(response.content)) + return self.parser.parse_shooting_diet(totals=table.rows) + def schedule_for_month(self, url): response = requests.get(url=url) diff --git a/basketball_reference_web_scraper/parser_service.py b/basketball_reference_web_scraper/parser_service.py index c58ca114..0c80536a 100644 --- a/basketball_reference_web_scraper/parser_service.py +++ b/basketball_reference_web_scraper/parser_service.py @@ -6,7 +6,7 @@ PeriodTimestampParser, ScoresParser, PlayByPlaysParser, TeamNameParser, ScheduledStartTimeParser, \ ScheduledGamesParser, PlayerBoxScoreOutcomeParser, PlayerSeasonBoxScoresParser, SearchResultNameParser, \ ResourceLocationParser, SearchResultsParser, LeagueAbbreviationParser, PlayerDataParser, DivisionNameParser, \ - TeamStandingsParser, ConferenceDivisionStandingsParser + TeamStandingsParser, ConferenceDivisionStandingsParser, ShootingDietParser class ParserService: @@ -66,6 +66,10 @@ def __init__(self): team_abbreviation_parser=self.team_abbreviation_parser, position_abbreviation_parser=self.position_abbreviation_parser, ) + self.shooting_diet_parser = ShootingDietParser( + position_abbreviation_parser=self.position_abbreviation_parser, + team_abbreviation_parser=self.team_abbreviation_parser, + ) self.scheduled_start_time_parser = ScheduledStartTimeParser() self.scheduled_games_parser = ScheduledGamesParser( start_time_parser=self.scheduled_start_time_parser, @@ -107,6 +111,9 @@ def parse_player_advanced_season_totals_parser(self, totals): def parse_player_season_totals(self, totals): return self.player_season_totals_parser.parse(totals=totals) + def parse_shooting_diet(self, totals): + return self.shooting_diet_parser.parse(totals=totals) + def parse_scheduled_games(self, games): return self.scheduled_games_parser.parse_games(games) diff --git a/basketball_reference_web_scraper/parsers.py b/basketball_reference_web_scraper/parsers.py index 3a2c867f..60411fca 100644 --- a/basketball_reference_web_scraper/parsers.py +++ b/basketball_reference_web_scraper/parsers.py @@ -373,6 +373,50 @@ def parse(self, totals): "turnovers": str_to_int(total.turnovers), "personal_fouls": str_to_int(total.personal_fouls), "points": str_to_int(total.points), + "efficiency_field_goal_percentage": str_to_float(total.efficiency_field_goal_percentage), + "free_throw_percentage": str_to_float(total.free_throw_percentage), + "three_point_field_goal_percentage": str_to_float(total.three_point_field_goal_percentage), + "field_goal_percentage": str_to_float(total.field_goal_percentage) + } for total in totals + ] + + +class ShootingDietParser: + def __init__(self, position_abbreviation_parser, team_abbreviation_parser): + self.position_abbreviation_parser = position_abbreviation_parser + self.team_abbreviation_parser = team_abbreviation_parser + + def parse(self, totals): + return [ + { + "name": str(total.name).rstrip("*"), + "positions": self.position_abbreviation_parser.from_abbreviations(total.position_abbreviations), + "age": str_to_int(total.age, default=None), + "team": self.team_abbreviation_parser.from_abbreviation(total.team_abbreviation), + "games_played": str_to_int(total.games_played), + "games_started": str_to_int(total.games_started), + "minutes_played": str_to_int(total.minutes_played), + "field_goal_percentage": str_to_float(total.field_goal_percentage), + "average_distance": str_to_float(total.average_distance), + "two_point_take_percentage": str_to_float(total.two_point_take_percentage), + "zero_to_three_meters_take_percentage": str_to_float(total.zero_to_three_meters_take_percentage), + "three_to_ten_meters_take_percentage": str_to_float(total.three_to_ten_meters_take_percentage), + "ten_to_sixteen_meters_take_percentage": str_to_float(total.ten_to_sixteen_meters_take_percentage), + "sixteen_to_rest_meters_take_percentage": str_to_float(total.sixteen_to_rest_meters_take_percentage), + "three_point_take_percentage": str_to_float(total.three_point_take_percentage), + "two_point_percentage": str_to_float(total.two_point_percentage), + "zero_to_three_meters_percentage": str_to_float(total.zero_to_three_meters_percentage), + "three_to_ten_meters_percentage": str_to_float(total.three_to_ten_meters_percentage), + "ten_to_sixteen_meters_percentage": str_to_float(total.ten_to_sixteen_meters_percentage), + "sixteen_to_rest_meters_percentage": str_to_float(total.sixteen_to_rest_meters_percentage), + "three_point_percentage": str_to_float(total.three_point_percentage), + "assisted_two_point_percentage": str_to_float(total.assisted_two_point_percentage), + "assisted_three_point_percentage": str_to_float(total.assisted_three_point_percentage), + "dunk_take_percentage": str_to_float(total.dunk_take_percentage), + "dunk_number": str_to_int(total.dunk_number), + "corner_three_take_percentage": str_to_float(total.corner_three_take_percentage), + "corner_three_percentage": str_to_float(total.corner_three_percentage) + } for total in totals ]