Skip to content
This repository was archived by the owner on Jan 22, 2025. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def setup_arg_parser():
arg_parser = argparse.ArgumentParser(description='A Pokémon web scraper')
arg_parser.add_argument('-s', '--save', action='store_true', help='save the output to JSON')
arg_parser.add_argument('-f', '--first', default=1, type=int, help='the ID of the first Pokémon to retrieve')
arg_parser.add_argument('-n', '--name', default='charmander', type=str, help='name of the first Pokémon to retrieve')
arg_parser.add_argument('-l', '--last', default=1, type=int, help='the ID of the last Pokémon to retrieve')
arg_parser.add_argument('-v', '--verbose', action='store_true', help='print the Pokémon\'s statistics to console')
return arg_parser.parse_args()
Expand Down Expand Up @@ -54,7 +55,13 @@ def extract_statistics(poke_id: int) -> object:
"""
Scrapes the Serebii.net with a given Pokémon ID.
"""
url = 'https://serebii.net/pokedex-swsh/{}.shtml'.format(str(poke_id).zfill(3))

if args.name:
url = 'https://serebii.net/pokedex-sv/{}'.format(args.name.lower())
print(url)
else:
url = 'https://serebii.net/pokedex-sv/{}.shtml'.format(str(poke_id).zfill(3))

data = requests.get(url)
soup = bs4.BeautifulSoup(data.text, 'html.parser')

Expand All @@ -70,6 +77,19 @@ def extract_statistics(poke_id: int) -> object:
weight = center_panel_info[7].find('td', string='Standard').findNext('td').text.replace('lbs', 'lbs ').split(" ")

base_stats_td = all_divs[1].find('td', string=re.compile("Base Stats - Total.*")).find_next_siblings('td')
# Find the 'Effort Values Earned' data
effort_values = None
rows = soup.find_all('tr')
for row in rows:
cells = row.find_all('td')
for i, cell in enumerate(cells):
if cell.get_text(strip=True).lower() == 'effort values earned':
# The effort values are in the next row, same column
effort_values_row = rows[rows.index(row) + 1]
effort_values = effort_values_row.find_all('td')[i].get_text(strip=True)
break
if effort_values:
break
except Exception:
logging.error('There was an error trying to identify HTML elements on the webpage. URL: %s', url)
raise
Expand All @@ -83,8 +103,10 @@ def extract_statistics(poke_id: int) -> object:
"hit_points": int(base_stats_td[0].text),
"attack": int(base_stats_td[1].text),
"defense": int(base_stats_td[2].text),
"special": int(base_stats_td[3].text),
"speed": int(base_stats_td[4].text)
"sp_att": int(base_stats_td[3].text),
"sp_def": int(base_stats_td[4].text),
"speed": int(base_stats_td[5].text),
"effort_values": effort_values
}

return extracted_pokemon
Expand All @@ -102,8 +124,10 @@ def display_formatted(poke_object):
print('HP\t\t', poke_object['hit_points'])
print('Attack\t\t', poke_object['attack'])
print('Defense\t\t', poke_object['defense'])
print('Special\t\t', poke_object['special'])
print('Sp.Att\t\t', poke_object['sp_att'])
print('Sp.Def\t\t', poke_object['sp_def'])
print('Speed\t\t', poke_object['speed'])
print('Effort Values\t', poke_object['effort_values'])
print('-' * 20)


Expand Down