diff --git a/Web Scraping Best Animes/README.md b/Web Scraping Best Animes/README.md new file mode 100644 index 0000000..7c52416 --- /dev/null +++ b/Web Scraping Best Animes/README.md @@ -0,0 +1,23 @@ +# Web Scraping Best Animes +The following script writes in the [best_animes.txt](best_animes.txt) file the best animes according to the International Community, by Web Scraping the [List challenges](https://www.listchallenges.com/) site. + +### Prerequisites +Modules required to be able to use the script successfully +Check [requirements.txt](requirements.txt). + +### How to run the script +Make sure you installed the required modules. +Open the project folder using your favorite IDE. +Either use your IDE's built-in "Run" button or type in the terminal "python3 main.py". + +P.S: As you can see a part of the code is commented, this is because https://www.listchallenges.com/top-100-anime-of-all-time-according-to-the distributes the items (animes in our case) on multiple HTML pages, therefore the code that is not commented can access only the first page which contains 40 animes. If you want to get hold of all the 100 animes you should uncomment the rest of the code, have fun! + +### Screenshot/GIF showing the sample use of the script +How to run the script: +![Run the code](screenshots/web_scraping_before.png) + +How to see the results: +![See the results](screenshots/web_scraping_after.png) + +## *Author Name* +[Alexei Luchian](https://github.com/AlexeiLuchian) \ No newline at end of file diff --git a/Web Scraping Best Animes/best_animes.txt b/Web Scraping Best Animes/best_animes.txt new file mode 100644 index 0000000..7174855 --- /dev/null +++ b/Web Scraping Best Animes/best_animes.txt @@ -0,0 +1,40 @@ +Fullmetal Alchemist Brotherhood +Boku No Hero Academia +Kimi No Na Wa. +Death Note +Attack on Titan +Steins;Gate +One Piece +Your Lie in April +Code Geass +One Punch-Man +Koe No Katachi +No Game No Life +Fairy Tail +Naruto +Tokyo Ghoul +Cowboy Bebop +Hunterxhunter +Assassination Classroom +Gintama +Haikyuu!! +Gurren Lagann +Jojo's Bizarre Adventure +Spirited Away +Re:Zero +Yuri!!! on Ice +Neon Genesis Evangelion +Madoka Magica +Hunterxhunter 2011 +Sword Art Online +Angel Beats +Black Butler +Konosuba +Clannad After Story +Noragami +Dragon Ball Z +Mob Psycho 100 +Toradora +Soul Eater +Fate/Zero +Ouran Highschool Host Club diff --git a/Web Scraping Best Animes/main.py b/Web Scraping Best Animes/main.py new file mode 100644 index 0000000..036ca10 --- /dev/null +++ b/Web Scraping Best Animes/main.py @@ -0,0 +1,33 @@ +import requests +from bs4 import BeautifulSoup + +list_challenges_url = "https://www.listchallenges.com/top-100-anime-of-all-time-according-to-the" +# list_challenges_url2 = "https://www.listchallenges.com/top-100-anime-of-all-time-according-to-the/list/2" +# list_challenges_url3 = "https://www.listchallenges.com/top-100-anime-of-all-time-according-to-the/list/3" + +response = requests.get(url=list_challenges_url) +response.raise_for_status() +# response2 = requests.get(url=list_challenges_url2) +# response2.raise_for_status() +# response3 = requests.get(url=list_challenges_url3) +# response3.raise_for_status() + +list_challenges_html = response.text +list_challenges_soup = BeautifulSoup(list_challenges_html, 'html.parser') +# list_challenges_html2 = response2.text +# list_challenges_soup2 = BeautifulSoup(list_challenges_html2, 'html.parser') +# list_challenges_html3 = response3.text +# list_challenges_soup3 = BeautifulSoup(list_challenges_html3, 'html.parser') + +title_anchors = list_challenges_soup.find_all(name="div", class_="item-name") +# title_anchors2 = list_challenges_soup2.find_all(name="div", class_="item-name") +# title_anchors3 = list_challenges_soup3.find_all(name="div", class_="item-name") + +titles = [title.getText() for title in title_anchors] +# titles2 = [title.getText() for title in title_anchors2] +# titles3 = [title.getText() for title in title_anchors3] +# titles += titles2 + titles3 + +with open("best_animes", "w", encoding="utf-8") as file: + for title in titles: + file.write(title + "\n") \ No newline at end of file diff --git a/Web Scraping Best Animes/requirements.txt b/Web Scraping Best Animes/requirements.txt new file mode 100644 index 0000000..e3d19b0 --- /dev/null +++ b/Web Scraping Best Animes/requirements.txt @@ -0,0 +1,6 @@ +beautifulsoup4 4.14.2 +requests 2.32.5 + +Both can be installed using pip: +pip install beautifulsoup4 +pip install requests \ No newline at end of file diff --git a/Web Scraping Best Animes/screenshots/web_scraping_after.png b/Web Scraping Best Animes/screenshots/web_scraping_after.png new file mode 100644 index 0000000..10ec1bb Binary files /dev/null and b/Web Scraping Best Animes/screenshots/web_scraping_after.png differ diff --git a/Web Scraping Best Animes/screenshots/web_scraping_before.png b/Web Scraping Best Animes/screenshots/web_scraping_before.png new file mode 100644 index 0000000..95067b8 Binary files /dev/null and b/Web Scraping Best Animes/screenshots/web_scraping_before.png differ