Skip to content

Commit 89abbd3

Browse files
committed
added a book scraper script
1 parent 7d76916 commit 89abbd3

File tree

5 files changed

+181
-0
lines changed

5 files changed

+181
-0
lines changed

Book_Scraper/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Book Scraper
2+
It is a Book Scraper Python Script whhich allow user to download books using console.
3+
Ex: when a uer provides a name of a book
4+
The script returns book name, size of book, author, extension type, language of book, book cover image, direct download.
5+
6+
# Installation & Run
7+
`pip install -r requirements.txt`
8+
9+
`python book.py`
10+
11+
# Screenshots
12+
![Alt text](assests/image.png)
13+
![Alt text](assests/image-1.png)

Book_Scraper/assests/image-1.png

12.5 KB
Loading

Book_Scraper/assests/image.png

68.4 KB
Loading

Book_Scraper/book.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# for scraping books
2+
from bs4 import BeautifulSoup as bs
3+
import requests
4+
# to identify emoji unicode characters
5+
import emoji
6+
import pyfiglet
7+
import itertools
8+
import threading
9+
import time
10+
import sys
11+
12+
13+
14+
def is_emoji(text):
15+
"""This function returns True if there is an emoji in the given string else False"""
16+
return bool(emoji.get_emoji_regexp().search(text))
17+
18+
def link_to_get(link):
19+
"""This function will get the url of the image & book download direct link using the given link for book download"""
20+
response = requests.get(link)
21+
th_html = bs(response.text , "html.parser")
22+
td_all = th_html.find_all("td" ,id ="info")
23+
td_all = td_all[0]
24+
td_a = td_all.find_all("a")
25+
link_href = td_a[1].get("href")
26+
img_link_td = td_all.find("img" ,alt="cover")
27+
img_link_src = img_link_td.get("src")
28+
img_link = f"http://library.lol{img_link_src}"
29+
return [link_href, img_link]
30+
31+
def book_get(name, mainres=100, results=5):
32+
"""This function returns the list of books for the given name
33+
34+
You can give in name :
35+
1. title of book
36+
2. isbn of book
37+
3. author of book
38+
4. publisher of book
39+
40+
mainres :
41+
1. 25
42+
2. 50
43+
3. 100
44+
45+
Results:
46+
[ 0.Book Name,
47+
1.Author,
48+
2.Size,
49+
3.Book Type,
50+
4.Book Link,
51+
5.Book Image Link
52+
6.Language]"""
53+
Books = []
54+
if is_emoji(name) == True:
55+
return "Error: emoji"
56+
if name == "":
57+
return "Error: enter name"
58+
name = name.replace(" ", "+")
59+
# getting request and response
60+
url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"
61+
# print(url)
62+
response = requests.get(url)
63+
bs_html = bs(response.text , "html.parser")
64+
65+
if "Search string must contain minimum 3 characters.." in bs_html.body:
66+
return "Error: Title Too Short"
67+
68+
# scraping the site for response
69+
table = bs_html.find_all("table")
70+
table = table[2]
71+
table_rows = table.find_all("tr")
72+
a = len(table_rows)
73+
table_rows.pop(0)
74+
# print(url, "\n\n")
75+
if a > 1 :
76+
counter = 1
77+
for i in table_rows :
78+
if counter <= results:
79+
# make book list
80+
book_lst = []
81+
# getting all table datas
82+
table_datas = i.find_all("td")
83+
# book name
84+
book_name = table_datas[2].get_text()
85+
# author name
86+
author = table_datas[1].get_text()
87+
# getting link to book
88+
link_row = table_datas[9]
89+
a = link_row.find("a" , href = True)
90+
link = a.get("href")
91+
# getting image url & direct book download link
92+
link_all = link_to_get(link)
93+
# getting language
94+
language_row = table_datas[6]
95+
language = language_row.get_text()
96+
# getting size of book
97+
size_row = table_datas[7]
98+
size = size_row.get_text()
99+
# getting type of book
100+
type_row = table_datas[8]
101+
type_ofit = type_row.get_text()
102+
# this will only take pdfs in English Language
103+
if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":
104+
continue
105+
book_lst.append(book_name)
106+
book_lst.append(author)
107+
book_lst.append(size)
108+
book_lst.append(type_ofit)
109+
book_lst.append(link_all[0])
110+
book_lst.append(link_all[1])
111+
book_lst.append(language)
112+
Books.append(book_lst)
113+
# print(f"\n\n\n{book_lst}\n\n\n")
114+
counter+=1
115+
if len(Books) >=1 :
116+
return Books
117+
else :
118+
return "Error: no results found"
119+
else:
120+
return "Error: no results found"
121+
122+
# a = book_get("Harry Potter",25,5)
123+
# print(a)
124+
# for i in a :
125+
# print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n\n")
126+
127+
def animate():
128+
for c in itertools.cycle(['|', '/', '-', '\\']):
129+
if done:
130+
break
131+
sys.stdout.write('\r...Searching Book ' + c)
132+
sys.stdout.flush()
133+
time.sleep(0.1)
134+
135+
if __name__ == "__main__":
136+
print(pyfiglet.figlet_format("Book Scraper"))
137+
print("---------------------------------------------------------------")
138+
print("---------------------------------------------------------------")
139+
while(True):
140+
print("\nEnter your Choice: \n1 - Search Book\n2 - Exit")
141+
entry = int(input())
142+
if(entry == 1):
143+
print("Enter name of book : ")
144+
book_name = input()
145+
# loading
146+
done = False
147+
#here is the animation
148+
t = threading.Thread(target=animate)
149+
t.start()
150+
books = book_get(book_name, 25, 5)
151+
done = True
152+
try:
153+
for i in books :
154+
print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n")
155+
except:
156+
if(book_get == "Error: no results found"):
157+
print("Book not Found/n")
158+
elif(book_get == "Error: Title Too Short"):
159+
print("Title too short/n")
160+
elif(entry == 2):
161+
print(pyfiglet.figlet_format("Thank You for Using"))
162+
print("---------------------------------------------------------------")
163+
print("---------------------------------------------------------------")
164+
break

Book_Scraper/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
beautifulsoup4==4.11.1
2+
emoji==1.6.3
3+
pyfiglet==0.8.post1
4+
Requests==2.31.0

0 commit comments

Comments
 (0)