1
+ # for scraping books
2
+ from bs4 import BeautifulSoup as bs
3
+ import requests
4
+ # to identify emoji unicode characters
5
+ import demoji
6
+
7
+
8
+ def link_to_get (link ):
9
+ """This function will get the url of the image & book download direct link using the given link for book download"""
10
+ response = requests .get (link )
11
+ th_html = bs (response .text , "html.parser" )
12
+ td_all = th_html .find_all ("td" ,id = "info" )
13
+ td_all = td_all [0 ]
14
+ td_a = td_all .find_all ("a" )
15
+ link_href = td_a [1 ].get ("href" )
16
+ img_link_td = td_all .find ("img" ,alt = "cover" )
17
+ img_link_src = img_link_td .get ("src" )
18
+ img_link = f"http://library.lol{ img_link_src } "
19
+ return [link_href , img_link ]
20
+
21
+
22
+
23
+ def book_get (name , mainres = 25 , results = 5 ):
24
+ """This function returns the list of books for the given name
25
+
26
+ You can give in name :
27
+ 1. title of book
28
+ 2. isbn of book
29
+ 3. author of book
30
+ 4. publisher of book
31
+
32
+ Results:
33
+ [ 0.Book Name,
34
+ 1.Author,
35
+ 2.Publisher,
36
+ 3.Size,
37
+ 4.Book Type,
38
+ 5.Book Link,
39
+ 6.Book Image Link
40
+ 7.Language]"""
41
+
42
+ Books = []
43
+ name = demoji .replace (name , '' )
44
+ if name == "" :
45
+ return "Error: enter name"
46
+ name = name .replace (" " , "+" )
47
+ # getting request and response
48
+ url = f"http://libgen.is/search.php?req={ name } &lg_topic=libgen&open=0&view=simple&res={ mainres } &phrase=1&column=def"
49
+ response = requests .get (url )
50
+ bs_html = bs (response .text , "html.parser" )
51
+
52
+ if "Search string must contain minimum 3 characters.." in bs_html .body :
53
+ return "Error: Title Too Short"
54
+
55
+ # scraping the site for response
56
+ table = bs_html .find_all ("table" )
57
+ table = table [2 ]
58
+ table_rows = table .find_all ("tr" )
59
+ a = len (table_rows )
60
+ table_rows .pop (0 )
61
+ if a > 1 :
62
+ counter = 0
63
+ for i in table_rows :
64
+ if counter <= results :
65
+ # make book list
66
+ book_lst = []
67
+ # getting all table datas
68
+ table_datas = i .find_all ("td" )
69
+ # book name
70
+ book_name = table_datas [2 ].get_text ()
71
+ # author name
72
+ author = table_datas [1 ].get_text ()
73
+ # publisher name
74
+ publisher = table_datas [3 ].get_text ()
75
+ if publisher == "" :
76
+ publisher = "unknown"
77
+ # getting link to book
78
+ link_row = table_datas [9 ]
79
+ a = link_row .find ("a" , href = True )
80
+ link = a .get ("href" )
81
+ # getting image url & direct book download link
82
+ link_all = link_to_get (link )
83
+ # getting language
84
+ language_row = table_datas [6 ]
85
+ language = language_row .get_text ()
86
+ # getting size of book
87
+ size_row = table_datas [7 ]
88
+ size = size_row .get_text ()
89
+ # getting type of book
90
+ type_row = table_datas [8 ]
91
+ type_ofit = type_row .get_text ()
92
+ # this will only take pdfs in English Language
93
+ if (type_ofit != "pdf" and type_ofit != "epub" ) or language != "English" :
94
+ continue
95
+ book_lst .append (book_name )
96
+ book_lst .append (author )
97
+ book_lst .append (publisher )
98
+ book_lst .append (size )
99
+ book_lst .append (type_ofit )
100
+ book_lst .append (link_all [0 ])
101
+ book_lst .append (link_all [1 ])
102
+ book_lst .append (language )
103
+ Books .append (book_lst )
104
+ counter += 1
105
+ if len (Books ) >= 1 :
106
+ return Books
107
+ else :
108
+ return "Error: no results found"
109
+ else :
110
+ return "Error: no results found"
111
+
112
+
113
+
114
+ if __name__ == "__main__" :
115
+ a = book_get ("Python" ,25 ,5 )
116
+ if "Error" not in a :
117
+ for i in a :
118
+ print (f"\n \n Name : { i [0 ]} \n Author : { i [1 ]} \n Publisher : { i [2 ]} \n Size : { i [3 ]} \n Format : { i [4 ]} \n Link : { i [5 ]} \n Image : { i [6 ]} \n \n " )
119
+ else :
120
+ print (a )
0 commit comments