avinashkranjan
diff --git a/‎Book_Scraper/README.md
Lines changed: 13 additions & 0 deletions b/‎Book_Scraper/README.md
Lines changed: 13 additions & 0 deletions
diff --git a/‎Book_Scraper/assests/image-1.png
12.5 KB b/‎Book_Scraper/assests/image-1.png
12.5 KB
diff --git a/‎Book_Scraper/assests/image.png
68.4 KB b/‎Book_Scraper/assests/image.png
68.4 KB
diff --git a/‎Book_Scraper/book.py
Lines changed: 164 additions & 0 deletions b/‎Book_Scraper/book.py
Lines changed: 164 additions & 0 deletions
diff --git a/‎Book_Scraper/requirements.txt
Lines changed: 4 additions & 0 deletions b/‎Book_Scraper/requirements.txt
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,13 @@
+# Book Scraper
+It is a Book Scraper Python Script whhich allow user to download books using console.
+Ex: when a uer provides a name of a book
+The script returns book name, size of book, author, extension type, language of book, book cover image, direct download.
+
+# Installation & Run
+`pip install -r requirements.txt`
+
+`python book.py`
+
+# Screenshots
+![Alt text](assests/image.png)
+![Alt text](assests/image-1.png)
@@ -0,0 +1,164 @@
+# for scraping books
+from bs4 import BeautifulSoup as bs
+import requests
+# to identify emoji unicode characters
+import emoji
+import pyfiglet
+import itertools
+import threading
+import time
+import sys
+
+
+
+def is_emoji(text):
+    """This function returns True if there is an emoji in the given string else False"""
+    return bool(emoji.get_emoji_regexp().search(text))
+
+def link_to_get(link):
+    """This function will get the url of the image & book download direct link using the given link for book download"""
+    response = requests.get(link)
+    th_html = bs(response.text , "html.parser")
+    td_all = th_html.find_all("td" ,id ="info")
+    td_all = td_all[0]
+    td_a = td_all.find_all("a")
+    link_href = td_a[1].get("href")
+    img_link_td = td_all.find("img" ,alt="cover")
+    img_link_src = img_link_td.get("src")
+    img_link = f"http://library.lol{img_link_src}"
+    return [link_href, img_link]
+
+def book_get(name, mainres=100, results=5):
+    """This function returns the list of books for the given name
+
+        You can give in name : 
+                        1. title of book
+                        2. isbn of book
+                        3. author of book
+                        4. publisher of book
+        
+        mainres :
+                1. 25
+                2. 50
+                3. 100
+
+        Results:
+                    [   0.Book Name, 
+                        1.Author, 
+                        2.Size, 
+                        3.Book Type, 
+                        4.Book Link, 
+                        5.Book Image Link
+                        6.Language]"""
+    Books = []
+    if is_emoji(name) == True:
+        return "Error: emoji"
+    if name == "":
+        return "Error: enter name"
+    name = name.replace(" ", "+")
+    # getting request and response
+    url = f"http://libgen.is/search.php?req={name}&lg_topic=libgen&open=0&view=simple&res={mainres}&phrase=1&column=def"
+    # print(url)
+    response = requests.get(url)
+    bs_html = bs(response.text , "html.parser")
+
+    if "Search string must contain minimum 3 characters.." in bs_html.body:
+        return "Error: Title Too Short"
+    
+    # scraping the site for response
+    table = bs_html.find_all("table")
+    table = table[2]
+    table_rows = table.find_all("tr")
+    a = len(table_rows)
+    table_rows.pop(0)
+    # print(url, "\n\n")
+    if a > 1 :
+        counter = 1
+        for i in table_rows :
+            if counter <= results:
+                # make book list
+                book_lst = []
+                # getting all table datas
+                table_datas = i.find_all("td")
+                # book name
+                book_name = table_datas[2].get_text()
+                # author name
+                author = table_datas[1].get_text()
+                # getting link to book
+                link_row = table_datas[9]
+                a = link_row.find("a" , href = True)
+                link = a.get("href")
+                # getting image url & direct book download link
+                link_all = link_to_get(link)
+                # getting language
+                language_row = table_datas[6]
+                language = language_row.get_text()
+                # getting size of book
+                size_row = table_datas[7]
+                size = size_row.get_text()
+                # getting type of book
+                type_row = table_datas[8]
+                type_ofit = type_row.get_text()
+                # this will only take pdfs in English Language
+                if (type_ofit != "pdf" and type_ofit != "epub") or language != "English":
+                    continue
+                book_lst.append(book_name)
+                book_lst.append(author)
+                book_lst.append(size)
+                book_lst.append(type_ofit)
+                book_lst.append(link_all[0])
+                book_lst.append(link_all[1])
+                book_lst.append(language)
+                Books.append(book_lst)
+                # print(f"\n\n\n{book_lst}\n\n\n")
+                counter+=1
+        if len(Books) >=1 :
+            return Books
+        else :
+            return "Error: no results found"
+    else:
+        return "Error: no results found"
+
+# a = book_get("Harry Potter",25,5)
+# print(a)
+# for i in a :
+#     print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n\n")
+
+def animate():
+    for c in itertools.cycle(['|', '/', '-', '\\']):
+        if done:
+            break
+        sys.stdout.write('\r...Searching Book ' + c)
+        sys.stdout.flush()
+        time.sleep(0.1)
+
+if __name__ == "__main__":
+    print(pyfiglet.figlet_format("Book Scraper"))
+    print("---------------------------------------------------------------")
+    print("---------------------------------------------------------------")
+    while(True):
+        print("\nEnter your Choice: \n1 - Search Book\n2 - Exit")
+        entry = int(input())
+        if(entry == 1):
+            print("Enter name of book : ")
+            book_name = input()
+            # loading
+            done = False
+            #here is the animation
+            t = threading.Thread(target=animate)
+            t.start()
+            books = book_get(book_name, 25, 5)
+            done = True
+            try:
+                for i in books :
+                    print(f"\n\nName : {i[0]}\nAuthor : {i[1]}\nSize : {i[2]}\nFormat : {i[3]}\nLink : {i[4]}\nImage : {i[5]}\n")
+            except:
+                if(book_get == "Error: no results found"):
+                    print("Book not Found/n")
+                elif(book_get == "Error: Title Too Short"):
+                    print("Title too short/n")
+        elif(entry == 2):
+            print(pyfiglet.figlet_format("Thank You for Using"))
+            print("---------------------------------------------------------------")
+            print("---------------------------------------------------------------")
+            break
@@ -0,0 +1,4 @@
+beautifulsoup4==4.11.1
+emoji==1.6.3
+pyfiglet==0.8.post1
+Requests==2.31.0