Merge pull request #1940 from SyedImtiyaz-1/imgScrape

avinashkranjan · web-flow · commit c4dbd1bb1f6e · 2023-06-30T17:20:16.000+05:30
Added `Image Scrapper`
diff --git a/Auto-Linkedin /AutoLinkedIn.py b/Auto-Linkedin /AutoLinkedIn.py
@@ -0,0 +1,26 @@
+from selenium import webdriver #connect python with webbrowser-chrome
+from selenium.webdriver.common.keys import Keys
+import pyautogui as pag
+
+def main():
+ url = "http://linkedin.com/"  #url of LinkedIn
+ network_url = "http://linkedin.com/mynetwork/"    # url of LinkedIn network page
+ driver = webdriver.Chrome('F:\Argha\WebDriver\chromedriver.exe')     # path to browser web driver
+ driver.get(url)
+
+ 
+def login():
+ username = driver.find_element_by_id("login-email")      # Getting the login element
+ username.send_keys("username")                           # Sending the keys for username         
+ password = driver.find_element_by_id("login-password")     # Getting the password element
+ password.send_keys("password")                           # Sending the keys for password
+ driver.find_element_by_id("login-submit").click()         # Getting the tag for submit button
+ 
+def goto_network():
+   driver.find_element_by_id("mynetwork-tab-icon").click()
+ 
+def send_requests():
+ n= input("Number of requsts: ")   # Number of requests you want to send
+ for i in range(0,n):
+   pag.click(880,770)  # position(in px) of connection button
+ print("Done!")
diff --git a/Auto-Linkedin /README.md b/Auto-Linkedin /README.md
@@ -0,0 +1,13 @@
+## Auto Linkedin
+
+- It imports the necessary modules: webdriver from Selenium to control the web browser, Keys from Selenium to handle keyboard keys, and pyautogui as pag to simulate mouse clicks.
+- The main() function sets up the Selenium WebDriver with the Chrome browser and opens the LinkedIn website.
+- The login() function finds the login elements on the LinkedIn page and enters the provided username and password. It also clicks the submit button to log in.
+- The goto_network() function clicks on the "My Network" tab on the LinkedIn page.
+- The send_requests() function prompts the user to enter the number of connection requests they want to send. It then uses the PyAutoGUI library to simulate mouse clicks on the connection button (at the specified position) the specified number of times.
+
+- Install this before running : 
+1. pip install selenium
+2. pip install pyautogui
+
+Once you have installed the necessary libraries and downloaded the Chrome WebDriver, you should be able to run the code successfully.
diff --git a/Image-Scraper/README.md b/Image-Scraper/README.md
@@ -0,0 +1,18 @@
+## Image Scraper
+
+The aim of the provided script is to scrape all HTML <img> tags from a given URL.
+
+    It imports the necessary modules: BeautifulSoup from the bs4 (Beautiful Soup) library for parsing HTML, and requests for making HTTP requests.
+    The code checks the length of the command-line arguments. If the length is not equal to 2 (indicating that a URL was not provided), it exits with an error message.
+    It uses the requests.get() function to make an HTTP GET request to the provided URL. The User-Agent header is set to mimic a web browser to avoid any potential blocking or filtering.
+    The response from the request is then passed to BeautifulSoup to parse the HTML content of the page.
+    The find_all() method is used on the parsed HTML data to find all <img> tags with a valid src attribute. The src=True parameter filters out <img> tags without the src attribute.
+    A loop iterates over the list of found images, and each image is printed.
+
+In summary, the script allows you to scrape and print all HTML <img> tags (along with their attributes) from a given URL.
+
+
+### Installation Requirements -
+1. pip install beautifulsoup4
+2. pip install requests
+
diff --git a/Image-Scraper/scrape_images.py b/Image-Scraper/scrape_images.py
@@ -0,0 +1,21 @@
+# Scrape all HTML <img> tags from a provided URL.
+
+from bs4 import BeautifulSoup
+import requests
+import sys
+
+if len(sys.argv) != 2:
+  sys.exit("Usage: python scrape_images.py {url}")
+
+response = requests.get(
+    sys.argv[1],
+    headers={
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
+    }
+)
+
+html_data = BeautifulSoup(response.text, 'html.parser')
+images = html_data.find_all('img', src=True)
+
+for image in images:
+  print(image)