Skip to content

Kipkemoi-coder/Python-libraries-assignment

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 

Repository files navigation

Python-libraries-assignment

import requests import os import hashlib from urllib.parse import urlparse

def download_image(url, output_dirt="images"): """ Downloads an image from a URL, handling potential issues and duplicates. Args url (str): The URL of the image to download. output_dir (str): The directory to save the downloaded image. """ try: #check if the output directory exists, create if not if not os.path.exists(output_dir): os.makedirs(output_dir)

1. Basic URL validation

if not url.starswith(('http://', 'https://')): print(f"Invalid URL: {url}. Skipping.") return

2.Get response from URL, using stream=True for large files

response=requests.get(url, stream True) response.raise_for_status()# raise HTTPError for bad responses(4xx or 5xx)

4. Check Context-Type header

context_type=response.headers.get('Context-Type') if context_type and 'image' not in context_type: print(f"URL {url} does not point to an image(Content-Type: {content_type}). Skipping.") return

Get content-Length header

content_Lenght=response.headers.get('Content_Lenght') if content_Lenght: content_Lenght =int(content_length) if content_Lenght> 1010241024: print(f"File size is too large ({content_Lenght} bytes). skipping.") return

Extract filename from URL

parsed_url= urlparse(url) filename=os.path.basename(parsed_url.path) if not filename: filename="image_"+ hashlib.md5(url.encode()).hexdigest()[:8] +".jpg" #Generate a name path filepath=os.path.join(output_dir, filename

3. Check for duplicates

image_hash=hashlib.md5() for chunk in response..inter_contentt(chunnk_size=8192): image_hash.update(chunk) hex_digest=image_hash.hexdigest()

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published