File tree Expand file tree Collapse file tree 1 file changed +28
-0
lines changed
Expand file tree Collapse file tree 1 file changed +28
-0
lines changed Original file line number Diff line number Diff line change 11# coding: utf-8
22from __future__ import print_function , unicode_literals
33
4+ import os
45import re
6+ import shutil
57
8+ import requests
69from lxml import html
710
811
@@ -35,3 +38,28 @@ def extract_text(html_content):
3538 link .text = '[%s](%s)' % (text , url )
3639
3740 return content .text_content ().replace ('\xa0 ' , '' ).strip ()
41+
42+
43+ def download_image (url , path , filename , proxies = None ):
44+ response = requests .get (url , stream = True , proxies = proxies )
45+ if response .status_code not in (200 , 201 ):
46+ return None
47+
48+ content_type = response .headers .get ('Content-Type' , '' )
49+ if not content_type or not content_type .startswith ('image/' ):
50+ return None
51+
52+ content_length = int (response .headers .get ('Content-Length' ) or '0' )
53+ if content_length <= 0 :
54+ return None
55+
56+ suffix = content_type .replace ('image/' , '' )
57+ if suffix == 'svg+xml' :
58+ suffix = 'svg'
59+
60+ image_filename = f'{ filename } .{ suffix } '
61+ with open (os .path .join (path , image_filename ), 'wb' ) as f :
62+ response .raw .decode_content = True
63+ shutil .copyfileobj (response .raw , f )
64+
65+ return image_filename
You can’t perform that action at this time.
0 commit comments