1616
1717from ftplib import FTP
1818from mimetypes import MimeTypes
19- import os
20- import tempfile
2119from urllib .parse import urlparse
20+ from kiss_headers import parse_it
21+ from pathlib import Path
2222
2323import requests
24+ import tempfile
2425
2526
2627class Response :
@@ -41,14 +42,35 @@ def __init__(self, location, content_type, size, url):
4142def fetch_http (url , location ):
4243 """
4344 Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44- saving the content in a file at `location`
45+ Saving the content in a file at `location`
46+ If `location` is an existing directory - try to deduce the filename
47+ If deduction failed, save the content in a temporary file created at a `location`
4548 """
4649 r = requests .get (url )
47- with open (location , 'wb' ) as f :
50+
51+ if Path .is_dir (location ):
52+ content_disposition = parse_it (r .headers ).get ("content-disposition" ) or {}
53+ filename_priority = [
54+ content_disposition .get ("filename*" ),
55+ content_disposition .get ("filename" ),
56+ Path (urlparse (url ).path ).name ,
57+ ]
58+ filename_found = False
59+ for filename in filename_priority :
60+ if filename is not None and len (filename ):
61+ filename_found = True
62+ location = location / filename
63+ break
64+ if not filename_found :
65+ location = Path (
66+ tempfile .NamedTemporaryFile (dir = location , delete = False ).name
67+ )
68+
69+ with open (location , "wb" ) as f :
4870 f .write (r .content )
4971
50- content_type = r .headers .get (' content-type' )
51- size = r .headers .get (' content-length' )
72+ content_type = r .headers .get (" content-type" )
73+ size = r .headers .get (" content-length" )
5274 size = int (size ) if size else None
5375
5476 resp = Response (location = location , content_type = content_type , size = size , url = url )
@@ -59,49 +81,57 @@ def fetch_http(url, location):
5981def fetch_ftp (url , location ):
6082 """
6183 Return a `Response` object built from fetching the content at a FTP based `url` URL string
62- saving the content in a file at `location`
84+ Saving the content in a file at `location`
85+ If `location` is an existing directory - deduce the filename from the URL
6386 """
6487 url_parts = urlparse (url )
6588
6689 netloc = url_parts .netloc
67- path = url_parts .path
68- dir , file = os .path .split (path )
90+ path = Path (url_parts .path )
91+ directory = path .parent
92+ filename = path .name
93+
94+ if Path .is_dir (location ):
95+ location /= filename
6996
7097 ftp = FTP (netloc )
7198 ftp .login ()
7299
73- size = ftp .size (path )
100+ size = ftp .size (str ( path ) )
74101 mime = MimeTypes ()
75- mime_type = mime .guess_type (file )
102+ mime_type = mime .guess_type (filename )
76103 if mime_type :
77104 content_type = mime_type [0 ]
78105 else :
79106 content_type = None
80107
81- ftp .cwd (dir )
82- file = ' RETR {}' .format (file )
83- with open (location , 'wb' ) as f :
84- ftp .retrbinary (file , f .write )
108+ ftp .cwd (str ( directory ) )
109+ filename = " RETR {}" .format (filename )
110+ with open (location , "wb" ) as f :
111+ ftp .retrbinary (filename , f .write )
85112 ftp .close ()
86113
87114 resp = Response (location = location , content_type = content_type , size = size , url = url )
88115 return resp
89116
90117
91- def fetch (url ):
118+ def fetch (url , location = None ):
92119 """
93- Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
120+ Return a `Response` object built from fetching the content at the `url` URL string and store content at a provided `location`
121+ If `location` is None, save the content in a newly created temporary file
122+ If `location` is an existing directory - try to deduce the filename
94123 """
95124
96- temp = tempfile .NamedTemporaryFile (delete = False )
97- location = temp .name
125+ if location is None :
126+ temp = tempfile .NamedTemporaryFile (delete = False )
127+ location = temp .name
98128
99129 url_parts = urlparse (url )
100130 scheme = url_parts .scheme
101131
102- fetchers = {' ftp' : fetch_ftp , ' http' : fetch_http , ' https' : fetch_http }
132+ fetchers = {" ftp" : fetch_ftp , " http" : fetch_http , " https" : fetch_http }
103133
104134 if scheme in fetchers :
105135 return fetchers .get (scheme )(url , location )
106136
107- raise Exception (' Not a supported/known scheme.' )
137+ raise Exception (" Not a supported/known scheme." )
0 commit comments