1616
1717from ftplib import FTP
1818from mimetypes import MimeTypes
19- import os
20- import tempfile
2119from urllib .parse import urlparse
20+ from kiss_headers import parse_it
21+ from pathlib import Path
2222
2323import requests
24+ import tempfile
2425
2526
2627class Response :
@@ -41,14 +42,33 @@ def __init__(self, location, content_type, size, url):
4142def fetch_http (url , location ):
4243 """
4344 Return a `Response` object built from fetching the content at a HTTP/HTTPS based `url` URL string
44- saving the content in a file at `location`
45+ Saving the content in a file at `location`
46+ If `location` is an existing directory - try to deduce the filename
47+ If deduction failed, save the content in a temporary file created at a `location`
4548 """
4649 r = requests .get (url )
47- with open (location , 'wb' ) as f :
50+
51+ if Path .is_dir (location ):
52+ content_disposition = parse_it (r .headers ).get ("content-disposition" ) or {}
53+ filename_priority = [
54+ content_disposition .get ("filename*" ),
55+ content_disposition .get ("filename" ),
56+ Path (urlparse (url ).path ).name ,
57+ ]
58+ filename_found = False
59+ for filename in filename_priority :
60+ if filename is not None and len (filename ):
61+ filename_found = True
62+ location = location / filename
63+ break
64+ if not filename_found :
65+ location /= tempfile .NamedTemporaryFile (dir = location , delete = False ).name
66+
67+ with open (location , "wb" ) as f :
4868 f .write (r .content )
4969
50- content_type = r .headers .get (' content-type' )
51- size = r .headers .get (' content-length' )
70+ content_type = r .headers .get (" content-type" )
71+ size = r .headers .get (" content-length" )
5272 size = int (size ) if size else None
5373
5474 resp = Response (location = location , content_type = content_type , size = size , url = url )
@@ -59,49 +79,57 @@ def fetch_http(url, location):
5979def fetch_ftp (url , location ):
6080 """
6181 Return a `Response` object built from fetching the content at a FTP based `url` URL string
62- saving the content in a file at `location`
82+ Saving the content in a file at `location`
83+ If `location` is an existing directory - deduce the filename from the URL
6384 """
6485 url_parts = urlparse (url )
6586
6687 netloc = url_parts .netloc
67- path = url_parts .path
68- dir , file = os .path .split (path )
88+ path = Path (url_parts .path )
89+ directory = path .parent
90+ filename = path .name
91+
92+ if Path .is_dir (location ):
93+ location /= filename
6994
7095 ftp = FTP (netloc )
7196 ftp .login ()
7297
73- size = ftp .size (path )
98+ size = ftp .size (str ( path ) )
7499 mime = MimeTypes ()
75- mime_type = mime .guess_type (file )
100+ mime_type = mime .guess_type (filename )
76101 if mime_type :
77102 content_type = mime_type [0 ]
78103 else :
79104 content_type = None
80105
81- ftp .cwd (dir )
82- file = ' RETR {}' .format (file )
83- with open (location , 'wb' ) as f :
84- ftp .retrbinary (file , f .write )
106+ ftp .cwd (str ( directory ) )
107+ filename = " RETR {}" .format (filename )
108+ with open (location , "wb" ) as f :
109+ ftp .retrbinary (filename , f .write )
85110 ftp .close ()
86111
87112 resp = Response (location = location , content_type = content_type , size = size , url = url )
88113 return resp
89114
90115
91- def fetch (url ):
116+ def fetch (url , location = None ):
92117 """
93- Return a `Response` object built from fetching the content at the `url` URL string and store content at a temporary file.
118+ Return a `Response` object built from fetching the content at the `url` URL string and store content at a provided `location`
119+ If `location` is None, save the content in a newly created temporary file
120+ If `location` is an existing directory - try to deduce the filename
94121 """
95122
96- temp = tempfile .NamedTemporaryFile (delete = False )
97- location = temp .name
123+ if location is None :
124+ temp = tempfile .NamedTemporaryFile (delete = False )
125+ location = temp .name
98126
99127 url_parts = urlparse (url )
100128 scheme = url_parts .scheme
101129
102- fetchers = {' ftp' : fetch_ftp , ' http' : fetch_http , ' https' : fetch_http }
130+ fetchers = {" ftp" : fetch_ftp , " http" : fetch_http , " https" : fetch_http }
103131
104132 if scheme in fetchers :
105133 return fetchers .get (scheme )(url , location )
106134
107- raise Exception (' Not a supported/known scheme.' )
135+ raise Exception (" Not a supported/known scheme." )
0 commit comments