|
7 | 7 | import posixpath |
8 | 8 | import warnings |
9 | 9 | import six |
10 | | -from six.moves.urllib.parse import (urlsplit, urlunsplit, |
| 10 | +from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit, |
11 | 11 | urldefrag, urlencode, urlparse, |
12 | 12 | quote, parse_qs, parse_qsl) |
13 | 13 | from six.moves.urllib.request import pathname2url, url2pathname |
|
18 | 18 | _ALWAYS_SAFE_BYTES = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' |
19 | 19 | b'abcdefghijklmnopqrstuvwxyz' |
20 | 20 | b'0123456789' b'_.-') |
| 21 | + |
| 22 | + |
| 23 | +def urljoin_rfc(base, ref, encoding='utf-8'): |
| 24 | + r""" |
| 25 | + .. warning:: |
| 26 | +
|
| 27 | + This function is deprecated and will be removed in future. |
| 28 | + Please use ``urlparse.urljoin`` instead. |
| 29 | +
|
| 30 | + Same as urlparse.urljoin but supports unicode values in base and ref |
| 31 | + parameters (in which case they will be converted to str using the given |
| 32 | + encoding). |
| 33 | +
|
| 34 | + Always returns a str. |
| 35 | +
|
| 36 | + >>> import w3lib.url |
| 37 | + >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html') |
| 38 | + 'http://www.example.com/otherpath/index2.html' |
| 39 | + >>> |
| 40 | +
|
| 41 | + >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') |
| 42 | + 'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm' |
| 43 | + >>> |
| 44 | +
|
| 45 | +
|
| 46 | + """ |
| 47 | + |
| 48 | + warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead", |
| 49 | + DeprecationWarning) |
| 50 | + |
| 51 | + str_base = unicode_to_str(base, encoding) |
| 52 | + str_ref = unicode_to_str(ref, encoding) |
| 53 | + return urljoin(str_base, str_ref) |
| 54 | + |
21 | 55 | _reserved = b';/?:@&=+$|,#' # RFC 3986 (Generic Syntax) |
22 | 56 | _unreserved_marks = b"-_.!~*'()" # RFC 3986 sec 2.3 |
23 | 57 | _safe_chars = _ALWAYS_SAFE_BYTES + b'%' + _reserved + _unreserved_marks |
|
0 commit comments