diff --git a/build.py b/build.py index 9cb95ffc2..7965585f9 100644 --- a/build.py +++ b/build.py @@ -1,5 +1,3 @@ -from __future__ import division, print_function, unicode_literals, with_statement - import fnmatch import os import shlex diff --git a/docs/conf.py b/docs/conf.py index 49b58df1a..2efada61a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,7 +30,7 @@ # General information about the project. project = u'Pando' -copyright = u'2016, Chad Whitacre et al.' +copyright = u'2016, Chad Whitacre et al' # RtD theme doubles the period # The full version, including alpha/beta/rc tags. release = open('../version.txt').read().strip() @@ -50,7 +50,6 @@ # -- Autodoc options -autodoc_default_flags = ['members', 'undoc-members', 'special-members'] autodoc_member_order = 'bysource' _autodoc_exclusions = { @@ -107,10 +106,7 @@ def setup(app): # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'pando', u'Pando Documentation', - [u'Chad Whitacre et al.'], 1) -] +man_pages = [] # -- Options for Texinfo output ------------------------------------------------ diff --git a/docs/reference.rst b/docs/reference.rst index 004928b67..93c50b822 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -4,4 +4,137 @@ This is the API reference for the Pando library. -.. automodule:: pando +pando.body_parsers +================== + +.. automodule:: pando.body_parsers + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.exceptions +================ + +.. automodule:: pando.exceptions + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.http +========== + +.. automodule:: pando.http + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.http.baseheaders +---------------------- + +.. automodule:: pando.http.baseheaders + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.http.mapping +------------------ + +.. automodule:: pando.http.mapping + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.http.request +------------------ + +.. automodule:: pando.http.request + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.http.response +------------------- + +.. automodule:: pando.http.response + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.logging +============= + +.. automodule:: pando.logging + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.state_chain +================= + +.. automodule:: pando.state_chain + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.testing +============= + +.. automodule:: pando.testing + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.testing.client +-------------------- + +.. automodule:: pando.testing.client + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.testing.harness +--------------------- + +.. automodule:: pando.testing.harness + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.utils +=========== + +.. automodule:: pando.utils + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.website +============= + +.. automodule:: pando.website + :members: + :undoc-members: + :special-members: + :show-inheritance: + +pando.wsgi +========== + +.. automodule:: pando.wsgi + :members: + :undoc-members: + :special-members: + :show-inheritance: diff --git a/docs/tutorial.rst b/docs/tutorial.rst index ab73f58e5..4c0c0a7a4 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -27,7 +27,7 @@ Step 3: Create a website root:: (foo)$ mkdir www (foo)$ cd www -Step 4: Create a web page, and start pando inside it:: +Step 4: Create a web page and start pando:: (foo)$ echo Greetings, program! > index.html.spt (foo)$ python -m pando diff --git a/pando/__init__.py b/pando/__init__.py index 6d079274a..e8be15923 100644 --- a/pando/__init__.py +++ b/pando/__init__.py @@ -1,16 +1,4 @@ -""" -.. automodule:: pando.body_parsers -.. automodule:: pando.exceptions -.. automodule:: pando.http -.. automodule:: pando.logging -.. automodule:: pando.state_chain -.. automodule:: pando.testing -.. automodule:: pando.utils -.. automodule:: pando.website -.. automodule:: pando.wsgi - -""" - +from http.cookies import Morsel from os.path import dirname, join import sys import pkg_resources @@ -32,3 +20,7 @@ __version__ = f.read() WINDOWS = sys.platform[:3] == 'win' + +if sys.version_info < (3, 8, 0): + # https://stackoverflow.com/q/50813091/2729778 + Morsel._reserved['samesite'] = 'SameSite' diff --git a/pando/body_parsers.py b/pando/body_parsers.py index 73f6670bb..f0cfaab68 100644 --- a/pando/body_parsers.py +++ b/pando/body_parsers.py @@ -1,8 +1,4 @@ -""" -:mod:`body_parsers` -=================== - -This module contains Pando's built-in body parsers. +"""This module contains Pando's built-in body parsers. Body parsers are optional ways to enable Pando to uniformly parse POST body content according to its supplied ``Content-Type``. diff --git a/pando/exceptions.py b/pando/exceptions.py index a617d6b74..cdc394159 100644 --- a/pando/exceptions.py +++ b/pando/exceptions.py @@ -1,9 +1,4 @@ -""" -:mod:`exceptions` -================= - -Custom exceptions raised by Pando -""" +"""Custom exceptions raised by Pando""" from . import Response diff --git a/pando/http/__init__.py b/pando/http/__init__.py index 2d1c24e81..aeb22209f 100644 --- a/pando/http/__init__.py +++ b/pando/http/__init__.py @@ -1,21 +1,4 @@ -""" -:mod:`http` -=========== - -.. automodule:: pando.http.baseheaders - :inherited-members: - :show-inheritance: -.. automodule:: pando.http.mapping - :inherited-members: - :show-inheritance: -.. automodule:: pando.http.request - :inherited-members: - :show-inheritance: -.. automodule:: pando.http.response - -""" - - +#: Dict of HTTP status codes to strings. status_strings = { 100: "Continue", 101: "Switching Protocols", diff --git a/pando/http/baseheaders.py b/pando/http/baseheaders.py index 25d40de47..1e818a103 100644 --- a/pando/http/baseheaders.py +++ b/pando/http/baseheaders.py @@ -1,8 +1,3 @@ -""" -:mod:`baseheaders` ------------------- -""" - from http.cookies import CookieError, SimpleCookie from .mapping import BytesMapping, CaseInsensitiveMapping diff --git a/pando/http/mapping.py b/pando/http/mapping.py index 27fdecf8d..90ffc0610 100644 --- a/pando/http/mapping.py +++ b/pando/http/mapping.py @@ -1,10 +1,18 @@ -""" -:mod:`mapping` --------------- -""" +from datetime import date +import re from aspen.http.mapping import Mapping as _Mapping, NO_DEFAULT +from .response import Response + + +FALSEISH = {'0', 'f', 'false', 'n', 'no'} +"The set of strings that should be converted to :obj:`False`." +TRUEISH = {'1', 't', 'true', 'y', 'yes'} +"The set of strings that should be converted to :obj:`True`." +NULLISH = {'', 'null', 'none'} +"The set of strings that should be converted to :obj:`None`." + class Mapping(_Mapping): @@ -27,9 +35,282 @@ def __init__(self, *a, **kw): def keyerror(self, name): """Raises a 400 :class:`~pando.http.response.Response`. """ - from .response import Response raise Response(400, "Missing key: %s" % repr(name)) + def bool(self, k, default=NO_DEFAULT): + """Get the last value with key `k`, as a boolean. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value isn't in either the :obj:`.FALSEISH` or :obj:`.TRUEISH` set + + Examples: + + >>> Mapping({'x': 'yes'}).bool('x') + True + >>> Mapping({'x': 'False'}).bool('x') + False + >>> Mapping({'x': ''}).bool('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value '' is invalid + + """ + try: + r = self[k].lower() + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + if r in TRUEISH: + return True + if r in FALSEISH: + return False + raise Response().error(400, "`%s` value %r is invalid" % (k, r)) + + def choice(self, k, choices, default=NO_DEFAULT): + """ + Get the last value with key `k`, and check that it matches one of the + elements of the `choices` set. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value isn't contained in `choices` + + Examples: + + >>> choices = {'foo'} + >>> Mapping({'x': 'foo'}).choice('x', choices) + 'foo' + >>> Mapping({'x': 'Foo'}).choice('x', choices) + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value 'Foo' is invalid. Choices: {'foo'} + + """ + try: + r = self[k] + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + if r not in choices: + raise Response().error(400, "`%s` value %r is invalid. Choices: %r" % (k, r, choices)) + return r + + def date(self, k, default=NO_DEFAULT, sep='-'): + """Get the last value with key `k`, as a :class:`~datetime.date`. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - parsing the value as a date fails + + Examples: + + >>> Mapping({'x': '2021-06-14'}).date('x') + datetime.date(2021, 6, 14) + >>> Mapping({'x': '0'}).date('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value '0' is invalid + + """ + try: + r = self[k] + if r: + r = r.split(sep) + elif default is not NO_DEFAULT: + return default + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + try: + year, month, day = map(int, r) + # the above raises ValueError if the number of parts isn't 3 + # or if any part isn't an integer + r = date(year, month, day) + except (ValueError, TypeError): + raise Response().error(400, "`%s` value %r is invalid" % (k, self[k])) + return r + + def int(self, k, default=NO_DEFAULT, minimum=None, maximum=None): + """Get the last value with key `k`, as an integer. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value is greater than `minimum` or lesser than `maximum` + + Examples: + + >>> Mapping({'x': '1'}).int('x') + 1 + >>> Mapping({'x': 'a'}).int('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value 'a' is not a valid integer + >>> Mapping({'x': '3'}).int('x', maximum=2) + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value 3 is greater than 2 + >>> Mapping({'x': '-1'}).int('x', minimum=0) + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value -1 is less than 0 + + """ + try: + r = self[k] + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + try: + r = int(r) + except (ValueError, TypeError): + raise Response().error(400, "`%s` value %r is not a valid integer" % (k, r)) + if minimum is not None and r < minimum: + raise Response().error(400, "`%s` value %r is less than %i" % (k, r, minimum)) + if maximum is not None and r > maximum: + raise Response().error(400, "`%s` value %r is greater than %i" % (k, r, maximum)) + return r + + def list_of(self, cast, k, default=NO_DEFAULT, sep=','): + """Get the last value with key `k`, split it on `sep`, and `cast()` each substring. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - a call to `cast` raises a :exc:`ValueError` + + Example: + + >>> Mapping({'x': '1,2,3,5,7'}).list_of(int, 'x') + [1, 2, 3, 5, 7] + + """ + try: + r = self[k].split(sep) + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + try: + r = [cast(v) for v in r] + except ValueError: + raise Response().error(400, "`%s` value %r is invalid" % (k, self[k])) + return r + + def match(self, k, pattern, default=NO_DEFAULT): + r"""Get the last value with key `k`, and check that it matches `pattern`. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value doesn't match the pattern (i.e. ``re.match(pattern, value)`` + returns :obj:`None`) + + Examples: + + >>> pattern = r'^\w+(:\w*)?$' + >>> Mapping({'x': 'foo'}).match('x', pattern) + 'foo' + >>> Mapping({'x': '!'}).match('x', pattern) + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value '!' doesn't match the expected pattern + + """ + try: + v = self[k] + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + if re.match(pattern, v): + return v + raise Response().error(400, "`%s` value %r doesn't match the expected pattern" % (k, v)) + + def ternary(self, k, default=NO_DEFAULT): + """Get the last value with key `k`, as a boolean or :obj:`None`. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value isn't in any of :obj:`.FALSEISH`, :obj:`.TRUEISH` or :obj:`.NULLISH` + + Examples: + + >>> Mapping({'x': 'TRUE'}).ternary('x') + True + >>> Mapping({'x': 'f'}).ternary('x') + False + >>> print(Mapping({'x': ''}).ternary('x')) + None + >>> Mapping({'x': 'oui'}).ternary('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value 'oui' is invalid + + """ + try: + r = self[k].lower() + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + if r in TRUEISH: + return True + if r in FALSEISH: + return False + if r in NULLISH: + return None + raise Response().error(400, "`%s` value %r is invalid" % (k, r)) + + def word(self, k, default=NO_DEFAULT, pattern=r'^\w+$', ascii_only=True): + """Get the last value with key `k`, and check that it matches `pattern`. + + The `ascii_only` argument determines whether the :obj:`re.ASCII` flag is + passed to :func:`re.match()`. + + Raises a 400 :class:`.Response` if: + + - the key isn't found and no `default` value was provided; or + - the value doesn't match the pattern (i.e. ``re.match(pattern, value, flag)`` + returns :obj:`None`) + + Examples: + + >>> Mapping({'x': 'foo'}).word('x') + 'foo' + >>> Mapping({'x': ''}).word('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value '' is empty + >>> Mapping({'x': 'blé'}).word('x') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `x` value 'blé' contains forbidden characters + >>> Mapping({'x': 'blé'}).word('x', ascii_only=False) + 'blé' + + """ + try: + r = self[k] + except (KeyError, Response): + if default is NO_DEFAULT: + raise + return default + if not r: + raise Response().error(400, "`%s` value %r is empty" % (k, r)) + if not re.match(pattern, r, re.ASCII if ascii_only else 0): + raise Response().error(400, "`%s` value %r contains forbidden characters" % (k, r)) + return r + class CaseInsensitiveMapping(Mapping): @@ -61,10 +342,6 @@ def popall(self, name): class BytesMapping(Mapping): """This mapping automatically transcodes keys and values. - Attributes: - encoding (str): UTF-8 by default - encoding_errors (str): 'backslashreplace' by default - >>> m = BytesMapping() >>> m[b'foo'] = b'bar' >>> m[b'foo'] diff --git a/pando/http/request.py b/pando/http/request.py index a89ebee4e..6abb93283 100644 --- a/pando/http/request.py +++ b/pando/http/request.py @@ -1,8 +1,4 @@ -""" -:mod:`request` --------------- - -Define a Request class and child classes. +"""Define a Request class and child classes. Here is how we analyze the structure of an HTTP message, along with the objects we use to model each:: @@ -26,7 +22,7 @@ import string import sys import traceback -from urllib.parse import quote, quote_plus +from urllib.parse import quote, quote_plus, urlencode, urlsplit, urlunsplit import warnings from aspen.http.request import Path as _Path, Querystring as _Querystring @@ -121,7 +117,7 @@ class Request: .. attribute:: headers - A mapping of HTTP headers. See :class:`.Headers`. + A mapping of HTTP headers. See :class:`~pando.http.baseheaders.BaseHeaders`. """ @@ -306,14 +302,15 @@ def scheme(self): https://developer.mozilla.org/docs/Web/HTTP/Headers/X-Forwarded-Proto """ scheme = None - if self.website.trusted_proxies or not self.environ.get(b'REMOTE_ADDR'): + environ = getattr(self, 'environ', {}) + if self.website.trusted_proxies or not environ.get(b'REMOTE_ADDR'): source = '`X-Forwarded-Proto` header' scheme = self.headers.get(b'X-Forwarded-Proto') if scheme: scheme = scheme.decode('ascii', 'backslashreplace') else: source = '`wsgi.url_scheme` variable' - scheme = self.environ.get(b'wsgi.url_scheme') + scheme = environ.get(b'wsgi.url_scheme') if scheme: scheme = scheme.decode('ascii', 'backslashreplace') if scheme in self.website.known_schemes: @@ -334,7 +331,7 @@ def source(self): :attr:`~pando.website.DefaultConfiguration.trusted_proxies`. .. warning:: - If the :attr:`~pando.website.DefaultConfiguration.trusted_proxies` + If the :attr:`~pando.website.DefaultConfiguration.trusted_proxies` list is incorrect or incomplete, then this property can mistakenly return the IP address of a reverse proxy instead of the client's IP address. @@ -446,11 +443,69 @@ def allow(self, *methods): }) def is_xhr(self): - """Check the value of X-Requested-With. + """Check the value of the ``X-Requested-With`` header. """ val = self.headers.get(b'X-Requested-With', b'') return val.lower() == b'xmlhttprequest' + def sanitize_untrusted_url(self, url): + """Sanitize a URL provided by the client. + + This method can be used to prevent “open redirect” vulnerabilities. + + Raises a 400 :class:`.Response` if the url is invalid or unacceptable (e.g. + if it includes a domain name different than :obj:`self.headers['Host']`). + """ + host = self.headers['Host'] + if isinstance(url, bytes): + url = url.decode('utf8', 'replace') + try: + scheme, netloc, path, query, fragment = urlsplit(url) + except ValueError: + raise Response(400, f"{url!r} isn't a valid URL.") + if scheme or netloc: + if scheme and scheme not in self.website.known_schemes: + raise Response( + 400, + f"URL {url!r} starts with unknown scheme {scheme!r}.", + ) + if netloc: + if netloc == host: + return url + raise Response( + 400, + f"The host in URL {url!r} doesn't match the `Host` header value {host!r}.", + ) + elif path: + if not path.startswith('/'): + # relative path + segments = path.split('/') + path = self.path.raw.split('/') + for seg in segments: + if seg == '..': + if path and path[-1] == '': + path.pop() + if path: + path.pop() + elif seg == '.': + if path: + if path[-1] == '': + continue + else: + path.pop() + elif seg == '' and path[-1] == '': + continue + else: + path.append(seg) + if segments[-1] in ('.', '..'): + path.append('') + path = '/'.join(path) + else: + path = self.path.raw + if fragment and not query: + query = self.qs.raw + return urlunsplit((self.scheme, host, path, query, fragment)) + # Request -> Line # --------------- @@ -619,8 +674,21 @@ def __new__(cls, raw): class _QuerystringMapping(Mapping, _Querystring): + __init__ = _Querystring.__init__ + def derive(self, **kw): + new_qs = dict(self) + for k, v in kw.items(): + if v is None: + new_qs.pop(k, None) + else: + new_qs[k] = v + return ('?' + urlencode(new_qs, doseq=True)) if new_qs else '' + + def serialize(self, **kw): + return ('?' + urlencode(self, doseq=True)) if self else '' + # Request -> Line -> Version # .......................... diff --git a/pando/http/response.py b/pando/http/response.py index 7b5c23236..c1ac73354 100644 --- a/pando/http/response.py +++ b/pando/http/response.py @@ -1,13 +1,15 @@ -""" -:mod:`response` ---------------- -""" - import os import sys +from aspen.request_processor.dispatcher import DispatchResult, DispatchStatus +import aspen.simplates.json_ as json +from aspen.utils import Constant + +from ..utils import encode_url from . import status_strings -from .baseheaders import BaseHeaders as Headers + + +MISSING = Constant('MISSING') class CloseWrapper: @@ -38,7 +40,7 @@ def __init__(self, code=200, body='', headers=None): - code an HTTP response code, e.g., 404 - body the message body as a string - - headers a dict, list, or bytestring of HTTP headers + - headers a dict or list of HTTP headers Code is first because when you're raising your own Responses, they're usually error conditions. Body is second because one more often wants @@ -56,6 +58,7 @@ def __init__(self, code=200, body='', headers=None): Exception.__init__(self) self.code = code self.body = body + from .baseheaders import BaseHeaders as Headers self.headers = Headers(headers) def to_wsgi(self, environ, start_response, charset): @@ -88,7 +91,7 @@ def to_wsgi(self, environ, start_response, charset): start_response(wsgi_status, wsgi_headers) body = self.body if not isinstance(body, (list, tuple)): - body = [body] + body = (body,) body = (x.encode(charset) if not isinstance(x, bytes) else x for x in body) return CloseWrapper(self.request, body) @@ -96,15 +99,13 @@ def __repr__(self): return "" % self._status_text() def __str__(self): + r = self._status_text() + if self.code // 100 == 3: + r += f" <{self.headers.get('Location')}>" body = self.body - if len(body) < 500: - if not isinstance(body, str): - if isinstance(body, bytes): - body = body.decode('ascii', 'backslashreplace') - else: - body = str(body) - return ': '.join((self._status_text(), body)) - return self._status_text() + if isinstance(body, (str, bytes)) and len(body) < 500: + r += f": {body}" + return r def _status_text(self): return "%d %s" % (self.code, self._status()) @@ -123,6 +124,105 @@ def _to_http(self, version): body = body.replace(b'\r\r', b'\r') return b'\r\n'.join([status_line, headers, b'', body]) + def erase_cookie(self, *a, **kw): + """Calls :meth:`pando.website.Website.erase_cookie`. + """ + return self.website.erase_cookie(self.headers.cookie, *a, **kw) + + def error(self, code, msg=''): + """Set :attr:`self.code` and :attr:`self.body`, then return :obj:`self`. + + Example: + + >>> raise Response().error(403, "You're not allowed to do this.") + Traceback (most recent call last): + ... + pando.http.response.Response: 403 Forbidden: You're not allowed to do this. + + """ + self.code = code + self.body = msg + return self + + def invalid_input( + self, input_value, input_name, input_location, code=400, + msg="`%s` value %s in request %s is invalid or unsupported", + ): + """Set :attr:`self.code` and :attr:`self.body`, then return :obj:`self`. + + Examples: + + >>> raise Response().invalid_input('XX', 'country', 'body') + Traceback (most recent call last): + ... + pando.http.response.Response: 400 Bad Request: `country` value 'XX' in request body is invalid or unsupported + >>> Response().invalid_input('X' * 500, 'currency', 'querystring').body + "`currency` value 'XXXXXXXXXXXXXXXXXXXXXXX[…]XXXXXXXXXXXXXXXXXXXXXXX' in request querystring is invalid or unsupported" + + """ + self.code = code + input_value = repr(input_value) + if len(input_value) > 50: + input_value = input_value[:24] + '[…]' + input_value[-24:] + self.body = msg % (input_name, input_value, input_location) + return self + + def json(self, obj=MISSING, code=200): + """Load or dump an object from or into a response body. + + >>> r = Response() + >>> print(r.json({'foo': 'bar'}).body) + { + "foo": "bar" + } + >>> r.json() + {'foo': 'bar'} + + """ + if obj is MISSING: + return json.loads(self.body) + else: + self.code = code + self.body = json.dumps(obj) + self.headers[b'Content-Type'] = b'application/json' + return self + + def redirect(self, url, code=302, trusted_url=False): + """ + Returns the response after modifying its code, setting its ``Location`` header, + and sanitizing the URL (unless :obj:`trusted_url` is set to :obj:`True`). + """ + if not trusted_url: + url = self.request.sanitize_untrusted_url(url) + self.code = code + self.headers[b'Location'] = encode_url(url) + return self + + def render(self, fspath, state, **extra): + """Render the resource file `fspath` with `state` plus `extra` as context. + + This method is an “internal redirect”, it uses a different file to generate + the response without changing the URL on the client side. It should be + used sparingly. + + """ + from ..state_chain import render_response + state.update(extra) + if 'dispatch_result' not in state: + # `render_response` needs `state['dispatch_result']` + state['dispatch_result'] = DispatchResult( + DispatchStatus.okay, fspath, None, None, None + ) + website = state['website'] + resource = website.request_processor.resources.get(fspath) + render_response(state, resource, self, website) + return self + + def set_cookie(self, *a, **kw): + """Calls :meth:`pando.website.Website.set_cookie`. + """ + return self.website.set_cookie(self.headers.cookie, *a, **kw) + def set_whence_raised(self): """Sets and returns the value of `self.whence_raised`. @@ -145,3 +245,38 @@ def set_whence_raised(self): filepath = os.sep.join(filepath.split(os.sep)[-2:]) self.whence_raised = (filepath, frame.f_lineno) return self.whence_raised + + def success(self, code=200, msg=''): + """Set :attr:`self.code` and :attr:`self.body`, then return :obj:`self`. + + Example: + + >>> raise Response().success(202, "Your request is being processed.") + Traceback (most recent call last): + ... + pando.http.response.Response: 202 Accepted: Your request is being processed. + + """ + self.code = code + self.body = msg + return self + + @property + def text(self): + """Return the response's body as a string. + + This is meant to be used in tests. + """ + body = self.body + if isinstance(body, str): + return body + if getattr(self, 'website', None): + codec = self.website.request_processor.encode_output_as + else: + codec = 'utf8' + if isinstance(body, bytes): + return body.decode(codec) + return ''.join( + chunk.decode(codec) if isinstance(chunk, bytes) else chunk + for chunk in body + ) diff --git a/pando/logging.py b/pando/logging.py index 6670a3d05..773a45b51 100644 --- a/pando/logging.py +++ b/pando/logging.py @@ -1,10 +1,4 @@ -""" -:mod:`logging` -============== - -Pando logging convenience wrappers - -""" +"""Pando logging convenience wrappers""" import sys import logging diff --git a/pando/state_chain.py b/pando/state_chain.py index 6ead31fe0..6595d6433 100644 --- a/pando/state_chain.py +++ b/pando/state_chain.py @@ -1,7 +1,4 @@ """ -:mod:`state_chain` ------------------- - These functions comprise the request processing functionality of Pando. The order of functions in this module defines Pando's state chain for request diff --git a/pando/testing/__init__.py b/pando/testing/__init__.py index 26ca7b495..e69de29bb 100644 --- a/pando/testing/__init__.py +++ b/pando/testing/__init__.py @@ -1,8 +0,0 @@ -""" -:mod:`testing` -============== - -.. automodule:: pando.testing.client -.. automodule:: pando.testing.harness - -""" diff --git a/pando/testing/client.py b/pando/testing/client.py index 314de3ffe..ab47140ef 100644 --- a/pando/testing/client.py +++ b/pando/testing/client.py @@ -1,8 +1,3 @@ -""" -:mod:`client` -------------- -""" - from functools import partial from http.cookies import SimpleCookie from io import BytesIO diff --git a/pando/testing/harness.py b/pando/testing/harness.py index f7e59a0fd..d0ec23c2b 100644 --- a/pando/testing/harness.py +++ b/pando/testing/harness.py @@ -1,8 +1,3 @@ -""" -:mod:`harness` --------------- -""" - import os import sys from collections import namedtuple diff --git a/pando/utils.py b/pando/utils.py index 4cf1da648..781472b15 100644 --- a/pando/utils.py +++ b/pando/utils.py @@ -1,10 +1,7 @@ -""" -:mod:`utils` -============ -""" - from datetime import datetime, timezone import re +import string +from urllib.parse import quote # encoding helpers @@ -14,6 +11,10 @@ def maybe_encode(s, codec='ascii'): return s.encode(codec) if isinstance(s, str) else s +def encode_url(url): + return maybe_encode(quote(url, string.punctuation)) + + # datetime helpers # ================ diff --git a/pando/website.py b/pando/website.py index 976ca3034..d66e2027d 100644 --- a/pando/website.py +++ b/pando/website.py @@ -1,10 +1,5 @@ -""" -:mod:`website` -============== -""" - from copy import copy -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone import os import string from urllib.parse import quote @@ -16,7 +11,7 @@ from . import body_parsers from .http.request import SAFE_METHODS from .http.response import Response -from .utils import maybe_encode, to_rfc822 +from .utils import maybe_encode, to_rfc822, utcnow from .exceptions import BadLocation @@ -183,6 +178,48 @@ def ours_or_theirs(self, filename): return None + # Cookie helpers + # ============== + + def erase_cookie(self, cookies, key, **kw): + """Calls :meth:`set_cookie` with an empty value and an expiration date in the past. + """ + return self.set_cookie(cookies, key, '', THE_PAST, **kw) + + def set_cookie( + self, cookies, key, value, expires=None, httponly=True, path='/', samesite='lax', + ): + """Modify a standard :class:`~http.cookies.SimpleCookie` object. + + The value of the `expires` argument can be a string, :class:`.datetime`, + or :class:`.timedelta` object. + + For details on the `samesite` argument, see + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + + Returns the modified :class:`~http.cookies.Morsel` object. + """ + key = key + cookies[key] = value + cookie = cookies[key] + if expires: + if isinstance(expires, timedelta): + expires += utcnow() + if isinstance(expires, datetime): + expires = to_rfc822(expires) + cookie['expires'] = expires + if httponly: + cookie['httponly'] = True + if path: + cookie['path'] = path + if samesite: + cookie['samesite'] = samesite + if self.cookie_domain: + cookie['domain'] = self.cookie_domain + if self.cookie_secure: + cookie['secure'] = True + return cookie + # Backward compatibility # ====================== @@ -222,10 +259,16 @@ class DefaultConfiguration: colorize_tracebacks = True "Use the Pygments package to prettify tracebacks with syntax highlighting." + cookie_domain = None + "The default `domain` attribute value of cookies set by `.Website.set_cookie`." + + cookie_secure = False + "If :obj:`True`, `.Website.set_cookie` restricts cookies to secure connections." + known_schemes = {'http', 'https', 'ws', 'wss'} """ The set of known and acceptable request URL schemes. Used by - :attr:`.Request.scheme`. + :attr:`.Request.scheme` and :meth:`.Request.sanitize_untrusted_url()`. """ list_directories = False diff --git a/pando/wsgi.py b/pando/wsgi.py index 142bdb86c..b18db77c4 100644 --- a/pando/wsgi.py +++ b/pando/wsgi.py @@ -1,7 +1,4 @@ """ -:mod:`wsgi` -=========== - Provide a WSGI callable. (It could be nice if this was at ``pando:wsgi`` instead of ``pando.wsgi:website``, diff --git a/pando/www/autoindex.html.spt b/pando/www/autoindex.html.spt index 648b72804..b5dfca303 100644 --- a/pando/www/autoindex.html.spt +++ b/pando/www/autoindex.html.spt @@ -3,10 +3,6 @@ Color scheme: http://colorschemedesigner.com/#0.21Tw0w0w0w0 """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals import os import stat diff --git a/pando/www/error.spt b/pando/www/error.spt index 7e418911f..0af69b4d2 100644 --- a/pando/www/error.spt +++ b/pando/www/error.spt @@ -1,20 +1,8 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from html import escape as html_escape from pando import json from pando.http import status_strings -try: # 3 - from html import escape as html_escape -except ImportError: # 2 - from cgi import escape as cgi_escape - def html_escape(*args,**kwargs): - # make the defaults match the py3 defaults - kwargs['quote'] = kwargs.get('quote', True) - return cgi_escape(*args,**kwargs) - try: from pygments.lexers import PythonTracebackLexer from pygments.formatters import HtmlFormatter diff --git a/setup.cfg b/setup.cfg index d28074317..9948e8b89 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,5 @@ [flake8] -max-line-length = 100 -ignore = E226,E302,E305,W504 +ignore = E226,E302,E305,E501,W504 [tool:pytest] doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE diff --git a/tests/test_request.py b/tests/test_request.py index bbf306c66..19a124ca9 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -100,7 +100,7 @@ def test_headers_dont_unicodify_cookie(): def test_baseheaders_loads_cookies_as_str(): headers = BaseHeaders({b"Cookie": b"key=value"}) - assert headers.cookie[str('key')].value == str('value') + assert headers.cookie['key'].value == 'value' # aliases @@ -243,6 +243,95 @@ def test_from_wsgi_tolerates_unicode_environ(harness): assert headers['À'.encode('latin1')] == 'µ'.encode('utf8') +# sanitize_untrusted_url + +def make_request(harness, uri=b'/', host=b'localhost', scheme=b'https'): + return Request( + harness.client.website, + uri=uri, + headers={b'Host': host, b'X-Forwarded-Proto': scheme} + ) + +def test_sanitize_untrusted_url_invalid(harness): + with raises(Response) as x: + make_request(harness).sanitize_untrusted_url('//[foo/') + response = x.value + assert response.code == 400 + assert response.text == "'//[foo/' isn't a valid URL." + +def test_sanitize_untrusted_url_absolute(harness): + url = 'https://localhost:8000/path?key=value#fragment' + request = make_request(harness, host=b'localhost:8000') + assert request.sanitize_untrusted_url(url) == url + +def test_sanitize_untrusted_url_absolute_different_port(harness): + url = 'https://localhost:8888/path?key=value#fragment' + with raises(Response) as x: + make_request(harness).sanitize_untrusted_url(url) + response = x.value + assert response.code == 400 + assert response.text == f"The host in URL {url!r} doesn't match the `Host` header value 'localhost'." + +def test_sanitize_untrusted_url_absolute_different_host(harness): + url = 'https://example.com/path?key=value#fragment' + with raises(Response) as x: + make_request(harness).sanitize_untrusted_url(url) + response = x.value + assert response.code == 400 + assert response.text == f"The host in URL {url!r} doesn't match the `Host` header value 'localhost'." + +def test_sanitize_untrusted_url_unknown_scheme(harness): + with raises(Response) as x: + make_request(harness).sanitize_untrusted_url('ftp:/foo') + response = x.value + assert response.code == 400 + assert response.text == "URL 'ftp:/foo' starts with unknown scheme 'ftp'." + +def test_sanitize_untrusted_url_scheme_relative(harness): + url = '//localhost/path?key=value#fragment' + actual = make_request(harness).sanitize_untrusted_url(url) + assert actual == url + +def test_sanitize_untrusted_url_scheme_relative_with_different_host(harness): + url = '//example.org/path' + with raises(Response) as x: + make_request(harness).sanitize_untrusted_url(url) + response = x.value + assert response.code == 400 + assert response.text == f"The host in URL {url!r} doesn't match the `Host` header value 'localhost'." + +def test_sanitize_untrusted_url_absolute_path(harness): + url = '/path?key=value#fragment' + expected = 'https://localhost' + url + actual = make_request(harness).sanitize_untrusted_url(url) + assert actual == expected + +def test_sanitize_untrusted_url_relative_path(harness): + url = '../foo/.././path' + expected = 'https://localhost/path' + actual = make_request(harness).sanitize_untrusted_url(url) + assert actual == expected + url = '.' + expected = 'https://localhost/foo/' + request = make_request(harness, uri=b'/foo/bar?key=value') + actual = request.sanitize_untrusted_url(url) + assert actual == expected + +def test_sanitize_untrusted_url_querystring_only(harness): + url = '?foo=bar' + expected = 'https://localhost?foo=bar' + request = make_request(harness, uri=b'?key=value') + actual = request.sanitize_untrusted_url(url) + assert actual == expected + +def test_sanitize_untrusted_url_fragment_only(harness): + url = '#fragment' + expected = 'https://localhost/?key=value#fragment' + request = make_request(harness, uri=b'/?key=value') + actual = request.sanitize_untrusted_url(url) + assert actual == expected + + # source def request(harness, forwarded_for, source, **kw): @@ -297,3 +386,29 @@ def test_request_source_is_cached(harness): src1 = r.source src2 = r.source assert src1 is src2 + + +# querystring + +def test_querystring_derive(harness): + request = Request(harness.client.website) + actual = request.qs.derive(key='value') + expected = '?key=value' + assert actual == expected + request = Request(harness.client.website, uri=b'/?key=value') + actual = request.qs.derive(key='different_value') + expected = '?key=different_value' + assert actual == expected + request = Request(harness.client.website, uri=b'/?key=value') + actual = request.qs.derive(key=None) + expected = '' + assert actual == expected + +def test_querystring_serialize(harness): + request = Request(harness.client.website, uri=b'/?a=0&b=0&d=1') + request.qs['b'] = '1' + request.qs['c'] = '2' + del request.qs['d'] + actual = request.qs.serialize() + expected = '?a=0&b=1&c=2' + assert actual == expected diff --git a/tests/test_response.py b/tests/test_response.py index 4095b2d6c..0cbe2d79f 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -3,6 +3,7 @@ from pando import Response from pando.exceptions import CRLFInjection +from pando.website import THE_PAST def test_response_to_wsgi(): @@ -29,25 +30,13 @@ def start_response(status, headers): def test_response_body_can_be_bytestring(): response = Response(body=b"Greetings, program!") - expected = b"Greetings, program!" - actual = response.body - assert actual == expected - -def test_response_body_as_bytestring_results_in_an_iterable(): - response = Response(body=b"Greetings, program!") - - def start_response(status, headers): - pass - - expected = [b"Greetings, program!"] - actual = list(response.to_wsgi({}, start_response, 'utf8').body) - assert actual == expected + assert response.body == b"Greetings, program!" + assert response.text == "Greetings, program!" def test_response_body_can_be_iterable(): response = Response(body=["Greetings, ", "program!"]) - expected = ["Greetings, ", "program!"] - actual = response.body - assert actual == expected + assert response.body == ["Greetings, ", "program!"] + assert response.text == "Greetings, program!" def test_response_body_as_iterable_comes_through_untouched(): response = Response(body=[b"Greetings, ", b"program!"]) @@ -60,12 +49,11 @@ def start_response(status, headers): assert actual == expected def test_response_body_can_be_unicode(): - try: - Response(body='Greetings, program!') - except Exception: - assert False, 'expecting no error' + response = Response(body="Greetings, program!") + assert response.body == "Greetings, program!" + assert response.text == "Greetings, program!" -def test_response_headers_are_str(): +def test_wsgi_response_headers_are_str(): response = Response() response.headers[b'Location'] = b'somewhere' @@ -85,11 +73,33 @@ def inject(): def test_response_cookie(): response = Response() - response.headers.cookie[str('foo')] = str('bar') + response.headers.cookie['foo'] = 'bar' + + def start_response(status, headers): + assert headers[0][0] == 'Set-Cookie' + assert headers[0][1].startswith('foo=bar') + + response.to_wsgi({}, start_response, 'utf8') + +def test_response_set_cookie(harness): + response = Response() + response.website = harness.client.website + response.set_cookie('foo', 'bar') def start_response(status, headers): - assert headers[0][0] == str('Set-Cookie') - assert headers[0][1].startswith(str('foo=bar')) + assert headers[0][0] == 'Set-Cookie' + assert headers[0][1].startswith('foo=bar;') + + response.to_wsgi({}, start_response, 'utf8') + +def test_response_erase_cookie(harness): + response = Response() + response.website = harness.client.website + response.erase_cookie('foo') + + def start_response(status, headers): + assert headers[0][0] == 'Set-Cookie' + assert headers[0][1] == f'foo=""; expires={THE_PAST}; HttpOnly; Path=/; SameSite=lax' response.to_wsgi({}, start_response, 'utf8') @@ -101,3 +111,23 @@ def test_set_whence_raised_works(): r.set_whence_raised() assert r.whence_raised[0] == 'tests' + os.sep + 'test_response.py' assert isinstance(r.whence_raised[1], int) + +def test_response_render(harness): + harness.fs.project.mk(('refresh.spt', """ + [---] + if url: + refresh_header = b'%i;url=%s' % (state.get('interval', 0), response.encode_url(url)) + else: + refresh_header = b'%i' % interval + response.headers[b'Refresh'] = refresh_header + [---] text/plain + Processing… + """)) + response = Response() + state = { + 'website': harness.client.website, + 'response': response, + 'accept_header': '*/*', + } + response.render(harness.fs.project.root + '/refresh.spt', state, interval=0, url='') + assert response.text == "Processing…\n" diff --git a/tests/test_test_client.py b/tests/test_test_client.py index 5342bda91..9c4e3ab58 100644 --- a/tests/test_test_client.py +++ b/tests/test_test_client.py @@ -22,7 +22,7 @@ def test_test_client_handles_body(harness): def test_test_client_sends_cookies(harness): harness.fs.www.mk(('foo.spt', ''' [---] - miam = request.headers.cookie[str('miam')].value + miam = request.headers.cookie['miam'].value [---] text/plain via stdlib_format {miam}''')) response = harness.client.POST('/foo', cookies={'miam': 'a_cookie'}) @@ -58,8 +58,8 @@ def test_test_client_can_have_file_upload_content_type_overriden(harness): def test_stateful_test_client_passes_cookies(harness): harness.fs.www.mk(('foo.spt', ''' [---] - csrf_token = request.headers.cookie[str('csrf_token')].value - session = request.headers.cookie[str('session')].value + csrf_token = request.headers.cookie['csrf_token'].value + session = request.headers.cookie['session'].value [---] text/plain via stdlib_format {csrf_token} and {session}''')) with harness.client.get_session() as sess: