|
| 1 | +import re |
| 2 | +from urllib.parse import urlsplit |
| 3 | + |
| 4 | +from django.conf import settings |
| 5 | +from django.core.exceptions import PermissionDenied |
| 6 | +from django.core.mail import mail_managers |
| 7 | +from django.http import HttpResponsePermanentRedirect |
| 8 | +from django.urls import is_valid_path |
| 9 | +from django.utils.http import escape_leading_slashes |
| 10 | + |
| 11 | +from django_async_extensions.middleware.base import AsyncMiddlewareMixin |
| 12 | + |
| 13 | + |
| 14 | +class AsyncCommonMiddleware(AsyncMiddlewareMixin): |
| 15 | + """ |
| 16 | + "Common" middleware for taking care of some basic operations: |
| 17 | +
|
| 18 | + - Forbid access to User-Agents in settings.DISALLOWED_USER_AGENTS |
| 19 | +
|
| 20 | + - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings, |
| 21 | + append missing slashes and/or prepends missing "www."s. |
| 22 | +
|
| 23 | + - If APPEND_SLASH is set and the initial URL doesn't end with a |
| 24 | + slash, and it is not found in urlpatterns, form a new URL by |
| 25 | + appending a slash at the end. If this new URL is found in |
| 26 | + urlpatterns, return an HTTP redirect to this new URL; otherwise |
| 27 | + process the initial URL as usual. |
| 28 | +
|
| 29 | + This behavior can be customized by subclassing AsyncCommonMiddleware and |
| 30 | + overriding the response_redirect_class attribute. |
| 31 | + """ |
| 32 | + |
| 33 | + response_redirect_class = HttpResponsePermanentRedirect |
| 34 | + |
| 35 | + async def process_request(self, request): |
| 36 | + """ |
| 37 | + Check for denied User-Agents and rewrite the URL based on |
| 38 | + settings.APPEND_SLASH and settings.PREPEND_WWW |
| 39 | + """ |
| 40 | + |
| 41 | + # Check for denied User-Agents |
| 42 | + user_agent = request.META.get("HTTP_USER_AGENT") |
| 43 | + if user_agent is not None: |
| 44 | + for user_agent_regex in settings.DISALLOWED_USER_AGENTS: |
| 45 | + if user_agent_regex.search(user_agent): |
| 46 | + raise PermissionDenied("Forbidden user agent") |
| 47 | + |
| 48 | + # Check for a redirect based on settings.PREPEND_WWW |
| 49 | + host = request.get_host() |
| 50 | + |
| 51 | + if settings.PREPEND_WWW and host and not host.startswith("www."): |
| 52 | + # Check if we also need to append a slash so we can do it all |
| 53 | + # with a single redirect. (This check may be somewhat expensive, |
| 54 | + # so we only do it if we already know we're sending a redirect, |
| 55 | + # or in process_response if we get a 404.) |
| 56 | + if self.should_redirect_with_slash(request): |
| 57 | + path = self.get_full_path_with_slash(request) |
| 58 | + else: |
| 59 | + path = request.get_full_path() |
| 60 | + |
| 61 | + return self.response_redirect_class(f"{request.scheme}://www.{host}{path}") |
| 62 | + |
| 63 | + def should_redirect_with_slash(self, request): |
| 64 | + """ |
| 65 | + Return True if settings.APPEND_SLASH is True and appending a slash to |
| 66 | + the request path turns an invalid path into a valid one. |
| 67 | + """ |
| 68 | + if settings.APPEND_SLASH and not request.path_info.endswith("/"): |
| 69 | + urlconf = getattr(request, "urlconf", None) |
| 70 | + if not is_valid_path(request.path_info, urlconf): |
| 71 | + match = is_valid_path("%s/" % request.path_info, urlconf) |
| 72 | + if match: |
| 73 | + view = match.func |
| 74 | + return getattr(view, "should_append_slash", True) |
| 75 | + return False |
| 76 | + |
| 77 | + def get_full_path_with_slash(self, request): |
| 78 | + """ |
| 79 | + Return the full path of the request with a trailing slash appended. |
| 80 | +
|
| 81 | + Raise a RuntimeError if settings.DEBUG is True and request.method is |
| 82 | + DELETE, POST, PUT, or PATCH. |
| 83 | + """ |
| 84 | + new_path = request.get_full_path(force_append_slash=True) |
| 85 | + # Prevent construction of scheme relative urls. |
| 86 | + new_path = escape_leading_slashes(new_path) |
| 87 | + if settings.DEBUG and request.method in ("DELETE", "POST", "PUT", "PATCH"): |
| 88 | + raise RuntimeError( |
| 89 | + "You called this URL via %(method)s, but the URL doesn't end " |
| 90 | + "in a slash and you have APPEND_SLASH set. Django can't " |
| 91 | + "redirect to the slash URL while maintaining %(method)s data. " |
| 92 | + "Change your form to point to %(url)s (note the trailing " |
| 93 | + "slash), or set APPEND_SLASH=False in your Django settings." |
| 94 | + % { |
| 95 | + "method": request.method, |
| 96 | + "url": request.get_host() + new_path, |
| 97 | + } |
| 98 | + ) |
| 99 | + return new_path |
| 100 | + |
| 101 | + async def process_response(self, request, response): |
| 102 | + """ |
| 103 | + When the status code of the response is 404, it may redirect to a path |
| 104 | + with an appended slash if should_redirect_with_slash() returns True. |
| 105 | + """ |
| 106 | + # If the given URL is "Not Found", then check if we should redirect to |
| 107 | + # a path with a slash appended. |
| 108 | + if response.status_code == 404 and self.should_redirect_with_slash(request): |
| 109 | + return self.response_redirect_class(self.get_full_path_with_slash(request)) |
| 110 | + |
| 111 | + # Add the Content-Length header to non-streaming responses if not |
| 112 | + # already set. |
| 113 | + if not response.streaming and not response.has_header("Content-Length"): |
| 114 | + response.headers["Content-Length"] = str(len(response.content)) |
| 115 | + |
| 116 | + return response |
| 117 | + |
| 118 | + |
| 119 | +class AsyncBrokenLinkEmailsMiddleware(AsyncMiddlewareMixin): |
| 120 | + async def process_response(self, request, response): |
| 121 | + """Send broken link emails for relevant 404 NOT FOUND responses.""" |
| 122 | + if response.status_code == 404 and not settings.DEBUG: |
| 123 | + domain = request.get_host() |
| 124 | + path = request.get_full_path() |
| 125 | + referer = request.META.get("HTTP_REFERER", "") |
| 126 | + |
| 127 | + if not self.is_ignorable_request(request, path, domain, referer): |
| 128 | + ua = request.META.get("HTTP_USER_AGENT", "<none>") |
| 129 | + ip = request.META.get("REMOTE_ADDR", "<none>") |
| 130 | + mail_managers( |
| 131 | + "Broken %slink on %s" |
| 132 | + % ( |
| 133 | + ( |
| 134 | + "INTERNAL " |
| 135 | + if self.is_internal_request(domain, referer) |
| 136 | + else "" |
| 137 | + ), |
| 138 | + domain, |
| 139 | + ), |
| 140 | + "Referrer: %s\nRequested URL: %s\nUser agent: %s\n" |
| 141 | + "IP address: %s\n" % (referer, path, ua, ip), |
| 142 | + fail_silently=True, |
| 143 | + ) |
| 144 | + return response |
| 145 | + |
| 146 | + def is_internal_request(self, domain, referer): |
| 147 | + """ |
| 148 | + Return True if the referring URL is the same domain as the current |
| 149 | + request. |
| 150 | + """ |
| 151 | + # Different subdomains are treated as different domains. |
| 152 | + return bool(re.match("^https?://%s/" % re.escape(domain), referer)) |
| 153 | + |
| 154 | + def is_ignorable_request(self, request, uri, domain, referer): |
| 155 | + """ |
| 156 | + Return True if the given request *shouldn't* notify the site managers |
| 157 | + according to project settings or in situations outlined by the inline |
| 158 | + comments. |
| 159 | + """ |
| 160 | + # The referer is empty. |
| 161 | + if not referer: |
| 162 | + return True |
| 163 | + |
| 164 | + # APPEND_SLASH is enabled and the referer is equal to the current URL |
| 165 | + # without a trailing slash indicating an internal redirect. |
| 166 | + if settings.APPEND_SLASH and uri.endswith("/") and referer == uri[:-1]: |
| 167 | + return True |
| 168 | + |
| 169 | + # A '?' in referer is identified as a search engine source. |
| 170 | + if not self.is_internal_request(domain, referer) and "?" in referer: |
| 171 | + return True |
| 172 | + |
| 173 | + # The referer is equal to the current URL, ignoring the scheme (assumed |
| 174 | + # to be a poorly implemented bot). |
| 175 | + parsed_referer = urlsplit(referer) |
| 176 | + if parsed_referer.netloc in ["", domain] and parsed_referer.path == uri: |
| 177 | + return True |
| 178 | + |
| 179 | + return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) |
0 commit comments