diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 063344e0284258..ba8d36a74148e9 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -246,7 +246,7 @@ instantiation, of which this module provides three different variants: .. versionadded:: 3.2 - .. method:: send_error(code, message=None, explain=None) + .. method:: send_error(code, message=None, explain=None, *, extra_headers=()) Sends and logs a complete error reply to the client. The numeric *code* specifies the HTTP error code, with *message* as an optional, short, human @@ -260,10 +260,17 @@ instantiation, of which this module provides three different variants: HEAD or the response code is one of the following: :samp:`1{xx}`, ``204 No Content``, ``205 Reset Content``, ``304 Not Modified``. + The *extra_headers* argument can be a key-value tuple list which + specifies additional headers to be sent in the response (for + instance, ``[("Content-Range", "bytes 3-14/42")]``). + .. versionchanged:: 3.4 The error response includes a Content-Length header. Added the *explain* argument. + .. versionchanged:: next + Added the *extra_headers* argument. + .. method:: send_response(code, message=None) Adds a response header to the headers buffer and logs the accepted @@ -374,6 +381,10 @@ instantiation, of which this module provides three different variants: .. versionchanged:: 3.9 The *directory* parameter accepts a :term:`path-like object`. + .. versionchanged:: next + Added support for HTTP single-part range requests on files, as specified + in :rfc:`9110#section-14`. + A lot of the work, such as parsing the request, is done by the base class :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` and :func:`do_HEAD` functions. diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index d5d387d9a0aaa7..b3d8574af013ee 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -359,6 +359,15 @@ http.client (Contributed by Alexander Enrique Urieles Nieto in :gh:`131724`.) +http.server +----------- + +* Added support for HTTP single-part range requests on files to + :class:`~http.server.SimpleHTTPRequestHandler`, as specified in + :rfc:`9110#section-14`. + (Contributed by Andy Ling in :gh:`86809`.) + + http.cookies ------------ @@ -736,6 +745,13 @@ Porting to Python 3.15 This section lists previously described changes and other bugfixes that may require changes to your code. +Changes in the Python API +------------------------- + +* The :meth:`BaseHTTPRequestHandler.send_error ` + method now has a new optional parameter *extra_headers*. + Subclasses should update their implementations to accept this new parameter. + (Contributed by Andy Ling in :gh:`86809`.) Build changes ============= diff --git a/Lib/http/server.py b/Lib/http/server.py index a2ffbe2e44df64..596727f05f5771 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -78,6 +78,7 @@ import mimetypes import os import posixpath +import re import shutil import socket import socketserver @@ -111,6 +112,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.ASCII | re.IGNORECASE) class HTTPServer(socketserver.TCPServer): @@ -464,7 +466,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None): + def send_error(self, code, message=None, explain=None, *, extra_headers=()): """Send and log an error reply. Arguments are @@ -475,6 +477,7 @@ def send_error(self, code, message=None, explain=None): defaults to short entry matching the response code * explain: a detailed message defaults to the long entry matching the response code. + * extra_headers: extra headers to be included in the response This sends an error response (so it must be called before any output has been generated), logs the error, and finally sends @@ -512,6 +515,8 @@ def send_error(self, code, message=None, explain=None): body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) + for name, value in extra_headers: + self.send_header(name, value) self.end_headers() if self.command != 'HEAD' and body: @@ -703,7 +708,7 @@ def do_GET(self): f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + self.copyfile(f, self.wfile, range=self._range) finally: f.close() @@ -726,6 +731,7 @@ def send_head(self): """ path = self.translate_path(self.path) f = None + self._range = self.parse_range() if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) if not parts.path.endswith(('/', '%2f', '%2F')): @@ -790,9 +796,44 @@ def send_head(self): f.close() return None - self.send_response(HTTPStatus.OK) + if self._range: + start, end = self._range + if start is None: + # parse_range() collapses (None, None) to None as it's invalid + # https://github.com/python/cpython/pull/118949#discussion_r1912397525 + assert end is not None + # `end` here means suffix length + start = max(0, fs.st_size - end) + end = fs.st_size - 1 + elif end is None or end >= fs.st_size: + end = fs.st_size - 1 + + if start == 0 and end >= fs.st_size - 1: + # Send entire file + self._range = None + elif start >= fs.st_size: + # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that + # none of the range values overlap the extent of + # the resource + f.close() + headers = [('Content-Range', f'bytes */{fs.st_size}')] + self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, + extra_headers=headers) + return None + + if self._range: + self.send_response(HTTPStatus.PARTIAL_CONTENT) + self.send_header("Content-Range", + f"bytes {start}-{end}/{fs.st_size}") + self.send_header("Content-Length", str(end - start + 1)) + + # Update range to be sent to be used later in copyfile + self._range = (start, end) + else: + self.send_response(HTTPStatus.OK) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Length", str(fs.st_size)) self.send_header("Content-type", ctype) - self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() @@ -892,13 +933,15 @@ def translate_path(self, path): path += '/' return path - def copyfile(self, source, outputfile): - """Copy all data between two file objects. + def copyfile(self, source, outputfile, *, range=None): + """Copy all data between two file objects if range is None. + Otherwise, copy data between two file objects based on the + inclusive range (start, end). The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + (or anything with read() and seek() method) and the + DESTINATION argument is a file object open for writing + (or anything with a write() method). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -906,7 +949,18 @@ def copyfile(self, source, outputfile): to copy binary data as well. """ - shutil.copyfileobj(source, outputfile) + if range is None: + shutil.copyfileobj(source, outputfile) + else: + start, end = range + length = end - start + 1 + source.seek(start) + while length > 0: + buf = source.read(min(length, shutil.COPY_BUFSIZE)) + if not buf: + raise EOFError('File shrank after size was checked') + length -= len(buf) + outputfile.write(buf) def guess_type(self, path): """Guess the type of a file. @@ -933,6 +987,35 @@ def guess_type(self, path): return guess return 'application/octet-stream' + def parse_range(self): + """Return a tuple of (start, end) representing the range header in + the HTTP request. If the range header is missing or not resolvable, + this returns None. + + This currently only supports single part ranges. + + """ + range_header = self.headers.get('range') + if range_header is None: + return None + m = RANGE_REGEX_PATTERN.match(range_header) + # Ignore invalid Range header and return None + # https://datatracker.ietf.org/doc/html/rfc9110#name-range + if m is None: + return None + + start = int(m.group(1)) if m.group(1) else None + end = int(m.group(2)) if m.group(2) else None + + if start is None and end is None: + return None + + if start is not None and end is not None and start > end: + return None + + return start, end + + nobody = None diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 2548a7c5f292f0..8e13faad0f9833 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -740,6 +740,83 @@ def test_get(self): finally: os.chmod(self.tempdir, 0o755) + def test_single_range_get(self): + route = self.base_url + '/test' + response = self.request(route) + self.assertEqual(response.getheader('accept-ranges'), 'bytes') + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + # valid ranges + response = self.request(route, headers={'Range': 'bYtEs=3-12'}) # case insensitive + self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') + self.assertEqual(response.getheader('content-length'), '10') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:13]) + + response = self.request(route, headers={'Range': 'bytes=3-'}) + self.assertEqual(response.getheader('content-range'), 'bytes 3-29/30') + self.assertEqual(response.getheader('content-length'), '27') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:]) + + response = self.request(route, headers={'Range': 'bytes=-5'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + + response = self.request(route, headers={'Range': 'bytes=29-29'}) + self.assertEqual(response.getheader('content-range'), 'bytes 29-29/30') + self.assertEqual(response.getheader('content-length'), '1') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[29:]) + + # end > file size + response = self.request(route, headers={'Range': 'bytes=25-100'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + + # invalid ranges + response = self.request(route, headers={'Range': 'bytes=100-200'}) + self.assertEqual(response.getheader('content-range'), 'bytes */30') + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + + response = self.request(route, headers={'Range': 'bytes=4-3'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=wrong format'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=-'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes=--'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes='}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + def test_single_range_get_empty(self): + # range requests to an empty file + os_helper.create_empty_file(os.path.join(self.tempdir_name, 'empty')) + empty_path = self.base_url + '/empty' + + response = self.request(empty_path, headers={'Range': 'bytes=0-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=1-2'}) + self.assertEqual(response.getheader('content-range'), 'bytes */0') + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + + # invalid Range header is always ignored + response = self.request(empty_path, headers={'Range': 'bytes=5-4'}) + self.check_status_and_reason(response, HTTPStatus.OK) + + def test_multi_range_get(self): + # multipart ranges (not supported currently) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=1-2, 4-7'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst new file mode 100644 index 00000000000000..deda1ee00b78aa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -0,0 +1 @@ +Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`9110#section-14`.