From e81d46ea26b7c7e5302db3609cc39c394117ae34 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Sun, 12 May 2024 00:24:55 +0800 Subject: [PATCH 01/25] Add support for HTTP Range header in SimpleHTTPServer --- Lib/http/server.py | 71 ++++++++++++++++--- Lib/test/test_httpservers.py | 21 ++++++ ...4-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 1 + 3 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst diff --git a/Lib/http/server.py b/Lib/http/server.py index a6f7aecc78763f..bbe2eeb4be9b7b 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -97,6 +97,7 @@ import mimetypes import os import posixpath +import re import select import shutil import socket # For gethostbyaddr() @@ -682,7 +683,7 @@ def do_GET(self): f = self.send_head() if f: try: - self.copyfile(f, self.wfile) + self.copyfile(f, self.wfile, range=self.range) finally: f.close() @@ -705,6 +706,7 @@ def send_head(self): """ path = self.translate_path(self.path) f = None + self.range = self.get_range() if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) if not parts.path.endswith('/'): @@ -769,9 +771,26 @@ def send_head(self): f.close() return None - self.send_response(HTTPStatus.OK) + if self.range: + start, end = self.range + if start >= fs.st_size: + # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that none of the range values overlap the extent of the resource + f.close() + self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + return None + if end is None or end >= fs.st_size: + end = fs.st_size - 1 + self.send_response(HTTPStatus.PARTIAL_CONTENT) + self.send_header("Content-Range", "bytes %s-%s/%s" % (start, end, fs.st_size)) + self.send_header("Content-Length", str(end-start+1)) + + # Update range to be sent to be used later in copyfile + self.range = (start, end) + else: + self.send_response(HTTPStatus.OK) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Content-Length", str(fs[6])) self.send_header("Content-type", ctype) - self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() @@ -868,13 +887,15 @@ def translate_path(self, path): path += '/' return path - def copyfile(self, source, outputfile): - """Copy all data between two file objects. + def copyfile(self, source, outputfile, range=None): + """Copy all data between two file objects if range is None. + Otherwise, copy data between two file objects based on the + inclusive range (start, end). The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). + (or anything with read() and seek() method) and the + DESTINATION argument is a file object open for writing + (or anything with a write() method). The only reason for overriding this would be to change the block size or perhaps to replace newlines by CRLF @@ -882,7 +903,21 @@ def copyfile(self, source, outputfile): to copy binary data as well. """ - shutil.copyfileobj(source, outputfile) + if range is None: + shutil.copyfileobj(source, outputfile) + else: + start, end = range + length = end - start + 1 + source.seek(start) + while True: + if length <= 0: + break + buf = source.read(min(length, shutil.COPY_BUFSIZE)) + if not buf: + break + length -= len(buf) + outputfile.write(buf) + def guess_type(self, path): """Guess the type of a file. @@ -909,6 +944,24 @@ def guess_type(self, path): return guess return 'application/octet-stream' + def get_range(self): + """Return a tuple of (start, end) representing the range header in + the HTTP request. If the range header is missing or not resolvable, + None is returned. This only supports single part ranges. + + """ + range_header = self.headers.get('range') + if not range_header: + return None + m = re.match(r'bytes=(\d+)-(\d*)$', range_header) + if not m: + return None + start = m.group(1) + if not m.group(2): + return int(start), None + end = m.group(2) + return int(start), int(end) + # Utilities for CGIHTTPRequestHandler diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 1c370dcafa9fea..c133e7109ce772 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -539,6 +539,27 @@ def test_get(self): finally: os.chmod(self.tempdir, 0o755) + def test_range_get(self): + response = self.request(self.base_url + '/test') + self.assertEqual(response.getheader('accept-ranges'), 'bytes') + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=3-12'}) + self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') + self.assertEqual(response.getheader('content-length'), '10') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:13]) + + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=3-'}) + self.assertEqual(response.getheader('content-range'), 'bytes 3-29/30') + self.assertEqual(response.getheader('content-length'), '27') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:]) + + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=100-200'}) + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=wrong format'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst new file mode 100644 index 00000000000000..ff48aca094e57b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -0,0 +1 @@ +Add support for HTTP Range header in ``SimpleHTTPServer`` From 9bc811f49bff36c2774a1eef13e6c9eb7711fd03 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 23 May 2024 22:27:14 +0800 Subject: [PATCH 02/25] Add handling for cases where start is greater than end in a Range and change function name --- Lib/http/server.py | 14 ++++++++------ Lib/test/test_httpservers.py | 3 +++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index bbe2eeb4be9b7b..831c20e481aadc 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -706,7 +706,7 @@ def send_head(self): """ path = self.translate_path(self.path) f = None - self.range = self.get_range() + self.range = self.parse_range() if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) if not parts.path.endswith('/'): @@ -944,7 +944,7 @@ def guess_type(self, path): return guess return 'application/octet-stream' - def get_range(self): + def parse_range(self): """Return a tuple of (start, end) representing the range header in the HTTP request. If the range header is missing or not resolvable, None is returned. This only supports single part ranges. @@ -956,11 +956,13 @@ def get_range(self): m = re.match(r'bytes=(\d+)-(\d*)$', range_header) if not m: return None - start = m.group(1) + start = int(m.group(1)) if not m.group(2): - return int(start), None - end = m.group(2) - return int(start), int(end) + return start, None + end = int(m.group(2)) + if start > end: + return None + return start, end # Utilities for CGIHTTPRequestHandler diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index c133e7109ce772..872c336e930595 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -557,6 +557,9 @@ def test_range_get(self): response = self.request(self.base_url + '/test', headers={'Range': 'bytes=100-200'}) self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=4-3'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=wrong format'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) From ac7b0fa852fdbf542c0d6772e44f0984cd0a29f5 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 23 May 2024 22:59:38 +0800 Subject: [PATCH 03/25] Add support for suffix in Range header --- Lib/http/server.py | 22 ++++++++++++++++------ Lib/test/test_httpservers.py | 5 +++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 831c20e481aadc..e1bd6ecea8d4e3 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -773,6 +773,12 @@ def send_head(self): if self.range: start, end = self.range + if start is None: + # `end` here means suffix length + start = fs.st_size - end + end = fs.st_size - 1 + if start < 0: + start = 0 if start >= fs.st_size: # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that none of the range values overlap the extent of the resource f.close() @@ -953,15 +959,19 @@ def parse_range(self): range_header = self.headers.get('range') if not range_header: return None - m = re.match(r'bytes=(\d+)-(\d*)$', range_header) + m = re.match(r'bytes=(\d*)-(\d*)$', range_header) if not m: return None - start = int(m.group(1)) - if not m.group(2): - return start, None - end = int(m.group(2)) - if start > end: + + start = int(m.group(1)) if m.group(1) else None + end = int(m.group(2)) if m.group(2) else None + + if start is None and end is None: + return None + + if start is not None and end is not None and start > end: return None + return start, end diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 872c336e930595..3f9691850b3b13 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -554,6 +554,11 @@ def test_range_get(self): self.assertEqual(response.getheader('content-length'), '27') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:]) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=-5'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=100-200'}) self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) From d23a60db52319ce3798c949998fce186ba3321e9 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Fri, 24 May 2024 00:10:18 +0800 Subject: [PATCH 04/25] Use precompiled pattern and some small improvements --- Lib/http/server.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index e1bd6ecea8d4e3..1fdcb334502b2b 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -132,6 +132,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$') class HTTPServer(socketserver.TCPServer): @@ -775,10 +776,8 @@ def send_head(self): start, end = self.range if start is None: # `end` here means suffix length - start = fs.st_size - end + start = max(0, fs.st_size - end) end = fs.st_size - 1 - if start < 0: - start = 0 if start >= fs.st_size: # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that none of the range values overlap the extent of the resource f.close() @@ -788,7 +787,7 @@ def send_head(self): end = fs.st_size - 1 self.send_response(HTTPStatus.PARTIAL_CONTENT) self.send_header("Content-Range", "bytes %s-%s/%s" % (start, end, fs.st_size)) - self.send_header("Content-Length", str(end-start+1)) + self.send_header("Content-Length", str(end - start + 1)) # Update range to be sent to be used later in copyfile self.range = (start, end) @@ -959,7 +958,7 @@ def parse_range(self): range_header = self.headers.get('range') if not range_header: return None - m = re.match(r'bytes=(\d*)-(\d*)$', range_header) + m = re.match(RANGE_REGEX_PATTERN, range_header) if not m: return None From e985c6797f43aa67cf5bd6b6d499a710be23bccd Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Fri, 24 May 2024 15:34:41 +0800 Subject: [PATCH 05/25] Add some more testcases --- Lib/http/server.py | 2 +- Lib/test/test_httpservers.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 1fdcb334502b2b..2ee245a653305d 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -958,7 +958,7 @@ def parse_range(self): range_header = self.headers.get('range') if not range_header: return None - m = re.match(RANGE_REGEX_PATTERN, range_header) + m = RANGE_REGEX_PATTERN.match(range_header) if not m: return None diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 3f9691850b3b13..9837c57c3185b7 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -544,6 +544,7 @@ def test_range_get(self): self.assertEqual(response.getheader('accept-ranges'), 'bytes') self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + # valid ranges response = self.request(self.base_url + '/test', headers={'Range': 'bytes=3-12'}) self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') self.assertEqual(response.getheader('content-length'), '10') @@ -559,6 +560,18 @@ def test_range_get(self): self.assertEqual(response.getheader('content-length'), '5') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=29-29'}) + self.assertEqual(response.getheader('content-range'), 'bytes 29-29/30') + self.assertEqual(response.getheader('content-length'), '1') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[29:]) + + # end > file size + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=25-100'}) + self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') + self.assertEqual(response.getheader('content-length'), '5') + self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) + + # invalid ranges response = self.request(self.base_url + '/test', headers={'Range': 'bytes=100-200'}) self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) @@ -568,6 +581,9 @@ def test_range_get(self): response = self.request(self.base_url + '/test', headers={'Range': 'bytes=wrong format'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=-'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') From 1ca0ff7a875854277a8bae134f1437d64153801d Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Sun, 15 Dec 2024 17:07:58 +0800 Subject: [PATCH 06/25] Coding style improvements --- Lib/http/server.py | 20 ++++++++++-------- Lib/test/test_httpservers.py | 21 ++++++++++--------- ...4-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 3 ++- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 2ee245a653305d..ca855823512929 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -684,7 +684,7 @@ def do_GET(self): f = self.send_head() if f: try: - self.copyfile(f, self.wfile, range=self.range) + self.copyfile(f, self.wfile, range=self._range) finally: f.close() @@ -707,7 +707,7 @@ def send_head(self): """ path = self.translate_path(self.path) f = None - self.range = self.parse_range() + self._range = self.parse_range() if os.path.isdir(path): parts = urllib.parse.urlsplit(self.path) if not parts.path.endswith('/'): @@ -772,14 +772,16 @@ def send_head(self): f.close() return None - if self.range: - start, end = self.range + if self._range: + start, end = self._range if start is None: # `end` here means suffix length start = max(0, fs.st_size - end) end = fs.st_size - 1 if start >= fs.st_size: - # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that none of the range values overlap the extent of the resource + # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that + # none of the range values overlap the extent of + # the resource f.close() self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) return None @@ -790,11 +792,11 @@ def send_head(self): self.send_header("Content-Length", str(end - start + 1)) # Update range to be sent to be used later in copyfile - self.range = (start, end) + self._range = (start, end) else: self.send_response(HTTPStatus.OK) self.send_header("Accept-Ranges", "bytes") - self.send_header("Content-Length", str(fs[6])) + self.send_header("Content-Length", str(fs.st_size)) self.send_header("Content-type", ctype) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) @@ -956,10 +958,10 @@ def parse_range(self): """ range_header = self.headers.get('range') - if not range_header: + if range_header is None: return None m = RANGE_REGEX_PATTERN.match(range_header) - if not m: + if m is None: return None start = int(m.group(1)) if m.group(1) else None diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 9837c57c3185b7..2ca1dd34e50f9a 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -540,48 +540,49 @@ def test_get(self): os.chmod(self.tempdir, 0o755) def test_range_get(self): - response = self.request(self.base_url + '/test') + route = self.base_url + '/test' + response = self.request(route) self.assertEqual(response.getheader('accept-ranges'), 'bytes') self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) # valid ranges - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=3-12'}) + response = self.request(route, headers={'Range': 'bytes=3-12'}) self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') self.assertEqual(response.getheader('content-length'), '10') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:13]) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=3-'}) + response = self.request(route, headers={'Range': 'bytes=3-'}) self.assertEqual(response.getheader('content-range'), 'bytes 3-29/30') self.assertEqual(response.getheader('content-length'), '27') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:]) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=-5'}) + response = self.request(route, headers={'Range': 'bytes=-5'}) self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') self.assertEqual(response.getheader('content-length'), '5') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=29-29'}) + response = self.request(route, headers={'Range': 'bytes=29-29'}) self.assertEqual(response.getheader('content-range'), 'bytes 29-29/30') self.assertEqual(response.getheader('content-length'), '1') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[29:]) # end > file size - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=25-100'}) + response = self.request(route, headers={'Range': 'bytes=25-100'}) self.assertEqual(response.getheader('content-range'), 'bytes 25-29/30') self.assertEqual(response.getheader('content-length'), '5') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[25:]) # invalid ranges - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=100-200'}) + response = self.request(route, headers={'Range': 'bytes=100-200'}) self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=4-3'}) + response = self.request(route, headers={'Range': 'bytes=4-3'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=wrong format'}) + response = self.request(route, headers={'Range': 'bytes=wrong format'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) - response = self.request(self.base_url + '/test', headers={'Range': 'bytes=-'}) + response = self.request(route, headers={'Range': 'bytes=-'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) def test_head(self): diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst index ff48aca094e57b..463170e3b88860 100644 --- a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -1 +1,2 @@ -Add support for HTTP Range header in ``SimpleHTTPServer`` +Add support for HTTP Range header in :class:`SimpleHTTPServer`. Add an optional +``range`` paramater to :func:`SimpleHTTPRequestHandler.copyfile`. From 8b9ae26e209b54e7461de3f0eaab9cc4d91cc6ee Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Tue, 31 Dec 2024 14:22:32 +0800 Subject: [PATCH 07/25] Make range a keyword only arg --- Lib/http/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index ca855823512929..54a5641acab995 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -894,7 +894,7 @@ def translate_path(self, path): path += '/' return path - def copyfile(self, source, outputfile, range=None): + def copyfile(self, source, outputfile, *, range=None): """Copy all data between two file objects if range is None. Otherwise, copy data between two file objects based on the inclusive range (start, end). From ca84b1c3cc6d47ebe178c8d2acf9ece409b65588 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Tue, 31 Dec 2024 15:38:44 +0800 Subject: [PATCH 08/25] Add docs --- Doc/library/http.server.rst | 3 +++ Doc/whatsnew/3.14.rst | 2 ++ .../next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 3 +-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 1197b575c00455..96e8a98d27b622 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -41,6 +41,9 @@ handler. Code to create and run the server looks like this:: :attr:`server_port`. The server is accessible by the handler, typically through the handler's :attr:`server` instance variable. + .. versionchanged:: next + Added support for HTTP Range header. + .. class:: ThreadingHTTPServer(server_address, RequestHandlerClass) This class is identical to HTTPServer but uses threads to handle diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 095949242c09d9..e4d85788604e4a 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -408,6 +408,8 @@ http module allow the browser to apply its default dark mode. (Contributed by Yorik Hansen in :gh:`123430`.) +* Added support for HTTP Range header to :class:`~http.server.HTTPServer`. + (Contributed by Andy Ling in :gh:`86809`.) inspect ------- diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst index 463170e3b88860..79136f06cc5144 100644 --- a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -1,2 +1 @@ -Add support for HTTP Range header in :class:`SimpleHTTPServer`. Add an optional -``range`` paramater to :func:`SimpleHTTPRequestHandler.copyfile`. +Added support for HTTP Range header to :class:`~http.server.HTTPServer`. From b266ff45f537394aa94df557ff58e59e7206dd73 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Wed, 1 Jan 2025 22:26:21 +0800 Subject: [PATCH 09/25] Error handling and testcase improve --- Lib/http/server.py | 9 +++------ Lib/test/test_httpservers.py | 6 +++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 54a5641acab995..d43a0a2d05bda2 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -132,7 +132,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" -RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$') +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.IGNORECASE) class HTTPServer(socketserver.TCPServer): @@ -916,16 +916,13 @@ def copyfile(self, source, outputfile, *, range=None): start, end = range length = end - start + 1 source.seek(start) - while True: - if length <= 0: - break + while length > 0: buf = source.read(min(length, shutil.COPY_BUFSIZE)) if not buf: - break + raise EOFError('File shrank after size was checked') length -= len(buf) outputfile.write(buf) - def guess_type(self, path): """Guess the type of a file. diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 2ca1dd34e50f9a..cabe0cbf41cbb8 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -546,7 +546,7 @@ def test_range_get(self): self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) # valid ranges - response = self.request(route, headers={'Range': 'bytes=3-12'}) + response = self.request(route, headers={'Range': 'bYtEs=3-12'}) # case insensitive self.assertEqual(response.getheader('content-range'), 'bytes 3-12/30') self.assertEqual(response.getheader('content-length'), '10') self.check_status_and_reason(response, HTTPStatus.PARTIAL_CONTENT, data=self.data[3:13]) @@ -585,6 +585,10 @@ def test_range_get(self): response = self.request(route, headers={'Range': 'bytes=-'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + # multipart ranges (not supported currently) + response = self.request(route, headers={'Range': 'bytes=1-2, 4-7'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') From 2824b9cc7f2f669deafdefec55ecdf7a17d0b717 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Wed, 1 Jan 2025 22:51:22 +0800 Subject: [PATCH 10/25] Send content-range when not satisfiable --- Lib/http/server.py | 7 +++++-- Lib/test/test_httpservers.py | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index d43a0a2d05bda2..57ffcdf2d3f14a 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -445,7 +445,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None): + def send_error(self, code, message=None, explain=None, *, range_size=None): """Send and log an error reply. Arguments are @@ -456,6 +456,7 @@ def send_error(self, code, message=None, explain=None): defaults to short entry matching the response code * explain: a detailed message defaults to the long entry matching the response code. + * range_size: file size for use in content-range header This sends an error response (so it must be called before any output has been generated), logs the error, and finally sends @@ -493,6 +494,8 @@ def send_error(self, code, message=None, explain=None): body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) + if code == HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE and range_size: + self.send_header('Content-Range', f'bytes */{range_size}') self.end_headers() if self.command != 'HEAD' and body: @@ -783,7 +786,7 @@ def send_head(self): # none of the range values overlap the extent of # the resource f.close() - self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, range_size=fs.st_size) return None if end is None or end >= fs.st_size: end = fs.st_size - 1 diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index cabe0cbf41cbb8..484df915d8233f 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -574,6 +574,7 @@ def test_range_get(self): # invalid ranges response = self.request(route, headers={'Range': 'bytes=100-200'}) + self.assertEqual(response.getheader('content-range'), 'bytes */30') self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) response = self.request(route, headers={'Range': 'bytes=4-3'}) From b5d9ffcaa6bab8576378f9a362e118b7ea132ec8 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Wed, 1 Jan 2025 23:14:32 +0800 Subject: [PATCH 11/25] Improve docs --- Doc/library/http.server.rst | 7 ++++--- Doc/whatsnew/3.14.rst | 3 ++- .../Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 96e8a98d27b622..d65735f32702b4 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -41,9 +41,6 @@ handler. Code to create and run the server looks like this:: :attr:`server_port`. The server is accessible by the handler, typically through the handler's :attr:`server` instance variable. - .. versionchanged:: next - Added support for HTTP Range header. - .. class:: ThreadingHTTPServer(server_address, RequestHandlerClass) This class is identical to HTTPServer but uses threads to handle @@ -337,6 +334,10 @@ provides three different variants: .. versionchanged:: 3.9 The *directory* parameter accepts a :term:`path-like object`. + .. versionchanged:: next + Added support for HTTP single-part Range header, as specified in + :rfc:`7233`. + A lot of the work, such as parsing the request, is done by the base class :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` and :func:`do_HEAD` functions. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e4d85788604e4a..8e63428e1da7b4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -408,7 +408,8 @@ http module allow the browser to apply its default dark mode. (Contributed by Yorik Hansen in :gh:`123430`.) -* Added support for HTTP Range header to :class:`~http.server.HTTPServer`. +* Added support for HTTP single-part Range header to + :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. (Contributed by Andy Ling in :gh:`86809`.) inspect diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst index 79136f06cc5144..3b3889ec0775cd 100644 --- a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -1 +1 @@ -Added support for HTTP Range header to :class:`~http.server.HTTPServer`. +Added support for HTTP single-part Range header to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. From 48a8523c242796701ce2a492f9390983f43b9c34 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Wed, 1 Jan 2025 23:20:24 +0800 Subject: [PATCH 12/25] Improve docs --- Doc/library/http.server.rst | 4 ++-- Doc/whatsnew/3.14.rst | 2 +- .../Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index d65735f32702b4..6b817b1c6b20f7 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -335,8 +335,8 @@ provides three different variants: The *directory* parameter accepts a :term:`path-like object`. .. versionchanged:: next - Added support for HTTP single-part Range header, as specified in - :rfc:`7233`. + Added support for HTTP single-part range requests on files, as specified + in :rfc:`7233`. A lot of the work, such as parsing the request, is done by the base class :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8e63428e1da7b4..b4d308d1775217 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -408,7 +408,7 @@ http module allow the browser to apply its default dark mode. (Contributed by Yorik Hansen in :gh:`123430`.) -* Added support for HTTP single-part Range header to +* Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. (Contributed by Andy Ling in :gh:`86809`.) diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst index 3b3889ec0775cd..0cf949a1256060 100644 --- a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -1 +1 @@ -Added support for HTTP single-part Range header to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. +Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. From 77436b38269ad975a026ab7926f46a4aacc8d88c Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 2 Jan 2025 13:39:02 +0800 Subject: [PATCH 13/25] Update RFC reference and some minor code fixes --- Doc/library/http.server.rst | 2 +- Doc/whatsnew/3.14.rst | 4 +++- Lib/http/server.py | 6 ++++-- .../Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 6b817b1c6b20f7..6e3e78ccc437e9 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -336,7 +336,7 @@ provides three different variants: .. versionchanged:: next Added support for HTTP single-part range requests on files, as specified - in :rfc:`7233`. + in :rfc:`9110#section-14`. A lot of the work, such as parsing the request, is done by the base class :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b4d308d1775217..b5c90f4be0257d 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -409,9 +409,11 @@ http (Contributed by Yorik Hansen in :gh:`123430`.) * Added support for HTTP single-part range requests on files to - :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. + :class:`~http.server.SimpleHTTPRequestHandler`, as specified in + :rfc:`9110#section-14`. (Contributed by Andy Ling in :gh:`86809`.) + inspect ------- diff --git a/Lib/http/server.py b/Lib/http/server.py index 57ffcdf2d3f14a..9f3d3a123efdba 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -132,7 +132,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" -RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.IGNORECASE) +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.IGNORECASE | re.ASCII) class HTTPServer(socketserver.TCPServer): @@ -779,6 +779,8 @@ def send_head(self): start, end = self._range if start is None: # `end` here means suffix length + # parse_range() collapses (None, None) to None + # and thus `end` can not be None here start = max(0, fs.st_size - end) end = fs.st_size - 1 if start >= fs.st_size: @@ -791,7 +793,7 @@ def send_head(self): if end is None or end >= fs.st_size: end = fs.st_size - 1 self.send_response(HTTPStatus.PARTIAL_CONTENT) - self.send_header("Content-Range", "bytes %s-%s/%s" % (start, end, fs.st_size)) + self.send_header("Content-Range", f"bytes {start}-{end}/{fs.st_size}") self.send_header("Content-Length", str(end - start + 1)) # Update range to be sent to be used later in copyfile diff --git a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst index 0cf949a1256060..deda1ee00b78aa 100644 --- a/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst +++ b/Misc/NEWS.d/next/Library/2024-05-12-00-15-44.gh-issue-86809._5vdGa.rst @@ -1 +1 @@ -Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`7233`. +Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`9110#section-14`. From c2cc2d4ed59defac34bb491416b109c4f61e38ba Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 2 Jan 2025 15:54:43 +0800 Subject: [PATCH 14/25] Add extra headers param and docs --- Doc/library/http.server.rst | 19 ++++++++++++------- Doc/whatsnew/3.14.rst | 4 ++++ Lib/http/server.py | 13 ++++++++----- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 6e3e78ccc437e9..c810002ea62f75 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -206,24 +206,29 @@ provides three different variants: .. versionadded:: 3.2 - .. method:: send_error(code, message=None, explain=None) + .. method:: send_error(code, message=None, explain=None, extra_headers=None) Sends and logs a complete error reply to the client. The numeric *code* specifies the HTTP error code, with *message* as an optional, short, human readable description of the error. The *explain* argument can be used to provide more detailed information about the error; it will be formatted using the :attr:`error_message_format` attribute and emitted, after - a complete set of headers, as the response body. The :attr:`responses` - attribute holds the default values for *message* and *explain* that - will be used if no value is provided; for unknown codes the default value - for both is the string ``???``. The body will be empty if the method is - HEAD or the response code is one of the following: :samp:`1{xx}`, - ``204 No Content``, ``205 Reset Content``, ``304 Not Modified``. + a complete set of headers, as the response body. The *extra_headers* + argument can be a key-value tuple list which specifies extra headers to + be sent in the response. The :attr:`responses` attribute holds the + default values for *message* and *explain* that will be used if no value + is provided; for unknown codes the default value for both is the string + ``???``. The body will be empty if the method is HEAD or the response + code is one of the following: :samp:`1{xx}`, ``204 No Content``, + ``205 Reset Content``, ``304 Not Modified``. .. versionchanged:: 3.4 The error response includes a Content-Length header. Added the *explain* argument. + .. versionchanged:: next + Added the *extra_headers* argument. + .. method:: send_response(code, message=None) Adds a response header to the headers buffer and logs the accepted diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b5c90f4be0257d..6f98a4b47e7997 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -912,6 +912,10 @@ Changes in the Python API This temporary change affects other threads. (Contributed by Serhiy Storchaka in :gh:`69998`.) +* The :meth:`~http.server.BaseHTTPRequestHandler.send_error` now has a new + optional parameter *extra_headers*. + Subclasses should update their implementations to accept this new parameter. + (Contributed by Andy Ling in :gh:`86809`.) Build changes ============= diff --git a/Lib/http/server.py b/Lib/http/server.py index 9f3d3a123efdba..4104017970cbec 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -445,7 +445,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None, *, range_size=None): + def send_error(self, code, message=None, explain=None, extra_headers=None): """Send and log an error reply. Arguments are @@ -456,7 +456,7 @@ def send_error(self, code, message=None, explain=None, *, range_size=None): defaults to short entry matching the response code * explain: a detailed message defaults to the long entry matching the response code. - * range_size: file size for use in content-range header + * extra_headers: extra headers to be included in the response This sends an error response (so it must be called before any output has been generated), logs the error, and finally sends @@ -494,8 +494,9 @@ def send_error(self, code, message=None, explain=None, *, range_size=None): body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) - if code == HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE and range_size: - self.send_header('Content-Range', f'bytes */{range_size}') + if extra_headers is not None: + for (keyword, value) in extra_headers: + self.send_header(keyword, value) self.end_headers() if self.command != 'HEAD' and body: @@ -788,7 +789,9 @@ def send_head(self): # none of the range values overlap the extent of # the resource f.close() - self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, range_size=fs.st_size) + headers = [('Content-Range', f'bytes */{fs.st_size}')] + self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, + extra_headers=headers) return None if end is None or end >= fs.st_size: end = fs.st_size - 1 From aa9ec6b7ef25b281b3b2a961180c2ed1bfabba85 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 2 Jan 2025 20:07:05 +0800 Subject: [PATCH 15/25] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/http.server.rst | 19 +++++++++++-------- Lib/http/server.py | 16 +++++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index c810002ea62f75..d84393be47783a 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -213,14 +213,17 @@ provides three different variants: readable description of the error. The *explain* argument can be used to provide more detailed information about the error; it will be formatted using the :attr:`error_message_format` attribute and emitted, after - a complete set of headers, as the response body. The *extra_headers* - argument can be a key-value tuple list which specifies extra headers to - be sent in the response. The :attr:`responses` attribute holds the - default values for *message* and *explain* that will be used if no value - is provided; for unknown codes the default value for both is the string - ``???``. The body will be empty if the method is HEAD or the response - code is one of the following: :samp:`1{xx}`, ``204 No Content``, - ``205 Reset Content``, ``304 Not Modified``. + a complete set of headers, as the response body. + + The *extra_headers* argument can be a key-value tuple list which + specifies additional headers to be sent in the response (for + instance, ``[("Content-Range", "bytes 3-14/42")]``). + + The :attr:`responses` attribute holds the default values for *message* + and *explain* that will be used if no value is provided; for unknown codes + the default value for both is the string ``???``. The body will be empty if + the method is HEAD or the response code is one of the following: :samp:`1{xx}`, + ``204 No Content``, ``205 Reset Content``, or ``304 Not Modified``. .. versionchanged:: 3.4 The error response includes a Content-Length header. diff --git a/Lib/http/server.py b/Lib/http/server.py index 4104017970cbec..fba63e8df5524b 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -132,7 +132,7 @@ """ DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" -RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.IGNORECASE | re.ASCII) +RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.ASCII | re.IGNORECASE) class HTTPServer(socketserver.TCPServer): @@ -495,8 +495,8 @@ def send_error(self, code, message=None, explain=None, extra_headers=None): self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) if extra_headers is not None: - for (keyword, value) in extra_headers: - self.send_header(keyword, value) + for name, value in extra_headers: + self.send_header(name, value) self.end_headers() if self.command != 'HEAD' and body: @@ -779,9 +779,9 @@ def send_head(self): if self._range: start, end = self._range if start is None: - # `end` here means suffix length # parse_range() collapses (None, None) to None - # and thus `end` can not be None here + assert end is not None + # `end` here means suffix length start = max(0, fs.st_size - end) end = fs.st_size - 1 if start >= fs.st_size: @@ -958,8 +958,10 @@ def guess_type(self, path): def parse_range(self): """Return a tuple of (start, end) representing the range header in - the HTTP request. If the range header is missing or not resolvable, - None is returned. This only supports single part ranges. + the HTTP request. If the range header is missing, not resolvable, + or trivial (namely "byte=-"), this returns None. + + This currently only supports single part ranges. """ range_header = self.headers.get('range') From a0bca0aaea69c66407ac4244ebb06f9d8a214f61 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Fri, 3 Jan 2025 18:00:27 +0800 Subject: [PATCH 16/25] Keyword-only argument --- Doc/library/http.server.rst | 2 +- Lib/http/server.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index d84393be47783a..70d02a84636913 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -206,7 +206,7 @@ provides three different variants: .. versionadded:: 3.2 - .. method:: send_error(code, message=None, explain=None, extra_headers=None) + .. method:: send_error(code, message=None, explain=None, *, extra_headers=None) Sends and logs a complete error reply to the client. The numeric *code* specifies the HTTP error code, with *message* as an optional, short, human diff --git a/Lib/http/server.py b/Lib/http/server.py index 94f86cdbb0afd3..6ab6a31ee7f601 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -445,7 +445,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None, extra_headers=None): + def send_error(self, code, message=None, explain=None, *, extra_headers=None): """Send and log an error reply. Arguments are @@ -958,8 +958,8 @@ def guess_type(self, path): def parse_range(self): """Return a tuple of (start, end) representing the range header in - the HTTP request. If the range header is missing, not resolvable, - or trivial (namely "byte=-"), this returns None. + the HTTP request. If the range header is missing or not resolvable, + this returns None. This currently only supports single part ranges. From 42c69ab2ba39043622d60fa0db509e1ba25224b7 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Tue, 7 Jan 2025 20:42:41 +0800 Subject: [PATCH 17/25] Improve test cases and code --- Doc/library/http.server.rst | 2 +- Lib/http/server.py | 7 +++---- Lib/test/test_httpservers.py | 6 ++++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 70d02a84636913..540479f606fa07 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -206,7 +206,7 @@ provides three different variants: .. versionadded:: 3.2 - .. method:: send_error(code, message=None, explain=None, *, extra_headers=None) + .. method:: send_error(code, message=None, explain=None, *, extra_headers=()) Sends and logs a complete error reply to the client. The numeric *code* specifies the HTTP error code, with *message* as an optional, short, human diff --git a/Lib/http/server.py b/Lib/http/server.py index 6ab6a31ee7f601..65878e639168ac 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -445,7 +445,7 @@ def handle(self): while not self.close_connection: self.handle_one_request() - def send_error(self, code, message=None, explain=None, *, extra_headers=None): + def send_error(self, code, message=None, explain=None, *, extra_headers=()): """Send and log an error reply. Arguments are @@ -494,9 +494,8 @@ def send_error(self, code, message=None, explain=None, *, extra_headers=None): body = content.encode('UTF-8', 'replace') self.send_header("Content-Type", self.error_content_type) self.send_header('Content-Length', str(len(body))) - if extra_headers is not None: - for name, value in extra_headers: - self.send_header(name, value) + for name, value in extra_headers: + self.send_header(name, value) self.end_headers() if self.command != 'HEAD' and body: diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 484df915d8233f..627e4c341da4bb 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -586,6 +586,12 @@ def test_range_get(self): response = self.request(route, headers={'Range': 'bytes=-'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + response = self.request(route, headers={'Range': 'bytes=--'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + + response = self.request(route, headers={'Range': 'bytes='}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + # multipart ranges (not supported currently) response = self.request(route, headers={'Range': 'bytes=1-2, 4-7'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) From 48508f652527904441a1be2f938fc8269eea837c Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 9 Jan 2025 13:14:23 +0800 Subject: [PATCH 18/25] Fix corner case on empty file and add testcase --- Lib/http/server.py | 15 +++++++++++---- Lib/test/test_httpservers.py | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 65878e639168ac..7b4454c76de0de 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -783,7 +783,13 @@ def send_head(self): # `end` here means suffix length start = max(0, fs.st_size - end) end = fs.st_size - 1 - if start >= fs.st_size: + elif end is None or end >= fs.st_size: + end = fs.st_size - 1 + + if start == 0 and end >= fs.st_size - 1: + # Send entire file + self._range = None + elif start >= fs.st_size: # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that # none of the range values overlap the extent of # the resource @@ -792,10 +798,11 @@ def send_head(self): self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, extra_headers=headers) return None - if end is None or end >= fs.st_size: - end = fs.st_size - 1 + + if self._range: self.send_response(HTTPStatus.PARTIAL_CONTENT) - self.send_header("Content-Range", f"bytes {start}-{end}/{fs.st_size}") + self.send_header("Content-Range", + f"bytes {start}-{end}/{fs.st_size}") self.send_header("Content-Length", str(end - start + 1)) # Update range to be sent to be used later in copyfile diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 627e4c341da4bb..5c4b7d4744d886 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -588,7 +588,7 @@ def test_range_get(self): response = self.request(route, headers={'Range': 'bytes=--'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) - + response = self.request(route, headers={'Range': 'bytes='}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) @@ -596,6 +596,21 @@ def test_range_get(self): response = self.request(route, headers={'Range': 'bytes=1-2, 4-7'}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + # empty file + with open(os.path.join(self.tempdir_name, 'empty'), 'wb'): + pass + empty_path = self.base_url + '/empty' + + response = self.request(empty_path, headers={'Range': 'bytes=0-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=-512'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=b'') + + response = self.request(empty_path, headers={'Range': 'bytes=1-2'}) + self.assertEqual(response.getheader('content-range'), 'bytes */0') + self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') From 7c94aae7e20f466874658eafa89c7d53f9d8e907 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Sun, 12 Jan 2025 13:38:20 +0800 Subject: [PATCH 19/25] Split testcases --- Lib/test/test_httpservers.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 5c4b7d4744d886..681abc10ef7926 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -539,7 +539,7 @@ def test_get(self): finally: os.chmod(self.tempdir, 0o755) - def test_range_get(self): + def test_single_range_get(self): route = self.base_url + '/test' response = self.request(route) self.assertEqual(response.getheader('accept-ranges'), 'bytes') @@ -592,13 +592,9 @@ def test_range_get(self): response = self.request(route, headers={'Range': 'bytes='}) self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) - # multipart ranges (not supported currently) - response = self.request(route, headers={'Range': 'bytes=1-2, 4-7'}) - self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) - - # empty file - with open(os.path.join(self.tempdir_name, 'empty'), 'wb'): - pass + def test_single_range_get_empty(self): + # range requests to an empty file + os_helper.create_empty_file(os.path.join(self.tempdir_name, 'empty')) empty_path = self.base_url + '/empty' response = self.request(empty_path, headers={'Range': 'bytes=0-512'}) @@ -611,6 +607,11 @@ def test_range_get(self): self.assertEqual(response.getheader('content-range'), 'bytes */0') self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + def test_multi_range_get(self): + # multipart ranges (not supported currently) + response = self.request(self.base_url + '/test', headers={'Range': 'bytes=1-2, 4-7'}) + self.check_status_and_reason(response, HTTPStatus.OK, data=self.data) + def test_head(self): response = self.request( self.base_url + '/test', method='HEAD') From 5c64648b3ef5a918169afb227b4f718214b37623 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Mon, 13 Jan 2025 14:56:38 +0800 Subject: [PATCH 20/25] Suggestions from review --- Lib/http/server.py | 4 +++- Lib/test/test_httpservers.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/http/server.py b/Lib/http/server.py index 7b4454c76de0de..42634b33637325 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -778,7 +778,7 @@ def send_head(self): if self._range: start, end = self._range if start is None: - # parse_range() collapses (None, None) to None + # parse_range() collapses (None, None) to None as it's invalid assert end is not None # `end` here means suffix length start = max(0, fs.st_size - end) @@ -974,6 +974,8 @@ def parse_range(self): if range_header is None: return None m = RANGE_REGEX_PATTERN.match(range_header) + # Ignore invalid Range header and return None + # https://datatracker.ietf.org/doc/html/rfc9110#name-range if m is None: return None diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 681abc10ef7926..d841da12b7effe 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -607,6 +607,10 @@ def test_single_range_get_empty(self): self.assertEqual(response.getheader('content-range'), 'bytes */0') self.check_status_and_reason(response, HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE) + # invalid Range header is always ignored + response = self.request(empty_path, headers={'Range': 'bytes=5-4'}) + self.check_status_and_reason(response, HTTPStatus.OK) + def test_multi_range_get(self): # multipart ranges (not supported currently) response = self.request(self.base_url + '/test', headers={'Range': 'bytes=1-2, 4-7'}) From 06b8c80823102060697c17be5c9687326a6f6ae4 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Mon, 13 Jan 2025 22:51:49 +0800 Subject: [PATCH 21/25] Add link to github discussion --- Lib/http/server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/http/server.py b/Lib/http/server.py index 42634b33637325..432713f05f2258 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -779,6 +779,7 @@ def send_head(self): start, end = self._range if start is None: # parse_range() collapses (None, None) to None as it's invalid + # https://github.com/python/cpython/pull/118949#discussion_r1912397525 assert end is not None # `end` here means suffix length start = max(0, fs.st_size - end) From 3033c27fabf7f51095aa71a6ec39281e80c2a528 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Mon, 13 Jan 2025 23:04:29 +0800 Subject: [PATCH 22/25] Update Doc/whatsnew/3.14.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/whatsnew/3.14.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b7b638647e2236..a63d74e3e9e91c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1070,8 +1070,8 @@ Changes in the Python API This temporary change affects other threads. (Contributed by Serhiy Storchaka in :gh:`69998`.) -* The :meth:`~http.server.BaseHTTPRequestHandler.send_error` now has a new - optional parameter *extra_headers*. +* The :meth:`BaseHTTPRequestHandler.send_error ` + method has a new optional parameter *extra_headers*. Subclasses should update their implementations to accept this new parameter. (Contributed by Andy Ling in :gh:`86809`.) From e4b213cb99ea0ac41f2c60efe8282eaf04bafa46 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 7 Aug 2025 10:32:12 +0800 Subject: [PATCH 23/25] oops... missed some docs from main --- Doc/library/http.server.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 5e02a6360b734c..a90cf8f0978b28 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -253,7 +253,12 @@ instantiation, of which this module provides three different variants: readable description of the error. The *explain* argument can be used to provide more detailed information about the error; it will be formatted using the :attr:`error_message_format` attribute and emitted, after - a complete set of headers, as the response body. + a complete set of headers, as the response body. The :attr:`responses` + attribute holds the default values for *message* and *explain* that + will be used if no value is provided; for unknown codes the default value + for both is the string ``???``. The body will be empty if the method is + HEAD or the response code is one of the following: :samp:`1{xx}`, + ``204 No Content``, ``205 Reset Content``, ``304 Not Modified``. The *extra_headers* argument can be a key-value tuple list which specifies additional headers to be sent in the response (for From 115606fef9efe4f52e636e7af7672f3bf19afaba Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 7 Aug 2025 10:45:07 +0800 Subject: [PATCH 24/25] migrate docs to 3.15 --- Doc/whatsnew/3.15.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 89644a509a0bb4..ffff08376998c6 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -255,6 +255,15 @@ http.client (Contributed by Alexander Enrique Urieles Nieto in :gh:`131724`.) +http.server +----------- + +* Added support for HTTP single-part range requests on files to + :class:`~http.server.SimpleHTTPRequestHandler`, as specified in + :rfc:`9110#section-14`. + (Contributed by Andy Ling in :gh:`86809`.) + + math ---- @@ -515,6 +524,13 @@ Porting to Python 3.15 This section lists previously described changes and other bugfixes that may require changes to your code. +Changes in the Python API +------------------------- + +* The :meth:`BaseHTTPRequestHandler.send_error ` + method now has a new optional parameter *extra_headers*. + Subclasses should update their implementations to accept this new parameter. + (Contributed by Andy Ling in :gh:`86809`.) Build changes ============= From 2925322562b8e3b6f9cdbefeba2cf7d1c524c8e2 Mon Sep 17 00:00:00 2001 From: lyc8503 Date: Thu, 7 Aug 2025 13:45:14 +0800 Subject: [PATCH 25/25] fix wrong merge result --- Doc/library/http.server.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index a90cf8f0978b28..ba8d36a74148e9 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -264,12 +264,6 @@ instantiation, of which this module provides three different variants: specifies additional headers to be sent in the response (for instance, ``[("Content-Range", "bytes 3-14/42")]``). - The :attr:`responses` attribute holds the default values for *message* - and *explain* that will be used if no value is provided; for unknown codes - the default value for both is the string ``???``. The body will be empty if - the method is HEAD or the response code is one of the following: :samp:`1{xx}`, - ``204 No Content``, ``205 Reset Content``, or ``304 Not Modified``. - .. versionchanged:: 3.4 The error response includes a Content-Length header. Added the *explain* argument.