-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
gh-86809: Add support for HTTP Range header in HTTPServer #118949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 22 commits
e81d46e
9bc811f
ac7b0fa
d23a60d
e985c67
1ca0ff7
8b9ae26
ca84b1c
b266ff4
2824b9c
b5d9ffc
48a8523
77436b3
c2cc2d4
aa9ec6b
abd519f
a0bca0a
42c69ab
48508f6
7c94aae
5c64648
06b8c80
3033c27
e4b3607
292f194
e4b213c
115606f
2925322
580aa14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,6 +97,7 @@ | |
| import mimetypes | ||
| import os | ||
| import posixpath | ||
| import re | ||
| import select | ||
| import shutil | ||
| import socket | ||
|
|
@@ -131,6 +132,7 @@ | |
| """ | ||
|
|
||
| DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" | ||
| RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.ASCII | re.IGNORECASE) | ||
|
|
||
| class HTTPServer(socketserver.TCPServer): | ||
|
|
||
|
|
@@ -443,7 +445,7 @@ def handle(self): | |
| while not self.close_connection: | ||
| self.handle_one_request() | ||
|
|
||
| def send_error(self, code, message=None, explain=None): | ||
| def send_error(self, code, message=None, explain=None, *, extra_headers=()): | ||
| """Send and log an error reply. | ||
|
|
||
| Arguments are | ||
|
|
@@ -454,6 +456,7 @@ def send_error(self, code, message=None, explain=None): | |
| defaults to short entry matching the response code | ||
| * explain: a detailed message defaults to the long entry | ||
| matching the response code. | ||
| * extra_headers: extra headers to be included in the response | ||
|
|
||
| This sends an error response (so it must be called before any | ||
| output has been generated), logs the error, and finally sends | ||
|
|
@@ -491,6 +494,8 @@ def send_error(self, code, message=None, explain=None): | |
| body = content.encode('UTF-8', 'replace') | ||
| self.send_header("Content-Type", self.error_content_type) | ||
| self.send_header('Content-Length', str(len(body))) | ||
| for name, value in extra_headers: | ||
| self.send_header(name, value) | ||
| self.end_headers() | ||
|
|
||
| if self.command != 'HEAD' and body: | ||
|
|
@@ -682,7 +687,7 @@ def do_GET(self): | |
| f = self.send_head() | ||
| if f: | ||
| try: | ||
| self.copyfile(f, self.wfile) | ||
| self.copyfile(f, self.wfile, range=self._range) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we have a multi-range, we would do: for r in self._ranges:
self.copyfile(f, self.wfile, range=r) |
||
| finally: | ||
| f.close() | ||
|
|
||
|
|
@@ -705,6 +710,7 @@ def send_head(self): | |
| """ | ||
| path = self.translate_path(self.path) | ||
| f = None | ||
| self._range = self.parse_range() | ||
| if os.path.isdir(path): | ||
| parts = urllib.parse.urlsplit(self.path) | ||
| if not parts.path.endswith('/'): | ||
|
|
@@ -769,9 +775,44 @@ def send_head(self): | |
| f.close() | ||
| return None | ||
|
|
||
| self.send_response(HTTPStatus.OK) | ||
| if self._range: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we have multi-range, we'll need to do "multiple" passes so I think we can have a method that takes the range we're trying to parse first and then iterate over the parsed ranges: |
||
| start, end = self._range | ||
| if start is None: | ||
| # parse_range() collapses (None, None) to None as it's invalid | ||
| # https://github.com/python/cpython/pull/118949#discussion_r1912397525 | ||
| assert end is not None | ||
| # `end` here means suffix length | ||
| start = max(0, fs.st_size - end) | ||
lyc8503 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end = fs.st_size - 1 | ||
picnixz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| elif end is None or end >= fs.st_size: | ||
| end = fs.st_size - 1 | ||
|
|
||
| if start == 0 and end >= fs.st_size - 1: | ||
| # Send entire file | ||
| self._range = None | ||
| elif start >= fs.st_size: | ||
| # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that | ||
| # none of the range values overlap the extent of | ||
| # the resource | ||
| f.close() | ||
| headers = [('Content-Range', f'bytes */{fs.st_size}')] | ||
| self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, | ||
| extra_headers=headers) | ||
| return None | ||
|
|
||
| if self._range: | ||
| self.send_response(HTTPStatus.PARTIAL_CONTENT) | ||
| self.send_header("Content-Range", | ||
| f"bytes {start}-{end}/{fs.st_size}") | ||
| self.send_header("Content-Length", str(end - start + 1)) | ||
|
|
||
| # Update range to be sent to be used later in copyfile | ||
| self._range = (start, end) | ||
| else: | ||
| self.send_response(HTTPStatus.OK) | ||
| self.send_header("Accept-Ranges", "bytes") | ||
picnixz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self.send_header("Content-Length", str(fs.st_size)) | ||
| self.send_header("Content-type", ctype) | ||
| self.send_header("Content-Length", str(fs[6])) | ||
| self.send_header("Last-Modified", | ||
| self.date_time_string(fs.st_mtime)) | ||
| self.end_headers() | ||
|
|
@@ -868,21 +909,34 @@ def translate_path(self, path): | |
| path += '/' | ||
| return path | ||
|
|
||
| def copyfile(self, source, outputfile): | ||
| """Copy all data between two file objects. | ||
| def copyfile(self, source, outputfile, *, range=None): | ||
| """Copy all data between two file objects if range is None. | ||
| Otherwise, copy data between two file objects based on the | ||
| inclusive range (start, end). | ||
|
|
||
| The SOURCE argument is a file object open for reading | ||
| (or anything with a read() method) and the DESTINATION | ||
| argument is a file object open for writing (or | ||
| anything with a write() method). | ||
| (or anything with read() and seek() method) and the | ||
| DESTINATION argument is a file object open for writing | ||
| (or anything with a write() method). | ||
|
|
||
| The only reason for overriding this would be to change | ||
| the block size or perhaps to replace newlines by CRLF | ||
| -- note however that this the default server uses this | ||
| to copy binary data as well. | ||
|
|
||
| """ | ||
| shutil.copyfileobj(source, outputfile) | ||
| if range is None: | ||
| shutil.copyfileobj(source, outputfile) | ||
| else: | ||
| start, end = range | ||
| length = end - start + 1 | ||
| source.seek(start) | ||
lyc8503 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| while length > 0: | ||
| buf = source.read(min(length, shutil.COPY_BUFSIZE)) | ||
| if not buf: | ||
| raise EOFError('File shrank after size was checked') | ||
| length -= len(buf) | ||
| outputfile.write(buf) | ||
|
Comment on lines
+955
to
+963
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once we have a multi-range, this will be called for each of the range we constructed. So I think this part will need to be a private method: and it will be used as However, for multi-ranges, we'll likely need to rename Alternatively, and this could be perhaps better, we can implement |
||
|
|
||
picnixz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| def guess_type(self, path): | ||
| """Guess the type of a file. | ||
|
|
@@ -909,6 +963,34 @@ def guess_type(self, path): | |
| return guess | ||
| return 'application/octet-stream' | ||
|
|
||
| def parse_range(self): | ||
| """Return a tuple of (start, end) representing the range header in | ||
| the HTTP request. If the range header is missing or not resolvable, | ||
| this returns None. | ||
|
|
||
| This currently only supports single part ranges. | ||
|
|
||
| """ | ||
| range_header = self.headers.get('range') | ||
| if range_header is None: | ||
| return None | ||
| m = RANGE_REGEX_PATTERN.match(range_header) | ||
picnixz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Ignore invalid Range header and return None | ||
| # https://datatracker.ietf.org/doc/html/rfc9110#name-range | ||
| if m is None: | ||
| return None | ||
|
|
||
| start = int(m.group(1)) if m.group(1) else None | ||
| end = int(m.group(2)) if m.group(2) else None | ||
|
|
||
| if start is None and end is None: | ||
| return None | ||
|
|
||
| if start is not None and end is not None and start > end: | ||
| return None | ||
|
|
||
| return start, end | ||
|
|
||
|
|
||
| # Utilities for CGIHTTPRequestHandler | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`9110#section-14`. |
Uh oh!
There was an error while loading. Please reload this page.