- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 33.2k
gh-86809: Add support for HTTP Range header in HTTPServer #118949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 28 commits
e81d46e
              9bc811f
              ac7b0fa
              d23a60d
              e985c67
              1ca0ff7
              8b9ae26
              ca84b1c
              b266ff4
              2824b9c
              b5d9ffc
              48a8523
              77436b3
              c2cc2d4
              aa9ec6b
              abd519f
              a0bca0a
              42c69ab
              48508f6
              7c94aae
              5c64648
              06b8c80
              3033c27
              e4b3607
              292f194
              e4b213c
              115606f
              2925322
              580aa14
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -78,6 +78,7 @@ | |
| import mimetypes | ||
| import os | ||
| import posixpath | ||
| import re | ||
| import shutil | ||
| import socket | ||
| import socketserver | ||
|  | @@ -111,6 +112,7 @@ | |
| """ | ||
|  | ||
| DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" | ||
| RANGE_REGEX_PATTERN = re.compile(r'bytes=(\d*)-(\d*)$', re.ASCII | re.IGNORECASE) | ||
|  | ||
| class HTTPServer(socketserver.TCPServer): | ||
|  | ||
|  | @@ -464,7 +466,7 @@ def handle(self): | |
| while not self.close_connection: | ||
| self.handle_one_request() | ||
|  | ||
| def send_error(self, code, message=None, explain=None): | ||
| def send_error(self, code, message=None, explain=None, *, extra_headers=()): | ||
| """Send and log an error reply. | ||
|  | ||
| Arguments are | ||
|  | @@ -475,6 +477,7 @@ def send_error(self, code, message=None, explain=None): | |
| defaults to short entry matching the response code | ||
| * explain: a detailed message defaults to the long entry | ||
| matching the response code. | ||
| * extra_headers: extra headers to be included in the response | ||
|  | ||
| This sends an error response (so it must be called before any | ||
| output has been generated), logs the error, and finally sends | ||
|  | @@ -512,6 +515,8 @@ def send_error(self, code, message=None, explain=None): | |
| body = content.encode('UTF-8', 'replace') | ||
| self.send_header("Content-Type", self.error_content_type) | ||
| self.send_header('Content-Length', str(len(body))) | ||
| for name, value in extra_headers: | ||
| self.send_header(name, value) | ||
| self.end_headers() | ||
|  | ||
| if self.command != 'HEAD' and body: | ||
|  | @@ -703,7 +708,7 @@ def do_GET(self): | |
| f = self.send_head() | ||
| if f: | ||
| try: | ||
| self.copyfile(f, self.wfile) | ||
| self.copyfile(f, self.wfile, range=self._range) | ||
| finally: | ||
| f.close() | ||
|  | ||
|  | @@ -726,6 +731,7 @@ def send_head(self): | |
| """ | ||
| path = self.translate_path(self.path) | ||
| f = None | ||
| self._range = self.parse_range() | ||
| if os.path.isdir(path): | ||
| parts = urllib.parse.urlsplit(self.path) | ||
| if not parts.path.endswith(('/', '%2f', '%2F')): | ||
|  | @@ -790,9 +796,44 @@ def send_head(self): | |
| f.close() | ||
| return None | ||
|  | ||
| self.send_response(HTTPStatus.OK) | ||
| if self._range: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we have multi-range, we'll need to do "multiple" passes so I think we can have a method that takes the range we're trying to parse first and then iterate over the parsed ranges:  | ||
| start, end = self._range | ||
| if start is None: | ||
| # parse_range() collapses (None, None) to None as it's invalid | ||
| # https://github.com/python/cpython/pull/118949#discussion_r1912397525 | ||
| assert end is not None | ||
| # `end` here means suffix length | ||
| start = max(0, fs.st_size - end) | ||
|         
                  lyc8503 marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| end = fs.st_size - 1 | ||
|         
                  picnixz marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| elif end is None or end >= fs.st_size: | ||
| end = fs.st_size - 1 | ||
|  | ||
| if start == 0 and end >= fs.st_size - 1: | ||
| # Send entire file | ||
| self._range = None | ||
| elif start >= fs.st_size: | ||
| # 416 REQUESTED_RANGE_NOT_SATISFIABLE means that | ||
| # none of the range values overlap the extent of | ||
| # the resource | ||
| f.close() | ||
| headers = [('Content-Range', f'bytes */{fs.st_size}')] | ||
| self.send_error(HTTPStatus.REQUESTED_RANGE_NOT_SATISFIABLE, | ||
| extra_headers=headers) | ||
| return None | ||
|  | ||
| if self._range: | ||
| self.send_response(HTTPStatus.PARTIAL_CONTENT) | ||
| self.send_header("Content-Range", | ||
| f"bytes {start}-{end}/{fs.st_size}") | ||
| self.send_header("Content-Length", str(end - start + 1)) | ||
|  | ||
| # Update range to be sent to be used later in copyfile | ||
| self._range = (start, end) | ||
| else: | ||
| self.send_response(HTTPStatus.OK) | ||
| self.send_header("Accept-Ranges", "bytes") | ||
|         
                  picnixz marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| self.send_header("Content-Length", str(fs.st_size)) | ||
| self.send_header("Content-type", ctype) | ||
| self.send_header("Content-Length", str(fs[6])) | ||
| self.send_header("Last-Modified", | ||
| self.date_time_string(fs.st_mtime)) | ||
| self.end_headers() | ||
|  | @@ -892,21 +933,34 @@ def translate_path(self, path): | |
| path += '/' | ||
| return path | ||
|  | ||
| def copyfile(self, source, outputfile): | ||
| """Copy all data between two file objects. | ||
| def copyfile(self, source, outputfile, *, range=None): | ||
| """Copy all data between two file objects if range is None. | ||
| Otherwise, copy data between two file objects based on the | ||
| inclusive range (start, end). | ||
|  | ||
| The SOURCE argument is a file object open for reading | ||
| (or anything with a read() method) and the DESTINATION | ||
| argument is a file object open for writing (or | ||
| anything with a write() method). | ||
| (or anything with read() and seek() method) and the | ||
| DESTINATION argument is a file object open for writing | ||
| (or anything with a write() method). | ||
|  | ||
| The only reason for overriding this would be to change | ||
| the block size or perhaps to replace newlines by CRLF | ||
| -- note however that this the default server uses this | ||
| to copy binary data as well. | ||
|  | ||
| """ | ||
| shutil.copyfileobj(source, outputfile) | ||
| if range is None: | ||
| shutil.copyfileobj(source, outputfile) | ||
| else: | ||
| start, end = range | ||
| length = end - start + 1 | ||
| source.seek(start) | ||
|         
                  lyc8503 marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| while length > 0: | ||
| buf = source.read(min(length, shutil.COPY_BUFSIZE)) | ||
| if not buf: | ||
| raise EOFError('File shrank after size was checked') | ||
| length -= len(buf) | ||
| outputfile.write(buf) | ||
| 
      Comment on lines
    
      +955
     to 
      +963
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once we have a multi-range, this will be called for each of the range we constructed. So I think this part will need to be a private method: and it will be used as However, for multi-ranges, we'll likely need to rename  Alternatively, and this could be perhaps better, we can implement  | ||
|  | ||
|         
                  picnixz marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| def guess_type(self, path): | ||
| """Guess the type of a file. | ||
|  | @@ -933,6 +987,35 @@ def guess_type(self, path): | |
| return guess | ||
| return 'application/octet-stream' | ||
|  | ||
| def parse_range(self): | ||
| """Return a tuple of (start, end) representing the range header in | ||
| the HTTP request. If the range header is missing or not resolvable, | ||
| this returns None. | ||
|  | ||
| This currently only supports single part ranges. | ||
|  | ||
| """ | ||
| range_header = self.headers.get('range') | ||
| if range_header is None: | ||
| return None | ||
| m = RANGE_REGEX_PATTERN.match(range_header) | ||
|         
                  picnixz marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| # Ignore invalid Range header and return None | ||
| # https://datatracker.ietf.org/doc/html/rfc9110#name-range | ||
| if m is None: | ||
| return None | ||
|  | ||
| start = int(m.group(1)) if m.group(1) else None | ||
| end = int(m.group(2)) if m.group(2) else None | ||
|  | ||
| if start is None and end is None: | ||
| return None | ||
|  | ||
| if start is not None and end is not None and start > end: | ||
| return None | ||
|  | ||
| return start, end | ||
|  | ||
|  | ||
|  | ||
| nobody = None | ||
|  | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Added support for HTTP single-part range requests on files to :class:`~http.server.SimpleHTTPRequestHandler`, as specified in :rfc:`9110#section-14`. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we have a multi-range, we would do: