Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ jobs:
- '3.10'
- '3.9'
- '3.8'
- '3.7'
pyopenssl: [0, 1]
runs-on: ${{ matrix.os }}
steps:
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
This document records all notable changes to [HTTPie](https://httpie.io).
This project adheres to [Semantic Versioning](https://semver.org/).

## Unreleased

### Fixed

- Respect `Content-Length` with `--download` when `Content-Encoding` is present to avoid false "Incomplete download" errors. ([#423](https://github.com/httpie/cli/issues/423))

## [3.2.4](https://github.com/httpie/cli/compare/3.2.3...3.2.4) (2024-11-01)

- Fix default certs loading and unpin `requests`. ([#1596](https://github.com/httpie/cli/issues/1596))
Expand Down
8 changes: 8 additions & 0 deletions docs/download.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Download mode

HTTPie's `--download` option saves response bodies to files. When a server
returns a `Content-Encoding` (for example `gzip`), the `Content-Length` header
is treated as the size of the encoded payload as defined in RFC 9110 § 8.6.
HTTPie writes the body exactly as received and no longer compares the header to
the post-decompression size.

142 changes: 71 additions & 71 deletions httpie/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Download mode implementation.

"""

import mimetypes
import os
import re
Expand All @@ -12,10 +13,9 @@

import requests

from .context import Environment
from .models import HTTPResponse, OutputOptions
from .output.streams import RawStream
from .context import Environment


PARTIAL_CONTENT = 206

Expand All @@ -37,24 +37,23 @@ def parse_content_range(content_range: str, resumed_from: int) -> int:

"""
if content_range is None:
raise ContentRangeError('Missing Content-Range')
raise ContentRangeError("Missing Content-Range")

pattern = (
r'^bytes (?P<first_byte_pos>\d+)-(?P<last_byte_pos>\d+)'
r'/(\*|(?P<instance_length>\d+))$'
r"^bytes (?P<first_byte_pos>\d+)-(?P<last_byte_pos>\d+)"
r"/(\*|(?P<instance_length>\d+))$"
)
match = re.match(pattern, content_range)

if not match:
raise ContentRangeError(
f'Invalid Content-Range format {content_range!r}')
raise ContentRangeError(f"Invalid Content-Range format {content_range!r}")

content_range_dict = match.groupdict()
first_byte_pos = int(content_range_dict['first_byte_pos'])
last_byte_pos = int(content_range_dict['last_byte_pos'])
first_byte_pos = int(content_range_dict["first_byte_pos"])
last_byte_pos = int(content_range_dict["last_byte_pos"])
instance_length = (
int(content_range_dict['instance_length'])
if content_range_dict['instance_length']
int(content_range_dict["instance_length"])
if content_range_dict["instance_length"]
else None
)

Expand All @@ -64,27 +63,24 @@ def parse_content_range(content_range: str, resumed_from: int) -> int:
# last-byte-pos value, is invalid. The recipient of an invalid
# byte-content-range- spec MUST ignore it and any content
# transferred along with it."
if (first_byte_pos > last_byte_pos
or (instance_length is not None
and instance_length <= last_byte_pos)):
raise ContentRangeError(
f'Invalid Content-Range returned: {content_range!r}')
if first_byte_pos > last_byte_pos or (
instance_length is not None and instance_length <= last_byte_pos
):
raise ContentRangeError(f"Invalid Content-Range returned: {content_range!r}")

if (first_byte_pos != resumed_from
or (instance_length is not None
and last_byte_pos + 1 != instance_length)):
if first_byte_pos != resumed_from or (
instance_length is not None and last_byte_pos + 1 != instance_length
):
# Not what we asked for.
raise ContentRangeError(
f'Unexpected Content-Range returned ({content_range!r})'
f"Unexpected Content-Range returned ({content_range!r})"
f' for the requested Range ("bytes={resumed_from}-")'
)

return last_byte_pos + 1


def filename_from_content_disposition(
content_disposition: str
) -> Optional[str]:
def filename_from_content_disposition(content_disposition: str) -> Optional[str]:
"""
Extract and validate filename from a Content-Disposition header.

Expand All @@ -94,28 +90,28 @@ def filename_from_content_disposition(
"""
# attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz

msg = Message(f'Content-Disposition: {content_disposition}')
msg = Message(f"Content-Disposition: {content_disposition}")
filename = msg.get_filename()
if filename:
# Basic sanitation.
filename = os.path.basename(filename).lstrip('.').strip()
filename = os.path.basename(filename).lstrip(".").strip()
if filename:
return filename


def filename_from_url(url: str, content_type: Optional[str]) -> str:
fn = urlsplit(url).path.rstrip('/')
fn = os.path.basename(fn) if fn else 'index'
if '.' not in fn and content_type:
content_type = content_type.split(';')[0]
if content_type == 'text/plain':
fn = urlsplit(url).path.rstrip("/")
fn = os.path.basename(fn) if fn else "index"
if "." not in fn and content_type:
content_type = content_type.split(";")[0]
if content_type == "text/plain":
# mimetypes returns '.ksh'
ext = '.txt'
ext = ".txt"
else:
ext = mimetypes.guess_extension(content_type)

if ext == '.htm':
ext = '.html'
if ext == ".htm":
ext = ".html"

if ext:
fn += ext
Expand All @@ -136,12 +132,12 @@ def trim_filename(filename: str, max_len: int) -> str:

def get_filename_max_length(directory: str) -> int:
max_len = 255
if hasattr(os, 'pathconf') and 'PC_NAME_MAX' in os.pathconf_names:
max_len = os.pathconf(directory, 'PC_NAME_MAX')
if hasattr(os, "pathconf") and "PC_NAME_MAX" in os.pathconf_names:
max_len = os.pathconf(directory, "PC_NAME_MAX")
return max_len


def trim_filename_if_needed(filename: str, directory='.', extra=0) -> str:
def trim_filename_if_needed(filename: str, directory=".", extra=0) -> str:
max_len = get_filename_max_length(directory) - extra
if len(filename) > max_len:
filename = trim_filename(filename, max_len)
Expand All @@ -151,7 +147,7 @@ def trim_filename_if_needed(filename: str, directory='.', extra=0) -> str:
def get_unique_filename(filename: str, exists=os.path.exists) -> str:
attempt = 0
while True:
suffix = f'-{attempt}' if attempt > 0 else ''
suffix = f"-{attempt}" if attempt > 0 else ""
try_filename = trim_filename_if_needed(filename, extra=len(suffix))
try_filename += suffix
if not exists(try_filename):
Expand All @@ -161,12 +157,7 @@ def get_unique_filename(filename: str, exists=os.path.exists) -> str:

class Downloader:

def __init__(
self,
env: Environment,
output_file: IO = None,
resume: bool = False
):
def __init__(self, env: Environment, output_file: IO = None, resume: bool = False):
"""
:param resume: Should the download resume if partial download
already exists.
Expand All @@ -190,19 +181,17 @@ def pre_request(self, request_headers: dict):

"""
# Ask the server not to encode the content so that we can resume, etc.
request_headers['Accept-Encoding'] = 'identity'
request_headers["Accept-Encoding"] = "identity"
if self._resume:
bytes_have = os.path.getsize(self._output_file.name)
if bytes_have:
# Set ``Range`` header to resume the download
# TODO: Use "If-Range: mtime" to make sure it's fresh?
request_headers['Range'] = f'bytes={bytes_have}-'
request_headers["Range"] = f"bytes={bytes_have}-"
self._resumed_from = bytes_have

def start(
self,
initial_url: str,
final_response: requests.Response
self, initial_url: str, final_response: requests.Response
) -> Tuple[RawStream, IO]:
"""
Initiate and return a stream for `response` body with progress
Expand All @@ -216,13 +205,27 @@ def start(
"""
assert not self.status.time_started

# FIXME: some servers still might sent Content-Encoding: gzip
# <https://github.com/httpie/cli/issues/423>
# Some servers may still send a compressed body even though
# we ask for identity encoding. In that case, ``Content-Length``
# refers to the encoded size (RFC 9110 § 8.6), so we disable
# automatic decoding to make our byte tracking match.
try:
total_size = int(final_response.headers['Content-Length'])
total_size = int(final_response.headers["Content-Length"])
except (KeyError, ValueError, TypeError):
total_size = None

content_encoding = final_response.headers.get("Content-Encoding")
if content_encoding:
final_response.raw.decode_content = False

class EncodedHTTPResponse(HTTPResponse):
def iter_body(self, chunk_size=1): # type: ignore[override]
return final_response.raw.stream(chunk_size, decode_content=False)

response_msg = EncodedHTTPResponse(final_response)
else:
response_msg = HTTPResponse(final_response)

if not self._output_file:
self._output_file = self._get_output_file_from_response(
initial_url=initial_url,
Expand All @@ -232,8 +235,7 @@ def start(
# `--output, -o` provided
if self._resume and final_response.status_code == PARTIAL_CONTENT:
total_size = parse_content_range(
final_response.headers.get('Content-Range'),
self._resumed_from
final_response.headers.get("Content-Range"), self._resumed_from
)

else:
Expand All @@ -244,17 +246,19 @@ def start(
except OSError:
pass # stdout

output_options = OutputOptions.from_message(final_response, headers=False, body=True)
output_options = OutputOptions.from_message(
final_response, headers=False, body=True
)
stream = RawStream(
msg=HTTPResponse(final_response),
msg=response_msg,
output_options=output_options,
on_body_chunk_downloaded=self.chunk_downloaded,
)

self.status.started(
output_file=self._output_file,
resumed_from=self._resumed_from,
total_size=total_size
total_size=total_size,
)

return stream, self._output_file
Expand Down Expand Up @@ -292,16 +296,17 @@ def _get_output_file_from_response(
) -> IO:
# Output file not specified. Pick a name that doesn't exist yet.
filename = None
if 'Content-Disposition' in final_response.headers:
if "Content-Disposition" in final_response.headers:
filename = filename_from_content_disposition(
final_response.headers['Content-Disposition'])
final_response.headers["Content-Disposition"]
)
if not filename:
filename = filename_from_url(
url=initial_url,
content_type=final_response.headers.get('Content-Type'),
content_type=final_response.headers.get("Content-Type"),
)
unique_filename = get_unique_filename(filename)
return open(unique_filename, buffering=0, mode='a+b')
return open(unique_filename, buffering=0, mode="a+b")


class DownloadStatus:
Expand All @@ -325,11 +330,11 @@ def started(self, output_file, resumed_from=0, total_size=None):
def start_display(self, output_file):
from httpie.output.ui.rich_progress import (
DummyDisplay,
ProgressDisplay,
StatusDisplay,
ProgressDisplay
)

message = f'Downloading to {output_file.name}'
message = f"Downloading to {output_file.name}"
if self.env.show_displays:
if self.total_size is None:
# Rich does not support progress bars without a total
Expand All @@ -341,9 +346,7 @@ def start_display(self, output_file):
self.display = DummyDisplay(self.env)

self.display.start(
total=self.total_size,
at=self.downloaded,
description=message
total=self.total_size, at=self.downloaded, description=message
)

def chunk_downloaded(self, size):
Expand All @@ -357,10 +360,7 @@ def has_finished(self):

@property
def time_spent(self):
if (
self.time_started is not None
and self.time_finished is not None
):
if self.time_started is not None and self.time_finished is not None:
return self.time_finished - self.time_started
else:
return None
Expand All @@ -369,9 +369,9 @@ def finished(self):
assert self.time_started is not None
assert self.time_finished is None
self.time_finished = monotonic()
if hasattr(self, 'display'):
if hasattr(self, "display"):
self.display.stop(self.time_spent)

def terminate(self):
if hasattr(self, 'display'):
if hasattr(self, "display"):
self.display.stop(self.time_spent)
Loading
Loading