Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions .bumpversion.cfg

This file was deleted.

14 changes: 6 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,17 @@
VENV=.venv
VENV_CMD=python3 -m venv
ACTIVATE = $(VENV)/bin/activate
CHEESE=https://pypi.python.org/pypi
BUMPTYPE=patch
BUMPPRE=0


$(VENV)/bin/pip3:
$(VENV)/bin/pip:
$(VENV_CMD) $(VENV)

bootstrap: $(VENV)/bin/pip3
$(VENV)/bin/pip3 install -e .[dev]
bootstrap: $(VENV)/bin/pip
$(VENV)/bin/pip install -e .[dev]

format:
$(VENV)/bin/black .
$(VENV)/bin/codespell
$(VENV)/bin/ruff check --fix
$(VENV)/bin/ruff format

doc: $(VENV)/bin/sphinx-build
. $(ACTIVATE);
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Quickstart
sess = requests.session()
cached_sess = CacheControl(sess)

response = cached_sess.get('http://google.com')
response = cached_sess.get('https://google.com')

If the URL contains any caching based headers, it will cache the
result in a simple dictionary.
Expand Down
3 changes: 2 additions & 1 deletion cachecontrol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

Make it easy to import from cachecontrol without long namespaces.
"""

__author__ = "Eric Larson"
__email__ = "[email protected]"
__version__ = "0.13.1"
__version__ = "0.14.3"

from cachecontrol.adapter import CacheControlAdapter
from cachecontrol.controller import CacheController
Expand Down
29 changes: 18 additions & 11 deletions cachecontrol/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import functools
import types
import weakref
import zlib
from typing import TYPE_CHECKING, Any, Collection, Mapping

Expand Down Expand Up @@ -77,7 +78,7 @@ def send(

return resp

def build_response(
def build_response( # type: ignore[override]
self,
request: PreparedRequest,
response: HTTPResponse,
Expand Down Expand Up @@ -125,25 +126,31 @@ def build_response(
else:
# Wrap the response file with a wrapper that will cache the
# response when the stream has been consumed.
response._fp = CallbackFileWrapper( # type: ignore[attr-defined]
response._fp, # type: ignore[attr-defined]
response._fp = CallbackFileWrapper( # type: ignore[assignment]
response._fp, # type: ignore[arg-type]
functools.partial(
self.controller.cache_response, request, response
self.controller.cache_response, request, weakref.ref(response)
),
)
if response.chunked:
super_update_chunk_length = response._update_chunk_length # type: ignore[attr-defined]
super_update_chunk_length = response.__class__._update_chunk_length

def _update_chunk_length(self: HTTPResponse) -> None:
super_update_chunk_length()
def _update_chunk_length(
weak_self: weakref.ReferenceType[HTTPResponse],
) -> None:
self = weak_self()
if self is None:
return

super_update_chunk_length(self)
if self.chunk_left == 0:
self._fp._close() # type: ignore[attr-defined]
self._fp._close() # type: ignore[union-attr]

response._update_chunk_length = types.MethodType( # type: ignore[attr-defined]
_update_chunk_length, response
response._update_chunk_length = functools.partial( # type: ignore[method-assign]
_update_chunk_length, weakref.ref(response)
)

resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call]
resp: Response = super().build_response(request, response)

# See if we should invalidate the cache.
if request.method in self.invalidating_methods and resp.ok:
Expand Down
1 change: 1 addition & 0 deletions cachecontrol/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
"""

from __future__ import annotations

from threading import Lock
Expand Down
62 changes: 13 additions & 49 deletions cachecontrol/caches/file_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

import hashlib
import os
import tempfile
from textwrap import dedent
from typing import IO, TYPE_CHECKING
from pathlib import Path

from cachecontrol.cache import BaseCache, SeparateBodyBaseCache
from cachecontrol.controller import CacheController
Expand All @@ -17,53 +19,12 @@
from filelock import BaseFileLock


def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
# We only want to write to this file, so open it in write only mode
flags = os.O_WRONLY

# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
# will open *new* files.
# We specify this because we want to ensure that the mode we pass is the
# mode of the file.
flags |= os.O_CREAT | os.O_EXCL

# Do not follow symlinks to prevent someone from making a symlink that
# we follow and insecurely open a cache file.
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW

# On Windows we'll mark this file as binary
if hasattr(os, "O_BINARY"):
flags |= os.O_BINARY

# Before we open our file, we want to delete any existing file that is
# there
try:
os.remove(filename)
except OSError:
# The file must not exist already, so we can just skip ahead to opening
pass

# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
# race condition happens between the os.remove and this line, that an
# error will be raised. Because we utilize a lockfile this should only
# happen if someone is attempting to attack us.
fd = os.open(filename, flags, fmode)
try:
return os.fdopen(fd, "wb")

except:
# An error occurred wrapping our FD in a file object
os.close(fd)
raise


class _FileCacheMixin:
"""Shared implementation for both FileCache variants."""

def __init__(
self,
directory: str,
directory: str | Path,
forever: bool = False,
filemode: int = 0o0600,
dirmode: int = 0o0700,
Expand All @@ -79,7 +40,7 @@ def __init__(
"""
NOTE: In order to use the FileCache you must have
filelock installed. You can install it via pip:
pip install filelock
pip install cachecontrol[filecache]
"""
)
raise ImportError(notice)
Expand Down Expand Up @@ -121,15 +82,18 @@ def _write(self, path: str, data: bytes) -> None:
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(path), self.dirmode)
except OSError:
pass
dirname = os.path.dirname(path)
os.makedirs(dirname, self.dirmode, exist_ok=True)

with self.lock_class(path + ".lock"):
# Write our actual file
with _secure_open_write(path, self.filemode) as fh:
fh.write(data)
(fd, name) = tempfile.mkstemp(dir=dirname)
try:
os.write(fd, data)
finally:
os.close(fd)
os.chmod(name, self.filemode)
os.replace(name, path)

def _delete(self, key: str, suffix: str) -> None:
name = self._fn(key) + suffix
Expand Down
21 changes: 19 additions & 2 deletions cachecontrol/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
"""
The httplib2 algorithms ported for use with requests.
"""

from __future__ import annotations

import calendar
import logging
import re
import time
import weakref
from email.utils import parsedate_tz
from typing import TYPE_CHECKING, Collection, Mapping

Expand Down Expand Up @@ -142,6 +144,11 @@ def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None:
"""
Load a cached response, or return None if it's not available.
"""
# We do not support caching of partial content: so if the request contains a
# Range header then we don't want to load anything from the cache.
if "Range" in request.headers:
return None

cache_url = request.url
assert cache_url is not None
cache_data = self.cache.get(cache_url)
Expand Down Expand Up @@ -317,7 +324,7 @@ def _cache_set(
def cache_response(
self,
request: PreparedRequest,
response: HTTPResponse,
response_or_ref: HTTPResponse | weakref.ReferenceType[HTTPResponse],
body: bytes | None = None,
status_codes: Collection[int] | None = None,
) -> None:
Expand All @@ -326,6 +333,16 @@ def cache_response(

This assumes a requests Response object.
"""
if isinstance(response_or_ref, weakref.ReferenceType):
response = response_or_ref()
if response is None:
# The weakref can be None only in case the user used streamed request
# and did not consume or close it, and holds no reference to requests.Response.
# In such case, we don't want to cache the response.
return
else:
response = response_or_ref

# From httplib2: Don't cache 206's since we aren't going to
# handle byte range requests
cacheable_status_codes = status_codes or self.cacheable_status_codes
Expand Down Expand Up @@ -480,7 +497,7 @@ def update_cached_response(
cached_response.headers.update(
{
k: v
for k, v in response.headers.items() # type: ignore[no-untyped-call]
for k, v in response.headers.items()
if k.lower() not in excluded_headers
}
)
Expand Down
4 changes: 2 additions & 2 deletions cachecontrol/filewrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def __init__(
self.__callback = callback

def __getattr__(self, name: str) -> Any:
# The vaguaries of garbage collection means that self.__fp is
# The vagaries of garbage collection means that self.__fp is
# not always set. By using __getattribute__ and the private
# name[0] allows looking up the attribute value and raising an
# AttributeError when it doesn't exist. This stop thigns from
# AttributeError when it doesn't exist. This stop things from
# infinitely recursing calls to getattr in the case where
# self.__fp hasn't been set.
#
Expand Down
5 changes: 4 additions & 1 deletion cachecontrol/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ def update_headers(self, response: HTTPResponse) -> dict[str, str]:

if "expires" not in response.headers:
date = parsedate(response.headers["date"])
expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[misc]
expires = expire_after(
timedelta(days=1),
date=datetime(*date[:6], tzinfo=timezone.utc), # type: ignore[index,misc]
)
headers["expires"] = datetime_to_header(expires)
headers["cache-control"] = "public"
return headers
Expand Down
Loading
Loading