diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md
index 8d6c40f112d..a1fe50dad24 100644
--- a/docs/html/topics/caching.md
+++ b/docs/html/topics/caching.md
@@ -139,9 +139,17 @@ The {ref}`pip cache` command can be used to manage pip's cache.
### Listing cached files
-`pip cache list` will list all wheel files from pip's cache.
+`pip cache list` will list locally built wheel files from pip's cache.
-`pip cache list setuptools` will list all setuptools-related wheel files from pip's cache.
+`pip cache list setuptools` will list locally built wheel files related to setuptools from pip's cache.
+
+`pip cache list --http` will list only HTTP cache files. Package names are extracted by inspecting the cached file content (wheel or tarball structure). Files without identifiable package names are not shown.
+
+`pip cache list --all` will list both locally built wheels and HTTP cache files in a unified list.
+
+When using `--all`, HTTP cached files are marked with a `[HTTP cached]` suffix to distinguish them from locally built wheels.
+
+You can also use `--format abspath` to print absolute paths instead of human-friendly filenames and sizes.
## Disabling caching
diff --git a/news/10460.feature.rst b/news/10460.feature.rst
new file mode 100644
index 00000000000..c64f238d851
--- /dev/null
+++ b/news/10460.feature.rst
@@ -0,0 +1 @@
+Add ``--http`` and ``--all`` flags to ``pip cache list`` command. By default, the command shows only locally built wheels (backward compatible). The ``--http`` flag shows only HTTP cached packages, and the ``--all`` flag shows both in a unified list with HTTP packages marked with a ``[HTTP cached]`` suffix. HTTP cached packages are extracted by inspecting wheel and tarball structures offline, displaying complete filenames with platform tags and accurate file sizes.
diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py
index c8e7aede687..a118973aa01 100644
--- a/src/pip/_internal/commands/cache.py
+++ b/src/pip/_internal/commands/cache.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import os
import textwrap
from optparse import Values
@@ -21,7 +23,7 @@ class CacheCommand(Command):
- dir: Show the cache directory.
- info: Show information about the cache.
- - list: List filenames of packages stored in the cache.
+ - list: List filenames of stored cache (wheels and HTTP cached packages).
- remove: Remove one or more package from the cache.
- purge: Remove all items from the cache.
@@ -32,7 +34,7 @@ class CacheCommand(Command):
usage = """
%prog dir
%prog info
- %prog list [] [--format=[human, abspath]]
+ %prog list [] [--format=[human, abspath]] [--http] [--all]
%prog remove
%prog purge
"""
@@ -47,6 +49,22 @@ def add_options(self) -> None:
help="Select the output format among: human (default) or abspath",
)
+ self.cmd_opts.add_option(
+ "--http",
+ action="store_true",
+ dest="list_http",
+ default=False,
+ help="List HTTP cached package files",
+ )
+
+ self.cmd_opts.add_option(
+ "--all",
+ action="store_true",
+ dest="list_all",
+ default=False,
+ help="List both HTTP cached and locally built package files",
+ )
+
self.parser.insert_option_group(0, self.cmd_opts)
def handler_map(self) -> dict[str, Callable[[Values, list[str]], None]]:
@@ -141,28 +159,136 @@ def list_cache_items(self, options: Values, args: list[str]) -> None:
else:
pattern = "*"
- files = self._find_wheels(options, pattern)
+ # Determine what to show based on flags
+ # Default: show only wheels (backward compatible)
+ # --http: show only HTTP cache
+ # --all: show both wheels and HTTP cache (unified)
+ if options.list_all:
+ show_wheels = True
+ show_http = True
+ unified = True
+ elif options.list_http:
+ show_wheels = False
+ show_http = True
+ unified = False
+ else:
+ # Default behavior
+ show_wheels = True
+ show_http = False
+ unified = False
+
+ wheel_files = []
+ if show_wheels:
+ wheel_files = self._find_wheels(options, pattern)
+
+ http_files = []
+ if show_http:
+ http_files = self._get_http_cache_files_with_metadata(options)
+
if options.list_format == "human":
- self.format_for_human(files)
+ if unified:
+ self.format_for_human_unified_all(wheel_files, http_files)
+ else:
+ self.format_for_human_separated(
+ wheel_files, http_files, show_http, show_wheels
+ )
else:
- self.format_for_abspath(files)
+ self.format_for_abspath_unified(wheel_files, http_files)
+
+ def format_for_human_separated(
+ self,
+ wheel_files: list[str],
+ http_files: list[tuple[str, str]],
+ show_http: bool,
+ show_wheels: bool,
+ ) -> None:
+ """Format wheel and HTTP cache files in separate sections."""
+ if not wheel_files and not http_files:
+ if show_http:
+ logger.info("No cached files.")
+ else:
+ logger.info("No locally built wheels cached.")
+ return
- def format_for_human(self, files: list[str]) -> None:
- if not files:
- logger.info("No locally built wheels cached.")
+ # When showing HTTP files only, use a separate section
+ if show_http and http_files:
+ logger.info("HTTP cache files:")
+ formatted = []
+ for cache_file, filename in http_files:
+ # Use body file size if available
+ body_file = cache_file + ".body"
+ if os.path.exists(body_file):
+ size = filesystem.format_file_size(body_file)
+ else:
+ size = filesystem.format_file_size(cache_file)
+
+ # Only show files where we extracted a filename
+ # (filename should always be present since we filter in
+ # _get_http_cache_files_with_metadata)
+ formatted.append(f" - {filename} ({size})")
+
+ logger.info("\n".join(sorted(formatted)))
+
+ # When showing wheels, list them
+ if show_wheels and wheel_files:
+ if show_http and http_files:
+ logger.info("") # Add spacing between sections
+ formatted = []
+ for filename in wheel_files:
+ wheel = os.path.basename(filename)
+ size = filesystem.format_file_size(filename)
+ formatted.append(f" - {wheel} ({size})")
+
+ logger.info("\n".join(sorted(formatted)))
+
+ def format_for_human_unified_all(
+ self,
+ wheel_files: list[str],
+ http_files: list[tuple[str, str]],
+ ) -> None:
+ """Format wheel and HTTP cache files in a unified list with
+ [HTTP cached] suffix.
+ """
+ if not wheel_files and not http_files:
+ logger.info("No cached files.")
return
- results = []
- for filename in files:
+ formatted = []
+
+ # Add HTTP files with suffix
+ for cache_file, filename in http_files:
+ # Use body file size if available
+ body_file = cache_file + ".body"
+ if os.path.exists(body_file):
+ size = filesystem.format_file_size(body_file)
+ else:
+ size = filesystem.format_file_size(cache_file)
+
+ formatted.append(f" - {filename} ({size}) [HTTP cached]")
+
+ # Add wheel files without suffix
+ for filename in wheel_files:
wheel = os.path.basename(filename)
size = filesystem.format_file_size(filename)
- results.append(f" - {wheel} ({size})")
- logger.info("Cache contents:\n")
- logger.info("\n".join(sorted(results)))
+ formatted.append(f" - {wheel} ({size})")
+
+ logger.info("\n".join(sorted(formatted)))
- def format_for_abspath(self, files: list[str]) -> None:
- if files:
- logger.info("\n".join(sorted(files)))
+ def format_for_abspath_unified(
+ self, wheel_files: list[str], http_files: list[tuple[str, str]]
+ ) -> None:
+ """Format wheel and HTTP cache files as absolute paths."""
+ all_files = []
+
+ # Add wheel files
+ all_files.extend(wheel_files)
+
+ # Add HTTP cache files (only those with extracted filenames)
+ for cache_file, _filename in http_files:
+ all_files.append(cache_file)
+
+ if all_files:
+ logger.info("\n".join(sorted(all_files)))
def remove_cache_items(self, options: Values, args: list[str]) -> None:
if len(args) > 1:
@@ -229,3 +355,130 @@ def _find_wheels(self, options: Values, pattern: str) -> list[str]:
pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl")
return filesystem.find_files(wheel_dir, pattern)
+
+ def _get_http_cache_files_with_metadata(
+ self, options: Values
+ ) -> list[tuple[str, str]]:
+ """Get HTTP cache files with filenames from package content inspection.
+
+ Extracts filenames by reading the cached package structure:
+ - Wheel files: Reads .dist-info/WHEEL metadata for complete filename with tags
+ - Tarball files: Reads tar structure to extract package name from root directory
+
+ Returns a list of tuples: (cache_file_path, filename)
+ Only returns files where a filename could be successfully extracted.
+ """
+ from pip._vendor.cachecontrol.serialize import Serializer
+
+ http_files = self._find_http_files(options)
+ result = []
+
+ serializer = Serializer()
+
+ for cache_file in http_files:
+ # Skip .body files as we only want metadata files
+ if cache_file.endswith(".body"):
+ continue
+
+ filename = None
+ try:
+ # Read the cached metadata
+ with open(cache_file, "rb") as f:
+ cached_data = f.read()
+
+ # Try to parse it
+ if cached_data.startswith(f"cc={serializer.serde_version},".encode()):
+ # Extract the msgpack data
+ from pip._vendor import msgpack
+
+ data = cached_data[5:] # Skip "cc=4,"
+ cached = msgpack.loads(data, raw=False)
+
+ headers = cached.get("response", {}).get("headers", {})
+ content_type = headers.get("content-type", "")
+
+ # Extract filename from body content
+ body_file = cache_file + ".body"
+ if os.path.exists(body_file):
+ filename = self._extract_filename_from_body(
+ body_file, content_type
+ )
+ except Exception:
+ # If we can't read/parse the file, just skip trying to extract name
+ pass
+
+ # Only include files where we successfully extracted a filename
+ if filename:
+ result.append((cache_file, filename))
+
+ return result
+
+ def _extract_filename_from_body(
+ self, body_file: str, content_type: str
+ ) -> str | None:
+ """Extract filename by inspecting the body content.
+
+ This works offline by examining the downloaded file structure.
+ """
+ try:
+ # Check if it's a wheel file (ZIP format)
+ if "application/octet-stream" in content_type or not content_type:
+ # Try to read as a wheel (ZIP file)
+ import zipfile
+
+ try:
+ with zipfile.ZipFile(body_file, "r") as zf:
+ # Wheel files contain a .dist-info directory
+ names = zf.namelist()
+ dist_info_dir = None
+ for name in names:
+ if ".dist-info/" in name:
+ dist_info_dir = name.split("/")[0]
+ break
+
+ if dist_info_dir and dist_info_dir.endswith(".dist-info"):
+ # Read WHEEL metadata to get the full wheel name
+ wheel_file = f"{dist_info_dir}/WHEEL"
+ if wheel_file in names:
+ wheel_content = zf.read(wheel_file).decode("utf-8")
+ # Parse WHEEL file for Root-Is-Purelib and Tag
+ tags = []
+ for line in wheel_content.split("\n"):
+ if line.startswith("Tag:"):
+ tag = line.split(":", 1)[1].strip()
+ tags.append(tag)
+
+ if tags:
+ # Use first tag to construct filename
+ # Format: {name}-{version}.dist-info
+ pkg_info = dist_info_dir[: -len(".dist-info")]
+ # Tags format: py3-none-any
+ tag = tags[0]
+ return f"{pkg_info}-{tag}.whl"
+
+ # Fallback: just use name-version.whl
+ pkg_info = dist_info_dir[: -len(".dist-info")]
+ return f"{pkg_info}.whl"
+ except (zipfile.BadZipFile, KeyError, UnicodeDecodeError):
+ pass
+
+ # Try to read as a tarball
+ import tarfile
+
+ try:
+ with tarfile.open(body_file, "r:*") as tf:
+ # Get the first member to determine the package name
+ members = tf.getmembers()
+ if members:
+ # Tarball usually has format: package-version/...
+ first_name = members[0].name
+ pkg_dir = first_name.split("/")[0]
+ if pkg_dir and "-" in pkg_dir:
+ return f"{pkg_dir}.tar.gz"
+ except (tarfile.TarError, KeyError):
+ pass
+
+ except Exception:
+ pass
+
+ return None
diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py
index bd1f75a4177..f8292ff1255 100644
--- a/tests/functional/test_cache.py
+++ b/tests/functional/test_cache.py
@@ -52,18 +52,56 @@ def wheel_cache_files(wheel_cache_dir: str) -> list[str]:
@pytest.fixture
def populate_http_cache(http_cache_dir: str) -> list[tuple[str, str]]:
+ import zipfile
+
+ from pip._vendor import msgpack
+
destination = os.path.join(http_cache_dir, "arbitrary", "pathname")
os.makedirs(destination)
- files = [
- ("aaaaaaaaa", os.path.join(destination, "aaaaaaaaa")),
- ("bbbbbbbbb", os.path.join(destination, "bbbbbbbbb")),
- ("ccccccccc", os.path.join(destination, "ccccccccc")),
+ files = []
+
+ # Create a few cache entries with proper wheel body files
+ wheel_entries = [
+ ("test_package-1.0.0-py3-none-any.whl", "test_package", "1.0.0"),
+ ("another-2.3.4-py3-none-any.whl", "another", "2.3.4"),
]
- for _name, filename in files:
- with open(filename, "w"):
- pass
+ for wheel_filename, pkg_name, version in wheel_entries:
+ cache_file = os.path.join(destination, "cached_" + wheel_filename)
+ body_file = cache_file + ".body"
+
+ # Create the .body file as a minimal wheel
+ with zipfile.ZipFile(body_file, "w") as zf:
+ dist_info = f"{pkg_name}-{version}.dist-info"
+ # Add WHEEL file
+ wheel_content = "Wheel-Version: 1.0\nTag: py3-none-any\n"
+ zf.writestr(f"{dist_info}/WHEEL", wheel_content)
+ # Add METADATA file
+ metadata_content = (
+ f"Metadata-Version: 2.1\nName: {pkg_name}\nVersion: {version}\n"
+ )
+ zf.writestr(f"{dist_info}/METADATA", metadata_content)
+
+ # Create the cache metadata file
+ cached_data = {
+ "response": {
+ "body": b"",
+ "headers": {
+ "content-type": "application/octet-stream",
+ },
+ "status": 200,
+ "version": 11,
+ "reason": "OK",
+ "decode_content": True,
+ }
+ }
+
+ with open(cache_file, "wb") as f:
+ f.write(b"cc=4,")
+ f.write(msgpack.dumps(cached_data, use_bin_type=True))
+
+ files.append((pkg_name, cache_file))
return files
@@ -370,9 +408,8 @@ def test_cache_purge(
wheels."""
result = script.pip("cache", "purge", "--verbose")
- assert remove_matches_http("aaaaaaaaa", result)
- assert remove_matches_http("bbbbbbbbb", result)
- assert remove_matches_http("ccccccccc", result)
+ assert remove_matches_http("cached_test_package-1.0.0-py3-none-any.whl", result)
+ assert remove_matches_http("cached_another-2.3.4-py3-none-any.whl", result)
assert remove_matches_wheel("yyy-1.2.3", result)
assert remove_matches_wheel("zzz-4.5.6", result)
@@ -400,6 +437,41 @@ def test_cache_purge_too_many_args(
assert os.path.exists(filename)
+@pytest.mark.usefixtures("populate_http_cache")
+def test_cache_list_with_http_flag(script: PipTestEnvironment) -> None:
+ """Running `pip cache list --http` should list HTTP cache files."""
+ result = script.pip("cache", "list", "--http")
+
+ # Should show HTTP cache files section
+ assert "HTTP cache files:" in result.stdout
+
+ # Should list cache files with extracted wheel names
+ assert "test_package-1.0.0-py3-none-any.whl" in result.stdout
+ assert "another-2.3.4-py3-none-any.whl" in result.stdout
+
+
+@pytest.mark.usefixtures("populate_http_cache")
+def test_cache_list_with_http_flag_abspath(script: PipTestEnvironment) -> None:
+ """Running `pip cache list --http --format=abspath` should list full paths."""
+ result = script.pip("cache", "list", "--http", "--format=abspath")
+
+ # Should have some output with paths
+ lines = result.stdout.strip().split("\n")
+ assert len(lines) > 0
+ # Each line should be a path
+ for line in lines:
+ assert os.path.isabs(line)
+
+
+@pytest.mark.usefixtures("empty_wheel_cache")
+def test_cache_list_with_http_flag_empty(script: PipTestEnvironment) -> None:
+ """Test `pip cache list --http` with empty cache."""
+ result = script.pip("cache", "list", "--http")
+
+ # Should show no cached files message
+ assert "No cached files." in result.stdout
+
+
@pytest.mark.parametrize("command", ["info", "list", "remove", "purge"])
def test_cache_abort_when_no_cache_dir(
script: PipTestEnvironment, command: str
diff --git a/tests/unit/test_cache_command.py b/tests/unit/test_cache_command.py
new file mode 100644
index 00000000000..62f02d65462
--- /dev/null
+++ b/tests/unit/test_cache_command.py
@@ -0,0 +1,202 @@
+"""Tests for the cache command with HTTP cache listing functionality."""
+
+import os
+import tempfile
+from optparse import Values
+
+from pip._vendor.cachecontrol.serialize import Serializer
+
+from pip._internal.commands.cache import CacheCommand
+
+
+class TestGetHttpCacheFilesWithMetadata:
+ """Tests for _get_http_cache_files_with_metadata method."""
+
+ def test_extracts_filename_from_wheel_body(self) -> None:
+ """Test that filenames are extracted from wheel file bodies."""
+ import zipfile
+
+ with tempfile.TemporaryDirectory() as cache_dir:
+ cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e")
+ os.makedirs(cache_subdir, exist_ok=True)
+
+ cache_file = os.path.join(cache_subdir, "test_cache_file")
+
+ # Create a minimal wheel file structure
+ body_file = cache_file + ".body"
+ with zipfile.ZipFile(body_file, "w") as zf:
+ # Wheels have a .dist-info directory
+ zf.writestr("test_package-1.0.0.dist-info/WHEEL", "Wheel-Version: 1.0")
+ zf.writestr(
+ "test_package-1.0.0.dist-info/METADATA", "Name: test-package"
+ )
+
+ # Create cache metadata
+ cache_data = {
+ "response": {
+ "body": b"",
+ "headers": {
+ "content-type": "application/octet-stream",
+ },
+ "status": 200,
+ "version": 11,
+ "reason": "OK",
+ "decode_content": False,
+ },
+ "vary": {},
+ }
+
+ s = Serializer()
+ serialized = s.serialize(cache_data)
+ full_data = f"cc={s.serde_version},".encode() + serialized
+
+ with open(cache_file, "wb") as f:
+ f.write(full_data)
+
+ options = Values()
+ options.cache_dir = cache_dir
+
+ cmd = CacheCommand("cache", "Test cache command")
+ result = cmd._get_http_cache_files_with_metadata(options)
+
+ # Should extract filename from wheel structure
+ assert len(result) == 1
+ assert result[0][0] == cache_file
+ assert result[0][1] == "test_package-1.0.0.whl"
+
+ def test_extracts_filename_from_tarball_body(self) -> None:
+ """Test that filenames are extracted from tarball file bodies."""
+ import tarfile
+
+ with tempfile.TemporaryDirectory() as cache_dir:
+ cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e")
+ os.makedirs(cache_subdir, exist_ok=True)
+
+ cache_file = os.path.join(cache_subdir, "test_cache_file")
+
+ # Create a minimal tarball structure
+ body_file = cache_file + ".body"
+ with tarfile.open(body_file, "w:gz") as tf:
+ # Tarballs typically have package-version/ as root
+ import io
+
+ data = b"test content"
+ tarinfo = tarfile.TarInfo(name="mypackage-2.0.0/setup.py")
+ tarinfo.size = len(data)
+ tf.addfile(tarinfo, io.BytesIO(data))
+
+ # Create cache metadata
+ cache_data = {
+ "response": {
+ "body": b"",
+ "headers": {
+ "content-type": "application/octet-stream",
+ },
+ "status": 200,
+ "version": 11,
+ "reason": "OK",
+ "decode_content": False,
+ },
+ "vary": {},
+ }
+
+ s = Serializer()
+ serialized = s.serialize(cache_data)
+ full_data = f"cc={s.serde_version},".encode() + serialized
+
+ with open(cache_file, "wb") as f:
+ f.write(full_data)
+
+ options = Values()
+ options.cache_dir = cache_dir
+
+ cmd = CacheCommand("cache", "Test cache command")
+ result = cmd._get_http_cache_files_with_metadata(options)
+
+ # Should extract filename from tarball structure
+ assert len(result) == 1
+ assert result[0][0] == cache_file
+ assert result[0][1] == "mypackage-2.0.0.tar.gz"
+
+ def test_handles_files_without_extractable_names(self) -> None:
+ """Test that files without extractable package names are excluded."""
+ with tempfile.TemporaryDirectory() as cache_dir:
+ # Create nested directory structure
+ cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e")
+ os.makedirs(cache_subdir, exist_ok=True)
+
+ # Create a cache file for non-package content (HTML)
+ cache_file = os.path.join(cache_subdir, "test_cache_file")
+
+ cache_data = {
+ "response": {
+ "body": b"",
+ "headers": {
+ "content-type": "text/html",
+ },
+ "status": 200,
+ "version": 11,
+ "reason": "OK",
+ "decode_content": False,
+ },
+ "vary": {},
+ }
+
+ s = Serializer()
+ serialized = s.serialize(cache_data)
+ full_data = f"cc={s.serde_version},".encode() + serialized
+
+ with open(cache_file, "wb") as f:
+ f.write(full_data)
+
+ # Create mock options
+ options = Values()
+ options.cache_dir = cache_dir
+
+ # Test the method
+ cmd = CacheCommand("cache", "Test cache command")
+ result = cmd._get_http_cache_files_with_metadata(options)
+
+ # Should not include files without extractable names
+ assert len(result) == 0
+
+ def test_skips_body_files(self) -> None:
+ """Test that .body files are skipped."""
+ with tempfile.TemporaryDirectory() as cache_dir:
+ cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e")
+ os.makedirs(cache_subdir, exist_ok=True)
+
+ # Create a .body file
+ body_file = os.path.join(cache_subdir, "test_cache_file.body")
+ with open(body_file, "wb") as f:
+ f.write(b"test data")
+
+ options = Values()
+ options.cache_dir = cache_dir
+
+ cmd = CacheCommand("cache", "Test cache command")
+ result = cmd._get_http_cache_files_with_metadata(options)
+
+ # Should not find any files (body files are skipped)
+ assert len(result) == 0
+
+ def test_handles_corrupted_cache_files(self) -> None:
+ """Test that corrupted cache files are handled gracefully."""
+ with tempfile.TemporaryDirectory() as cache_dir:
+ cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e")
+ os.makedirs(cache_subdir, exist_ok=True)
+
+ # Create a corrupted cache file
+ cache_file = os.path.join(cache_subdir, "corrupted_file")
+ with open(cache_file, "wb") as f:
+ f.write(b"not a valid cache file")
+
+ options = Values()
+ options.cache_dir = cache_dir
+
+ cmd = CacheCommand("cache", "Test cache command")
+ result = cmd._get_http_cache_files_with_metadata(options)
+
+ # Should handle the corrupted file without crashing
+ # Corrupted files without extractable names are excluded
+ assert len(result) == 0