diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md index 8d6c40f112d..a1fe50dad24 100644 --- a/docs/html/topics/caching.md +++ b/docs/html/topics/caching.md @@ -139,9 +139,17 @@ The {ref}`pip cache` command can be used to manage pip's cache. ### Listing cached files -`pip cache list` will list all wheel files from pip's cache. +`pip cache list` will list locally built wheel files from pip's cache. -`pip cache list setuptools` will list all setuptools-related wheel files from pip's cache. +`pip cache list setuptools` will list locally built wheel files related to setuptools from pip's cache. + +`pip cache list --http` will list only HTTP cache files. Package names are extracted by inspecting the cached file content (wheel or tarball structure). Files without identifiable package names are not shown. + +`pip cache list --all` will list both locally built wheels and HTTP cache files in a unified list. + +When using `--all`, HTTP cached files are marked with a `[HTTP cached]` suffix to distinguish them from locally built wheels. + +You can also use `--format abspath` to print absolute paths instead of human-friendly filenames and sizes. ## Disabling caching diff --git a/news/10460.feature.rst b/news/10460.feature.rst new file mode 100644 index 00000000000..c64f238d851 --- /dev/null +++ b/news/10460.feature.rst @@ -0,0 +1 @@ +Add ``--http`` and ``--all`` flags to ``pip cache list`` command. By default, the command shows only locally built wheels (backward compatible). The ``--http`` flag shows only HTTP cached packages, and the ``--all`` flag shows both in a unified list with HTTP packages marked with a ``[HTTP cached]`` suffix. HTTP cached packages are extracted by inspecting wheel and tarball structures offline, displaying complete filenames with platform tags and accurate file sizes. diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index c8e7aede687..a118973aa01 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os import textwrap from optparse import Values @@ -21,7 +23,7 @@ class CacheCommand(Command): - dir: Show the cache directory. - info: Show information about the cache. - - list: List filenames of packages stored in the cache. + - list: List filenames of stored cache (wheels and HTTP cached packages). - remove: Remove one or more package from the cache. - purge: Remove all items from the cache. @@ -32,7 +34,7 @@ class CacheCommand(Command): usage = """ %prog dir %prog info - %prog list [] [--format=[human, abspath]] + %prog list [] [--format=[human, abspath]] [--http] [--all] %prog remove %prog purge """ @@ -47,6 +49,22 @@ def add_options(self) -> None: help="Select the output format among: human (default) or abspath", ) + self.cmd_opts.add_option( + "--http", + action="store_true", + dest="list_http", + default=False, + help="List HTTP cached package files", + ) + + self.cmd_opts.add_option( + "--all", + action="store_true", + dest="list_all", + default=False, + help="List both HTTP cached and locally built package files", + ) + self.parser.insert_option_group(0, self.cmd_opts) def handler_map(self) -> dict[str, Callable[[Values, list[str]], None]]: @@ -141,28 +159,136 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: else: pattern = "*" - files = self._find_wheels(options, pattern) + # Determine what to show based on flags + # Default: show only wheels (backward compatible) + # --http: show only HTTP cache + # --all: show both wheels and HTTP cache (unified) + if options.list_all: + show_wheels = True + show_http = True + unified = True + elif options.list_http: + show_wheels = False + show_http = True + unified = False + else: + # Default behavior + show_wheels = True + show_http = False + unified = False + + wheel_files = [] + if show_wheels: + wheel_files = self._find_wheels(options, pattern) + + http_files = [] + if show_http: + http_files = self._get_http_cache_files_with_metadata(options) + if options.list_format == "human": - self.format_for_human(files) + if unified: + self.format_for_human_unified_all(wheel_files, http_files) + else: + self.format_for_human_separated( + wheel_files, http_files, show_http, show_wheels + ) else: - self.format_for_abspath(files) + self.format_for_abspath_unified(wheel_files, http_files) + + def format_for_human_separated( + self, + wheel_files: list[str], + http_files: list[tuple[str, str]], + show_http: bool, + show_wheels: bool, + ) -> None: + """Format wheel and HTTP cache files in separate sections.""" + if not wheel_files and not http_files: + if show_http: + logger.info("No cached files.") + else: + logger.info("No locally built wheels cached.") + return - def format_for_human(self, files: list[str]) -> None: - if not files: - logger.info("No locally built wheels cached.") + # When showing HTTP files only, use a separate section + if show_http and http_files: + logger.info("HTTP cache files:") + formatted = [] + for cache_file, filename in http_files: + # Use body file size if available + body_file = cache_file + ".body" + if os.path.exists(body_file): + size = filesystem.format_file_size(body_file) + else: + size = filesystem.format_file_size(cache_file) + + # Only show files where we extracted a filename + # (filename should always be present since we filter in + # _get_http_cache_files_with_metadata) + formatted.append(f" - {filename} ({size})") + + logger.info("\n".join(sorted(formatted))) + + # When showing wheels, list them + if show_wheels and wheel_files: + if show_http and http_files: + logger.info("") # Add spacing between sections + formatted = [] + for filename in wheel_files: + wheel = os.path.basename(filename) + size = filesystem.format_file_size(filename) + formatted.append(f" - {wheel} ({size})") + + logger.info("\n".join(sorted(formatted))) + + def format_for_human_unified_all( + self, + wheel_files: list[str], + http_files: list[tuple[str, str]], + ) -> None: + """Format wheel and HTTP cache files in a unified list with + [HTTP cached] suffix. + """ + if not wheel_files and not http_files: + logger.info("No cached files.") return - results = [] - for filename in files: + formatted = [] + + # Add HTTP files with suffix + for cache_file, filename in http_files: + # Use body file size if available + body_file = cache_file + ".body" + if os.path.exists(body_file): + size = filesystem.format_file_size(body_file) + else: + size = filesystem.format_file_size(cache_file) + + formatted.append(f" - {filename} ({size}) [HTTP cached]") + + # Add wheel files without suffix + for filename in wheel_files: wheel = os.path.basename(filename) size = filesystem.format_file_size(filename) - results.append(f" - {wheel} ({size})") - logger.info("Cache contents:\n") - logger.info("\n".join(sorted(results))) + formatted.append(f" - {wheel} ({size})") + + logger.info("\n".join(sorted(formatted))) - def format_for_abspath(self, files: list[str]) -> None: - if files: - logger.info("\n".join(sorted(files))) + def format_for_abspath_unified( + self, wheel_files: list[str], http_files: list[tuple[str, str]] + ) -> None: + """Format wheel and HTTP cache files as absolute paths.""" + all_files = [] + + # Add wheel files + all_files.extend(wheel_files) + + # Add HTTP cache files (only those with extracted filenames) + for cache_file, _filename in http_files: + all_files.append(cache_file) + + if all_files: + logger.info("\n".join(sorted(all_files))) def remove_cache_items(self, options: Values, args: list[str]) -> None: if len(args) > 1: @@ -229,3 +355,130 @@ def _find_wheels(self, options: Values, pattern: str) -> list[str]: pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl") return filesystem.find_files(wheel_dir, pattern) + + def _get_http_cache_files_with_metadata( + self, options: Values + ) -> list[tuple[str, str]]: + """Get HTTP cache files with filenames from package content inspection. + + Extracts filenames by reading the cached package structure: + - Wheel files: Reads .dist-info/WHEEL metadata for complete filename with tags + - Tarball files: Reads tar structure to extract package name from root directory + + Returns a list of tuples: (cache_file_path, filename) + Only returns files where a filename could be successfully extracted. + """ + from pip._vendor.cachecontrol.serialize import Serializer + + http_files = self._find_http_files(options) + result = [] + + serializer = Serializer() + + for cache_file in http_files: + # Skip .body files as we only want metadata files + if cache_file.endswith(".body"): + continue + + filename = None + try: + # Read the cached metadata + with open(cache_file, "rb") as f: + cached_data = f.read() + + # Try to parse it + if cached_data.startswith(f"cc={serializer.serde_version},".encode()): + # Extract the msgpack data + from pip._vendor import msgpack + + data = cached_data[5:] # Skip "cc=4," + cached = msgpack.loads(data, raw=False) + + headers = cached.get("response", {}).get("headers", {}) + content_type = headers.get("content-type", "") + + # Extract filename from body content + body_file = cache_file + ".body" + if os.path.exists(body_file): + filename = self._extract_filename_from_body( + body_file, content_type + ) + except Exception: + # If we can't read/parse the file, just skip trying to extract name + pass + + # Only include files where we successfully extracted a filename + if filename: + result.append((cache_file, filename)) + + return result + + def _extract_filename_from_body( + self, body_file: str, content_type: str + ) -> str | None: + """Extract filename by inspecting the body content. + + This works offline by examining the downloaded file structure. + """ + try: + # Check if it's a wheel file (ZIP format) + if "application/octet-stream" in content_type or not content_type: + # Try to read as a wheel (ZIP file) + import zipfile + + try: + with zipfile.ZipFile(body_file, "r") as zf: + # Wheel files contain a .dist-info directory + names = zf.namelist() + dist_info_dir = None + for name in names: + if ".dist-info/" in name: + dist_info_dir = name.split("/")[0] + break + + if dist_info_dir and dist_info_dir.endswith(".dist-info"): + # Read WHEEL metadata to get the full wheel name + wheel_file = f"{dist_info_dir}/WHEEL" + if wheel_file in names: + wheel_content = zf.read(wheel_file).decode("utf-8") + # Parse WHEEL file for Root-Is-Purelib and Tag + tags = [] + for line in wheel_content.split("\n"): + if line.startswith("Tag:"): + tag = line.split(":", 1)[1].strip() + tags.append(tag) + + if tags: + # Use first tag to construct filename + # Format: {name}-{version}.dist-info + pkg_info = dist_info_dir[: -len(".dist-info")] + # Tags format: py3-none-any + tag = tags[0] + return f"{pkg_info}-{tag}.whl" + + # Fallback: just use name-version.whl + pkg_info = dist_info_dir[: -len(".dist-info")] + return f"{pkg_info}.whl" + except (zipfile.BadZipFile, KeyError, UnicodeDecodeError): + pass + + # Try to read as a tarball + import tarfile + + try: + with tarfile.open(body_file, "r:*") as tf: + # Get the first member to determine the package name + members = tf.getmembers() + if members: + # Tarball usually has format: package-version/... + first_name = members[0].name + pkg_dir = first_name.split("/")[0] + if pkg_dir and "-" in pkg_dir: + return f"{pkg_dir}.tar.gz" + except (tarfile.TarError, KeyError): + pass + + except Exception: + pass + + return None diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index bd1f75a4177..f8292ff1255 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -52,18 +52,56 @@ def wheel_cache_files(wheel_cache_dir: str) -> list[str]: @pytest.fixture def populate_http_cache(http_cache_dir: str) -> list[tuple[str, str]]: + import zipfile + + from pip._vendor import msgpack + destination = os.path.join(http_cache_dir, "arbitrary", "pathname") os.makedirs(destination) - files = [ - ("aaaaaaaaa", os.path.join(destination, "aaaaaaaaa")), - ("bbbbbbbbb", os.path.join(destination, "bbbbbbbbb")), - ("ccccccccc", os.path.join(destination, "ccccccccc")), + files = [] + + # Create a few cache entries with proper wheel body files + wheel_entries = [ + ("test_package-1.0.0-py3-none-any.whl", "test_package", "1.0.0"), + ("another-2.3.4-py3-none-any.whl", "another", "2.3.4"), ] - for _name, filename in files: - with open(filename, "w"): - pass + for wheel_filename, pkg_name, version in wheel_entries: + cache_file = os.path.join(destination, "cached_" + wheel_filename) + body_file = cache_file + ".body" + + # Create the .body file as a minimal wheel + with zipfile.ZipFile(body_file, "w") as zf: + dist_info = f"{pkg_name}-{version}.dist-info" + # Add WHEEL file + wheel_content = "Wheel-Version: 1.0\nTag: py3-none-any\n" + zf.writestr(f"{dist_info}/WHEEL", wheel_content) + # Add METADATA file + metadata_content = ( + f"Metadata-Version: 2.1\nName: {pkg_name}\nVersion: {version}\n" + ) + zf.writestr(f"{dist_info}/METADATA", metadata_content) + + # Create the cache metadata file + cached_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": True, + } + } + + with open(cache_file, "wb") as f: + f.write(b"cc=4,") + f.write(msgpack.dumps(cached_data, use_bin_type=True)) + + files.append((pkg_name, cache_file)) return files @@ -370,9 +408,8 @@ def test_cache_purge( wheels.""" result = script.pip("cache", "purge", "--verbose") - assert remove_matches_http("aaaaaaaaa", result) - assert remove_matches_http("bbbbbbbbb", result) - assert remove_matches_http("ccccccccc", result) + assert remove_matches_http("cached_test_package-1.0.0-py3-none-any.whl", result) + assert remove_matches_http("cached_another-2.3.4-py3-none-any.whl", result) assert remove_matches_wheel("yyy-1.2.3", result) assert remove_matches_wheel("zzz-4.5.6", result) @@ -400,6 +437,41 @@ def test_cache_purge_too_many_args( assert os.path.exists(filename) +@pytest.mark.usefixtures("populate_http_cache") +def test_cache_list_with_http_flag(script: PipTestEnvironment) -> None: + """Running `pip cache list --http` should list HTTP cache files.""" + result = script.pip("cache", "list", "--http") + + # Should show HTTP cache files section + assert "HTTP cache files:" in result.stdout + + # Should list cache files with extracted wheel names + assert "test_package-1.0.0-py3-none-any.whl" in result.stdout + assert "another-2.3.4-py3-none-any.whl" in result.stdout + + +@pytest.mark.usefixtures("populate_http_cache") +def test_cache_list_with_http_flag_abspath(script: PipTestEnvironment) -> None: + """Running `pip cache list --http --format=abspath` should list full paths.""" + result = script.pip("cache", "list", "--http", "--format=abspath") + + # Should have some output with paths + lines = result.stdout.strip().split("\n") + assert len(lines) > 0 + # Each line should be a path + for line in lines: + assert os.path.isabs(line) + + +@pytest.mark.usefixtures("empty_wheel_cache") +def test_cache_list_with_http_flag_empty(script: PipTestEnvironment) -> None: + """Test `pip cache list --http` with empty cache.""" + result = script.pip("cache", "list", "--http") + + # Should show no cached files message + assert "No cached files." in result.stdout + + @pytest.mark.parametrize("command", ["info", "list", "remove", "purge"]) def test_cache_abort_when_no_cache_dir( script: PipTestEnvironment, command: str diff --git a/tests/unit/test_cache_command.py b/tests/unit/test_cache_command.py new file mode 100644 index 00000000000..62f02d65462 --- /dev/null +++ b/tests/unit/test_cache_command.py @@ -0,0 +1,202 @@ +"""Tests for the cache command with HTTP cache listing functionality.""" + +import os +import tempfile +from optparse import Values + +from pip._vendor.cachecontrol.serialize import Serializer + +from pip._internal.commands.cache import CacheCommand + + +class TestGetHttpCacheFilesWithMetadata: + """Tests for _get_http_cache_files_with_metadata method.""" + + def test_extracts_filename_from_wheel_body(self) -> None: + """Test that filenames are extracted from wheel file bodies.""" + import zipfile + + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + cache_file = os.path.join(cache_subdir, "test_cache_file") + + # Create a minimal wheel file structure + body_file = cache_file + ".body" + with zipfile.ZipFile(body_file, "w") as zf: + # Wheels have a .dist-info directory + zf.writestr("test_package-1.0.0.dist-info/WHEEL", "Wheel-Version: 1.0") + zf.writestr( + "test_package-1.0.0.dist-info/METADATA", "Name: test-package" + ) + + # Create cache metadata + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should extract filename from wheel structure + assert len(result) == 1 + assert result[0][0] == cache_file + assert result[0][1] == "test_package-1.0.0.whl" + + def test_extracts_filename_from_tarball_body(self) -> None: + """Test that filenames are extracted from tarball file bodies.""" + import tarfile + + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + cache_file = os.path.join(cache_subdir, "test_cache_file") + + # Create a minimal tarball structure + body_file = cache_file + ".body" + with tarfile.open(body_file, "w:gz") as tf: + # Tarballs typically have package-version/ as root + import io + + data = b"test content" + tarinfo = tarfile.TarInfo(name="mypackage-2.0.0/setup.py") + tarinfo.size = len(data) + tf.addfile(tarinfo, io.BytesIO(data)) + + # Create cache metadata + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should extract filename from tarball structure + assert len(result) == 1 + assert result[0][0] == cache_file + assert result[0][1] == "mypackage-2.0.0.tar.gz" + + def test_handles_files_without_extractable_names(self) -> None: + """Test that files without extractable package names are excluded.""" + with tempfile.TemporaryDirectory() as cache_dir: + # Create nested directory structure + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a cache file for non-package content (HTML) + cache_file = os.path.join(cache_subdir, "test_cache_file") + + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "text/html", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + # Create mock options + options = Values() + options.cache_dir = cache_dir + + # Test the method + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should not include files without extractable names + assert len(result) == 0 + + def test_skips_body_files(self) -> None: + """Test that .body files are skipped.""" + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a .body file + body_file = os.path.join(cache_subdir, "test_cache_file.body") + with open(body_file, "wb") as f: + f.write(b"test data") + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should not find any files (body files are skipped) + assert len(result) == 0 + + def test_handles_corrupted_cache_files(self) -> None: + """Test that corrupted cache files are handled gracefully.""" + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a corrupted cache file + cache_file = os.path.join(cache_subdir, "corrupted_file") + with open(cache_file, "wb") as f: + f.write(b"not a valid cache file") + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should handle the corrupted file without crashing + # Corrupted files without extractable names are excluded + assert len(result) == 0