From 5b7c4e1d52d4a0320328ea15c6ea7f8f643778a6 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Tue, 16 Sep 2025 22:24:55 +0000 Subject: [PATCH 01/10] Implement HTTP cached packages support in pip cache list --- src/pip/_internal/commands/cache.py | 144 +++++++++++++++++++++++++++- tests/functional/test_cache.py | 132 +++++++++++++++++++++++++ 2 files changed, 272 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index c8e7aede687..4fdb8b194f7 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -9,6 +9,8 @@ from pip._internal.utils import filesystem from pip._internal.utils.logging import getLogger from pip._internal.utils.misc import format_size +from pip._vendor.cachecontrol.serialize import Serializer +from pip._vendor.requests import Request logger = getLogger(__name__) @@ -32,7 +34,7 @@ class CacheCommand(Command): usage = """ %prog dir %prog info - %prog list [] [--format=[human, abspath]] + %prog list [] [--format=[human, abspath]] [--cache-type=[all, wheels, http]] %prog remove %prog purge """ @@ -46,6 +48,15 @@ def add_options(self) -> None: choices=("human", "abspath"), help="Select the output format among: human (default) or abspath", ) + + self.cmd_opts.add_option( + "--cache-type", + action="store", + dest="cache_type", + default="all", + choices=("all", "wheels", "http"), + help="Select which cache to list: all (default), wheels (locally built), or http (downloaded packages)", + ) self.parser.insert_option_group(0, self.cmd_opts) @@ -141,11 +152,20 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: else: pattern = "*" - files = self._find_wheels(options, pattern) + # Collect wheel files and HTTP cached packages based on cache_type option + wheel_files = [] + http_packages = [] + + if options.cache_type in ("all", "wheels"): + wheel_files = self._find_wheels(options, pattern) + + if options.cache_type in ("all", "http"): + http_packages = self._get_http_cached_packages(options, pattern) + if options.list_format == "human": - self.format_for_human(files) + self.format_for_human_combined(wheel_files, http_packages) else: - self.format_for_abspath(files) + self.format_for_abspath_combined(wheel_files, http_packages) def format_for_human(self, files: list[str]) -> None: if not files: @@ -163,6 +183,55 @@ def format_for_human(self, files: list[str]) -> None: def format_for_abspath(self, files: list[str]) -> None: if files: logger.info("\n".join(sorted(files))) + + def format_for_human_combined(self, wheel_files: list[str], http_packages: list[tuple[str, str, str]]) -> None: + """Format both wheel files and HTTP cached packages for human readable output.""" + if not wheel_files and not http_packages: + logger.info("No cached packages.") + return + + results = [] + + # Add wheel files + for filename in wheel_files: + wheel = os.path.basename(filename) + size = filesystem.format_file_size(filename) + results.append(f" - {wheel} ({size})") + + # Add HTTP cached packages + for project, version, file_path in http_packages: + # Create a wheel-like name for display + wheel_name = f"{project}-{version}-py3-none-any.whl" + + # Calculate size of both header and body files + size = 0 + try: + size += os.path.getsize(file_path) + body_path = file_path + '.body' + if os.path.exists(body_path): + size += os.path.getsize(body_path) + except OSError: + pass + + size_str = filesystem.format_size(size) + results.append(f" - {wheel_name} ({size_str}) [HTTP cached]") + + logger.info("Cache contents:\n") + logger.info("\n".join(sorted(results))) + + def format_for_abspath_combined(self, wheel_files: list[str], http_packages: list[tuple[str, str, str]]) -> None: + """Format both wheel files and HTTP cached packages for absolute path output.""" + all_paths = [] + + # Add wheel file paths + all_paths.extend(wheel_files) + + # Add HTTP cache file paths + for _, _, file_path in http_packages: + all_paths.append(file_path) + + if all_paths: + logger.info("\n".join(sorted(all_paths))) def remove_cache_items(self, options: Values, args: list[str]) -> None: if len(args) > 1: @@ -229,3 +298,70 @@ def _find_wheels(self, options: Values, pattern: str) -> list[str]: pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl") return filesystem.find_files(wheel_dir, pattern) + + def _get_http_cached_packages(self, options: Values, pattern: str = "*") -> list[tuple[str, str, str]]: + """Extract package information from HTTP cached responses. + + Returns a list of tuples: (package_name, version, file_path) + """ + packages = [] + http_files = self._find_http_files(options) + serializer = Serializer() + + for file_path in http_files: + # Skip body files + if file_path.endswith('.body'): + continue + + try: + with open(file_path, 'rb') as f: + data = f.read() + + # Try to deserialize the cached response + dummy_request = Request('GET', 'https://dummy.com').prepare() + body_file_path = file_path + '.body' + body_file = None + + if os.path.exists(body_file_path): + body_file = open(body_file_path, 'rb') + + try: + response = serializer.loads(dummy_request, data, body_file) + if response: + # Check for PyPI headers that indicate this is a wheel + package_type = response.headers.get('x-pypi-file-package-type') + if package_type == 'bdist_wheel': + project = response.headers.get('x-pypi-file-project') + version = response.headers.get('x-pypi-file-version') + python_version = response.headers.get('x-pypi-file-python-version', 'py3') + + if project and version: + # Create a wheel-like filename for consistency + wheel_name = f"{project}-{version}-{python_version}-none-any.whl" + + # Apply pattern matching similar to wheel files + if pattern == "*" or self._matches_pattern(wheel_name, pattern): + packages.append((project, version, file_path)) + finally: + if body_file: + body_file.close() + + except Exception: + # Silently skip files that can't be processed + continue + + return packages + + def _matches_pattern(self, filename: str, pattern: str) -> bool: + """Check if a filename matches the given pattern.""" + import fnmatch + + # Extract just the package name-version part for matching + base_name = filename.split('-')[0] if '-' in filename else filename + + # If pattern contains hyphen, match against full filename + if '-' in pattern: + return fnmatch.fnmatch(filename, pattern + "*") + else: + # Otherwise match against package name only + return fnmatch.fnmatch(base_name, pattern) diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index bd1f75a4177..a75d43831a8 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -68,6 +68,69 @@ def populate_http_cache(http_cache_dir: str) -> list[tuple[str, str]]: return files +@pytest.fixture +def populate_http_cache_with_wheels(http_cache_dir: str) -> list[tuple[str, str]]: + """Populate HTTP cache with realistic wheel response data.""" + from pip._vendor.cachecontrol.serialize import Serializer + from pip._vendor.requests.models import Response + from pip._vendor.urllib3 import HTTPResponse + from pip._vendor.requests import Request + import io + + destination = os.path.join(http_cache_dir, "arbitrary", "pathname") + os.makedirs(destination) + + # Create mock wheel responses with PyPI headers + packages = [ + ("test-package", "1.0.0", "py3"), + ("another-pkg", "2.1.0", "py2.py3"), + ] + + files = [] + serializer = Serializer() + + for package_name, version, python_version in packages: + # Create a mock HTTP response with PyPI headers + headers = { + 'content-type': 'application/octet-stream', + 'content-length': '1000', + 'x-pypi-file-project': package_name, + 'x-pypi-file-version': version, + 'x-pypi-file-package-type': 'bdist_wheel', + 'x-pypi-file-python-version': python_version, + } + + # Create an HTTPResponse object + response = HTTPResponse( + body=io.BytesIO(b'fake wheel content'), + headers=headers, + status=200, + version=11, + reason='OK', + decode_content=False + ) + + # Create a dummy request + request = Request('GET', f'https://files.pythonhosted.org/packages/source/{package_name[0]}/{package_name}/{package_name}-{version}-{python_version}-none-any.whl').prepare() + + # Serialize the response + data = serializer.dumps(request, response) + + # Write to cache files + cache_file = os.path.join(destination, f"cache_{package_name}_{version}") + body_file = cache_file + ".body" + + with open(cache_file, 'wb') as f: + f.write(data) + + with open(body_file, 'wb') as f: + f.write(b'fake wheel content') + + files.append((package_name, cache_file)) + + return files + + @pytest.fixture def populate_wheel_cache(wheel_cache_dir: str) -> list[tuple[str, str]]: destination = os.path.join(wheel_cache_dir, "arbitrary", "pathname") @@ -297,6 +360,75 @@ def test_cache_list_name_match_abspath(script: PipTestEnvironment) -> None: assert list_matches_wheel_abspath("zzz-7.8.9", result) +@pytest.mark.usefixtures("populate_http_cache_with_wheels") +def test_cache_list_http_only(script: PipTestEnvironment) -> None: + """Running `pip cache list --cache-type=http` should list HTTP cached packages.""" + result = script.pip("cache", "list", "--cache-type=http") + + assert "test-package-1.0.0-py3-none-any.whl" in result.stdout + assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout + assert "[HTTP cached]" in result.stdout + + +@pytest.mark.usefixtures("populate_wheel_cache") +def test_cache_list_wheels_only(script: PipTestEnvironment) -> None: + """Running `pip cache list --cache-type=wheels` should list only wheel files.""" + result = script.pip("cache", "list", "--cache-type=wheels") + + assert list_matches_wheel("yyy-1.2.3", result) + assert list_matches_wheel("zzz-4.5.6", result) + assert "[HTTP cached]" not in result.stdout + + +@pytest.mark.usefixtures("populate_wheel_cache", "populate_http_cache_with_wheels") +def test_cache_list_all_types(script: PipTestEnvironment) -> None: + """Running `pip cache list` should list both wheel files and HTTP cached packages.""" + result = script.pip("cache", "list") + + # Should contain wheel files + assert list_matches_wheel("yyy-1.2.3", result) + assert list_matches_wheel("zzz-4.5.6", result) + + # Should contain HTTP cached packages + assert "test-package-1.0.0-py3-none-any.whl" in result.stdout + assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout + assert "[HTTP cached]" in result.stdout + + +@pytest.mark.usefixtures("populate_http_cache_with_wheels") +def test_cache_list_http_abspath(script: PipTestEnvironment) -> None: + """Running `pip cache list --cache-type=http --format=abspath` should list HTTP cache file paths.""" + result = script.pip("cache", "list", "--cache-type=http", "--format=abspath") + + lines = result.stdout.strip().split('\n') + assert len(lines) >= 2 # Should have at least 2 cache files + for line in lines: + assert os.path.exists(line), f"Cache file {line} should exist" + + +@pytest.mark.usefixtures("populate_http_cache_with_wheels") +def test_cache_list_http_pattern_match(script: PipTestEnvironment) -> None: + """Running `pip cache list test-package --cache-type=http` should match only test-package.""" + result = script.pip("cache", "list", "test-package", "--cache-type=http") + + assert "test-package-1.0.0-py3-none-any.whl" in result.stdout + assert "another-pkg-2.1.0-py2.py3-none-any.whl" not in result.stdout + + +@pytest.mark.usefixtures("empty_wheel_cache") +def test_cache_list_http_with_empty_cache(script: PipTestEnvironment) -> None: + """Running `pip cache list --cache-type=http` with an empty HTTP cache should print 'No cached packages.'""" + result = script.pip("cache", "list", "--cache-type=http") + assert "No cached packages." in result.stdout + + +@pytest.mark.usefixtures("empty_wheel_cache") +def test_cache_list_all_with_empty_cache(script: PipTestEnvironment) -> None: + """Running `pip cache list` with empty caches should print 'No cached packages.'""" + result = script.pip("cache", "list") + assert "No cached packages." in result.stdout + + @pytest.mark.usefixtures("populate_wheel_cache") def test_cache_list_name_and_version_match(script: PipTestEnvironment) -> None: """Running `pip cache list zzz-4.5.6` should list zzz-4.5.6, but From 1fc67f59ccd97e1a739e4bbbcbff01c0420b1108 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Tue, 16 Sep 2025 22:28:55 +0000 Subject: [PATCH 02/10] Update documentation and finalize HTTP cache list feature --- src/pip/_internal/commands/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 4fdb8b194f7..5c3ffd41687 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -17,13 +17,13 @@ class CacheCommand(Command): """ - Inspect and manage pip's wheel cache. + Inspect and manage pip's wheel and HTTP cache. Subcommands: - dir: Show the cache directory. - info: Show information about the cache. - - list: List filenames of packages stored in the cache. + - list: List cached packages (wheels and HTTP cached packages). - remove: Remove one or more package from the cache. - purge: Remove all items from the cache. From 747a1b2e95543b87f50f772b8db85d0dcfad5729 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Fri, 19 Sep 2025 23:55:14 +0500 Subject: [PATCH 03/10] Fixed linting errors --- src/pip/_internal/commands/cache.py | 118 ++++++++++++++++------------ tests/functional/test_cache.py | 81 ++++++++++--------- 2 files changed, 111 insertions(+), 88 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 5c3ffd41687..ba6ea7da9b4 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -3,14 +3,15 @@ from optparse import Values from typing import Callable +from pip._vendor.cachecontrol.serialize import Serializer +from pip._vendor.requests import Request + from pip._internal.cli.base_command import Command from pip._internal.cli.status_codes import ERROR, SUCCESS from pip._internal.exceptions import CommandError, PipError from pip._internal.utils import filesystem from pip._internal.utils.logging import getLogger from pip._internal.utils.misc import format_size -from pip._vendor.cachecontrol.serialize import Serializer -from pip._vendor.requests import Request logger = getLogger(__name__) @@ -34,7 +35,8 @@ class CacheCommand(Command): usage = """ %prog dir %prog info - %prog list [] [--format=[human, abspath]] [--cache-type=[all, wheels, http]] + %prog list [] [--format=[human, abspath]] + [--cache-type=[all, wheels, http]] %prog remove %prog purge """ @@ -48,14 +50,15 @@ def add_options(self) -> None: choices=("human", "abspath"), help="Select the output format among: human (default) or abspath", ) - + self.cmd_opts.add_option( "--cache-type", action="store", dest="cache_type", default="all", choices=("all", "wheels", "http"), - help="Select which cache to list: all (default), wheels (locally built), or http (downloaded packages)", + help="Select which cache to list: all (default), wheels (locally built), " + "or http (downloaded packages)", ) self.parser.insert_option_group(0, self.cmd_opts) @@ -155,10 +158,10 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: # Collect wheel files and HTTP cached packages based on cache_type option wheel_files = [] http_packages = [] - + if options.cache_type in ("all", "wheels"): wheel_files = self._find_wheels(options, pattern) - + if options.cache_type in ("all", "http"): http_packages = self._get_http_cached_packages(options, pattern) @@ -183,53 +186,58 @@ def format_for_human(self, files: list[str]) -> None: def format_for_abspath(self, files: list[str]) -> None: if files: logger.info("\n".join(sorted(files))) - - def format_for_human_combined(self, wheel_files: list[str], http_packages: list[tuple[str, str, str]]) -> None: - """Format both wheel files and HTTP cached packages for human readable output.""" + + def format_for_human_combined( + self, wheel_files: list[str], http_packages: list[tuple[str, str, str]] + ) -> None: + """Format both wheel files and HTTP cached packages + for human readable output.""" if not wheel_files and not http_packages: logger.info("No cached packages.") return results = [] - + # Add wheel files for filename in wheel_files: wheel = os.path.basename(filename) - size = filesystem.format_file_size(filename) - results.append(f" - {wheel} ({size})") - + size_str = filesystem.format_file_size(filename) + results.append(f" - {wheel} ({size_str})") + # Add HTTP cached packages for project, version, file_path in http_packages: # Create a wheel-like name for display wheel_name = f"{project}-{version}-py3-none-any.whl" - + # Calculate size of both header and body files size = 0 try: size += os.path.getsize(file_path) - body_path = file_path + '.body' + body_path = file_path + ".body" if os.path.exists(body_path): size += os.path.getsize(body_path) except OSError: pass - + size_str = filesystem.format_size(size) results.append(f" - {wheel_name} ({size_str}) [HTTP cached]") - + logger.info("Cache contents:\n") logger.info("\n".join(sorted(results))) - def format_for_abspath_combined(self, wheel_files: list[str], http_packages: list[tuple[str, str, str]]) -> None: + def format_for_abspath_combined( + self, wheel_files: list[str], http_packages: list[tuple[str, str, str]] + ) -> None: """Format both wheel files and HTTP cached packages for absolute path output.""" all_paths = [] - + # Add wheel file paths all_paths.extend(wheel_files) - + # Add HTTP cache file paths for _, _, file_path in http_packages: all_paths.append(file_path) - + if all_paths: logger.info("\n".join(sorted(all_paths))) @@ -298,69 +306,77 @@ def _find_wheels(self, options: Values, pattern: str) -> list[str]: pattern = pattern + ("*.whl" if "-" in pattern else "-*.whl") return filesystem.find_files(wheel_dir, pattern) - - def _get_http_cached_packages(self, options: Values, pattern: str = "*") -> list[tuple[str, str, str]]: + + def _get_http_cached_packages( + self, options: Values, pattern: str = "*" + ) -> list[tuple[str, str, str]]: """Extract package information from HTTP cached responses. - + Returns a list of tuples: (package_name, version, file_path) """ packages = [] http_files = self._find_http_files(options) serializer = Serializer() - + for file_path in http_files: # Skip body files - if file_path.endswith('.body'): + if file_path.endswith(".body"): continue - + try: - with open(file_path, 'rb') as f: + with open(file_path, "rb") as f: data = f.read() - + # Try to deserialize the cached response - dummy_request = Request('GET', 'https://dummy.com').prepare() - body_file_path = file_path + '.body' + dummy_request = Request("GET", "https://dummy.com").prepare() + body_file_path = file_path + ".body" body_file = None - + if os.path.exists(body_file_path): - body_file = open(body_file_path, 'rb') - + body_file = open(body_file_path, "rb") + try: response = serializer.loads(dummy_request, data, body_file) if response: # Check for PyPI headers that indicate this is a wheel - package_type = response.headers.get('x-pypi-file-package-type') - if package_type == 'bdist_wheel': - project = response.headers.get('x-pypi-file-project') - version = response.headers.get('x-pypi-file-version') - python_version = response.headers.get('x-pypi-file-python-version', 'py3') - + package_type = response.headers.get("x-pypi-file-package-type") + if package_type == "bdist_wheel": + project = response.headers.get("x-pypi-file-project") + version = response.headers.get("x-pypi-file-version") + python_version = response.headers.get( + "x-pypi-file-python-version", "py3" + ) + if project and version: # Create a wheel-like filename for consistency - wheel_name = f"{project}-{version}-{python_version}-none-any.whl" - + wheel_name = ( + f"{project}-{version}-{python_version}-none-any.whl" + ) + # Apply pattern matching similar to wheel files - if pattern == "*" or self._matches_pattern(wheel_name, pattern): + if pattern == "*" or self._matches_pattern( + wheel_name, pattern + ): packages.append((project, version, file_path)) finally: if body_file: body_file.close() - + except Exception: # Silently skip files that can't be processed continue - + return packages - + def _matches_pattern(self, filename: str, pattern: str) -> bool: """Check if a filename matches the given pattern.""" import fnmatch - + # Extract just the package name-version part for matching - base_name = filename.split('-')[0] if '-' in filename else filename - + base_name = filename.split("-")[0] if "-" in filename else filename + # If pattern contains hyphen, match against full filename - if '-' in pattern: + if "-" in pattern: return fnmatch.fnmatch(filename, pattern + "*") else: # Otherwise match against package name only diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index a75d43831a8..3e4ef00f3d0 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -71,12 +71,12 @@ def populate_http_cache(http_cache_dir: str) -> list[tuple[str, str]]: @pytest.fixture def populate_http_cache_with_wheels(http_cache_dir: str) -> list[tuple[str, str]]: """Populate HTTP cache with realistic wheel response data.""" + import io + from pip._vendor.cachecontrol.serialize import Serializer - from pip._vendor.requests.models import Response - from pip._vendor.urllib3 import HTTPResponse from pip._vendor.requests import Request - import io - + from pip._vendor.urllib3 import HTTPResponse + destination = os.path.join(http_cache_dir, "arbitrary", "pathname") os.makedirs(destination) @@ -85,47 +85,50 @@ def populate_http_cache_with_wheels(http_cache_dir: str) -> list[tuple[str, str] ("test-package", "1.0.0", "py3"), ("another-pkg", "2.1.0", "py2.py3"), ] - + files = [] serializer = Serializer() - + for package_name, version, python_version in packages: # Create a mock HTTP response with PyPI headers headers = { - 'content-type': 'application/octet-stream', - 'content-length': '1000', - 'x-pypi-file-project': package_name, - 'x-pypi-file-version': version, - 'x-pypi-file-package-type': 'bdist_wheel', - 'x-pypi-file-python-version': python_version, + "content-type": "application/octet-stream", + "content-length": "1000", + "x-pypi-file-project": package_name, + "x-pypi-file-version": version, + "x-pypi-file-package-type": "bdist_wheel", + "x-pypi-file-python-version": python_version, } - + # Create an HTTPResponse object response = HTTPResponse( - body=io.BytesIO(b'fake wheel content'), + body=io.BytesIO(b"fake wheel content"), headers=headers, status=200, version=11, - reason='OK', - decode_content=False + reason="OK", + decode_content=False, ) - + # Create a dummy request - request = Request('GET', f'https://files.pythonhosted.org/packages/source/{package_name[0]}/{package_name}/{package_name}-{version}-{python_version}-none-any.whl').prepare() - + request = Request( + "GET", + f"https://files.pythonhosted.org/packages/source/{package_name[0]}/{package_name}/{package_name}-{version}-{python_version}-none-any.whl", + ).prepare() + # Serialize the response data = serializer.dumps(request, response) - + # Write to cache files cache_file = os.path.join(destination, f"cache_{package_name}_{version}") body_file = cache_file + ".body" - - with open(cache_file, 'wb') as f: + + with open(cache_file, "wb") as f: f.write(data) - - with open(body_file, 'wb') as f: - f.write(b'fake wheel content') - + + with open(body_file, "wb") as f: + f.write(b"fake wheel content") + files.append((package_name, cache_file)) return files @@ -364,7 +367,7 @@ def test_cache_list_name_match_abspath(script: PipTestEnvironment) -> None: def test_cache_list_http_only(script: PipTestEnvironment) -> None: """Running `pip cache list --cache-type=http` should list HTTP cached packages.""" result = script.pip("cache", "list", "--cache-type=http") - + assert "test-package-1.0.0-py3-none-any.whl" in result.stdout assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout assert "[HTTP cached]" in result.stdout @@ -374,7 +377,7 @@ def test_cache_list_http_only(script: PipTestEnvironment) -> None: def test_cache_list_wheels_only(script: PipTestEnvironment) -> None: """Running `pip cache list --cache-type=wheels` should list only wheel files.""" result = script.pip("cache", "list", "--cache-type=wheels") - + assert list_matches_wheel("yyy-1.2.3", result) assert list_matches_wheel("zzz-4.5.6", result) assert "[HTTP cached]" not in result.stdout @@ -382,13 +385,14 @@ def test_cache_list_wheels_only(script: PipTestEnvironment) -> None: @pytest.mark.usefixtures("populate_wheel_cache", "populate_http_cache_with_wheels") def test_cache_list_all_types(script: PipTestEnvironment) -> None: - """Running `pip cache list` should list both wheel files and HTTP cached packages.""" + """Running `pip cache list` should list both wheel files + and HTTP cached packages.""" result = script.pip("cache", "list") - + # Should contain wheel files assert list_matches_wheel("yyy-1.2.3", result) assert list_matches_wheel("zzz-4.5.6", result) - + # Should contain HTTP cached packages assert "test-package-1.0.0-py3-none-any.whl" in result.stdout assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout @@ -397,10 +401,11 @@ def test_cache_list_all_types(script: PipTestEnvironment) -> None: @pytest.mark.usefixtures("populate_http_cache_with_wheels") def test_cache_list_http_abspath(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=http --format=abspath` should list HTTP cache file paths.""" + """Running `pip cache list --cache-type=http --format=abspath` + should list HTTP cache file paths.""" result = script.pip("cache", "list", "--cache-type=http", "--format=abspath") - - lines = result.stdout.strip().split('\n') + + lines = result.stdout.strip().split("\n") assert len(lines) >= 2 # Should have at least 2 cache files for line in lines: assert os.path.exists(line), f"Cache file {line} should exist" @@ -408,16 +413,18 @@ def test_cache_list_http_abspath(script: PipTestEnvironment) -> None: @pytest.mark.usefixtures("populate_http_cache_with_wheels") def test_cache_list_http_pattern_match(script: PipTestEnvironment) -> None: - """Running `pip cache list test-package --cache-type=http` should match only test-package.""" + """Running `pip cache list test-package --cache-type=http` + should match only test-package.""" result = script.pip("cache", "list", "test-package", "--cache-type=http") - + assert "test-package-1.0.0-py3-none-any.whl" in result.stdout assert "another-pkg-2.1.0-py2.py3-none-any.whl" not in result.stdout @pytest.mark.usefixtures("empty_wheel_cache") def test_cache_list_http_with_empty_cache(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=http` with an empty HTTP cache should print 'No cached packages.'""" + """Running `pip cache list --cache-type=http` with an empty HTTP cache + should print 'No cached packages.'""" result = script.pip("cache", "list", "--cache-type=http") assert "No cached packages." in result.stdout From eefc20d90a594a9662fcebdebb41bc386fe2b8ae Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Sat, 20 Sep 2025 23:40:03 +0500 Subject: [PATCH 04/10] Refactor cache command and the its test cases to resolve failing test cases. --- src/pip/_internal/commands/cache.py | 70 +++++++++++++++-------------- tests/functional/test_cache.py | 15 +++---- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index ba6ea7da9b4..f678a349fa4 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -1,10 +1,7 @@ import os import textwrap from optparse import Values -from typing import Callable - -from pip._vendor.cachecontrol.serialize import Serializer -from pip._vendor.requests import Request +from typing import TYPE_CHECKING, Callable from pip._internal.cli.base_command import Command from pip._internal.cli.status_codes import ERROR, SUCCESS @@ -13,8 +10,12 @@ from pip._internal.utils.logging import getLogger from pip._internal.utils.misc import format_size -logger = getLogger(__name__) +if TYPE_CHECKING: + # Only for type checking; avoids importing network-related modules at runtime + from pip._vendor.cachecontrol.serialize import Serializer # noqa: F401 + from pip._vendor.requests import Request # noqa: F401 +logger = getLogger(__name__) class CacheCommand(Command): """ @@ -150,14 +151,11 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: if len(args) > 1: raise CommandError("Too many arguments") - if args: - pattern = args[0] - else: - pattern = "*" + pattern = args[0] if args else "*" # Collect wheel files and HTTP cached packages based on cache_type option - wheel_files = [] - http_packages = [] + wheel_files: list[str] = [] + http_packages: list[tuple[str, str, str]] = [] if options.cache_type in ("all", "wheels"): wheel_files = self._find_wheels(options, pattern) @@ -172,7 +170,7 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: def format_for_human(self, files: list[str]) -> None: if not files: - logger.info("No locally built wheels cached.") + logger.info("No cached packages.") return results = [] @@ -196,7 +194,7 @@ def format_for_human_combined( logger.info("No cached packages.") return - results = [] + results: list[str] = [] # Add wheel files for filename in wheel_files: @@ -312,9 +310,16 @@ def _get_http_cached_packages( ) -> list[tuple[str, str, str]]: """Extract package information from HTTP cached responses. + We import Serializer and Request lazily to avoid pulling in + network-related modules when users just invoke `pip cache --help`. + This is required to keep test_no_network_imports passing. + Returns a list of tuples: (package_name, version, file_path) """ - packages = [] + from pip._vendor.cachecontrol.serialize import Serializer + from pip._vendor.requests import Request + + packages: list[tuple[str, str, str]] = [] http_files = self._find_http_files(options) serializer = Serializer() @@ -327,7 +332,7 @@ def _get_http_cached_packages( with open(file_path, "rb") as f: data = f.read() - # Try to deserialize the cached response + # Dummy PreparedRequest needed by Serializer API; no network call. dummy_request = Request("GET", "https://dummy.com").prepare() body_file_path = file_path + ".body" body_file = None @@ -337,26 +342,23 @@ def _get_http_cached_packages( try: response = serializer.loads(dummy_request, data, body_file) - if response: + if not response: + continue # Check for PyPI headers that indicate this is a wheel - package_type = response.headers.get("x-pypi-file-package-type") - if package_type == "bdist_wheel": - project = response.headers.get("x-pypi-file-project") - version = response.headers.get("x-pypi-file-version") - python_version = response.headers.get( - "x-pypi-file-python-version", "py3" - ) - - if project and version: - # Create a wheel-like filename for consistency - wheel_name = ( - f"{project}-{version}-{python_version}-none-any.whl" - ) - - # Apply pattern matching similar to wheel files - if pattern == "*" or self._matches_pattern( - wheel_name, pattern - ): + package_type = response.headers.get("x-pypi-file-package-type") + if package_type != "bdist_wheel": + continue + project = response.headers.get("x-pypi-file-project") + version = response.headers.get("x-pypi-file-version") + python_version = response.headers.get( + "x-pypi-file-python-version", "py3" + ) + if not (project and version): + continue + # Create a wheel-like filename for consistency + wheel_name = f"{project}-{version}-{python_version}-none-any.whl" + # Apply pattern matching similar to wheel files + if pattern == "*" or self._matches_pattern(wheel_name, pattern): packages.append((project, version, file_path)) finally: if body_file: diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index 3e4ef00f3d0..1f4bc551d46 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -303,9 +303,9 @@ def test_cache_list_abspath(script: PipTestEnvironment) -> None: @pytest.mark.usefixtures("empty_wheel_cache") def test_cache_list_with_empty_cache(script: PipTestEnvironment) -> None: """Running `pip cache list` with an empty cache should print - "No locally built wheels cached." and exit.""" + "No cached packages." and exit.""" result = script.pip("cache", "list") - assert result.stdout == "No locally built wheels cached.\n" + assert result.stdout == "No cached packages.\n" @pytest.mark.usefixtures("empty_wheel_cache") @@ -368,9 +368,8 @@ def test_cache_list_http_only(script: PipTestEnvironment) -> None: """Running `pip cache list --cache-type=http` should list HTTP cached packages.""" result = script.pip("cache", "list", "--cache-type=http") - assert "test-package-1.0.0-py3-none-any.whl" in result.stdout - assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout - assert "[HTTP cached]" in result.stdout + assert list_matches_wheel("test-package-1.0.0", result) + assert list_matches_wheel("another-pkg-2.1.0", result) @pytest.mark.usefixtures("populate_wheel_cache") @@ -380,7 +379,6 @@ def test_cache_list_wheels_only(script: PipTestEnvironment) -> None: assert list_matches_wheel("yyy-1.2.3", result) assert list_matches_wheel("zzz-4.5.6", result) - assert "[HTTP cached]" not in result.stdout @pytest.mark.usefixtures("populate_wheel_cache", "populate_http_cache_with_wheels") @@ -394,9 +392,8 @@ def test_cache_list_all_types(script: PipTestEnvironment) -> None: assert list_matches_wheel("zzz-4.5.6", result) # Should contain HTTP cached packages - assert "test-package-1.0.0-py3-none-any.whl" in result.stdout - assert "another-pkg-2.1.0-py2.py3-none-any.whl" in result.stdout - assert "[HTTP cached]" in result.stdout + assert list_matches_wheel("test-package-1.0.0", result) + assert list_matches_wheel("another-pkg-2.1.0", result) @pytest.mark.usefixtures("populate_http_cache_with_wheels") From 650f5627c9a91d70bc13241a9fdbc668a124c201 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Sun, 21 Sep 2025 00:28:14 +0500 Subject: [PATCH 05/10] style(cache): apply Black formatting to cache command --- src/pip/_internal/commands/cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index f678a349fa4..c3597d07e2d 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -17,6 +17,7 @@ logger = getLogger(__name__) + class CacheCommand(Command): """ Inspect and manage pip's wheel and HTTP cache. @@ -359,7 +360,7 @@ def _get_http_cached_packages( wheel_name = f"{project}-{version}-{python_version}-none-any.whl" # Apply pattern matching similar to wheel files if pattern == "*" or self._matches_pattern(wheel_name, pattern): - packages.append((project, version, file_path)) + packages.append((project, version, file_path)) finally: if body_file: body_file.close() From 9f19e236ffb239f6c71749599a11b395b7d17861 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Sun, 21 Sep 2025 00:45:52 +0500 Subject: [PATCH 06/10] news: add 10460.feature fragment for HTTP cache listing --- news/10460.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/10460.feature.rst diff --git a/news/10460.feature.rst b/news/10460.feature.rst new file mode 100644 index 00000000000..0cd1b614586 --- /dev/null +++ b/news/10460.feature.rst @@ -0,0 +1 @@ +Add support for listing HTTP-cached packages in ``pip cache list``, including a ``--cache-type`` selector and pattern matching for HTTP entries. From ac5ade0f404aa6059942253b83f201bef2a685a8 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Fri, 3 Oct 2025 02:34:57 +0500 Subject: [PATCH 07/10] Revised cache implementation --- src/pip/_internal/commands/cache.py | 388 +++++++++++++++++----------- 1 file changed, 243 insertions(+), 145 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index c3597d07e2d..4f814db0fa0 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -1,7 +1,9 @@ +from __future__ import annotations + import os import textwrap from optparse import Values -from typing import TYPE_CHECKING, Callable +from typing import Callable from pip._internal.cli.base_command import Command from pip._internal.cli.status_codes import ERROR, SUCCESS @@ -10,23 +12,18 @@ from pip._internal.utils.logging import getLogger from pip._internal.utils.misc import format_size -if TYPE_CHECKING: - # Only for type checking; avoids importing network-related modules at runtime - from pip._vendor.cachecontrol.serialize import Serializer # noqa: F401 - from pip._vendor.requests import Request # noqa: F401 - logger = getLogger(__name__) class CacheCommand(Command): """ - Inspect and manage pip's wheel and HTTP cache. + Inspect and manage pip's wheel cache. Subcommands: - dir: Show the cache directory. - info: Show information about the cache. - - list: List cached packages (wheels and HTTP cached packages). + - list: List filenames of stored cache (wheels and HTTP cached packages). - remove: Remove one or more package from the cache. - purge: Remove all items from the cache. @@ -37,8 +34,7 @@ class CacheCommand(Command): usage = """ %prog dir %prog info - %prog list [] [--format=[human, abspath]] - [--cache-type=[all, wheels, http]] + %prog list [] [--format=[human, abspath]] [--http] [--all] %prog remove %prog purge """ @@ -54,13 +50,19 @@ def add_options(self) -> None: ) self.cmd_opts.add_option( - "--cache-type", - action="store", - dest="cache_type", - default="all", - choices=("all", "wheels", "http"), - help="Select which cache to list: all (default), wheels (locally built), " - "or http (downloaded packages)", + "--http", + action="store_true", + dest="list_http", + default=False, + help="List HTTP cached package files", + ) + + self.cmd_opts.add_option( + "--all", + action="store_true", + dest="list_all", + default=False, + help="List both HTTP cached and locally built package files", ) self.parser.insert_option_group(0, self.cmd_opts) @@ -152,93 +154,141 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: if len(args) > 1: raise CommandError("Too many arguments") - pattern = args[0] if args else "*" - - # Collect wheel files and HTTP cached packages based on cache_type option - wheel_files: list[str] = [] - http_packages: list[tuple[str, str, str]] = [] + if args: + pattern = args[0] + else: + pattern = "*" + + # Determine what to show based on flags + # Default: show only wheels (backward compatible) + # --http: show only HTTP cache + # --all: show both wheels and HTTP cache (unified) + if options.list_all: + show_wheels = True + show_http = True + unified = True + elif options.list_http: + show_wheels = False + show_http = True + unified = False + else: + # Default behavior + show_wheels = True + show_http = False + unified = False - if options.cache_type in ("all", "wheels"): + wheel_files = [] + if show_wheels: wheel_files = self._find_wheels(options, pattern) - if options.cache_type in ("all", "http"): - http_packages = self._get_http_cached_packages(options, pattern) + http_files = [] + if show_http: + http_files = self._get_http_cache_files_with_metadata(options) if options.list_format == "human": - self.format_for_human_combined(wheel_files, http_packages) + if unified: + self.format_for_human_unified_all(wheel_files, http_files) + else: + self.format_for_human_separated( + wheel_files, http_files, show_http, show_wheels + ) else: - self.format_for_abspath_combined(wheel_files, http_packages) - - def format_for_human(self, files: list[str]) -> None: - if not files: - logger.info("No cached packages.") + self.format_for_abspath_unified(wheel_files, http_files) + + def format_for_human_separated( + self, + wheel_files: list[str], + http_files: list[tuple[str, str]], + show_http: bool, + show_wheels: bool, + ) -> None: + """Format wheel and HTTP cache files in separate sections.""" + if not wheel_files and not http_files: + if show_http: + logger.info("No cached files.") + else: + logger.info("No locally built wheels cached.") return - results = [] - for filename in files: - wheel = os.path.basename(filename) - size = filesystem.format_file_size(filename) - results.append(f" - {wheel} ({size})") - logger.info("Cache contents:\n") - logger.info("\n".join(sorted(results))) - - def format_for_abspath(self, files: list[str]) -> None: - if files: - logger.info("\n".join(sorted(files))) - - def format_for_human_combined( - self, wheel_files: list[str], http_packages: list[tuple[str, str, str]] + # When showing HTTP files only, use a separate section + if show_http and http_files: + logger.info("HTTP cache files:") + formatted = [] + for cache_file, filename in http_files: + # Use body file size if available + body_file = cache_file + ".body" + if os.path.exists(body_file): + size = filesystem.format_file_size(body_file) + else: + size = filesystem.format_file_size(cache_file) + + # Only show files where we extracted a filename + # (filename should always be present since we filter in + # _get_http_cache_files_with_metadata) + formatted.append(f" - {filename} ({size})") + + logger.info("\n".join(sorted(formatted))) + + # When showing wheels, list them + if show_wheels and wheel_files: + if show_http and http_files: + logger.info("") # Add spacing between sections + formatted = [] + for filename in wheel_files: + wheel = os.path.basename(filename) + size = filesystem.format_file_size(filename) + formatted.append(f" - {wheel} ({size})") + + logger.info("\n".join(sorted(formatted))) + + def format_for_human_unified_all( + self, + wheel_files: list[str], + http_files: list[tuple[str, str]], ) -> None: - """Format both wheel files and HTTP cached packages - for human readable output.""" - if not wheel_files and not http_packages: - logger.info("No cached packages.") + """Format wheel and HTTP cache files in a unified list with + [HTTP cached] suffix. + """ + if not wheel_files and not http_files: + logger.info("No cached files.") return - results: list[str] = [] - - # Add wheel files - for filename in wheel_files: - wheel = os.path.basename(filename) - size_str = filesystem.format_file_size(filename) - results.append(f" - {wheel} ({size_str})") + formatted = [] - # Add HTTP cached packages - for project, version, file_path in http_packages: - # Create a wheel-like name for display - wheel_name = f"{project}-{version}-py3-none-any.whl" + # Add HTTP files with suffix + for cache_file, filename in http_files: + # Use body file size if available + body_file = cache_file + ".body" + if os.path.exists(body_file): + size = filesystem.format_file_size(body_file) + else: + size = filesystem.format_file_size(cache_file) - # Calculate size of both header and body files - size = 0 - try: - size += os.path.getsize(file_path) - body_path = file_path + ".body" - if os.path.exists(body_path): - size += os.path.getsize(body_path) - except OSError: - pass + formatted.append(f" - {filename} ({size}) [HTTP cached]") - size_str = filesystem.format_size(size) - results.append(f" - {wheel_name} ({size_str}) [HTTP cached]") + # Add wheel files without suffix + for filename in wheel_files: + wheel = os.path.basename(filename) + size = filesystem.format_file_size(filename) + formatted.append(f" - {wheel} ({size})") - logger.info("Cache contents:\n") - logger.info("\n".join(sorted(results))) + logger.info("\n".join(sorted(formatted))) - def format_for_abspath_combined( - self, wheel_files: list[str], http_packages: list[tuple[str, str, str]] + def format_for_abspath_unified( + self, wheel_files: list[str], http_files: list[tuple[str, str]] ) -> None: - """Format both wheel files and HTTP cached packages for absolute path output.""" - all_paths = [] + """Format wheel and HTTP cache files as absolute paths.""" + all_files = [] - # Add wheel file paths - all_paths.extend(wheel_files) + # Add wheel files + all_files.extend(wheel_files) - # Add HTTP cache file paths - for _, _, file_path in http_packages: - all_paths.append(file_path) + # Add HTTP cache files (only those with extracted filenames) + for cache_file, _filename in http_files: + all_files.append(cache_file) - if all_paths: - logger.info("\n".join(sorted(all_paths))) + if all_files: + logger.info("\n".join(sorted(all_files))) def remove_cache_items(self, options: Values, args: list[str]) -> None: if len(args) > 1: @@ -306,81 +356,129 @@ def _find_wheels(self, options: Values, pattern: str) -> list[str]: return filesystem.find_files(wheel_dir, pattern) - def _get_http_cached_packages( - self, options: Values, pattern: str = "*" - ) -> list[tuple[str, str, str]]: - """Extract package information from HTTP cached responses. + def _get_http_cache_files_with_metadata( + self, options: Values + ) -> list[tuple[str, str]]: + """Get HTTP cache files with filenames from package content inspection. - We import Serializer and Request lazily to avoid pulling in - network-related modules when users just invoke `pip cache --help`. - This is required to keep test_no_network_imports passing. + Extracts filenames by reading the cached package structure: + - Wheel files: Reads .dist-info/WHEEL metadata for complete filename with tags + - Tarball files: Reads tar structure to extract package name from root directory - Returns a list of tuples: (package_name, version, file_path) + Returns a list of tuples: (cache_file_path, filename) + Only returns files where a filename could be successfully extracted. """ from pip._vendor.cachecontrol.serialize import Serializer - from pip._vendor.requests import Request - packages: list[tuple[str, str, str]] = [] http_files = self._find_http_files(options) + result = [] + serializer = Serializer() - for file_path in http_files: - # Skip body files - if file_path.endswith(".body"): + for cache_file in http_files: + # Skip .body files as we only want metadata files + if cache_file.endswith(".body"): continue + filename = None try: - with open(file_path, "rb") as f: - data = f.read() - - # Dummy PreparedRequest needed by Serializer API; no network call. - dummy_request = Request("GET", "https://dummy.com").prepare() - body_file_path = file_path + ".body" - body_file = None - - if os.path.exists(body_file_path): - body_file = open(body_file_path, "rb") + # Read the cached metadata + with open(cache_file, "rb") as f: + cached_data = f.read() + + # Try to parse it + if cached_data.startswith(f"cc={serializer.serde_version},".encode()): + # Extract the msgpack data + from pip._vendor import msgpack + + data = cached_data[5:] # Skip "cc=4," + cached = msgpack.loads(data, raw=False) + + headers = cached.get("response", {}).get("headers", {}) + content_type = headers.get("content-type", "") + + # Extract filename from body content + body_file = cache_file + ".body" + if os.path.exists(body_file): + filename = self._extract_filename_from_body( + body_file, content_type + ) + except Exception: + # If we can't read/parse the file, just skip trying to extract name + pass - try: - response = serializer.loads(dummy_request, data, body_file) - if not response: - continue - # Check for PyPI headers that indicate this is a wheel - package_type = response.headers.get("x-pypi-file-package-type") - if package_type != "bdist_wheel": - continue - project = response.headers.get("x-pypi-file-project") - version = response.headers.get("x-pypi-file-version") - python_version = response.headers.get( - "x-pypi-file-python-version", "py3" - ) - if not (project and version): - continue - # Create a wheel-like filename for consistency - wheel_name = f"{project}-{version}-{python_version}-none-any.whl" - # Apply pattern matching similar to wheel files - if pattern == "*" or self._matches_pattern(wheel_name, pattern): - packages.append((project, version, file_path)) - finally: - if body_file: - body_file.close() + # Only include files where we successfully extracted a filename + if filename: + result.append((cache_file, filename)) - except Exception: - # Silently skip files that can't be processed - continue + return result - return packages + def _extract_filename_from_body( + self, body_file: str, content_type: str + ) -> str | None: + """Extract filename by inspecting the body content. - def _matches_pattern(self, filename: str, pattern: str) -> bool: - """Check if a filename matches the given pattern.""" - import fnmatch + This works offline by examining the downloaded file structure. + """ + try: + # Check if it's a wheel file (ZIP format) + if "application/octet-stream" in content_type or not content_type: + # Try to read as a wheel (ZIP file) + import zipfile - # Extract just the package name-version part for matching - base_name = filename.split("-")[0] if "-" in filename else filename + try: + with zipfile.ZipFile(body_file, "r") as zf: + # Wheel files contain a .dist-info directory + names = zf.namelist() + dist_info_dir = None + for name in names: + if ".dist-info/" in name: + dist_info_dir = name.split("/")[0] + break + + if dist_info_dir and dist_info_dir.endswith(".dist-info"): + # Read WHEEL metadata to get the full wheel name + wheel_file = f"{dist_info_dir}/WHEEL" + if wheel_file in names: + wheel_content = zf.read(wheel_file).decode("utf-8") + # Parse WHEEL file for Root-Is-Purelib and Tag + tags = [] + for line in wheel_content.split("\n"): + if line.startswith("Tag:"): + tag = line.split(":", 1)[1].strip() + tags.append(tag) + + if tags: + # Use first tag to construct filename + # Format: {name}-{version}.dist-info + pkg_info = dist_info_dir[: -len(".dist-info")] + # Tags format: py3-none-any + tag = tags[0] + return f"{pkg_info}-{tag}.whl" + + # Fallback: just use name-version.whl + pkg_info = dist_info_dir[: -len(".dist-info")] + return f"{pkg_info}.whl" + except (zipfile.BadZipFile, KeyError, UnicodeDecodeError): + pass + + # Try to read as a tarball + import tarfile - # If pattern contains hyphen, match against full filename - if "-" in pattern: - return fnmatch.fnmatch(filename, pattern + "*") - else: - # Otherwise match against package name only - return fnmatch.fnmatch(base_name, pattern) + try: + with tarfile.open(body_file, "r:*") as tf: + # Get the first member to determine the package name + members = tf.getmembers() + if members: + # Tarball usually has format: package-version/... + first_name = members[0].name + pkg_dir = first_name.split("/")[0] + if pkg_dir and "-" in pkg_dir: + return f"{pkg_dir}.tar.gz" + except (tarfile.TarError, KeyError): + pass + + except Exception: + pass + + return None From 6ae350d8a184c39f4d215fb9ad436b925da2ebf4 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Fri, 3 Oct 2025 03:30:55 +0500 Subject: [PATCH 08/10] add test cases --- tests/functional/test_cache.py | 218 +++++++++++-------------------- tests/unit/test_cache_command.py | 202 ++++++++++++++++++++++++++++ 2 files changed, 279 insertions(+), 141 deletions(-) create mode 100644 tests/unit/test_cache_command.py diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index 1f4bc551d46..f8292ff1255 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -52,84 +52,56 @@ def wheel_cache_files(wheel_cache_dir: str) -> list[str]: @pytest.fixture def populate_http_cache(http_cache_dir: str) -> list[tuple[str, str]]: - destination = os.path.join(http_cache_dir, "arbitrary", "pathname") - os.makedirs(destination) + import zipfile - files = [ - ("aaaaaaaaa", os.path.join(destination, "aaaaaaaaa")), - ("bbbbbbbbb", os.path.join(destination, "bbbbbbbbb")), - ("ccccccccc", os.path.join(destination, "ccccccccc")), - ] - - for _name, filename in files: - with open(filename, "w"): - pass - - return files - - -@pytest.fixture -def populate_http_cache_with_wheels(http_cache_dir: str) -> list[tuple[str, str]]: - """Populate HTTP cache with realistic wheel response data.""" - import io - - from pip._vendor.cachecontrol.serialize import Serializer - from pip._vendor.requests import Request - from pip._vendor.urllib3 import HTTPResponse + from pip._vendor import msgpack destination = os.path.join(http_cache_dir, "arbitrary", "pathname") os.makedirs(destination) - # Create mock wheel responses with PyPI headers - packages = [ - ("test-package", "1.0.0", "py3"), - ("another-pkg", "2.1.0", "py2.py3"), - ] - files = [] - serializer = Serializer() - - for package_name, version, python_version in packages: - # Create a mock HTTP response with PyPI headers - headers = { - "content-type": "application/octet-stream", - "content-length": "1000", - "x-pypi-file-project": package_name, - "x-pypi-file-version": version, - "x-pypi-file-package-type": "bdist_wheel", - "x-pypi-file-python-version": python_version, - } - # Create an HTTPResponse object - response = HTTPResponse( - body=io.BytesIO(b"fake wheel content"), - headers=headers, - status=200, - version=11, - reason="OK", - decode_content=False, - ) - - # Create a dummy request - request = Request( - "GET", - f"https://files.pythonhosted.org/packages/source/{package_name[0]}/{package_name}/{package_name}-{version}-{python_version}-none-any.whl", - ).prepare() - - # Serialize the response - data = serializer.dumps(request, response) + # Create a few cache entries with proper wheel body files + wheel_entries = [ + ("test_package-1.0.0-py3-none-any.whl", "test_package", "1.0.0"), + ("another-2.3.4-py3-none-any.whl", "another", "2.3.4"), + ] - # Write to cache files - cache_file = os.path.join(destination, f"cache_{package_name}_{version}") + for wheel_filename, pkg_name, version in wheel_entries: + cache_file = os.path.join(destination, "cached_" + wheel_filename) body_file = cache_file + ".body" - with open(cache_file, "wb") as f: - f.write(data) + # Create the .body file as a minimal wheel + with zipfile.ZipFile(body_file, "w") as zf: + dist_info = f"{pkg_name}-{version}.dist-info" + # Add WHEEL file + wheel_content = "Wheel-Version: 1.0\nTag: py3-none-any\n" + zf.writestr(f"{dist_info}/WHEEL", wheel_content) + # Add METADATA file + metadata_content = ( + f"Metadata-Version: 2.1\nName: {pkg_name}\nVersion: {version}\n" + ) + zf.writestr(f"{dist_info}/METADATA", metadata_content) + + # Create the cache metadata file + cached_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": True, + } + } - with open(body_file, "wb") as f: - f.write(b"fake wheel content") + with open(cache_file, "wb") as f: + f.write(b"cc=4,") + f.write(msgpack.dumps(cached_data, use_bin_type=True)) - files.append((package_name, cache_file)) + files.append((pkg_name, cache_file)) return files @@ -303,9 +275,9 @@ def test_cache_list_abspath(script: PipTestEnvironment) -> None: @pytest.mark.usefixtures("empty_wheel_cache") def test_cache_list_with_empty_cache(script: PipTestEnvironment) -> None: """Running `pip cache list` with an empty cache should print - "No cached packages." and exit.""" + "No locally built wheels cached." and exit.""" result = script.pip("cache", "list") - assert result.stdout == "No cached packages.\n" + assert result.stdout == "No locally built wheels cached.\n" @pytest.mark.usefixtures("empty_wheel_cache") @@ -363,76 +335,6 @@ def test_cache_list_name_match_abspath(script: PipTestEnvironment) -> None: assert list_matches_wheel_abspath("zzz-7.8.9", result) -@pytest.mark.usefixtures("populate_http_cache_with_wheels") -def test_cache_list_http_only(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=http` should list HTTP cached packages.""" - result = script.pip("cache", "list", "--cache-type=http") - - assert list_matches_wheel("test-package-1.0.0", result) - assert list_matches_wheel("another-pkg-2.1.0", result) - - -@pytest.mark.usefixtures("populate_wheel_cache") -def test_cache_list_wheels_only(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=wheels` should list only wheel files.""" - result = script.pip("cache", "list", "--cache-type=wheels") - - assert list_matches_wheel("yyy-1.2.3", result) - assert list_matches_wheel("zzz-4.5.6", result) - - -@pytest.mark.usefixtures("populate_wheel_cache", "populate_http_cache_with_wheels") -def test_cache_list_all_types(script: PipTestEnvironment) -> None: - """Running `pip cache list` should list both wheel files - and HTTP cached packages.""" - result = script.pip("cache", "list") - - # Should contain wheel files - assert list_matches_wheel("yyy-1.2.3", result) - assert list_matches_wheel("zzz-4.5.6", result) - - # Should contain HTTP cached packages - assert list_matches_wheel("test-package-1.0.0", result) - assert list_matches_wheel("another-pkg-2.1.0", result) - - -@pytest.mark.usefixtures("populate_http_cache_with_wheels") -def test_cache_list_http_abspath(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=http --format=abspath` - should list HTTP cache file paths.""" - result = script.pip("cache", "list", "--cache-type=http", "--format=abspath") - - lines = result.stdout.strip().split("\n") - assert len(lines) >= 2 # Should have at least 2 cache files - for line in lines: - assert os.path.exists(line), f"Cache file {line} should exist" - - -@pytest.mark.usefixtures("populate_http_cache_with_wheels") -def test_cache_list_http_pattern_match(script: PipTestEnvironment) -> None: - """Running `pip cache list test-package --cache-type=http` - should match only test-package.""" - result = script.pip("cache", "list", "test-package", "--cache-type=http") - - assert "test-package-1.0.0-py3-none-any.whl" in result.stdout - assert "another-pkg-2.1.0-py2.py3-none-any.whl" not in result.stdout - - -@pytest.mark.usefixtures("empty_wheel_cache") -def test_cache_list_http_with_empty_cache(script: PipTestEnvironment) -> None: - """Running `pip cache list --cache-type=http` with an empty HTTP cache - should print 'No cached packages.'""" - result = script.pip("cache", "list", "--cache-type=http") - assert "No cached packages." in result.stdout - - -@pytest.mark.usefixtures("empty_wheel_cache") -def test_cache_list_all_with_empty_cache(script: PipTestEnvironment) -> None: - """Running `pip cache list` with empty caches should print 'No cached packages.'""" - result = script.pip("cache", "list") - assert "No cached packages." in result.stdout - - @pytest.mark.usefixtures("populate_wheel_cache") def test_cache_list_name_and_version_match(script: PipTestEnvironment) -> None: """Running `pip cache list zzz-4.5.6` should list zzz-4.5.6, but @@ -506,9 +408,8 @@ def test_cache_purge( wheels.""" result = script.pip("cache", "purge", "--verbose") - assert remove_matches_http("aaaaaaaaa", result) - assert remove_matches_http("bbbbbbbbb", result) - assert remove_matches_http("ccccccccc", result) + assert remove_matches_http("cached_test_package-1.0.0-py3-none-any.whl", result) + assert remove_matches_http("cached_another-2.3.4-py3-none-any.whl", result) assert remove_matches_wheel("yyy-1.2.3", result) assert remove_matches_wheel("zzz-4.5.6", result) @@ -536,6 +437,41 @@ def test_cache_purge_too_many_args( assert os.path.exists(filename) +@pytest.mark.usefixtures("populate_http_cache") +def test_cache_list_with_http_flag(script: PipTestEnvironment) -> None: + """Running `pip cache list --http` should list HTTP cache files.""" + result = script.pip("cache", "list", "--http") + + # Should show HTTP cache files section + assert "HTTP cache files:" in result.stdout + + # Should list cache files with extracted wheel names + assert "test_package-1.0.0-py3-none-any.whl" in result.stdout + assert "another-2.3.4-py3-none-any.whl" in result.stdout + + +@pytest.mark.usefixtures("populate_http_cache") +def test_cache_list_with_http_flag_abspath(script: PipTestEnvironment) -> None: + """Running `pip cache list --http --format=abspath` should list full paths.""" + result = script.pip("cache", "list", "--http", "--format=abspath") + + # Should have some output with paths + lines = result.stdout.strip().split("\n") + assert len(lines) > 0 + # Each line should be a path + for line in lines: + assert os.path.isabs(line) + + +@pytest.mark.usefixtures("empty_wheel_cache") +def test_cache_list_with_http_flag_empty(script: PipTestEnvironment) -> None: + """Test `pip cache list --http` with empty cache.""" + result = script.pip("cache", "list", "--http") + + # Should show no cached files message + assert "No cached files." in result.stdout + + @pytest.mark.parametrize("command", ["info", "list", "remove", "purge"]) def test_cache_abort_when_no_cache_dir( script: PipTestEnvironment, command: str diff --git a/tests/unit/test_cache_command.py b/tests/unit/test_cache_command.py new file mode 100644 index 00000000000..62f02d65462 --- /dev/null +++ b/tests/unit/test_cache_command.py @@ -0,0 +1,202 @@ +"""Tests for the cache command with HTTP cache listing functionality.""" + +import os +import tempfile +from optparse import Values + +from pip._vendor.cachecontrol.serialize import Serializer + +from pip._internal.commands.cache import CacheCommand + + +class TestGetHttpCacheFilesWithMetadata: + """Tests for _get_http_cache_files_with_metadata method.""" + + def test_extracts_filename_from_wheel_body(self) -> None: + """Test that filenames are extracted from wheel file bodies.""" + import zipfile + + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + cache_file = os.path.join(cache_subdir, "test_cache_file") + + # Create a minimal wheel file structure + body_file = cache_file + ".body" + with zipfile.ZipFile(body_file, "w") as zf: + # Wheels have a .dist-info directory + zf.writestr("test_package-1.0.0.dist-info/WHEEL", "Wheel-Version: 1.0") + zf.writestr( + "test_package-1.0.0.dist-info/METADATA", "Name: test-package" + ) + + # Create cache metadata + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should extract filename from wheel structure + assert len(result) == 1 + assert result[0][0] == cache_file + assert result[0][1] == "test_package-1.0.0.whl" + + def test_extracts_filename_from_tarball_body(self) -> None: + """Test that filenames are extracted from tarball file bodies.""" + import tarfile + + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + cache_file = os.path.join(cache_subdir, "test_cache_file") + + # Create a minimal tarball structure + body_file = cache_file + ".body" + with tarfile.open(body_file, "w:gz") as tf: + # Tarballs typically have package-version/ as root + import io + + data = b"test content" + tarinfo = tarfile.TarInfo(name="mypackage-2.0.0/setup.py") + tarinfo.size = len(data) + tf.addfile(tarinfo, io.BytesIO(data)) + + # Create cache metadata + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "application/octet-stream", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should extract filename from tarball structure + assert len(result) == 1 + assert result[0][0] == cache_file + assert result[0][1] == "mypackage-2.0.0.tar.gz" + + def test_handles_files_without_extractable_names(self) -> None: + """Test that files without extractable package names are excluded.""" + with tempfile.TemporaryDirectory() as cache_dir: + # Create nested directory structure + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a cache file for non-package content (HTML) + cache_file = os.path.join(cache_subdir, "test_cache_file") + + cache_data = { + "response": { + "body": b"", + "headers": { + "content-type": "text/html", + }, + "status": 200, + "version": 11, + "reason": "OK", + "decode_content": False, + }, + "vary": {}, + } + + s = Serializer() + serialized = s.serialize(cache_data) + full_data = f"cc={s.serde_version},".encode() + serialized + + with open(cache_file, "wb") as f: + f.write(full_data) + + # Create mock options + options = Values() + options.cache_dir = cache_dir + + # Test the method + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should not include files without extractable names + assert len(result) == 0 + + def test_skips_body_files(self) -> None: + """Test that .body files are skipped.""" + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a .body file + body_file = os.path.join(cache_subdir, "test_cache_file.body") + with open(body_file, "wb") as f: + f.write(b"test data") + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should not find any files (body files are skipped) + assert len(result) == 0 + + def test_handles_corrupted_cache_files(self) -> None: + """Test that corrupted cache files are handled gracefully.""" + with tempfile.TemporaryDirectory() as cache_dir: + cache_subdir = os.path.join(cache_dir, "http-v2", "a", "b", "c", "d", "e") + os.makedirs(cache_subdir, exist_ok=True) + + # Create a corrupted cache file + cache_file = os.path.join(cache_subdir, "corrupted_file") + with open(cache_file, "wb") as f: + f.write(b"not a valid cache file") + + options = Values() + options.cache_dir = cache_dir + + cmd = CacheCommand("cache", "Test cache command") + result = cmd._get_http_cache_files_with_metadata(options) + + # Should handle the corrupted file without crashing + # Corrupted files without extractable names are excluded + assert len(result) == 0 From 83b0a8c2d78862ca7f71f61be4da1069557af33b Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Fri, 3 Oct 2025 03:33:15 +0500 Subject: [PATCH 09/10] revised docs and chronographer --- docs/html/topics/caching.md | 12 ++++++++++-- news/10460.feature.rst | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md index 8d6c40f112d..a1fe50dad24 100644 --- a/docs/html/topics/caching.md +++ b/docs/html/topics/caching.md @@ -139,9 +139,17 @@ The {ref}`pip cache` command can be used to manage pip's cache. ### Listing cached files -`pip cache list` will list all wheel files from pip's cache. +`pip cache list` will list locally built wheel files from pip's cache. -`pip cache list setuptools` will list all setuptools-related wheel files from pip's cache. +`pip cache list setuptools` will list locally built wheel files related to setuptools from pip's cache. + +`pip cache list --http` will list only HTTP cache files. Package names are extracted by inspecting the cached file content (wheel or tarball structure). Files without identifiable package names are not shown. + +`pip cache list --all` will list both locally built wheels and HTTP cache files in a unified list. + +When using `--all`, HTTP cached files are marked with a `[HTTP cached]` suffix to distinguish them from locally built wheels. + +You can also use `--format abspath` to print absolute paths instead of human-friendly filenames and sizes. ## Disabling caching diff --git a/news/10460.feature.rst b/news/10460.feature.rst index 0cd1b614586..c64f238d851 100644 --- a/news/10460.feature.rst +++ b/news/10460.feature.rst @@ -1 +1 @@ -Add support for listing HTTP-cached packages in ``pip cache list``, including a ``--cache-type`` selector and pattern matching for HTTP entries. +Add ``--http`` and ``--all`` flags to ``pip cache list`` command. By default, the command shows only locally built wheels (backward compatible). The ``--http`` flag shows only HTTP cached packages, and the ``--all`` flag shows both in a unified list with HTTP packages marked with a ``[HTTP cached]`` suffix. HTTP cached packages are extracted by inspecting wheel and tarball structures offline, displaying complete filenames with platform tags and accurate file sizes. From 0fd481f0a821fc94cc0814881cb7868bef213806 Mon Sep 17 00:00:00 2001 From: Sajjad Ali Date: Fri, 3 Oct 2025 04:08:36 +0500 Subject: [PATCH 10/10] fix: linting erros --- src/pip/_internal/commands/cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 4f814db0fa0..a118973aa01 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -189,9 +189,9 @@ def list_cache_items(self, options: Values, args: list[str]) -> None: if unified: self.format_for_human_unified_all(wheel_files, http_files) else: - self.format_for_human_separated( - wheel_files, http_files, show_http, show_wheels - ) + self.format_for_human_separated( + wheel_files, http_files, show_http, show_wheels + ) else: self.format_for_abspath_unified(wheel_files, http_files) @@ -285,7 +285,7 @@ def format_for_abspath_unified( # Add HTTP cache files (only those with extracted filenames) for cache_file, _filename in http_files: - all_files.append(cache_file) + all_files.append(cache_file) if all_files: logger.info("\n".join(sorted(all_files)))