diff --git a/.github/workflows/pmg-e2e.yml b/.github/workflows/pmg-e2e.yml index 9645234..7886bfa 100644 --- a/.github/workflows/pmg-e2e.yml +++ b/.github/workflows/pmg-e2e.yml @@ -81,9 +81,9 @@ jobs: run: | echo "Testing NPM single package installation..." mkdir npm-test && cd npm-test - pmg npm init -y - pmg npm install express@5.2.1 - pmg npm install lodash@4.17.21 + pmg --proxy-mode=false npm init -y + pmg --proxy-mode=false npm install express@5.2.1 + pmg --proxy-mode=false npm install lodash@4.17.21 # Verification: npm added packages present and manifest updated test -d node_modules/express @@ -93,7 +93,7 @@ jobs: echo "Testing NPM manifest installation..." rm -rf node_modules package-lock.json - pmg npm install + pmg --proxy-mode=false npm install # Verification: npm lockfile and installed modules exist after manifest install test -f package-lock.json @@ -144,13 +144,81 @@ jobs: cd .. && rm -rf npm-proxy-test + - name: Test PyPI - Proxy Mode + run: | + echo "Testing PyPI package managers with proxy-based interception..." + mkdir pypi-proxy-test && cd pypi-proxy-test + + echo "Setting up Python virtual environment for pip and pip3 tests..." + python -m venv venv && source venv/bin/activate + python --version + pip --version + + echo "Testing pip single package installation via proxy mode..." + pmg pip install requests==2.32.4 + pmg pip install numpy==2.3.5 + + # Verification: packages installed and importable + python -c "import requests, numpy; print('pip ok:', requests.__version__, numpy.__version__)" + + echo "Testing pip manifest installation via proxy mode..." + pmg pip freeze > requirements.txt + pmg pip uninstall -y requests numpy + pmg pip install -r requirements.txt + python -c "import requests, numpy; print('pip manifest ok:', requests.__version__, numpy.__version__)" + deactivate + + echo "Setting up Python virtual environment for pip3 tests..." + python -m venv venv3 && source venv3/bin/activate + python --version + pip3 --version + + echo "Testing pip3 single package installation via proxy mode..." + pmg pip3 install requests==2.32.4 + pmg pip3 install numpy==2.3.5 + + # Verification: packages installed and importable + python -c "import requests, numpy; print('pip3 ok:', requests.__version__, numpy.__version__)" + + echo "Testing pip3 manifest installation via proxy mode..." + pmg pip3 freeze > requirements3.txt + pmg pip3 uninstall -y requests numpy + pmg pip3 install -r requirements3.txt + python -c "import requests, numpy; print('pip3 manifest ok:', requests.__version__, numpy.__version__)" + deactivate + + echo "Testing uv add and uv pip install via proxy mode..." + mkdir uv-proxy && cd uv-proxy + pmg uv init --no-readme + pmg uv add requests==2.32.4 + pmg uv add numpy==2.3.5 + + # Verification: pyproject.toml lists expected dependencies + test -f pyproject.toml + grep -q 'requests' pyproject.toml + grep -q 'numpy' pyproject.toml + + echo "Sync environment and verify installations..." + pmg uv sync + pmg uv pip show requests >/dev/null + pmg uv pip show numpy >/dev/null + + echo "Testing uv pip install from requirements via proxy mode..." + pmg uv pip freeze > requirements.txt + pmg uv pip install -r requirements.txt + pmg uv pip show requests >/dev/null + pmg uv pip show numpy >/dev/null + cd .. + + cd .. && rm -rf pypi-proxy-test + - name: Test PNPM - Single Package & Manifest run: | echo "Testing PNPM single package installation..." mkdir pnpm-test && cd pnpm-test - pmg pnpm init - pmg pnpm add express@5.2.1 - pmg pnpm add lodash@4.17.21 + pmg --proxy-mode=false pnpm init + pmg --proxy-mode=false pnpm add express@5.2.1 + pmg --proxy-mode=false pnpm add lodash@4.17.21 # Verification: pnpm packages installed and lockfile created test -d node_modules/express @@ -159,7 +227,7 @@ jobs: echo "Testing PNPM manifest installation..." rm -rf node_modules pnpm-lock.yaml - pmg pnpm install + pmg --proxy-mode=false pnpm install # Verification: pnpm lockfile and modules exist after manifest install test -f pnpm-lock.yaml @@ -171,9 +239,9 @@ jobs: run: | echo "Testing Bun single package installation..." mkdir bun-test && cd bun-test - pmg bun init -y - pmg bun add express@5.2.1 - pmg bun add lodash@4.17.21 + pmg --proxy-mode=false bun init -y + pmg --proxy-mode=false bun add express@5.2.1 + pmg --proxy-mode=false bun add lodash@4.17.21 # Verification: bun packages installed and lockfile created test -d node_modules/express @@ -182,7 +250,7 @@ jobs: echo "Testing Bun manifest installation..." rm -rf node_modules bun.lock - pmg bun install + pmg --proxy-mode=false bun install # Verification: bun lockfile and modules exist after manifest install test -f bun.lock @@ -198,9 +266,9 @@ jobs: yarn --version mkdir yarn-test && cd yarn-test - pmg yarn init -y - pmg yarn add express@5.2.1 - pmg yarn add lodash@4.17.21 + pmg --proxy-mode=false yarn init -y + pmg --proxy-mode=false yarn add express@5.2.1 + pmg --proxy-mode=false yarn add lodash@4.17.21 # Verification: yarn packages installed and lockfile created test -d node_modules/express @@ -209,7 +277,7 @@ jobs: echo "Testing Yarn manifest installation..." rm -rf node_modules yarn.lock - pmg yarn install + pmg --proxy-mode=false yarn install # Verification: yarn lockfile and modules exist after manifest install test -f yarn.lock @@ -223,19 +291,19 @@ jobs: mkdir npx-test && cd npx-test echo "Testing npx with a simple package..." - pmg npx cowsay@1.6.0 "Hello from pmg npx" | tee npx-output.txt + pmg --proxy-mode=false npx cowsay@1.6.0 "Hello from pmg npx" | tee npx-output.txt # Verification: cowsay output contains our message grep -q "Hello from pmg npx" npx-output.txt echo "Testing npx with --package flag..." - pmg npx --package cowsay@1.6.0 -- cowsay "Hello with package flag" | tee npx-pkg-output.txt + pmg --proxy-mode=false npx --package cowsay@1.6.0 -- cowsay "Hello with package flag" | tee npx-pkg-output.txt # Verification: package flag execution produces expected output grep -q "Hello with package flag" npx-pkg-output.txt echo "Testing npx dry-run mode..." - pmg --dry-run npx cowsay@1.6.0 "This should not execute" | tee npx-dry-output.txt + pmg --proxy-mode=false --dry-run npx cowsay@1.6.0 "This should not execute" | tee npx-dry-output.txt # Verification: dry-run should NOT produce cowsay ASCII art (cow face ^__^ should not appear) ! grep -q '\^__\^' npx-dry-output.txt @@ -248,19 +316,19 @@ jobs: mkdir pnpx-test && cd pnpx-test echo "Testing pnpx with a simple package..." - pmg pnpx cowsay@1.6.0 "Hello from pmg pnpx" | tee pnpx-output.txt + pmg --proxy-mode=false pnpx cowsay@1.6.0 "Hello from pmg pnpx" | tee pnpx-output.txt # Verification: cowsay output contains our message grep -q "Hello from pmg pnpx" pnpx-output.txt echo "Testing pnpx with --package flag..." - pmg pnpx --package cowsay@1.6.0 -- cowsay "Hello with package flag" | tee pnpx-pkg-output.txt + pmg --proxy-mode=false pnpx --package cowsay@1.6.0 -- cowsay "Hello with package flag" | tee pnpx-pkg-output.txt # Verification: package flag execution produces expected output grep -q "Hello with package flag" pnpx-pkg-output.txt echo "Testing pnpx dry-run mode..." - pmg --dry-run pnpx cowsay@1.6.0 "This should not execute" | tee pnpx-dry-output.txt + pmg --proxy-mode=false --dry-run pnpx cowsay@1.6.0 "This should not execute" | tee pnpx-dry-output.txt # Verification: dry-run should NOT produce cowsay ASCII art (cow face ^__^ should not appear) ! grep -q '\^__\^' pnpx-dry-output.txt @@ -272,9 +340,9 @@ jobs: echo "Testing Pip single package installation..." mkdir pip-test && cd pip-test python -m venv venv && source venv/bin/activate - pmg pip install requests==2.32.4 - pmg pip install numpy==2.3.5 - pmg pip freeze > requirements.txt + pmg --proxy-mode=false pip install requests==2.32.4 + pmg --proxy-mode=false pip install numpy==2.3.5 + pmg --proxy-mode=false pip freeze > requirements.txt # Verification: requirements.txt contains expected packages test -s requirements.txt @@ -282,8 +350,8 @@ jobs: grep -E '^numpy==' requirements.txt echo "Testing Pip manifest installation..." - pmg pip uninstall -y requests numpy - pmg pip install -r requirements.txt + pmg --proxy-mode=false pip uninstall -y requests numpy + pmg --proxy-mode=false pip install -r requirements.txt # Verification: imported packages are available in the environment python -c "import requests, numpy; print(requests.__version__); print(numpy.__version__)" @@ -295,9 +363,9 @@ jobs: echo "Testing Pip3 single package installation..." mkdir pip3-test && cd pip3-test python -m venv venv && source venv/bin/activate - pmg pip3 install requests==2.32.4 - pmg pip3 install numpy==2.3.5 - pmg pip3 freeze > requirements.txt + pmg --proxy-mode=false pip3 install requests==2.32.4 + pmg --proxy-mode=false pip3 install numpy==2.3.5 + pmg --proxy-mode=false pip3 freeze > requirements.txt # Verification: requirements.txt contains expected packages test -s requirements.txt @@ -305,8 +373,8 @@ jobs: grep -E '^numpy==' requirements.txt echo "Testing Pip3 manifest installation..." - pmg pip3 uninstall -y requests numpy - pmg pip3 install -r requirements.txt + pmg --proxy-mode=false pip3 uninstall -y requests numpy + pmg --proxy-mode=false pip3 install -r requirements.txt # Verification: imported packages are available in the environment python -c "import requests, numpy; print(requests.__version__); print(numpy.__version__)" @@ -317,9 +385,9 @@ jobs: run: | echo "Testing UV single package installation..." mkdir uv-test && cd uv-test - pmg uv init --no-readme - pmg uv add requests==2.32.4 - pmg uv add numpy==2.3.5 + pmg --proxy-mode=false uv init --no-readme + pmg --proxy-mode=false uv add requests==2.32.4 + pmg --proxy-mode=false uv add numpy==2.3.5 # Verification: pyproject.toml lists expected dependencies test -f pyproject.toml @@ -328,31 +396,31 @@ jobs: echo "Testing UV manifest installation..." rm -rf .venv uv.lock - pmg uv sync + pmg --proxy-mode=false uv sync # Verification: uv lockfile and virtualenv created; packages present test -d .venv test -f uv.lock - pmg uv pip show requests >/dev/null - pmg uv pip show numpy >/dev/null + pmg --proxy-mode=false uv pip show requests >/dev/null + pmg --proxy-mode=false uv pip show numpy >/dev/null echo "Testing UV pip commands..." - pmg uv pip freeze > requirements.txt - pmg uv pip install -r requirements.txt - pmg uv pip sync requirements.txt + pmg --proxy-mode=false uv pip freeze > requirements.txt + pmg --proxy-mode=false uv pip install -r requirements.txt + pmg --proxy-mode=false uv pip sync requirements.txt # Verification: uv pip can show installed packages after requirements sync - pmg uv pip show requests >/dev/null - pmg uv pip show numpy >/dev/null + pmg --proxy-mode=false uv pip show requests >/dev/null + pmg --proxy-mode=false uv pip show numpy >/dev/null cd .. && rm -rf uv-test - name: Test Poetry - Single Package & Manifest run: | echo "Testing Poetry single package installation..." mkdir poetry-test && cd poetry-test - pmg poetry init --name poetry-test --no-interaction --quiet - pmg poetry add requests==2.32.4 - pmg poetry add numpy==2.3.5 + pmg --proxy-mode=false poetry init --name poetry-test --no-interaction --quiet + pmg --proxy-mode=false poetry add requests==2.32.4 + pmg --proxy-mode=false poetry add numpy==2.3.5 # Verification: pyproject.toml dependencies updated test -f pyproject.toml @@ -361,15 +429,15 @@ jobs: echo "Testing Poetry manifest installation..." rm -rf .venv poetry.lock - pmg poetry install --no-root + pmg --proxy-mode=false poetry install --no-root cd .. && rm -rf poetry-test - name: Test Malicious Package Detection run: | echo "Testing malicious package detection..." mkdir malicious-test && cd malicious-test - pmg npm init -y - ! pmg npm install nyc-config@10.0.0 || echo "Malicious package correctly blocked" + pmg --proxy-mode=false npm init -y + ! pmg --proxy-mode=false npm install nyc-config@10.0.0 || echo "Malicious package correctly blocked" cd .. && rm -rf malicious-test - name: Test safedep-test-pkg is Blocked using Proxy mode @@ -399,29 +467,29 @@ jobs: mkdir pmg-modes-test && cd pmg-modes-test pmg npm init -y # Mode: --dry-run should not create node_modules or lockfiles - pmg --dry-run npm install express + pmg --proxy-mode=false --dry-run npm install express # Verification: no files created during dry-run test ! -d node_modules test ! -f package-lock.json # Mode: --silent should install without noisy output - pmg --silent npm install express + pmg --proxy-mode=false --silent npm install express # Verification: package installed test -d node_modules/express # Clean and test --verbose installation rm -rf node_modules package-lock.json - pmg --verbose npm install express + pmg --proxy-mode=false --verbose npm install express # Verification: package installed test -d node_modules/express # Clean and test --debug with log output rm -rf node_modules package-lock.json - pmg --debug --log debug.json npm install express + pmg --proxy-mode=false --debug --log debug.json npm install express # Verification: debug log written test -f debug.json # Mode: --paranoid may require cloud credentials; run non-blocking with dry-run - pmg --paranoid --dry-run npm install express || true + pmg --proxy-mode=false --paranoid --dry-run npm install express || true cd .. && rm -rf pmg-modes-test sandbox-e2e-macos: diff --git a/cmd/pypi/pip.go b/cmd/pypi/pip.go index 619daa9..53c2016 100644 --- a/cmd/pypi/pip.go +++ b/cmd/pypi/pip.go @@ -53,5 +53,9 @@ func executePipFlow(ctx context.Context, args []string) error { return fmt.Errorf("failed to create dependency resolver: %w", err) } + if config.IsProxyModeEnabled() { + return flows.ProxyFlow(packageManager, packageResolver).Run(ctx, args, parsedCommand) + } + return flows.Common(packageManager, packageResolver).Run(ctx, args, parsedCommand) } diff --git a/cmd/pypi/pip3.go b/cmd/pypi/pip3.go index 2fbe87f..898a4b2 100644 --- a/cmd/pypi/pip3.go +++ b/cmd/pypi/pip3.go @@ -53,5 +53,9 @@ func executePip3Flow(ctx context.Context, args []string) error { return fmt.Errorf("failed to create dependency resolver: %w", err) } + if config.IsProxyModeEnabled() { + return flows.ProxyFlow(packageManager, packageResolver).Run(ctx, args, parsedCommand) + } + return flows.Common(packageManager, packageResolver).Run(ctx, args, parsedCommand) } diff --git a/cmd/pypi/poetry.go b/cmd/pypi/poetry.go index 24a610a..bcae4dc 100644 --- a/cmd/pypi/poetry.go +++ b/cmd/pypi/poetry.go @@ -52,5 +52,9 @@ func executePoetryFlow(ctx context.Context, args []string) error { return fmt.Errorf("failed to create dependency resolver: %w", err) } + if config.IsProxyModeEnabled() { + return flows.ProxyFlow(packageManager, packageResolver).Run(ctx, args, parsedCommand) + } + return flows.Common(packageManager, packageResolver).Run(ctx, args, parsedCommand) } diff --git a/cmd/pypi/uv.go b/cmd/pypi/uv.go index d98a10b..706e7d3 100644 --- a/cmd/pypi/uv.go +++ b/cmd/pypi/uv.go @@ -52,5 +52,9 @@ func executeUvFlow(ctx context.Context, args []string) error { return fmt.Errorf("failed to create dependency resolver: %w", err) } + if config.IsProxyModeEnabled() { + return flows.ProxyFlow(packageManager, packageResolver).Run(ctx, args, parsedCommand) + } + return flows.Common(packageManager, packageResolver).Run(ctx, args, parsedCommand) } diff --git a/docs/proxy-mode.md b/docs/proxy-mode.md index a2cea7d..fd91ac6 100644 --- a/docs/proxy-mode.md +++ b/docs/proxy-mode.md @@ -31,6 +31,6 @@ proxy_mode: true | `pnpx` | ✅ | | `bun` | ✅ | | `yarn` | ✅ | -| `pip` | 🕒 | -| `uv` | 🕒 | -| `poetry` | 🕒 | +| `pip` | ✅ | +| `uv` | ✅ | +| `poetry` | ✅ | diff --git a/go.mod b/go.mod index 9e2e80e..18b8982 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( buf.build/gen/go/safedep/api/grpc/go v1.5.1-20250418165058-162f6b0cc319.2 buf.build/gen/go/safedep/api/protocolbuffers/go v1.36.6-20250418165058-162f6b0cc319.1 github.com/Masterminds/semver v1.5.0 - github.com/elazarl/goproxy v1.7.2 + github.com/elazarl/goproxy v1.8.1 github.com/fatih/color v1.18.0 github.com/google/osv-scalibr v0.2.1 github.com/google/uuid v1.6.0 @@ -20,7 +20,7 @@ require ( github.com/spf13/pflag v1.0.10 github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 - golang.org/x/term v0.34.0 + golang.org/x/term v0.39.0 google.golang.org/grpc v1.72.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -213,12 +213,12 @@ require ( go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect golang.org/x/exp/typeparams v0.0.0-20250210185358-939b2ce775ac // indirect - golang.org/x/mod v0.26.0 // indirect - golang.org/x/net v0.42.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/text v0.28.0 // indirect - golang.org/x/tools v0.35.0 // indirect + golang.org/x/mod v0.31.0 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.40.0 // indirect + golang.org/x/text v0.33.0 // indirect + golang.org/x/tools v0.40.0 // indirect golang.org/x/tools/go/expect v0.1.1-deprecated // indirect golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e // indirect diff --git a/go.sum b/go.sum index 6fa78a5..d966f98 100644 --- a/go.sum +++ b/go.sum @@ -86,6 +86,8 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk= github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= +github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/curioswitch/go-reassign v0.3.0 h1:dh3kpQHuADL3cobV/sSGETA8DOv457dwl+fbBAhrQPs= github.com/curioswitch/go-reassign v0.3.0/go.mod h1:nApPCCTtqLJN/s8HfItCcKV0jIPwluBOvZP+dsJGA88= @@ -99,8 +101,8 @@ github.com/denis-tingaikin/go-header v0.5.0 h1:SRdnP5ZKvcO9KKRP1KJrhFR3RrlGuD+42 github.com/denis-tingaikin/go-header v0.5.0/go.mod h1:mMenU5bWrok6Wl2UsZjy+1okegmwQ3UgWl4V1D8gjlY= github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= -github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= +github.com/elazarl/goproxy v1.8.1 h1:/qGpPJGgIPOTZ7IoIQvjavocp//qYSe9LQnIGCgRY5k= +github.com/elazarl/goproxy v1.8.1/go.mod h1:b5xm6W48AUHNpRTCvlnd0YVh+JafCCtsLsJZvvNTz+E= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -563,8 +565,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg= -golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -584,8 +586,8 @@ golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= -golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= -golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -598,8 +600,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -624,8 +626,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= @@ -634,8 +636,8 @@ golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= -golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= -golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= +golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= +golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -646,8 +648,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -673,8 +675,8 @@ golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= golang.org/x/tools/go/expect v0.1.1-deprecated h1:jpBZDwmgPhXsKZC6WhL20P4b/wmnpsEAGHaNy0n/rJM= golang.org/x/tools/go/expect v0.1.1-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY= golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated h1:1h2MnaIAIXISqTFKdENegdpAgUXz6NrPEsbIeWaBRvM= diff --git a/internal/flows/proxy_flow.go b/internal/flows/proxy_flow.go index 75c2c98..f37911e 100644 --- a/internal/flows/proxy_flow.go +++ b/internal/flows/proxy_flow.go @@ -299,6 +299,7 @@ func (f *proxyFlow) setupEnvForProxy(proxyAddr, caCertPath string) []string { fmt.Sprintf("REQUESTS_CA_BUNDLE=%s", caCertPath), fmt.Sprintf("PIP_CERT=%s", caCertPath), fmt.Sprintf("PIP_PROXY=%s", proxyURL), + "PIP_RETRIES=0", ) return env diff --git a/proxy/interceptors/factory.go b/proxy/interceptors/factory.go index f760afc..cdcd091 100644 --- a/proxy/interceptors/factory.go +++ b/proxy/interceptors/factory.go @@ -43,6 +43,14 @@ func (f *InterceptorFactory) CreateInterceptor(ecosystem packagev1.Ecosystem) (p f.confirmationChan, ), nil + case packagev1.Ecosystem_ECOSYSTEM_PYPI: + return NewPypiRegistryInterceptor( + f.analyzer, + f.cache, + f.statsCollector, + f.confirmationChan, + ), nil + default: return nil, fmt.Errorf("proxy-based interception not yet supported for ecosystem: %s", ecosystem.String()) } @@ -52,6 +60,7 @@ func (f *InterceptorFactory) CreateInterceptor(ecosystem packagev1.Ecosystem) (p func SupportedEcosystems() []packagev1.Ecosystem { return []packagev1.Ecosystem{ packagev1.Ecosystem_ECOSYSTEM_NPM, + packagev1.Ecosystem_ECOSYSTEM_PYPI, } } diff --git a/proxy/interceptors/pypi_registry.go b/proxy/interceptors/pypi_registry.go new file mode 100644 index 0000000..5ba7a42 --- /dev/null +++ b/proxy/interceptors/pypi_registry.go @@ -0,0 +1,131 @@ +package interceptors + +import ( + packagev1 "buf.build/gen/go/safedep/api/protocolbuffers/go/safedep/messages/package/v1" + "github.com/safedep/dry/log" + "github.com/safedep/pmg/analyzer" + "github.com/safedep/pmg/proxy" +) + +var pypiRegistryDomains = registryConfigMap{ + "files.pythonhosted.org": { + Host: "files.pythonhosted.org", + SupportedForAnalysis: true, + Parser: pypiFilesParser{}, + }, + "pypi.org": { + Host: "pypi.org", + SupportedForAnalysis: true, + Parser: pypiOrgParser{}, + }, + // Test PyPI instance + "test.pypi.org": { + Host: "test.pypi.org", + SupportedForAnalysis: false, // Skip analysis for test PyPI + Parser: pypiOrgParser{}, + }, + "test-files.pythonhosted.org": { + Host: "test-files.pythonhosted.org", + SupportedForAnalysis: false, // Skip analysis for test PyPI files + Parser: pypiFilesParser{}, + }, +} + +// PypiRegistryInterceptor intercepts PyPI registry requests and analyzes packages for malware +// It embeds baseRegistryInterceptor to reuse ecosystem agnostic functionality +type PypiRegistryInterceptor struct { + baseRegistryInterceptor +} + +var _ proxy.Interceptor = (*PypiRegistryInterceptor)(nil) + +// NewPypiRegistryInterceptor creates a new PyPI registry interceptor +func NewPypiRegistryInterceptor( + analyzer analyzer.PackageVersionAnalyzer, + cache AnalysisCache, + statsCollector *AnalysisStatsCollector, + confirmationChan chan *ConfirmationRequest, +) *PypiRegistryInterceptor { + return &PypiRegistryInterceptor{ + baseRegistryInterceptor: baseRegistryInterceptor{ + analyzer: analyzer, + cache: cache, + statsCollector: statsCollector, + confirmationChan: confirmationChan, + }, + } +} + +// Name returns the interceptor name for logging +func (i *PypiRegistryInterceptor) Name() string { + return "pypi-registry-interceptor" +} + +// ShouldIntercept determines if this interceptor should handle the given request +func (i *PypiRegistryInterceptor) ShouldIntercept(ctx *proxy.RequestContext) bool { + return pypiRegistryDomains.ContainsHostname(ctx.Hostname) +} + +// HandleRequest processes the request and returns response action +// We take a fail-open approach here, allowing requests that we can't parse the package information from the URL. +func (i *PypiRegistryInterceptor) HandleRequest(ctx *proxy.RequestContext) (*proxy.InterceptorResponse, error) { + log.Debugf("[%s] Handling PyPI registry request: %s", ctx.RequestID, ctx.URL.Path) + + // Get registry configuration + config := pypiRegistryDomains.GetConfigForHostname(ctx.Hostname) + if config == nil { + // Shouldn't happen if ShouldIntercept is working correctly + log.Warnf("[%s] No registry config found for hostname: %s", ctx.RequestID, ctx.Hostname) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + // Skip analysis for registries that are not supported for analysis + if !config.SupportedForAnalysis { + log.Debugf("[%s] Skipping analysis for %s registry (not supported for analysis): %s", + ctx.RequestID, config.Host, ctx.URL.String()) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + // Parse URL using registry-specific strategy + pkgInfo, err := config.Parser.ParseURL(ctx.URL.Path) + if err != nil { + log.Warnf("[%s] Failed to parse PyPI registry URL %s for %s: %v", + ctx.RequestID, ctx.URL.Path, config.Host, err) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + // Only analyze actual file downloads (sdist or wheel) + // Metadata requests (Simple API or JSON API) are allowed through + if !pkgInfo.IsFileDownload() { + log.Debugf("[%s] Skipping analysis for metadata request: %s", ctx.RequestID, pkgInfo.GetName()) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + // Ensure we have both name and version for analysis + if pkgInfo.GetName() == "" || pkgInfo.GetVersion() == "" { + log.Warnf("[%s] Incomplete package info from URL %s: name=%s, version=%s", + ctx.RequestID, ctx.URL.Path, pkgInfo.GetName(), pkgInfo.GetVersion()) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + // Get file type for logging if available + fileType := "" + if pypiInfo, ok := pkgInfo.(*pypiPackageInfo); ok { + fileType = pypiInfo.FileType() + } + log.Debugf("[%s] Analyzing PyPI package: %s@%s (type: %s)", + ctx.RequestID, pkgInfo.GetName(), pkgInfo.GetVersion(), fileType) + + result, err := i.analyzePackage( + ctx, + packagev1.Ecosystem_ECOSYSTEM_PYPI, + pkgInfo.GetName(), + pkgInfo.GetVersion(), + ) + if err != nil { + log.Errorf("[%s] Failed to analyze package %s@%s: %v", ctx.RequestID, pkgInfo.GetName(), pkgInfo.GetVersion(), err) + return &proxy.InterceptorResponse{Action: proxy.ActionAllow}, nil + } + + return i.handleAnalysisResult(ctx, packagev1.Ecosystem_ECOSYSTEM_PYPI, pkgInfo.GetName(), pkgInfo.GetVersion(), result) +} diff --git a/proxy/interceptors/pypi_url_parser.go b/proxy/interceptors/pypi_url_parser.go new file mode 100644 index 0000000..38fb81c --- /dev/null +++ b/proxy/interceptors/pypi_url_parser.go @@ -0,0 +1,374 @@ +package interceptors + +import ( + "fmt" + "regexp" + "strings" +) + +// pypiPackageInfo represents parsed package information from a PyPI registry URL +type pypiPackageInfo struct { + name string + version string + isDownload bool // True if this is a file download (sdist or wheel) + fileType string // "sdist", "wheel", or empty for non-download requests +} + +// Ensure pypiPackageInfo implements packageInfo interface +var _ packageInfo = (*pypiPackageInfo)(nil) + +// GetName returns the package name +func (p *pypiPackageInfo) GetName() string { + return p.name +} + +// GetVersion returns the package version +func (p *pypiPackageInfo) GetVersion() string { + return p.version +} + +// IsFileDownload returns true if this is a file download (sdist or wheel) +func (p *pypiPackageInfo) IsFileDownload() bool { + return p.isDownload +} + +// FileType returns the file type ("sdist", "wheel", or empty) +func (p *pypiPackageInfo) FileType() string { + return p.fileType +} + +// pypiFilesParser parses URLs from files.pythonhosted.org +// This is where PyPI serves package files (sdists and wheels) +type pypiFilesParser struct{} + +// Ensure pypiFilesParser implements RegistryURLParser interface +var _ registryURLParser = pypiFilesParser{} + +// ParseURL parses files.pythonhosted.org URL paths +// URL patterns: +// - /packages/{hash_dirs}/{filename} +// Where filename can be: +// - {name}-{version}.tar.gz (sdist) +// - {name}-{version}.zip (sdist) +// - {name}-{version}(-{build})?-{python}-{abi}-{platform}.whl (wheel) +func (p pypiFilesParser) ParseURL(urlPath string) (packageInfo, error) { + // Remove leading and trailing slashes + urlPath = strings.Trim(urlPath, "/") + + if urlPath == "" { + return nil, fmt.Errorf("empty URL path") + } + + // Split path into segments + segments := strings.Split(urlPath, "/") + + // files.pythonhosted.org paths start with "packages" + // Format: packages/{hash_prefix}/{filename} + // The hash prefix can be variable length (typically 2-3 directory levels) + if len(segments) < 2 { + return nil, fmt.Errorf("invalid PyPI files URL: not enough segments") + } + + // The filename is always the last segment + filename := segments[len(segments)-1] + + // Check if it's a packages download path + if segments[0] != "packages" { + return nil, fmt.Errorf("invalid PyPI files URL: expected 'packages' prefix, got %s", segments[0]) + } + + return parseFilename(filename) +} + +// pypiOrgParser parses URLs from pypi.org (Simple API and JSON API) +type pypiOrgParser struct{} + +// Ensure pypiOrgParser implements RegistryURLParser interface +var _ registryURLParser = pypiOrgParser{} + +// ParseURL parses pypi.org URL paths +// URL patterns: +// - /simple/{package}/ (Simple API - package index) +// - /simple/{package}/{filename} (Simple API - file redirect, rare) +// - /pypi/{package}/json (JSON API - package metadata) +// - /pypi/{package}/{version}/json (JSON API - version metadata) +func (p pypiOrgParser) ParseURL(urlPath string) (packageInfo, error) { + // Remove leading and trailing slashes + urlPath = strings.Trim(urlPath, "/") + + if urlPath == "" { + return nil, fmt.Errorf("empty URL path") + } + + // Split path into segments + segments := strings.Split(urlPath, "/") + + if len(segments) < 2 { + return nil, fmt.Errorf("invalid pypi.org URL: not enough segments") + } + + switch segments[0] { + case "simple": + // Simple API: /simple/{package}/ or /simple/{package}/{filename} + return parseSimpleAPIURL(segments[1:]) + case "pypi": + // JSON API: /pypi/{package}/json or /pypi/{package}/{version}/json + return parseJSONAPIURL(segments[1:]) + default: + return nil, fmt.Errorf("unknown pypi.org path prefix: %s", segments[0]) + } +} + +// parseSimpleAPIURL parses Simple API URL paths +func parseSimpleAPIURL(segments []string) (*pypiPackageInfo, error) { + if len(segments) == 0 { + return nil, fmt.Errorf("invalid Simple API URL: missing package name") + } + + packageName := segments[0] + + // Simple API index request: /simple/{package}/ + if len(segments) == 1 { + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(packageName), + isDownload: false, + }, nil + } + + // Simple API might include filename (for redirects): /simple/{package}/{filename} + if len(segments) == 2 { + filename := segments[1] + info, err := parseFilename(filename) + if err != nil { + // If we can't parse the filename, treat it as a non-download request + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(packageName), + isDownload: false, + }, nil + } + return info, nil + } + + return nil, fmt.Errorf("invalid Simple API URL format: too many segments") +} + +// parseJSONAPIURL parses JSON API URL paths +func parseJSONAPIURL(segments []string) (*pypiPackageInfo, error) { + if len(segments) == 0 { + return nil, fmt.Errorf("invalid JSON API URL: missing package name") + } + + packageName := segments[0] + + // /pypi/{package}/json - package metadata (no specific version) + if len(segments) == 2 && segments[1] == "json" { + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(packageName), + isDownload: false, + }, nil + } + + // /pypi/{package}/{version}/json - version metadata + if len(segments) == 3 && segments[2] == "json" { + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(packageName), + version: segments[1], + isDownload: false, + }, nil + } + + return nil, fmt.Errorf("invalid JSON API URL format") +} + +// parseFilename extracts package name and version from a PyPI distribution filename +func parseFilename(filename string) (*pypiPackageInfo, error) { + // Try to parse as wheel first + if strings.HasSuffix(filename, ".whl") { + return parseWheelFilename(filename) + } + + // Try to parse as sdist (tar.gz or zip) + if strings.HasSuffix(filename, ".tar.gz") || strings.HasSuffix(filename, ".zip") { + return parseSdistFilename(filename) + } + + // Check for other archive formats that PyPI might serve + if strings.HasSuffix(filename, ".tar.bz2") || strings.HasSuffix(filename, ".tgz") { + return parseSdistFilename(filename) + } + + return nil, fmt.Errorf("unsupported file type: %s", filename) +} + +// parseWheelFilename parses a wheel filename to extract package info +// Wheel filename format: {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl +// Examples: +// - requests-2.28.0-py3-none-any.whl +// - numpy-1.24.0-cp311-cp311-linux_x86_64.whl +// - package_name-1.0.0-1-py3-none-any.whl (with build tag) +func parseWheelFilename(filename string) (*pypiPackageInfo, error) { + // Remove .whl extension + basename := strings.TrimSuffix(filename, ".whl") + + // Split by '-' to get components + // Minimum: name-version-python-abi-platform (5 parts) + // With build tag: name-version-build-python-abi-platform (6 parts) + parts := strings.Split(basename, "-") + + if len(parts) < 5 { + return nil, fmt.Errorf("invalid wheel filename: not enough components in %s", filename) + } + + // The last 3 parts are always: python_tag, abi_tag, platform_tag + // Before that is either: name, version OR name, version, build_tag + // We need to find where the version is + + // Work backwards: last 3 are tags + // If 6+ parts, could have build tag + // If 5 parts, no build tag + + var name, version string + + if len(parts) == 5 { + // name-version-python-abi-platform + name = parts[0] + version = parts[1] + } else if len(parts) == 6 { + // Could be: + // - name-version-build-python-abi-platform (6 parts, with build tag) + // - name_with_underscore-version-python-abi-platform (can't be this, underscores in names are normalized) + // Build tags are numeric (PEP 427) + if isBuildTag(parts[2]) { + name = parts[0] + version = parts[1] + } else { + // The name might contain a hyphen that wasn't normalized + // This shouldn't happen with properly normalized names, but handle it + name = parts[0] + "_" + parts[1] + version = parts[2] + } + } else { + // More than 6 parts - name contains hyphens or there's a build tag + // Try to find version by looking for semver-like pattern + name, version = extractNameVersionFromParts(parts[:len(parts)-3]) + if name == "" || version == "" { + return nil, fmt.Errorf("could not parse wheel filename: %s", filename) + } + } + + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(name), + version: version, + isDownload: true, + fileType: "wheel", + }, nil +} + +// isBuildTag checks if a string looks like a wheel build tag (numeric) +func isBuildTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + if c < '0' || c > '9' { + return false + } + } + return true +} + +// parseSdistFilename parses a source distribution filename to extract package info +// Sdist filename format: {name}-{version}.tar.gz or {name}-{version}.zip +// Examples: +// - requests-2.28.0.tar.gz +// - Flask-RESTful-0.3.10.tar.gz (note: hyphens in name) +func parseSdistFilename(filename string) (*pypiPackageInfo, error) { + // Remove extension + basename := filename + for _, ext := range []string{".tar.gz", ".tar.bz2", ".tgz", ".zip"} { + if strings.HasSuffix(basename, ext) { + basename = strings.TrimSuffix(basename, ext) + break + } + } + + // Find the version by looking for the last hyphen followed by a version-like string + // This is tricky because package names can contain hyphens + name, version := extractNameVersionFromSdist(basename) + if name == "" || version == "" { + return nil, fmt.Errorf("could not parse sdist filename: %s", filename) + } + + return &pypiPackageInfo{ + name: denormalizePyPIPackageName(name), + version: version, + isDownload: true, + fileType: "sdist", + }, nil +} + +// extractNameVersionFromSdist extracts name and version from a sdist basename +// The challenge is that package names can contain hyphens, so we need to find +// where the name ends and the version begins +func extractNameVersionFromSdist(basename string) (string, string) { + // Version pattern: starts with a digit, may contain digits, dots, and pre-release suffixes + versionPattern := regexp.MustCompile(`^\d+(\.\d+)*([._-]?(a|alpha|b|beta|c|rc|pre|post|dev|final)\.?\d*)*(\+[a-zA-Z0-9._-]+)?$`) + + // Split by hyphen and try to find where version starts + parts := strings.Split(basename, "-") + + // Try from the end, looking for version-like parts + for i := len(parts) - 1; i > 0; i-- { + potentialVersion := strings.Join(parts[i:], "-") + // Check if this could be a version + if versionPattern.MatchString(potentialVersion) { + name := strings.Join(parts[:i], "-") + return name, potentialVersion + } + + // Also try just the single part as version + if versionPattern.MatchString(parts[i]) { + name := strings.Join(parts[:i], "-") + return name, parts[i] + } + } + + return "", "" +} + +// extractNameVersionFromParts extracts name and version from wheel filename parts +// (excluding the python-abi-platform tags) +func extractNameVersionFromParts(parts []string) (string, string) { + if len(parts) < 2 { + return "", "" + } + + // Version pattern for wheels + versionPattern := regexp.MustCompile(`^\d+(\.\d+)*([._]?(a|alpha|b|beta|c|rc|pre|post|dev|final)\d*)*(\+[a-zA-Z0-9._]+)?$`) + + // Try from the end, looking for version-like parts + for i := len(parts) - 1; i > 0; i-- { + if versionPattern.MatchString(parts[i]) { + // Check if next part is a build tag (numeric only) + if i+1 < len(parts) && isBuildTag(parts[i+1]) { + // This is the version, parts[i+1] is build tag + name := strings.Join(parts[:i], "_") + return name, parts[i] + } + name := strings.Join(parts[:i], "_") + return name, parts[i] + } + } + + // Fallback: assume first part is name, second is version + return parts[0], parts[1] +} + +// denormalizePyPIPackageName converts a normalized package name back to a more canonical form +// PyPI normalizes names by replacing [-_.] with - and lowercasing +// We can't fully reverse this, but we keep the normalized form which works for lookups +func denormalizePyPIPackageName(name string) string { + // Convert underscores to hyphens (common PyPI convention) + // Keep lowercase as that's the normalized form + return strings.ReplaceAll(strings.ToLower(name), "_", "-") +} diff --git a/proxy/interceptors/pypi_url_parser_test.go b/proxy/interceptors/pypi_url_parser_test.go new file mode 100644 index 0000000..8bcb101 --- /dev/null +++ b/proxy/interceptors/pypi_url_parser_test.go @@ -0,0 +1,590 @@ +package interceptors + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestPypiFilesParser_ParseURL(t *testing.T) { + tests := []struct { + name string + urlPath string + wantName string + wantVersion string + wantIsDownload bool + wantFileType string + wantErr bool + }{ + // Source distributions (sdist) + { + name: "sdist tar.gz simple package", + urlPath: "/packages/ab/cd/abcd1234/requests-2.28.0.tar.gz", + wantName: "requests", + wantVersion: "2.28.0", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist tar.gz with hyphenated name", + urlPath: "/packages/12/34/5678abcd/Flask-RESTful-0.3.10.tar.gz", + wantName: "flask-restful", + wantVersion: "0.3.10", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist zip format", + urlPath: "/packages/aa/bb/ccdd/some-package-1.0.0.zip", + wantName: "some-package", + wantVersion: "1.0.0", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist with prerelease version", + urlPath: "/packages/ff/ee/ddcc/mypackage-2.0.0rc1.tar.gz", + wantName: "mypackage", + wantVersion: "2.0.0rc1", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist with dev version", + urlPath: "/packages/11/22/3344/testpkg-0.1.0.dev1.tar.gz", + wantName: "testpkg", + wantVersion: "0.1.0.dev1", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist with post version", + urlPath: "/packages/aa/bb/cc/package-1.0.0.post1.tar.gz", + wantName: "package", + wantVersion: "1.0.0.post1", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "sdist with local version identifier", + urlPath: "/packages/dd/ee/ff/mylib-1.2.3+local.tar.gz", + wantName: "mylib", + wantVersion: "1.2.3+local", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + + // Wheel files + { + name: "wheel simple package", + urlPath: "/packages/ab/cd/ef12/requests-2.28.0-py3-none-any.whl", + wantName: "requests", + wantVersion: "2.28.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel with platform-specific tags", + urlPath: "/packages/12/34/56/numpy-1.24.0-cp311-cp311-linux_x86_64.whl", + wantName: "numpy", + wantVersion: "1.24.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel with manylinux platform", + urlPath: "/packages/aa/bb/cc/cryptography-41.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + wantName: "cryptography", + wantVersion: "41.0.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel with underscore in name (normalized)", + urlPath: "/packages/11/22/33/some_package-1.0.0-py3-none-any.whl", + wantName: "some-package", + wantVersion: "1.0.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel with build tag", + urlPath: "/packages/ff/ee/dd/mypackage-1.0.0-1-py3-none-any.whl", + wantName: "mypackage", + wantVersion: "1.0.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel windows platform", + urlPath: "/packages/aa/bb/cc/pywin32-306-cp311-cp311-win_amd64.whl", + wantName: "pywin32", + wantVersion: "306", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + { + name: "wheel macos platform", + urlPath: "/packages/dd/ee/ff/tensorflow-2.15.0-cp311-cp311-macosx_10_15_x86_64.whl", + wantName: "tensorflow", + wantVersion: "2.15.0", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + + // Real-world examples + { + name: "real django sdist", + urlPath: "/packages/b8/50/71e60c5e9148c20de37c37f3e4cd1da1f63f7d0f7ea4c7e9c8a2f5c8d9e1/Django-4.2.7.tar.gz", + wantName: "django", + wantVersion: "4.2.7", + wantIsDownload: true, + wantFileType: "sdist", + wantErr: false, + }, + { + name: "real pandas wheel", + urlPath: "/packages/a1/b2/c3d4e5f6/pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + wantName: "pandas", + wantVersion: "2.1.3", + wantIsDownload: true, + wantFileType: "wheel", + wantErr: false, + }, + + // Error cases + { + name: "empty URL path", + urlPath: "", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantFileType: "", + wantErr: true, + }, + { + name: "just slash", + urlPath: "/", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantFileType: "", + wantErr: true, + }, + { + name: "invalid path without packages prefix", + urlPath: "/files/ab/cd/requests-2.28.0.tar.gz", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantFileType: "", + wantErr: true, + }, + { + name: "unsupported file type", + urlPath: "/packages/ab/cd/ef/readme.txt", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantFileType: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + parser := pypiFilesParser{} + got, err := parser.ParseURL(tt.urlPath) + + if tt.wantErr { + assert.Error(t, err) + return + } + + assert.NoError(t, err) + assert.Equal(t, tt.wantName, got.GetName()) + assert.Equal(t, tt.wantVersion, got.GetVersion()) + assert.Equal(t, tt.wantIsDownload, got.IsFileDownload()) + + // Check file type via type assertion - must succeed for pypi packages + pypiInfo, ok := got.(*pypiPackageInfo) + assert.True(t, ok, "expected *pypiPackageInfo type") + if ok { + assert.Equal(t, tt.wantFileType, pypiInfo.FileType()) + } + }) + } +} + +func TestPypiOrgParser_ParseURL(t *testing.T) { + tests := []struct { + name string + urlPath string + wantName string + wantVersion string + wantIsDownload bool + wantErr bool + }{ + // Simple API + { + name: "simple api package index", + urlPath: "/simple/requests/", + wantName: "requests", + wantVersion: "", + wantIsDownload: false, + wantErr: false, + }, + { + name: "simple api package without trailing slash", + urlPath: "/simple/django", + wantName: "django", + wantVersion: "", + wantIsDownload: false, + wantErr: false, + }, + { + name: "simple api normalized name", + urlPath: "/simple/flask-restful/", + wantName: "flask-restful", + wantVersion: "", + wantIsDownload: false, + wantErr: false, + }, + + // JSON API + { + name: "json api package metadata", + urlPath: "/pypi/requests/json", + wantName: "requests", + wantVersion: "", + wantIsDownload: false, + wantErr: false, + }, + { + name: "json api version metadata", + urlPath: "/pypi/requests/2.28.0/json", + wantName: "requests", + wantVersion: "2.28.0", + wantIsDownload: false, + wantErr: false, + }, + { + name: "json api with normalized name", + urlPath: "/pypi/flask-restful/0.3.10/json", + wantName: "flask-restful", + wantVersion: "0.3.10", + wantIsDownload: false, + wantErr: false, + }, + + // Error cases + { + name: "empty URL path", + urlPath: "", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantErr: true, + }, + { + name: "just slash", + urlPath: "/", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantErr: true, + }, + { + name: "unknown path prefix", + urlPath: "/unknown/requests/", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantErr: true, + }, + { + name: "simple api missing package name", + urlPath: "/simple/", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantErr: true, + }, + { + name: "json api missing package name", + urlPath: "/pypi/json", + wantName: "", + wantVersion: "", + wantIsDownload: false, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + parser := pypiOrgParser{} + got, err := parser.ParseURL(tt.urlPath) + + if tt.wantErr { + assert.Error(t, err) + return + } + + assert.NoError(t, err) + assert.Equal(t, tt.wantName, got.GetName()) + assert.Equal(t, tt.wantVersion, got.GetVersion()) + assert.Equal(t, tt.wantIsDownload, got.IsFileDownload()) + }) + } +} + +func TestParseWheelFilename(t *testing.T) { + tests := []struct { + name string + filename string + wantName string + wantVersion string + wantErr bool + }{ + { + name: "simple wheel", + filename: "requests-2.28.0-py3-none-any.whl", + wantName: "requests", + wantVersion: "2.28.0", + wantErr: false, + }, + { + name: "wheel with cpython tag", + filename: "numpy-1.24.0-cp311-cp311-linux_x86_64.whl", + wantName: "numpy", + wantVersion: "1.24.0", + wantErr: false, + }, + { + name: "wheel with build tag", + filename: "package-1.0.0-1-py3-none-any.whl", + wantName: "package", + wantVersion: "1.0.0", + wantErr: false, + }, + { + name: "wheel with underscore name", + filename: "my_package-1.0.0-py3-none-any.whl", + wantName: "my-package", + wantVersion: "1.0.0", + wantErr: false, + }, + { + name: "invalid wheel - too few parts", + filename: "invalid-1.0.0.whl", + wantName: "", + wantVersion: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseWheelFilename(tt.filename) + + if tt.wantErr { + assert.Error(t, err) + return + } + + assert.NoError(t, err) + assert.Equal(t, tt.wantName, got.GetName()) + assert.Equal(t, tt.wantVersion, got.GetVersion()) + assert.True(t, got.IsFileDownload()) + assert.Equal(t, "wheel", got.FileType()) + }) + } +} + +func TestParseSdistFilename(t *testing.T) { + tests := []struct { + name string + filename string + wantName string + wantVersion string + wantErr bool + }{ + { + name: "simple tar.gz", + filename: "requests-2.28.0.tar.gz", + wantName: "requests", + wantVersion: "2.28.0", + wantErr: false, + }, + { + name: "zip format", + filename: "django-4.2.0.zip", + wantName: "django", + wantVersion: "4.2.0", + wantErr: false, + }, + { + name: "hyphenated name", + filename: "Flask-RESTful-0.3.10.tar.gz", + wantName: "flask-restful", + wantVersion: "0.3.10", + wantErr: false, + }, + { + name: "prerelease version", + filename: "package-1.0.0rc1.tar.gz", + wantName: "package", + wantVersion: "1.0.0rc1", + wantErr: false, + }, + { + name: "dev version", + filename: "package-0.1.0.dev1.tar.gz", + wantName: "package", + wantVersion: "0.1.0.dev1", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseSdistFilename(tt.filename) + + if tt.wantErr { + assert.Error(t, err) + return + } + + assert.NoError(t, err) + assert.Equal(t, tt.wantName, got.GetName()) + assert.Equal(t, tt.wantVersion, got.GetVersion()) + assert.True(t, got.IsFileDownload()) + assert.Equal(t, "sdist", got.FileType()) + }) + } +} + +func TestDenormalizePyPIPackageName(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"requests", "requests"}, + {"Flask_RESTful", "flask-restful"}, + {"My_Package", "my-package"}, + {"UPPERCASE", "uppercase"}, + {"under_score", "under-score"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + result := denormalizePyPIPackageName(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGetPypiRegistryConfigForHostname(t *testing.T) { + tests := []struct { + name string + hostname string + expectConfig bool + expectHost string + }{ + { + name: "exact match files.pythonhosted.org", + hostname: "files.pythonhosted.org", + expectConfig: true, + expectHost: "files.pythonhosted.org", + }, + { + name: "exact match pypi.org", + hostname: "pypi.org", + expectConfig: true, + expectHost: "pypi.org", + }, + { + name: "subdomain match", + hostname: "cdn.files.pythonhosted.org", + expectConfig: true, + expectHost: "files.pythonhosted.org", + }, + { + name: "test pypi", + hostname: "test.pypi.org", + expectConfig: true, + expectHost: "test.pypi.org", + }, + { + name: "unknown hostname", + hostname: "example.com", + expectConfig: false, + expectHost: "", + }, + { + name: "partial match should not work", + hostname: "fakepypi.org", + expectConfig: false, + expectHost: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := pypiRegistryDomains.GetConfigForHostname(tt.hostname) + + if !tt.expectConfig { + assert.Nil(t, config) + return + } + + assert.NotNil(t, config) + assert.Equal(t, tt.expectHost, config.Host) + }) + } +} + +func TestPypiRegistryDomains_ContainsHostname(t *testing.T) { + tests := []struct { + name string + hostname string + want bool + }{ + { + name: "exact match", + hostname: "files.pythonhosted.org", + want: true, + }, + { + name: "subdomain match", + hostname: "cdn.files.pythonhosted.org", + want: true, + }, + { + name: "no match", + hostname: "example.com", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := pypiRegistryDomains.ContainsHostname(tt.hostname) + assert.Equal(t, tt.want, got) + }) + } +}