diff --git a/internal/cli/build.go b/internal/cli/build.go index 946623846..86b17661f 100644 --- a/internal/cli/build.go +++ b/internal/cli/build.go @@ -60,6 +60,8 @@ func buildCmd() *cobra.Command { var includePaths []string var ignoreSignatures bool var sizeLimits options.SizeLimits + var extraPythonPackages []string + var extraPythonIndexes []string cmd := &cobra.Command{ Use: "build", @@ -119,6 +121,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin build.WithIncludePaths(includePaths), build.WithIgnoreSignatures(ignoreSignatures), build.WithSizeLimits(sizeLimits), + build.WithExtraEcosystemPackages("python", extraPythonPackages), + build.WithExtraEcosystemIndexes("python", extraPythonIndexes), ) }, } @@ -139,6 +143,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin cmd.Flags().StringVar(&lockfile, "lockfile", "", "a path to .lock.json file (e.g. produced by apko lock) that constraints versions of packages to the listed ones (default '' means no additional constraints)") cmd.Flags().StringSliceVar(&includePaths, "include-paths", []string{}, "Additional include paths where to look for input files (config, base image, etc.). By default apko will search for paths only in workdir. Include paths may be absolute, or relative. Relative paths are interpreted relative to workdir. For adding extra paths for packages, use --repository-append.") cmd.Flags().BoolVar(&ignoreSignatures, "ignore-signatures", false, "ignore repository signature verification") + cmd.Flags().StringSliceVar(&extraPythonPackages, "ecosystem-python-package-append", []string{}, "extra Python packages to include (e.g., flask==3.0.0)") + cmd.Flags().StringSliceVar(&extraPythonIndexes, "ecosystem-python-index-append", []string{}, "extra Python package index URLs to use") addClientLimitFlags(cmd, &sizeLimits) return cmd } diff --git a/internal/cli/lock.go b/internal/cli/lock.go index 67c10d6ff..22d46382f 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -35,6 +35,8 @@ import ( apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/build" "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/python" pkglock "chainguard.dev/apko/pkg/lock" ) @@ -245,6 +247,30 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt } } + // Resolve ecosystem packages + for name, ecoConfig := range ic.Contents.Ecosystems { + installer, ok := ecosystem.Get(name) + if !ok { + return fmt.Errorf("unknown ecosystem: %s", name) + } + for _, arch := range archs { + resolved, err := installer.Resolve(ctx, ecoConfig, arch) + if err != nil { + return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) + } + for _, pkg := range resolved { + lock.Contents.EcosystemPackages = append(lock.Contents.EcosystemPackages, pkglock.LockEcosystemPkg{ + Ecosystem: pkg.Ecosystem, + Name: pkg.Name, + Version: pkg.Version, + URL: pkg.URL, + Checksum: pkg.Checksum, + Architecture: arch.ToAPK(), + }) + } + } + } + // Sort keyrings by name for reproducible lock files sort.Slice(lock.Contents.Keyrings, func(i, j int) bool { return lock.Contents.Keyrings[i].Name < lock.Contents.Keyrings[j].Name diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 2200af5f6..68794e454 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -36,6 +36,8 @@ import ( ldsocache "chainguard.dev/apko/internal/ldso-cache" "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/python" // Register python ecosystem installer. "chainguard.dev/apko/pkg/lock" "chainguard.dev/apko/pkg/options" ) @@ -177,6 +179,23 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) } } + // Install ecosystem packages (python, etc.) after APK packages so that + // the language runtime is available for version detection. + if len(bc.ic.Contents.Ecosystems) > 0 { + env, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch) + if err != nil { + return nil, fmt.Errorf("installing ecosystem packages: %w", err) + } + if len(env) > 0 { + if bc.ic.Environment == nil { + bc.ic.Environment = make(map[string]string) + } + for k, v := range env { + bc.ic.Environment[k] = v + } + } + } + // For now adding additional accounts is banned when using base image. On the other hand, we don't want to // wipe out the users set in base. // If one wants to add a support for adding additional users they would need to look into this piece of code. diff --git a/pkg/build/options.go b/pkg/build/options.go index 0e25edd0c..85a9a1d7d 100644 --- a/pkg/build/options.go +++ b/pkg/build/options.go @@ -266,3 +266,35 @@ func WithSizeLimits(limits options.SizeLimits) Option { return nil } } + +// WithExtraEcosystemPackages adds extra ecosystem packages to the build. +func WithExtraEcosystemPackages(ecosystem string, packages []string) Option { + return func(bc *Context) error { + if len(packages) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Packages = append(eco.Packages, packages...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} + +// WithExtraEcosystemIndexes adds extra ecosystem indexes to the build. +func WithExtraEcosystemIndexes(ecosystem string, indexes []string) Option { + return func(bc *Context) error { + if len(indexes) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Indexes = append(eco.Indexes, indexes...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} diff --git a/pkg/build/types/image_configuration.go b/pkg/build/types/image_configuration.go index 245452a1f..04b9e6a8e 100644 --- a/pkg/build/types/image_configuration.go +++ b/pkg/build/types/image_configuration.go @@ -181,6 +181,27 @@ func (i *ImageContents) MergeInto(target *ImageContents) error { if target.BaseImage == nil { target.BaseImage = i.BaseImage } + // Merge ecosystem configs + if len(i.Ecosystems) > 0 { + if target.Ecosystems == nil { + target.Ecosystems = make(map[string]EcosystemConfig) + } + for name, eco := range i.Ecosystems { + if existing, ok := target.Ecosystems[name]; ok { + existing.Indexes = slices.Concat(eco.Indexes, existing.Indexes) + existing.Packages = slices.Concat(eco.Packages, existing.Packages) + if existing.PythonVersion == "" { + existing.PythonVersion = eco.PythonVersion + } + if existing.Venv == "" { + existing.Venv = eco.Venv + } + target.Ecosystems[name] = existing + } else { + target.Ecosystems[name] = eco + } + } + } return nil } @@ -295,6 +316,14 @@ func (ic *ImageConfiguration) Summarize(ctx context.Context) { log.Infof(" - gid=%d(%s) members=%v", g.GID, g.GroupName, g.Members) } } + if len(ic.Contents.Ecosystems) > 0 { + log.Infof(" ecosystems:") + for name, eco := range ic.Contents.Ecosystems { + log.Infof(" %s:", name) + log.Infof(" indexes: %v", eco.Indexes) + log.Infof(" packages: %v", eco.Packages) + } + } if len(ic.Annotations) > 0 { log.Infof(" annotations:") for k, v := range ic.Annotations { diff --git a/pkg/build/types/types.go b/pkg/build/types/types.go index e920acc39..357512c7a 100644 --- a/pkg/build/types/types.go +++ b/pkg/build/types/types.go @@ -104,6 +104,20 @@ type BaseImageDescriptor struct { APKIndex string `json:"apkindex,omitempty" yaml:"apkindex,omitempty"` } +// EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., python). +type EcosystemConfig struct { + // Indexes is a list of package index URLs (e.g., PyPI simple API URLs). + Indexes []string `json:"indexes,omitempty" yaml:"indexes,omitempty"` + // Packages is a list of package specifications (e.g., "flask==3.0.0"). + Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` + // PythonVersion overrides auto-detection of the Python version (e.g., "3.12"). + PythonVersion string `json:"python_version,omitempty" yaml:"python_version,omitempty"` + // Venv is an optional path for a virtual environment (e.g., "/app/venv"). + // When set, packages are installed into the venv instead of the system site-packages, + // and VIRTUAL_ENV / PATH are set automatically. + Venv string `json:"venv,omitempty" yaml:"venv,omitempty"` +} + type ImageContents struct { // A list of apk repositories to use for pulling packages at build time, // which are not installed into /etc/apk/repositories in the image (to @@ -122,6 +136,8 @@ type ImageContents struct { Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` // Optional: Base image to build on top of. Warning: Experimental. BaseImage *BaseImageDescriptor `json:"baseimage,omitempty" yaml:"baseimage,omitempty" apko:"experimental"` + // Optional: Non-APK ecosystem packages to install (e.g., pip packages). + Ecosystems map[string]EcosystemConfig `json:"ecosystems,omitempty" yaml:"ecosystems,omitempty"` } // MarshalYAML implements yaml.Marshaler for ImageContents, redacting URLs in @@ -138,6 +154,13 @@ func (i ImageContents) MarshalYAML() (any, error) { return nil, err } + for name, eco := range ri.Ecosystems { + if err := processRepositoryURLs(eco.Indexes); err != nil { + return nil, err + } + ri.Ecosystems[name] = eco + } + for idx, key := range ri.Keyring { rawURL := key parsed, err := url.Parse(rawURL) diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go new file mode 100644 index 000000000..cc475e07b --- /dev/null +++ b/pkg/ecosystem/ecosystem.go @@ -0,0 +1,95 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ecosystem + +import ( + "context" + "fmt" + "sync" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" +) + +// ResolvedPackage represents a package that has been resolved to a specific +// version and download URL. +type ResolvedPackage struct { + Ecosystem string + Name string + Version string + URL string + Checksum string // "sha256:" +} + +// Installer is the interface that ecosystem package installers must implement. +type Installer interface { + // Name returns the ecosystem name (e.g., "python"). + Name() string + // Resolve resolves the requested packages to specific versions and URLs. + Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ResolvedPackage, error) + // Install extracts resolved packages into the filesystem. + // Returns environment variables that should be set in the image configuration. + Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) +} + +var ( + registryMu sync.RWMutex + registry = map[string]func() Installer{} +) + +// Register registers an ecosystem installer factory. +func Register(name string, factory func() Installer) { + registryMu.Lock() + defer registryMu.Unlock() + registry[name] = factory +} + +// Get returns an installer for the named ecosystem. +func Get(name string) (Installer, bool) { + registryMu.RLock() + defer registryMu.RUnlock() + factory, ok := registry[name] + if !ok { + return nil, false + } + return factory(), true +} + +// InstallAll installs packages for all configured ecosystems. +// Returns environment variables that should be set in the image configuration. +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture) (map[string]string, error) { + env := map[string]string{} + for name, config := range ecosystems { + installer, ok := Get(name) + if !ok { + return nil, fmt.Errorf("unknown ecosystem: %s", name) + } + resolved, err := installer.Resolve(ctx, config, arch) + if err != nil { + return nil, fmt.Errorf("resolving %s packages: %w", name, err) + } + if len(resolved) == 0 { + continue + } + vars, err := installer.Install(ctx, fs, resolved, config) + if err != nil { + return nil, fmt.Errorf("installing %s packages: %w", name, err) + } + for k, v := range vars { + env[k] = v + } + } + return env, nil +} diff --git a/pkg/ecosystem/python/platform.go b/pkg/ecosystem/python/platform.go new file mode 100644 index 000000000..c1e9dd06b --- /dev/null +++ b/pkg/ecosystem/python/platform.go @@ -0,0 +1,222 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "fmt" + "strings" + + "chainguard.dev/apko/pkg/build/types" +) + +// platformTags returns the list of compatible wheel platform tags for the +// given architecture, ordered from most specific to least specific. +func platformTags(arch types.Architecture) []string { + switch arch { + case types.ParseArchitecture("amd64"): + return []string{ + "manylinux_2_17_x86_64", + "manylinux2014_x86_64", + "manylinux_2_5_x86_64", + "manylinux1_x86_64", + "linux_x86_64", + } + case types.ParseArchitecture("arm64"): + return []string{ + "manylinux_2_17_aarch64", + "manylinux2014_aarch64", + "linux_aarch64", + } + case types.ParseArchitecture("arm/v7"): + return []string{ + "manylinux_2_17_armv7l", + "manylinux2014_armv7l", + "linux_armv7l", + } + case types.ParseArchitecture("arm/v6"): + return []string{ + "manylinux_2_17_armv6l", + "linux_armv6l", + } + case types.ParseArchitecture("386"): + return []string{ + "manylinux_2_17_i686", + "manylinux2014_i686", + "manylinux_2_5_i686", + "manylinux1_i686", + "linux_i686", + } + case types.ParseArchitecture("ppc64le"): + return []string{ + "manylinux_2_17_ppc64le", + "manylinux2014_ppc64le", + "linux_ppc64le", + } + case types.ParseArchitecture("s390x"): + return []string{ + "manylinux_2_17_s390x", + "manylinux2014_s390x", + "linux_s390x", + } + case types.ParseArchitecture("riscv64"): + return []string{ + "manylinux_2_17_riscv64", + "linux_riscv64", + } + default: + return []string{"any"} + } +} + +// wheelFileParts holds the parsed components of a wheel filename per PEP 427. +// Format: {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl +type wheelFileParts struct { + Distribution string + Version string + BuildTag string + PythonTag string + ABITag string + PlatformTag string +} + +// parseWheelFilename parses a wheel filename per PEP 427. +func parseWheelFilename(filename string) (wheelFileParts, error) { + name := strings.TrimSuffix(filename, ".whl") + if name == filename { + return wheelFileParts{}, fmt.Errorf("not a wheel file: %s", filename) + } + + parts := strings.Split(name, "-") + switch len(parts) { + case 5: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + PythonTag: parts[2], + ABITag: parts[3], + PlatformTag: parts[4], + }, nil + case 6: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + BuildTag: parts[2], + PythonTag: parts[3], + ABITag: parts[4], + PlatformTag: parts[5], + }, nil + default: + return wheelFileParts{}, fmt.Errorf("invalid wheel filename: %s", filename) + } +} + +// isCompatibleWheel checks whether a wheel file is compatible with the given +// Python version and architecture. +func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Architecture) bool { + // Check python tag compatibility + if !isCompatiblePythonTag(w.PythonTag, pythonVersion) { + return false + } + + // Check ABI compatibility + if !isCompatibleABI(w.ABITag, pythonVersion) { + return false + } + + // Check platform compatibility + return isCompatiblePlatform(w.PlatformTag, arch) +} + +// isCompatiblePythonTag checks if the wheel's python tag is compatible. +// E.g., "py3", "cp312", "py2.py3" +func isCompatiblePythonTag(tag, pythonVersion string) bool { + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(tag, ".") { + if t == "py3" || t == "py2.py3" || t == cpTag { + return true + } + } + return false +} + +// isCompatibleABI checks if the wheel's ABI tag is compatible. +func isCompatibleABI(tag, pythonVersion string) bool { + if tag == "none" { + return true + } + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(tag, ".") { + if t == "abi3" || t == cpTag { + return true + } + } + return false +} + +// isCompatiblePlatform checks if the wheel's platform tag is compatible. +func isCompatiblePlatform(tag string, arch types.Architecture) bool { + if tag == "any" { + return true + } + compatible := platformTags(arch) + for _, t := range strings.Split(tag, ".") { + for _, c := range compatible { + if t == c { + return true + } + } + } + return false +} + +// wheelScore returns a priority score for the wheel. Higher is better. +// Binary wheels for the exact platform are preferred over pure-Python wheels. +func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) int { + score := 0 + + // Prefer exact CPython tag over generic py3 + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(w.PythonTag, ".") { + if t == cpTag { + score += 100 + break + } + } + + // Prefer specific ABI over none/abi3 + for _, t := range strings.Split(w.ABITag, ".") { + if t == cpTag { + score += 50 + } else if t == "abi3" { + score += 25 + } + } + + // Prefer specific platform over any + if w.PlatformTag != "any" { + platTags := platformTags(arch) + for i, pt := range platTags { + for _, t := range strings.Split(w.PlatformTag, ".") { + if t == pt { + // More specific platforms (earlier in list) get higher scores + score += 10 * (len(platTags) - i) + break + } + } + } + } + + return score +} diff --git a/pkg/ecosystem/python/platform_test.go b/pkg/ecosystem/python/platform_test.go new file mode 100644 index 000000000..7edd62ed9 --- /dev/null +++ b/pkg/ecosystem/python/platform_test.go @@ -0,0 +1,202 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestPlatformTags(t *testing.T) { + tests := []struct { + arch string + wantLen int + wantAny string // At least one tag should contain this + }{ + {"amd64", 5, "x86_64"}, + {"arm64", 3, "aarch64"}, + {"arm/v7", 3, "armv7l"}, + {"386", 5, "i686"}, + {"ppc64le", 3, "ppc64le"}, + {"s390x", 3, "s390x"}, + } + + for _, tt := range tests { + t.Run(tt.arch, func(t *testing.T) { + tags := platformTags(types.ParseArchitecture(tt.arch)) + if len(tags) != tt.wantLen { + t.Errorf("platformTags(%s) returned %d tags, want %d", tt.arch, len(tags), tt.wantLen) + } + found := false + for _, tag := range tags { + if contains(tag, tt.wantAny) { + found = true + break + } + } + if !found { + t.Errorf("platformTags(%s) = %v, none contain %q", tt.arch, tags, tt.wantAny) + } + }) + } +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstr(s, substr)) +} + +func containsSubstr(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +func TestParseWheelFilename(t *testing.T) { + tests := []struct { + filename string + wantDist string + wantVer string + wantPy string + wantABI string + wantPlat string + wantErr bool + }{ + { + filename: "Flask-3.0.0-py3-none-any.whl", + wantDist: "Flask", + wantVer: "3.0.0", + wantPy: "py3", + wantABI: "none", + wantPlat: "any", + }, + { + filename: "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl", + wantDist: "numpy", + wantVer: "1.26.0", + wantPy: "cp312", + wantABI: "cp312", + wantPlat: "manylinux_2_17_x86_64", + }, + { + filename: "notawheel.tar.gz", + wantErr: true, + }, + { + filename: "bad-name.whl", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + parts, err := parseWheelFilename(tt.filename) + if tt.wantErr { + if err == nil { + t.Error("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if parts.Distribution != tt.wantDist { + t.Errorf("Distribution = %q, want %q", parts.Distribution, tt.wantDist) + } + if parts.Version != tt.wantVer { + t.Errorf("Version = %q, want %q", parts.Version, tt.wantVer) + } + if parts.PythonTag != tt.wantPy { + t.Errorf("PythonTag = %q, want %q", parts.PythonTag, tt.wantPy) + } + if parts.ABITag != tt.wantABI { + t.Errorf("ABITag = %q, want %q", parts.ABITag, tt.wantABI) + } + if parts.PlatformTag != tt.wantPlat { + t.Errorf("PlatformTag = %q, want %q", parts.PlatformTag, tt.wantPlat) + } + }) + } +} + +func TestIsCompatibleWheel(t *testing.T) { + tests := []struct { + name string + wheel wheelFileParts + pyVer string + arch string + want bool + }{ + { + name: "pure python wheel is always compatible", + wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + { + name: "cpython binary for matching arch", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + { + name: "cpython binary for wrong arch", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_aarch64"}, + pyVer: "3.12", + arch: "amd64", + want: false, + }, + { + name: "wrong python version", + wheel: wheelFileParts{PythonTag: "cp311", ABITag: "cp311", PlatformTag: "any"}, + pyVer: "3.12", + arch: "amd64", + want: false, + }, + { + name: "abi3 is compatible", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "abi3", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isCompatibleWheel(tt.wheel, tt.pyVer, types.ParseArchitecture(tt.arch)) + if got != tt.want { + t.Errorf("isCompatibleWheel() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestWheelScore(t *testing.T) { + pureWheel := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binaryWheel := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} + + pureScore := wheelScore(pureWheel, "3.12", types.ParseArchitecture("amd64")) + binaryScore := wheelScore(binaryWheel, "3.12", types.ParseArchitecture("amd64")) + + if binaryScore <= pureScore { + t.Errorf("binary wheel score (%d) should be higher than pure wheel score (%d)", binaryScore, pureScore) + } +} diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go new file mode 100644 index 000000000..d4d5320c9 --- /dev/null +++ b/pkg/ecosystem/python/python.go @@ -0,0 +1,201 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "fmt" + "io" + "net/http" + "path/filepath" + "strings" + + "github.com/chainguard-dev/clog" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" +) + +func init() { + ecosystem.Register("python", func() ecosystem.Installer { + return &installer{} + }) +} + +type installer struct{} + +func (i *installer) Name() string { return "python" } + +func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { + if len(config.Packages) == 0 { + return nil, nil + } + + specs := make([]packageSpec, 0, len(config.Packages)) + for _, pkg := range config.Packages { + specs = append(specs, parsePackageSpec(pkg)) + } + + indexes := config.Indexes + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + pythonVersion := config.PythonVersion + if pythonVersion == "" { + pythonVersion = "3.12" + } + + return resolvePackages(ctx, specs, indexes, pythonVersion, arch) +} + +func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) { + log := clog.FromContext(ctx) + + pythonVersion := detectPythonVersion(fsys) + if pythonVersion == "" { + return nil, fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") + } + log.Infof("detected Python %s for python ecosystem install", pythonVersion) + + var sitePackagesPath string + if config.Venv != "" { + venvPath := strings.TrimPrefix(config.Venv, "/") + if err := createVenv(fsys, venvPath, pythonVersion); err != nil { + return nil, fmt.Errorf("creating virtual environment at %s: %w", config.Venv, err) + } + sitePackagesPath = filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages") + log.Infof("using virtual environment at %s", config.Venv) + } else { + sitePackagesPath = fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) + } + + if err := fsys.MkdirAll(sitePackagesPath, 0755); err != nil { + return nil, fmt.Errorf("creating site-packages directory: %w", err) + } + + for _, pkg := range packages { + log.Infof("installing python package %s==%s", pkg.Name, pkg.Version) + + data, err := downloadWheel(ctx, pkg.URL) + if err != nil { + return nil, fmt.Errorf("downloading %s: %w", pkg.Name, err) + } + + if err := verifyChecksum(data, pkg.Checksum); err != nil { + return nil, fmt.Errorf("verifying %s: %w", pkg.Name, err) + } + + if err := extractWheel(fsys, data, sitePackagesPath); err != nil { + return nil, fmt.Errorf("extracting %s: %w", pkg.Name, err) + } + + if err := writeInstallerFile(fsys, sitePackagesPath, data); err != nil { + log.Debugf("could not write INSTALLER file for %s: %v", pkg.Name, err) + } + } + + // When using a venv, set VIRTUAL_ENV and prepend its bin/ to PATH. + if config.Venv != "" { + venvBin := filepath.Join(config.Venv, "bin") + return map[string]string{ + "VIRTUAL_ENV": config.Venv, + "PATH": venvBin + ":/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + }, nil + } + + return nil, nil +} + +// createVenv sets up a virtual environment directory structure. +func createVenv(fsys apkfs.FullFS, venvPath, pythonVersion string) error { + // Create directory structure + dirs := []string{ + filepath.Join(venvPath, "bin"), + filepath.Join(venvPath, "include"), + filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages"), + } + for _, dir := range dirs { + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating %s: %w", dir, err) + } + } + + // Write pyvenv.cfg + cfg := fmt.Sprintf( + "home = /usr/bin\ninclude-system-site-packages = false\nversion = %s\n", + pythonVersion, + ) + cfgPath := filepath.Join(venvPath, "pyvenv.cfg") + if err := fsys.WriteFile(cfgPath, []byte(cfg), 0644); err != nil { + return fmt.Errorf("writing pyvenv.cfg: %w", err) + } + + // Create symlinks in bin/ + pythonBin := "/usr/bin/python" + pythonVersion + binPath := filepath.Join(venvPath, "bin") + symlinks := map[string]string{ + "python": pythonBin, + "python3": pythonBin, + "python" + pythonVersion: pythonBin, + } + for name, target := range symlinks { + linkPath := filepath.Join(binPath, name) + if err := fsys.Symlink(target, linkPath); err != nil { + return fmt.Errorf("creating symlink %s: %w", linkPath, err) + } + } + + return nil +} + +// detectPythonVersion scans the filesystem for a Python installation and +// returns the version string (e.g., "3.12"). +func detectPythonVersion(fsys apkfs.FullFS) string { + entries, err := fsys.ReadDir("usr/lib") + if err != nil { + return "" + } + + for _, entry := range entries { + name := entry.Name() + if strings.HasPrefix(name, "python3.") && entry.IsDir() { + return strings.TrimPrefix(name, "python") + } + } + + return "" +} + +// downloadWheel downloads a wheel file from the given URL. +func downloadWheel(ctx context.Context, url string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d downloading %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/python/python_test.go b/pkg/ecosystem/python/python_test.go new file mode 100644 index 000000000..dd85cb305 --- /dev/null +++ b/pkg/ecosystem/python/python_test.go @@ -0,0 +1,117 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" +) + +func TestCreateVenv(t *testing.T) { + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/bin", 0755); err != nil { + t.Fatal(err) + } + + err := createVenv(fs, "app/venv", "3.12") + if err != nil { + t.Fatalf("createVenv() error: %v", err) + } + + // Check pyvenv.cfg + data, err := fs.ReadFile("app/venv/pyvenv.cfg") + if err != nil { + t.Fatalf("reading pyvenv.cfg: %v", err) + } + cfg := string(data) + if !contains(cfg, "home = /usr/bin") { + t.Errorf("pyvenv.cfg missing home, got: %q", cfg) + } + if !contains(cfg, "version = 3.12") { + t.Errorf("pyvenv.cfg missing version, got: %q", cfg) + } + + // Check directories exist + for _, dir := range []string{ + "app/venv/bin", + "app/venv/include", + "app/venv/lib/python3.12/site-packages", + } { + if _, err := fs.Stat(dir); err != nil { + t.Errorf("directory %s should exist: %v", dir, err) + } + } + + // Check symlinks + for _, name := range []string{"python", "python3", "python3.12"} { + target, err := fs.Readlink("app/venv/bin/" + name) + if err != nil { + t.Errorf("symlink %s should exist: %v", name, err) + continue + } + if target != "/usr/bin/python3.12" { + t.Errorf("symlink %s = %q, want %q", name, target, "/usr/bin/python3.12") + } + } +} + + +func TestInstallerRegistration(t *testing.T) { + inst, ok := ecosystem.Get("python") + if !ok { + t.Fatal("python installer not registered") + } + if inst.Name() != "python" { + t.Errorf("Name() = %q, want %q", inst.Name(), "python") + } +} + +func TestDetectPythonVersion(t *testing.T) { + fs := apkfs.NewMemFS() + + // No python installed + if v := detectPythonVersion(fs); v != "" { + t.Errorf("detectPythonVersion() = %q on empty fs, want empty", v) + } + + // Create python directory + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want %q", v, "3.12") + } +} + +func TestDetectPythonVersionMultiple(t *testing.T) { + fs := apkfs.NewMemFS() + + // Create multiple python versions - should return whichever is found first + if err := fs.MkdirAll("usr/lib/python3.11/site-packages", 0755); err != nil { + t.Fatal(err) + } + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.11" && v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want 3.11 or 3.12", v) + } +} diff --git a/pkg/ecosystem/python/resolve.go b/pkg/ecosystem/python/resolve.go new file mode 100644 index 000000000..f6205193b --- /dev/null +++ b/pkg/ecosystem/python/resolve.go @@ -0,0 +1,642 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + neturl "net/url" + "regexp" + "strings" + + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + + "github.com/chainguard-dev/clog" +) + +const defaultIndex = "https://pypi.org/simple/" +const pypiJSONBaseDefault = "https://pypi.org/pypi/" + +// pypiJSONBaseOverride allows tests to redirect the JSON API to a mock server. +var pypiJSONBaseOverride string + +func pypiJSONBase() string { + if pypiJSONBaseOverride != "" { + return pypiJSONBaseOverride + } + return pypiJSONBaseDefault +} + +// packageSpec represents a parsed package requirement (e.g., "flask==3.0.0"). +type packageSpec struct { + Name string + Operator string // "==", ">=", "<=", "!=", "~=", "" + Version string + Extras []string + Markers string +} + +// parsePackageSpec parses a PEP 508-style requirement string. +func parsePackageSpec(spec string) packageSpec { + ps := packageSpec{} + + // Strip environment markers + if idx := strings.Index(spec, ";"); idx != -1 { + ps.Markers = strings.TrimSpace(spec[idx+1:]) + spec = strings.TrimSpace(spec[:idx]) + } + + // Strip extras + if lbIdx := strings.Index(spec, "["); lbIdx != -1 { + if rbIdx := strings.Index(spec, "]"); rbIdx != -1 { + extras := spec[lbIdx+1 : rbIdx] + ps.Extras = strings.Split(extras, ",") + for i := range ps.Extras { + ps.Extras[i] = strings.TrimSpace(ps.Extras[i]) + } + spec = spec[:lbIdx] + spec[rbIdx+1:] + } + } + + spec = strings.TrimSpace(spec) + + // Handle parenthesized version constraints: "package (>=1.0)" + if lpIdx := strings.Index(spec, "("); lpIdx != -1 { + if rpIdx := strings.LastIndex(spec, ")"); rpIdx > lpIdx { + ps.Name = strings.TrimSpace(spec[:lpIdx]) + inner := strings.TrimSpace(spec[lpIdx+1 : rpIdx]) + parts := strings.SplitN(inner, ",", 2) + constraint := strings.TrimSpace(parts[0]) + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + if strings.HasPrefix(constraint, op) { + ps.Operator = op + ps.Version = strings.TrimSpace(constraint[len(op):]) + return ps + } + } + return ps + } + } + + // Find the first operator by position in the string + bestIdx := -1 + bestOp := "" + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + idx := strings.Index(spec, op) + if idx != -1 && (bestIdx == -1 || idx < bestIdx) { + bestIdx = idx + bestOp = op + } + } + if bestIdx != -1 { + ps.Name = strings.TrimSpace(spec[:bestIdx]) + ps.Operator = bestOp + version := strings.TrimSpace(spec[bestIdx+len(bestOp):]) + if commaIdx := strings.Index(version, ","); commaIdx != -1 { + version = version[:commaIdx] + } + ps.Version = version + return ps + } + + ps.Name = spec + return ps +} + +// normalizeName normalizes a Python package name per PEP 503. +func normalizeName(name string) string { + return strings.ToLower(regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")) +} + +// --- PyPI JSON API types --- + +// pypiPackageJSON is the response from https://pypi.org/pypi/{name}/{version}/json +type pypiPackageJSON struct { + Info pypiInfo `json:"info"` + URLs []pypiURL `json:"urls"` +} + +type pypiInfo struct { + Name string `json:"name"` + Version string `json:"version"` + RequiresDist []string `json:"requires_dist"` +} + +type pypiURL struct { + Filename string `json:"filename"` + URL string `json:"url"` + PackageType string `json:"packagetype"` + Digests pypiDigests `json:"digests"` +} + +type pypiDigests struct { + SHA256 string `json:"sha256"` +} + +// pypiVersionsJSON is a minimal parse of https://pypi.org/pypi/{name}/json +// to list available versions. +type pypiVersionsJSON struct { + Releases map[string][]pypiURL `json:"releases"` +} + +// --- Resolution --- + +// resolvePackages resolves package specs to specific wheel URLs, +// including transitive dependencies discovered via the PyPI JSON API. +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { + log := clog.FromContext(ctx) + + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + var resolved []ecosystem.ResolvedPackage + seen := map[string]bool{} + + // BFS queue + queue := make([]packageSpec, len(specs)) + copy(queue, specs) + + for len(queue) > 0 { + spec := queue[0] + queue = queue[1:] + + name := normalizeName(spec.Name) + if seen[name] { + continue + } + + pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch) + if err != nil { + return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) + } + seen[name] = true + resolved = append(resolved, pkg) + log.Debugf("resolved %s==%s from %s", pkg.Name, pkg.Version, pkg.URL) + + for _, dep := range deps { + if !seen[normalizeName(dep.Name)] { + log.Debugf("discovered transitive dependency: %s (from %s)", dep.Name, pkg.Name) + queue = append(queue, dep) + } + } + } + + return resolved, nil +} + +// resolveOneWithDeps resolves a package and returns both the resolved package +// and its transitive dependencies. It tries the PyPI JSON API first (which +// gives us clean metadata), falling back to the Simple API for non-PyPI indexes. +func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + // Try PyPI JSON API first — it gives us metadata + wheel URLs in one call + if usesDefaultPyPI(indexes) { + pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch) + if err == nil { + return pkg, deps, nil + } + clog.FromContext(ctx).Debugf("JSON API failed for %s, falling back to Simple API: %v", spec.Name, err) + } + + // Fall back to Simple API + pkg, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + return pkg, nil, nil +} + +func usesDefaultPyPI(indexes []string) bool { + if pypiJSONBaseOverride != "" { + return true + } + for _, idx := range indexes { + if strings.Contains(idx, "pypi.org") { + return true + } + } + return false +} + +// resolveViaJSON resolves a package using the PyPI JSON API. +// Returns the resolved package and its parsed Requires-Dist as deps. +func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + name := normalizeName(spec.Name) + + // If we have an exact version, fetch that directly + if spec.Operator == "==" { + return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch) + } + + // Otherwise, list all versions and pick the best + versionsURL := pypiJSONBase() + name + "/json" + data, err := httpGet(ctx, versionsURL) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var versionsResp pypiVersionsJSON + if err := json.Unmarshal(data, &versionsResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI versions JSON: %w", err) + } + + // Find the best matching version + bestVersion := "" + for version := range versionsResp.Releases { + if !matchesVersionSpec(version, spec) { + continue + } + // Skip pre-releases unless explicitly requested + if isPreRelease(version) && spec.Operator != "==" { + continue + } + if bestVersion == "" || compareVersions(version, bestVersion) > 0 { + bestVersion = version + } + } + if bestVersion == "" { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("no matching version for %s%s%s", spec.Name, spec.Operator, spec.Version) + } + + return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch) +} + +// resolveJSONVersion fetches a specific version from the PyPI JSON API. +func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + versionURL := pypiJSONBase() + normalizedName + "/" + version + "/json" + data, err := httpGet(ctx, versionURL) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var pkgResp pypiPackageJSON + if err := json.Unmarshal(data, &pkgResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI JSON: %w", err) + } + + // Find the best wheel from the URLs + wheelURL, checksum, err := selectBestWheelFromJSON(pkgResp.URLs, pythonVersion, arch) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + // Parse dependencies from requires_dist + var deps []packageSpec + for _, req := range pkgResp.Info.RequiresDist { + dep := parsePackageSpec(req) + if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { + continue + } + deps = append(deps, dep) + } + + return ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: originalName, + Version: pkgResp.Info.Version, + URL: wheelURL, + Checksum: checksum, + }, deps, nil +} + +// selectBestWheelFromJSON picks the best compatible wheel from PyPI JSON API URLs. +func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Architecture) (string, string, error) { + var bestURL *pypiURL + var bestParts wheelFileParts + bestScore := -1 + + for i, u := range urls { + if u.PackageType != "bdist_wheel" { + continue + } + parts, err := parseWheelFilename(u.Filename) + if err != nil { + continue + } + if !isCompatibleWheel(parts, pythonVersion, arch) { + continue + } + + score := wheelScore(parts, pythonVersion, arch) + if bestURL == nil || score > bestScore { + bestURL = &urls[i] + bestParts = parts + _ = bestParts // used for future scoring + bestScore = score + } + } + + if bestURL == nil { + return "", "", fmt.Errorf("no compatible wheel found") + } + + checksum := "" + if bestURL.Digests.SHA256 != "" { + checksum = "sha256:" + bestURL.Digests.SHA256 + } + return bestURL.URL, checksum, nil +} + +// isPreRelease returns true if a version string looks like a pre-release. +func isPreRelease(version string) bool { + v := strings.ToLower(version) + for _, tag := range []string{"a", "b", "rc", "alpha", "beta", "dev", "pre"} { + if strings.Contains(v, tag) { + return true + } + } + return false +} + +// --- Simple API fallback (for non-PyPI indexes) --- + +// wheelLink represents a parsed link from a PEP 503 Simple API response. +type wheelLink struct { + Filename string + URL string + Checksum string // "sha256:" + RequiresPython string +} + +// parseSimpleIndex parses the HTML from a PEP 503 Simple Repository API response. +func parseSimpleIndex(body string, baseURL string) []wheelLink { + var links []wheelLink + + linkRe := regexp.MustCompile(`]*href="([^"]*)"[^>]*>([^<]*)`) + requiresPythonRe := regexp.MustCompile(`data-requires-python="([^"]*)"`) + + for _, match := range linkRe.FindAllStringSubmatch(body, -1) { + href := match[1] + filename := strings.TrimSpace(match[2]) + + if !strings.HasSuffix(filename, ".whl") { + continue + } + + var checksum string + if hashIdx := strings.Index(href, "#sha256="); hashIdx != -1 { + checksum = "sha256:" + href[hashIdx+8:] + href = href[:hashIdx] + } + + linkURL := href + if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { + if base, err := neturl.Parse(baseURL); err == nil { + if ref, err := neturl.Parse(href); err == nil { + linkURL = base.ResolveReference(ref).String() + } + } + } + + var requiresPython string + tagStart := strings.LastIndex(body[:strings.Index(body, match[0])+1], "= 0 { + tagEnd := strings.Index(body[tagStart:], ">") + tagStart + tag := body[tagStart : tagEnd+1] + if rpMatch := requiresPythonRe.FindStringSubmatch(tag); rpMatch != nil { + requiresPython = strings.ReplaceAll(rpMatch[1], ">", ">") + requiresPython = strings.ReplaceAll(requiresPython, "<", "<") + requiresPython = strings.ReplaceAll(requiresPython, "&", "&") + } + } + + links = append(links, wheelLink{ + Filename: filename, + URL: linkURL, + Checksum: checksum, + RequiresPython: requiresPython, + }) + } + + return links +} + +// resolveViaSimple resolves a package using the PEP 503 Simple API. +// Does not return transitive deps (no metadata available without downloading). +func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, error) { + name := normalizeName(spec.Name) + + for _, index := range indexes { + indexURL := strings.TrimSuffix(index, "/") + "/" + name + "/" + + body, err := fetchSimpleIndex(ctx, indexURL) + if err != nil { + clog.FromContext(ctx).Debugf("index %s: %v", indexURL, err) + continue + } + + links := parseSimpleIndex(body, indexURL) + if len(links) == 0 { + continue + } + + best, err := selectBestWheel(links, spec, pythonVersion, arch) + if err != nil { + continue + } + + return ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: spec.Name, + Version: best.version, + URL: best.url, + Checksum: best.checksum, + }, nil + } + + return ecosystem.ResolvedPackage{}, fmt.Errorf("package %s not found in any index", spec.Name) +} + +type selectedWheel struct { + version string + url string + checksum string +} + +// selectBestWheel selects the best compatible wheel from Simple API links. +func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture) (selectedWheel, error) { + var bestLink *wheelLink + var bestParts wheelFileParts + bestScore := -1 + + for i, link := range links { + parts, err := parseWheelFilename(link.Filename) + if err != nil { + continue + } + if !isCompatibleWheel(parts, pythonVersion, arch) { + continue + } + if !matchesVersionSpec(parts.Version, spec) { + continue + } + + score := wheelScore(parts, pythonVersion, arch) + if bestLink == nil || compareVersions(parts.Version, bestParts.Version) > 0 || (compareVersions(parts.Version, bestParts.Version) == 0 && score > bestScore) { + bestLink = &links[i] + bestParts = parts + bestScore = score + } + } + + if bestLink == nil { + return selectedWheel{}, fmt.Errorf("no compatible wheel found") + } + + return selectedWheel{ + version: bestParts.Version, + url: bestLink.URL, + checksum: bestLink.Checksum, + }, nil +} + +// --- Version comparison --- + +func matchesVersionSpec(version string, spec packageSpec) bool { + if spec.Operator == "" { + return true + } + switch spec.Operator { + case "==": + return version == spec.Version + case "!=": + return version != spec.Version + case ">=": + return compareVersions(version, spec.Version) >= 0 + case "<=": + return compareVersions(version, spec.Version) <= 0 + case ">": + return compareVersions(version, spec.Version) > 0 + case "<": + return compareVersions(version, spec.Version) < 0 + case "~=": + if compareVersions(version, spec.Version) < 0 { + return false + } + specParts := strings.Split(spec.Version, ".") + verParts := strings.Split(version, ".") + if len(specParts) < 2 || len(verParts) < 2 { + return false + } + for i := 0; i < len(specParts)-1 && i < len(verParts); i++ { + if verParts[i] != specParts[i] { + return false + } + } + return true + } + return false +} + +func compareVersions(a, b string) int { + aParts := strings.Split(a, ".") + bParts := strings.Split(b, ".") + + maxLen := len(aParts) + if len(bParts) > maxLen { + maxLen = len(bParts) + } + + for i := 0; i < maxLen; i++ { + var aVal, bVal string + if i < len(aParts) { + aVal = aParts[i] + } else { + aVal = "0" + } + if i < len(bParts) { + bVal = bParts[i] + } else { + bVal = "0" + } + if aVal == bVal { + continue + } + aNum := parseVersionPart(aVal) + bNum := parseVersionPart(bVal) + if aNum != bNum { + if aNum < bNum { + return -1 + } + return 1 + } + if aVal < bVal { + return -1 + } + return 1 + } + return 0 +} + +func parseVersionPart(s string) int { + n := 0 + for _, c := range s { + if c >= '0' && c <= '9' { + n = n*10 + int(c-'0') + } else { + break + } + } + return n +} + +// --- HTTP helpers --- + +func fetchSimpleIndex(ctx context.Context, url string) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "text/html") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func httpGet(ctx context.Context, url string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go new file mode 100644 index 000000000..8f24b113b --- /dev/null +++ b/pkg/ecosystem/python/resolve_test.go @@ -0,0 +1,369 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestParsePackageSpec(t *testing.T) { + tests := []struct { + input string + name string + op string + version string + markers string + }{ + {"flask==3.0.0", "flask", "==", "3.0.0", ""}, + {"requests>=2.31.0", "requests", ">=", "2.31.0", ""}, + {"numpy", "numpy", "", "", ""}, + {"foo~=1.4.2", "foo", "~=", "1.4.2", ""}, + {"bar!=2.0", "bar", "!=", "2.0", ""}, + {`baz>=1.0; python_version>="3.8"`, "baz", ">=", "1.0", `python_version>="3.8"`}, + {"typing-extensions (>=4.10.0)", "typing-extensions", ">=", "4.10.0", ""}, + {"packaging (>=22.0,<25.0)", "packaging", ">=", "22.0", ""}, + {"mpmath<1.4,>=1.1.0", "mpmath", "<", "1.4", ""}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + spec := parsePackageSpec(tt.input) + if spec.Name != tt.name { + t.Errorf("Name = %q, want %q", spec.Name, tt.name) + } + if spec.Operator != tt.op { + t.Errorf("Operator = %q, want %q", spec.Operator, tt.op) + } + if spec.Version != tt.version { + t.Errorf("Version = %q, want %q", spec.Version, tt.version) + } + if spec.Markers != tt.markers { + t.Errorf("Markers = %q, want %q", spec.Markers, tt.markers) + } + }) + } +} + +func TestNormalizeName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"Flask", "flask"}, + {"my-package", "my-package"}, + {"my_package", "my-package"}, + {"My.Package", "my-package"}, + {"My---Package", "my-package"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := normalizeName(tt.input) + if got != tt.want { + t.Errorf("normalizeName(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestParseSimpleIndex(t *testing.T) { + body := ` + +Flask-3.0.0-py3-none-any.whl +Flask-2.3.0-py3-none-any.whl +Flask-3.0.0.tar.gz + +` + links := parseSimpleIndex(body, "https://pypi.org/simple/flask/") + if len(links) != 2 { + t.Fatalf("expected 2 wheel links, got %d", len(links)) + } + + if links[0].Filename != "Flask-3.0.0-py3-none-any.whl" { + t.Errorf("links[0].Filename = %q", links[0].Filename) + } + if links[0].Checksum != "sha256:abc123" { + t.Errorf("links[0].Checksum = %q", links[0].Checksum) + } +} + +func TestCompareVersions(t *testing.T) { + tests := []struct { + a, b string + want int + }{ + {"1.0.0", "1.0.0", 0}, + {"2.0.0", "1.0.0", 1}, + {"1.0.0", "2.0.0", -1}, + {"1.10.0", "1.9.0", 1}, + {"1.0", "1.0.0", 0}, + } + + for _, tt := range tests { + t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) { + got := compareVersions(tt.a, tt.b) + if got != tt.want { + t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want) + } + }) + } +} + +func TestMatchesVersionSpec(t *testing.T) { + tests := []struct { + version string + spec packageSpec + want bool + }{ + {"3.0.0", packageSpec{Operator: "==", Version: "3.0.0"}, true}, + {"3.0.1", packageSpec{Operator: "==", Version: "3.0.0"}, false}, + {"3.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, true}, + {"1.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, false}, + {"3.0.0", packageSpec{Operator: "", Version: ""}, true}, + {"1.4.3", packageSpec{Operator: "~=", Version: "1.4.2"}, true}, + {"2.0.0", packageSpec{Operator: "~=", Version: "1.4.2"}, false}, + } + + for _, tt := range tests { + t.Run(tt.version+"_"+tt.spec.Operator+tt.spec.Version, func(t *testing.T) { + got := matchesVersionSpec(tt.version, tt.spec) + if got != tt.want { + t.Errorf("matchesVersionSpec(%q, %v) = %v, want %v", tt.version, tt.spec, got, tt.want) + } + }) + } +} + +func TestIsPreRelease(t *testing.T) { + tests := []struct { + version string + want bool + }{ + {"3.0.0", false}, + {"3.0.0rc1", true}, + {"3.0.0a1", true}, + {"3.0.0b2", true}, + {"3.0.0.dev1", true}, + {"1.14.0rc2", true}, + } + for _, tt := range tests { + t.Run(tt.version, func(t *testing.T) { + got := isPreRelease(tt.version) + if got != tt.want { + t.Errorf("isPreRelease(%q) = %v, want %v", tt.version, got, tt.want) + } + }) + } +} + +// servePyPIJSON creates a mock server that serves PyPI JSON API responses. +func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest.Server { + t.Helper() + mux := http.NewServeMux() + for name, pkg := range packages { + name := normalizeName(name) + pkg := pkg + + // Serve /pypi/{name}/{version}/json + mux.HandleFunc("/pypi/"+name+"/"+pkg.Info.Version+"/json", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(pkg) + }) + + // Serve /pypi/{name}/json (versions listing) + mux.HandleFunc("/pypi/"+name+"/json", func(w http.ResponseWriter, r *http.Request) { + resp := pypiVersionsJSON{ + Releases: map[string][]pypiURL{ + pkg.Info.Version: pkg.URLs, + }, + } + json.NewEncoder(w).Encode(resp) + }) + + // Serve Simple API as fallback + mux.HandleFunc("/simple/"+name+"/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + html := "\n" + for _, u := range pkg.URLs { + html += `` + u.Filename + "\n" + } + html += "" + w.Write([]byte(html)) + }) + } + return httptest.NewServer(mux) +} + +func TestResolveWithMockJSON(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "abc123"}, + }}, + }, + }) + defer server.Close() + + // Override the JSON API base for the test + origBase := pypiJSONBase + defer func() { pypiJSONBaseOverride = ""; _ = origBase }() + pypiJSONBaseOverride = server.URL + "/pypi/" + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + if resolved[0].Name != "flask" { + t.Errorf("Name = %q, want %q", resolved[0].Name, "flask") + } + if resolved[0].Version != "3.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "3.0.0") + } + if resolved[0].Checksum != "sha256:abc123" { + t.Errorf("Checksum = %q, want %q", resolved[0].Checksum, "sha256:abc123") + } +} + +func TestResolveTransitiveDeps(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + RequiresDist: []string{ + "Werkzeug>=3.0.0", + "click>=8.0", + "devtools; extra == \"dev\"", + }, + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "aaa"}, + }}, + }, + "werkzeug": { + Info: pypiInfo{ + Name: "Werkzeug", + Version: "3.0.1", + RequiresDist: []string{ + "MarkupSafe>=2.1.1", + }, + }, + URLs: []pypiURL{{ + Filename: "Werkzeug-3.0.1-py3-none-any.whl", + URL: "https://files.example.com/Werkzeug-3.0.1-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "bbb"}, + }}, + }, + "click": { + Info: pypiInfo{ + Name: "click", + Version: "8.1.7", + }, + URLs: []pypiURL{{ + Filename: "click-8.1.7-py3-none-any.whl", + URL: "https://files.example.com/click-8.1.7-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ccc"}, + }}, + }, + "markupsafe": { + Info: pypiInfo{ + Name: "MarkupSafe", + Version: "2.1.5", + }, + URLs: []pypiURL{{ + Filename: "MarkupSafe-2.1.5-py3-none-any.whl", + URL: "https://files.example.com/MarkupSafe-2.1.5-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ddd"}, + }}, + }, + }) + defer server.Close() + + pypiJSONBaseOverride = server.URL + "/pypi/" + defer func() { pypiJSONBaseOverride = "" }() + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + names := map[string]bool{} + for _, pkg := range resolved { + names[normalizeName(pkg.Name)] = true + } + + for _, want := range []string{"flask", "werkzeug", "click", "markupsafe"} { + if !names[want] { + t.Errorf("missing transitive dependency: %s (resolved: %v)", want, names) + } + } + if names["devtools"] { + t.Error("should NOT include devtools (gated on extra)") + } + if len(resolved) != 4 { + t.Errorf("expected 4 resolved packages, got %d: %v", len(resolved), names) + } +} + +func TestResolveSimpleApiFallback(t *testing.T) { + // Test that non-PyPI indexes use the Simple API + mux := http.NewServeMux() + mux.HandleFunc("/simple/mypackage/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` +mypackage-1.0.0-py3-none-any.whl +`)) + }) + server := httptest.NewServer(mux) + defer server.Close() + + specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} + // Use a non-pypi index so it doesn't try the JSON API + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + if resolved[0].Version != "1.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "1.0.0") + } +} diff --git a/pkg/ecosystem/python/wheel.go b/pkg/ecosystem/python/wheel.go new file mode 100644 index 000000000..6ade0eeb5 --- /dev/null +++ b/pkg/ecosystem/python/wheel.go @@ -0,0 +1,303 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "path/filepath" + "strings" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +// extractWheel extracts a wheel (.whl) file into the filesystem at the given +// site-packages path. A .whl file is a ZIP archive. +func extractWheel(fsys apkfs.FullFS, wheelData []byte, sitePackagesPath string) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return fmt.Errorf("opening wheel as zip: %w", err) + } + + for _, f := range reader.File { + targetPath := filepath.Join(sitePackagesPath, f.Name) + + if f.FileInfo().IsDir() { + if err := fsys.MkdirAll(targetPath, 0755); err != nil { + return fmt.Errorf("creating directory %s: %w", targetPath, err) + } + continue + } + + // Ensure parent directory exists + dir := filepath.Dir(targetPath) + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating parent directory %s: %w", dir, err) + } + + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening %s in wheel: %w", f.Name, err) + } + + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return fmt.Errorf("reading %s from wheel: %w", f.Name, err) + } + + if err := fsys.WriteFile(targetPath, data, 0644); err != nil { + return fmt.Errorf("writing %s: %w", targetPath, err) + } + } + + return nil +} + +// writeInstallerFile writes the PEP 376 INSTALLER file into the .dist-info directory. +func writeInstallerFile(fsys apkfs.FullFS, sitePackagesPath string, wheelData []byte) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return err + } + + // Find the .dist-info directory + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + distInfoDir := filepath.Dir(f.Name) + installerPath := filepath.Join(sitePackagesPath, distInfoDir, "INSTALLER") + return fsys.WriteFile(installerPath, []byte("apko\n"), 0644) + } + } + + return nil +} + +// evaluateMarkers performs a simplified evaluation of PEP 508 environment markers. +// It handles the most common cases: +// - extra == "..." — only satisfied if the extra was requested +// - os_name, sys_platform, platform_system — always Linux +// - python_version — assumed satisfied (we already filtered wheels) +// - implementation_name — "cpython" +// +// For compound markers (and/or), we do best-effort evaluation. +func evaluateMarkers(markers string, requestedExtras []string) bool { + markers = strings.TrimSpace(markers) + + // Handle "or" — if any branch is true, the whole thing is true + if orParts := splitMarkerOr(markers); len(orParts) > 1 { + for _, part := range orParts { + if evaluateMarkers(part, requestedExtras) { + return true + } + } + return false + } + + // Handle "and" — all branches must be true + if andParts := splitMarkerAnd(markers); len(andParts) > 1 { + for _, part := range andParts { + if !evaluateMarkers(part, requestedExtras) { + return false + } + } + return true + } + + // Strip outer parens + markers = strings.TrimSpace(markers) + for strings.HasPrefix(markers, "(") && strings.HasSuffix(markers, ")") { + markers = strings.TrimSpace(markers[1 : len(markers)-1]) + } + + // Parse single comparison: key op value + key, op, value := parseMarkerExpr(markers) + if key == "" { + // Can't parse — be permissive, include the dep + return true + } + + switch key { + case "extra": + // Only include if the extra was explicitly requested + for _, e := range requestedExtras { + if matchMarkerOp(e, op, value) { + return true + } + } + return false + case "os_name": + return matchMarkerOp("posix", op, value) + case "sys_platform": + return matchMarkerOp("linux", op, value) + case "platform_system": + return matchMarkerOp("Linux", op, value) + case "implementation_name": + return matchMarkerOp("cpython", op, value) + case "python_version", "python_full_version", "platform_machine", + "platform_release", "platform_version", "implementation_version": + // Be permissive for version-related markers — we've already + // filtered wheels by Python version compatibility. + return true + default: + // Unknown marker — be permissive + return true + } +} + +// splitMarkerOr splits on " or " at the top level (not inside parens). +func splitMarkerOr(s string) []string { + return splitMarkerBool(s, " or ") +} + +// splitMarkerAnd splits on " and " at the top level (not inside parens). +func splitMarkerAnd(s string) []string { + return splitMarkerBool(s, " and ") +} + +func splitMarkerBool(s, sep string) []string { + var parts []string + depth := 0 + start := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '(': + depth++ + case ')': + depth-- + default: + if depth == 0 && i+len(sep) <= len(s) && s[i:i+len(sep)] == sep { + parts = append(parts, strings.TrimSpace(s[start:i])) + start = i + len(sep) + i += len(sep) - 1 + } + } + } + parts = append(parts, strings.TrimSpace(s[start:])) + if len(parts) == 1 && parts[0] == s { + return parts + } + return parts +} + +// parseMarkerExpr parses "key op 'value'" or "'value' op key". +func parseMarkerExpr(expr string) (key, op, value string) { + expr = strings.TrimSpace(expr) + + // Try patterns like: extra == "dev" or "linux" == sys_platform + for _, operator := range []string{"===", "~=", "==", "!=", ">=", "<=", ">", "<", " in ", " not in "} { + idx := strings.Index(expr, operator) + if idx < 0 { + continue + } + lhs := strings.TrimSpace(expr[:idx]) + rhs := strings.TrimSpace(expr[idx+len(operator):]) + + lhs = stripQuotes(lhs) + rhs = stripQuotes(rhs) + + // Figure out which side is the key vs the value + if isMarkerVar(lhs) { + return lhs, strings.TrimSpace(operator), rhs + } + if isMarkerVar(rhs) { + return rhs, flipOp(strings.TrimSpace(operator)), lhs + } + // Both look like values — treat lhs as key + return lhs, strings.TrimSpace(operator), rhs + } + return "", "", "" +} + +func stripQuotes(s string) string { + if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) { + return s[1 : len(s)-1] + } + return s +} + +func isMarkerVar(s string) bool { + switch s { + case "os_name", "sys_platform", "platform_machine", "platform_python_implementation", + "platform_release", "platform_system", "platform_version", + "python_version", "python_full_version", "implementation_name", + "implementation_version", "extra": + return true + } + return false +} + +func flipOp(op string) string { + switch op { + case ">": + return "<" + case "<": + return ">" + case ">=": + return "<=" + case "<=": + return ">=" + } + return op +} + +func matchMarkerOp(actual, op, expected string) bool { + switch op { + case "==", "===": + return actual == expected + case "!=": + return actual != expected + case "in": + return strings.Contains(expected, actual) + case "not in": + return !strings.Contains(expected, actual) + case ">=": + return actual >= expected + case "<=": + return actual <= expected + case ">": + return actual > expected + case "<": + return actual < expected + default: + return true + } +} + +// verifyChecksum verifies the SHA256 checksum of data against the expected value. +func verifyChecksum(data []byte, expected string) error { + if expected == "" { + return nil + } + + prefix := "sha256:" + if !strings.HasPrefix(expected, prefix) { + return fmt.Errorf("unsupported checksum format: %s", expected) + } + expectedHex := expected[len(prefix):] + + h := sha256.Sum256(data) + actualHex := hex.EncodeToString(h[:]) + + if actualHex != expectedHex { + return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedHex, actualHex) + } + + return nil +} diff --git a/pkg/ecosystem/python/wheel_test.go b/pkg/ecosystem/python/wheel_test.go new file mode 100644 index 000000000..4d87c8a4a --- /dev/null +++ b/pkg/ecosystem/python/wheel_test.go @@ -0,0 +1,158 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +func createTestWheel(t *testing.T, files map[string]string) []byte { + t.Helper() + var buf bytes.Buffer + w := zip.NewWriter(&buf) + for name, content := range files { + f, err := w.Create(name) + if err != nil { + t.Fatalf("creating file in zip: %v", err) + } + if _, err := f.Write([]byte(content)); err != nil { + t.Fatalf("writing file in zip: %v", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("closing zip: %v", err) + } + return buf.Bytes() +} + +func TestExtractWheel(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage/module.py": "def hello(): pass", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + "mypackage-1.0.0.dist-info/RECORD": "", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := extractWheel(fs, wheelData, "usr/lib/python3.12/site-packages") + if err != nil { + t.Fatalf("extractWheel() error: %v", err) + } + + // Check that files were extracted + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/__init__.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "# init" { + t.Errorf("content = %q, want %q", string(data), "# init") + } + + data, err = fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/module.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "def hello(): pass" { + t.Errorf("content = %q, want %q", string(data), "def hello(): pass") + } +} + +func TestWriteInstallerFile(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := writeInstallerFile(fs, "usr/lib/python3.12/site-packages", wheelData) + if err != nil { + t.Fatalf("writeInstallerFile() error: %v", err) + } + + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info/INSTALLER") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "apko\n" { + t.Errorf("INSTALLER content = %q, want %q", string(data), "apko\n") + } +} + +func TestEvaluateMarkers(t *testing.T) { + tests := []struct { + name string + markers string + extras []string + want bool + }{ + {"no markers", "", nil, true}, + {"extra not requested", `extra == "dev"`, nil, false}, + {"extra requested", `extra == "dev"`, []string{"dev"}, true}, + {"wrong extra", `extra == "dev"`, []string{"test"}, false}, + {"os_name posix", `os_name == "posix"`, nil, true}, + {"os_name nt", `os_name == "nt"`, nil, false}, + {"sys_platform linux", `sys_platform == "linux"`, nil, true}, + {"sys_platform win32", `sys_platform == "win32"`, nil, false}, + {"platform_system Linux", `platform_system == "Linux"`, nil, true}, + {"python_version", `python_version >= "3.8"`, nil, true}, + {"compound and true", `python_version >= "3.8" and os_name == "posix"`, nil, true}, + {"compound and false", `os_name == "nt" and python_version >= "3.8"`, nil, false}, + {"compound or true", `os_name == "nt" or os_name == "posix"`, nil, true}, + {"compound or false", `os_name == "nt" or sys_platform == "win32"`, nil, false}, + {"extra and platform", `extra == "dev" and os_name == "posix"`, []string{"dev"}, true}, + {"extra and wrong platform", `extra == "dev" and os_name == "nt"`, []string{"dev"}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := evaluateMarkers(tt.markers, tt.extras) + if got != tt.want { + t.Errorf("evaluateMarkers(%q, %v) = %v, want %v", tt.markers, tt.extras, got, tt.want) + } + }) + } +} + +func TestVerifyChecksum(t *testing.T) { + data := []byte("hello world") + h := sha256.Sum256(data) + validChecksum := "sha256:" + hex.EncodeToString(h[:]) + + if err := verifyChecksum(data, validChecksum); err != nil { + t.Errorf("verifyChecksum() with valid checksum: %v", err) + } + + if err := verifyChecksum(data, "sha256:0000000000000000000000000000000000000000000000000000000000000000"); err == nil { + t.Error("verifyChecksum() with invalid checksum should return error") + } + + if err := verifyChecksum(data, ""); err != nil { + t.Error("verifyChecksum() with empty checksum should return nil") + } +} diff --git a/pkg/lock/lock.go b/pkg/lock/lock.go index aceb4e395..870661f8c 100644 --- a/pkg/lock/lock.go +++ b/pkg/lock/lock.go @@ -24,12 +24,23 @@ type Config struct { } type LockContents struct { - Keyrings []LockKeyring `json:"keyring"` - BuildRepositories []LockRepo `json:"build_repositories"` - RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` - Repositories []LockRepo `json:"repositories"` + Keyrings []LockKeyring `json:"keyring"` + BuildRepositories []LockRepo `json:"build_repositories"` + RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` + Repositories []LockRepo `json:"repositories"` // Packages in order of installation -> for a single architecture. - Packages []LockPkg `json:"packages"` + Packages []LockPkg `json:"packages"` + EcosystemPackages []LockEcosystemPkg `json:"ecosystem_packages,omitempty"` +} + +// LockEcosystemPkg represents a locked non-APK ecosystem package. +type LockEcosystemPkg struct { + Ecosystem string `json:"ecosystem"` + Name string `json:"name"` + Version string `json:"version"` + URL string `json:"url"` + Checksum string `json:"checksum"` + Architecture string `json:"architecture"` } type LockPkg struct {