Skip to content

Commit ca90e4e

Browse files
committed
apk: repo: introduce backtracking support for better virtual package resolution
We have identified a situation in which apko dependency resolver doesn't correctly resolve virtual packages competing to satisfy the dependency tree. Take this scenario as example: * an image installs package-A and package-B * package-A depends on so:libfoo (unversioned) * package-B depends on so:libfoo-15 (versioned) * both so:libfoo and so:libfoo-15 can be satisfied by the libfoo-15 package * but there is also libfoo-16, which also provides so:libfoo (unversioned) In this scenario, before this patch, apko resolve will happily install libfoo-16 and thus break installation of package-B, resulting in an image build failure. This patch introduces a "backtracking" logic that re-evaluates the dependency three when this case is found. This is achieved with this strategy: * by introducing the notion of "retryable" errors in the resolver loop, and * snapshotting the resolver state when evaluating candidate packages * evaluate package candidates and if a retryable error is found, restore resolver state from the snapshot This change has been co-authored with Claude Code. Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero@chainguard.dev>
1 parent 0fb082b commit ca90e4e

File tree

2 files changed

+581
-55
lines changed

2 files changed

+581
-55
lines changed

pkg/apk/apk/repo.go

Lines changed: 251 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -333,14 +333,9 @@ func (p *PkgResolver) disqualifyProviders(constraint string, dq map[*RepositoryP
333333
}
334334

335335
func (p *PkgResolver) conflictingVersion(constraint ParsedConstraint, conflict *repositoryPackage) bool {
336-
// If the constraint is not a virtual, everything will conflict with it.
337-
// TODO: Figure out if this logic is actually equivalent to how apk disqualifies virtual.
338-
if constraint.Version != "" {
339-
return true
340-
}
341-
342-
// From here on, "the same version" really means it's a virtual, but let's keep the diff small until we revisit.
343-
336+
// Two packages conflict when they provide the same virtual at *different* versions.
337+
// Same-version co-providers are allowed: if neither package is explicitly requested
338+
// by name, having two packages both advertising "helper=1.0" is harmless.
344339
if conflict.Name == constraint.Name {
345340
return conflict.Version != constraint.Version
346341
}
@@ -392,6 +387,22 @@ func (p *PkgResolver) disqualifyConflicts(pkg *RepositoryPackage, dq map[*Reposi
392387
}
393388
}
394389

390+
// packageProvidesVersion reports whether pkg provides name at the given version,
391+
// either because pkg.Name == name && pkg.Version == version, or because one of
392+
// pkg.Provides contains "name=version".
393+
func packageProvidesVersion(pkg *RepositoryPackage, name, version string) bool {
394+
if pkg.Name == name && pkg.Version == version {
395+
return true
396+
}
397+
for _, prov := range pkg.Provides {
398+
pc := cachedResolvePackageNameVersionPin(prov)
399+
if pc.Name == name && pc.Version == version {
400+
return true
401+
}
402+
}
403+
return false
404+
}
405+
395406
func (p *PkgResolver) pick(pkg *RepositoryPackage) error {
396407
if conflict, ok := p.selected[pkg.Name]; ok {
397408
// Trying to re-select the same thing is fine actually.
@@ -406,21 +417,75 @@ func (p *PkgResolver) pick(pkg *RepositoryPackage) error {
406417

407418
for _, prov := range pkg.Provides {
408419
constraint := cachedResolvePackageNameVersionPin(prov)
409-
if conflict, ok := p.selected[constraint.Name]; ok {
410-
return fmt.Errorf("selecting package %s conflicts with %s on %q", pkg.Filename(), conflict.Filename(), constraint.Name)
411-
}
412420

413-
// We don't care about virtuals, actually.
421+
// We don't track unversioned provides in p.selected.
414422
if constraint.Version == "" {
415423
continue
416424
}
417425

426+
if conflict, ok := p.selected[constraint.Name]; ok {
427+
// Same-version co-providers are valid: the dep slot is already
428+
// satisfied by an equivalent provider, so there is no real conflict.
429+
if packageProvidesVersion(conflict, constraint.Name, constraint.Version) {
430+
continue
431+
}
432+
return fmt.Errorf("selecting package %s conflicts with %s on %q", pkg.Filename(), conflict.Filename(), constraint.Name)
433+
}
434+
418435
p.selected[constraint.Name] = pkg
419436
}
420437

421438
return nil
422439
}
423440

441+
// resolverSnapshot captures the mutable resolver and call-local state at a
442+
// choice point so it can be restored if the chosen candidate fails.
443+
type resolverSnapshot struct {
444+
selected map[string]*RepositoryPackage
445+
dq map[*RepositoryPackage]string
446+
existing map[string]*RepositoryPackage
447+
existingOrigins map[string]bool
448+
depsLen int
449+
conflictsLen int
450+
}
451+
452+
// snapshotState captures the current mutable state for later restoration.
453+
func (p *PkgResolver) snapshotState(
454+
dq map[*RepositoryPackage]string,
455+
existing map[string]*RepositoryPackage,
456+
existingOrigins map[string]bool,
457+
deps []*RepositoryPackage,
458+
conflicts []string,
459+
) resolverSnapshot {
460+
return resolverSnapshot{
461+
selected: maps.Clone(p.selected),
462+
dq: maps.Clone(dq),
463+
existing: maps.Clone(existing),
464+
existingOrigins: maps.Clone(existingOrigins),
465+
depsLen: len(deps),
466+
conflictsLen: len(conflicts),
467+
}
468+
}
469+
470+
// restoreState reverts all mutable state to a previously captured snapshot.
471+
// Because dq and existing are function-local maps (not struct fields), the
472+
// restored copies are returned so callers can reassign their local variables.
473+
func (p *PkgResolver) restoreState(
474+
snap resolverSnapshot,
475+
deps []*RepositoryPackage,
476+
conflicts []string,
477+
) (
478+
dq map[*RepositoryPackage]string,
479+
existing map[string]*RepositoryPackage,
480+
existingOrigins map[string]bool,
481+
restoredDeps []*RepositoryPackage,
482+
restoredConflicts []string,
483+
) {
484+
p.selected = snap.selected
485+
return snap.dq, snap.existing, snap.existingOrigins,
486+
deps[:snap.depsLen], conflicts[:snap.conflictsLen]
487+
}
488+
424489
func (p *PkgResolver) disqualify(dq map[*RepositoryPackage]string, pkg *RepositoryPackage, reason string) {
425490
dq[pkg] = reason
426491

@@ -508,6 +573,35 @@ func (p *PkgResolver) GetPackagesWithDependencies(ctx context.Context, packages
508573
return nil, nil, fmt.Errorf("constraining initial packages: %w", err)
509574
}
510575

576+
// Pre-constrain: apply versioned deps from each top-level package's best
577+
// candidate before Phase 1 greedily selects providers via disqualifyConflicts.
578+
// Without this, pkg-A's greedy provider selection can DQ the only provider
579+
// that satisfies pkg-B's stricter version requirement of the same virtual.
580+
// constrain() is monotone — it only DQs packages that provably fail a versioned
581+
// requirement, so this cannot produce false failures.
582+
{
583+
var preConstraints []string
584+
for _, pkgName := range constraints {
585+
parsed := cachedResolvePackageNameVersionPin(pkgName)
586+
candidates, ok := p.nameMap[parsed.Name]
587+
if !ok {
588+
continue
589+
}
590+
filtered := filterPackages(candidates, dq, withVersion(parsed.Version, parsed.dep))
591+
if len(filtered) == 0 {
592+
continue
593+
}
594+
best := p.bestPackage(filtered, nil, parsed.Name, nil, nil, parsed.pin)
595+
if best == nil {
596+
continue
597+
}
598+
preConstraints = append(preConstraints, best.Dependencies...)
599+
}
600+
if err := p.constrain(preConstraints, dq); err != nil {
601+
return nil, nil, fmt.Errorf("pre-constraining top-level deps: %w", err)
602+
}
603+
}
604+
511605
for len(constraints) != 0 {
512606
next, err := p.nextPackage(constraints, dq)
513607
if err != nil {
@@ -530,30 +624,76 @@ func (p *PkgResolver) GetPackagesWithDependencies(ctx context.Context, packages
530624
p.disqualifyConflicts(pkg, dq)
531625
}
532626

533-
// now get the dependencies for each package
534-
for _, pkgName := range packages {
535-
pkg, deps, confs, err := p.GetPackageWithDependencies(ctx, pkgName, dependenciesMap, dq)
536-
if err != nil {
537-
return toInstall, nil, &ConstraintError{pkgName, err}
627+
// Snapshot state at the Phase 1 / Phase 2 boundary. When a package's
628+
// dependency resolution fails retryably (e.g. because an earlier package's
629+
// greedy provider choice DQed the provider the failing package needs), we
630+
// reset to this snapshot and retry with the failing package moved to the
631+
// front of the processing order. Processing it first lets its transitive
632+
// versioned constraints propagate into dq before the other packages run
633+
// their greedy selection.
634+
phase2DQ := maps.Clone(dq)
635+
phase2Selected := maps.Clone(p.selected)
636+
phase2DepsMap := maps.Clone(dependenciesMap)
637+
packageOrder := slices.Clone(packages)
638+
639+
var lastPhase2Err error
640+
for attempt := range len(packages) {
641+
if attempt > 0 {
642+
// Reset to Phase 2 initial state.
643+
clear(dq)
644+
maps.Copy(dq, phase2DQ)
645+
p.selected = maps.Clone(phase2Selected)
646+
clear(dependenciesMap)
647+
maps.Copy(dependenciesMap, phase2DepsMap)
648+
toInstall = toInstall[:0]
649+
installTracked = map[string]*RepositoryPackage{}
650+
conflicts = conflicts[:0]
538651
}
539652

540-
for _, dep := range deps {
541-
if _, ok := installTracked[dep.Name]; !ok {
542-
toInstall = append(toInstall, dep)
543-
installTracked[dep.Name] = dep
653+
lastPhase2Err = nil
654+
for i, pkgName := range packageOrder {
655+
pkg, deps, confs, err := p.GetPackageWithDependencies(ctx, pkgName, dependenciesMap, dq)
656+
if err != nil {
657+
if !isRetryable(err) {
658+
return nil, nil, &ConstraintError{pkgName, err}
659+
}
660+
lastPhase2Err = &ConstraintError{pkgName, err}
661+
// Move the failing package to front so its transitive
662+
// constraints run before the other packages next attempt.
663+
newOrder := make([]string, 0, len(packageOrder))
664+
newOrder = append(newOrder, pkgName)
665+
newOrder = append(newOrder, packageOrder[:i]...)
666+
newOrder = append(newOrder, packageOrder[i+1:]...)
667+
packageOrder = newOrder
668+
break
669+
}
670+
671+
for _, dep := range deps {
672+
if _, ok := installTracked[dep.Name]; !ok {
673+
toInstall = append(toInstall, dep)
674+
installTracked[dep.Name] = dep
675+
}
676+
if _, ok := dependenciesMap[dep.Name]; !ok {
677+
dependenciesMap[dep.Name] = dep
678+
}
544679
}
545-
if _, ok := dependenciesMap[dep.Name]; !ok {
546-
dependenciesMap[dep.Name] = dep
680+
if _, ok := installTracked[pkg.Name]; !ok {
681+
toInstall = append(toInstall, pkg)
682+
installTracked[pkg.Name] = pkg
547683
}
684+
if _, ok := dependenciesMap[pkg.Name]; !ok {
685+
dependenciesMap[pkg.Name] = pkg
686+
}
687+
conflicts = append(conflicts, confs...)
548688
}
549-
if _, ok := installTracked[pkg.Name]; !ok {
550-
toInstall = append(toInstall, pkg)
551-
installTracked[pkg.Name] = pkg
552-
}
553-
if _, ok := dependenciesMap[pkg.Name]; !ok {
554-
dependenciesMap[pkg.Name] = pkg
689+
690+
if lastPhase2Err == nil {
691+
break
555692
}
556-
conflicts = append(conflicts, confs...)
693+
}
694+
695+
if lastPhase2Err != nil {
696+
return nil, nil, lastPhase2Err
557697
}
558698

559699
conflicts = uniqify(conflicts)
@@ -824,7 +964,8 @@ func (p *PkgResolver) getPackageDependencies(ctx context.Context, pkg *Repositor
824964
}
825965

826966
// We already selected something to satisfy "name" and it does not match the "version" we need now.
827-
return nil, nil, fmt.Errorf("we already selected \"%s=%s\" which conflicts with %q", picked.Name, picked.Version, dep)
967+
// This is retryable: a different upstream provider choice might avoid this conflict.
968+
return nil, nil, &retryableError{fmt.Errorf("we already selected \"%s=%s\" which conflicts with %q", picked.Name, picked.Version, dep)}
828969
}
829970

830971
// first see if it is a name of a package
@@ -841,7 +982,13 @@ func (p *PkgResolver) getPackageDependencies(ctx context.Context, pkg *Repositor
841982
withInstalledPackage(existing[name]),
842983
)
843984
if len(pkgs) == 0 {
844-
return nil, nil, &ConstraintError{dep, maybedqerror(depPkgWithVersions, dq)}
985+
dqErr := maybedqerror(depPkgWithVersions, dq)
986+
// If all candidates exist but were disqualified, a different upstream
987+
// provider choice might un-disqualify one — mark as retryable.
988+
if isDQBasedError(dqErr) {
989+
return nil, nil, &retryableError{&ConstraintError{dep, dqErr}}
990+
}
991+
return nil, nil, &ConstraintError{dep, dqErr}
845992
}
846993
options[dep] = pkgs
847994
}
@@ -871,38 +1018,65 @@ func (p *PkgResolver) getPackageDependencies(ctx context.Context, pkg *Repositor
8711018
return s == lowest
8721019
})
8731020

874-
best := p.bestPackage(pkgs, nil, name, existing, existingOrigins, "")
875-
if best == nil {
876-
return nil, nil, &ConstraintError{name, fmt.Errorf("could not find package for %q", name)}
877-
}
878-
879-
depPkg := best.RepositoryPackage
880-
p.disqualifyConflicts(depPkg, dq)
1021+
// Sort candidates in preference order (best first). We try them in
1022+
// order and backtrack on retryable failures.
1023+
p.sortPackages(pkgs, nil, name, existing, existingOrigins, "")
8811024

882-
// and then recurse to its children
883-
// each child gets the parental chain, but should not affect any others,
884-
// so we duplicate the map for the child
1025+
// Build the parent chain once; it is the same for all candidates.
8851026
childParents := map[string]bool{}
8861027
for k := range parents {
8871028
childParents[k] = true
8881029
}
8891030
childParents[pkg.Name] = true
8901031

891-
if err := p.pick(pkg); err != nil {
892-
return nil, nil, err
893-
}
1032+
var (
1033+
lastErr error
1034+
succeeded bool
1035+
)
1036+
for _, candidate := range pkgs {
1037+
// Snapshot all mutable state before attempting this candidate.
1038+
snap := p.snapshotState(dq, existing, existingOrigins, dependencies, conflicts)
1039+
1040+
depPkg := candidate.RepositoryPackage
1041+
p.disqualifyConflicts(depPkg, dq)
1042+
if err := p.pick(depPkg); err != nil {
1043+
// This should not happen: disqualifyConflicts removes conflicting
1044+
// providers before we reach pick, and conflictingVersion now allows
1045+
// same-version co-providers. Log and restore rather than silently
1046+
// ignoring or panicking.
1047+
clog.FromContext(ctx).Warnf("unexpected pick conflict for %s: %v", depPkg.Filename(), err)
1048+
dq, existing, existingOrigins, dependencies, conflicts =
1049+
p.restoreState(snap, dependencies, conflicts)
1050+
lastErr = &retryableError{err}
1051+
continue
1052+
}
8941053

895-
subDeps, confs, err := p.getPackageDependencies(ctx, depPkg, allowPin, childParents, existing, existingOrigins, dq)
896-
if err != nil {
897-
return nil, nil, &ConstraintError{name, &DepError{depPkg, err}}
1054+
subDeps, confs, err := p.getPackageDependencies(ctx, depPkg, allowPin, childParents, existing, existingOrigins, dq)
1055+
if err != nil {
1056+
dq, existing, existingOrigins, dependencies, conflicts =
1057+
p.restoreState(snap, dependencies, conflicts)
1058+
if !isRetryable(err) {
1059+
// Hard failure — propagate immediately.
1060+
return nil, nil, &ConstraintError{name, &DepError{depPkg, err}}
1061+
}
1062+
lastErr = err
1063+
continue
1064+
}
1065+
1066+
// Candidate succeeded — commit results (depth-first order).
1067+
dependencies = append(dependencies, subDeps...)
1068+
dependencies = append(dependencies, depPkg)
1069+
conflicts = append(conflicts, confs...)
1070+
for _, dep := range subDeps {
1071+
existing[dep.Name] = dep
1072+
existingOrigins[dep.Origin] = true
1073+
}
1074+
succeeded = true
1075+
break
8981076
}
899-
// first add the children, then the parent (depth-first)
900-
dependencies = append(dependencies, subDeps...)
901-
dependencies = append(dependencies, depPkg)
902-
conflicts = append(conflicts, confs...)
903-
for _, dep := range subDeps {
904-
existing[dep.Name] = dep
905-
existingOrigins[dep.Origin] = true
1077+
1078+
if !succeeded {
1079+
return nil, nil, &ConstraintError{name, lastErr}
9061080
}
9071081
}
9081082
return dependencies, conflicts, nil
@@ -1140,6 +1314,28 @@ func maybedqerror(pkgs []*repositoryPackage, dq map[*RepositoryPackage]string) e
11401314
return errors.New("not in indexes")
11411315
}
11421316

1317+
// retryableError wraps an error to signal that the caller should try the
1318+
// next candidate rather than propagating failure immediately. An error is
1319+
// retryable when a different upstream package selection might resolve it.
1320+
type retryableError struct{ wrapped error }
1321+
1322+
func (e *retryableError) Error() string { return e.wrapped.Error() }
1323+
func (e *retryableError) Unwrap() error { return e.wrapped }
1324+
1325+
// isRetryable reports whether err (or any error in its chain) is a retryableError.
1326+
func isRetryable(err error) bool {
1327+
var r *retryableError
1328+
return errors.As(err, &r)
1329+
}
1330+
1331+
// isDQBasedError reports whether err contains a DisqualifiedError, meaning all
1332+
// candidates exist in the index but were disqualified. A different upstream
1333+
// provider choice might un-disqualify one of them.
1334+
func isDQBasedError(err error) bool {
1335+
var d *DisqualifiedError
1336+
return errors.As(err, &d)
1337+
}
1338+
11431339
func disqualifyDifference(ctx context.Context, byArch map[string][]NamedIndex) map[*RepositoryPackage]string {
11441340
dq := map[*RepositoryPackage]string{}
11451341

0 commit comments

Comments
 (0)