Skip to content

Commit 9ed4c5a

Browse files
authored
index: avoid quadratic complexity through bulk update (spack#48771)
Refactors `ProviderIndex` and `TagIndex` to support bulk updates. Previously, updating the index with N packages involved iterating through the entire index for each package to remove stale entries, resulting in O(N * IndexSize) complexity. This change splits the update process into two steps: 1. Bulk removal of stale entries for all updated packages. 2. Bulk insertion of new entries. This reduces the complexity of full index generation from quadratic to linear. Signed-off-by: Harmen Stoppels <me@harmenstoppels.nl>
1 parent 6e390cb commit 9ed4c5a

File tree

6 files changed

+107
-106
lines changed

6 files changed

+107
-106
lines changed

lib/spack/spack/patch.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import os
77
import pathlib
88
import sys
9-
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type, Union
9+
from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Tuple, Type, Union
1010

1111
import spack
1212
import spack.error
@@ -455,36 +455,38 @@ def patch_for_package(self, sha256: str, pkg: "spack.package_base.PackageBase")
455455
patch_dict["sha256"] = sha256
456456
return from_dict(patch_dict, repository=self.repository)
457457

458-
def update_package(self, pkg_fullname: str) -> None:
458+
def update_packages(self, pkgs_fullname: Set[str]) -> None:
459459
"""Update the patch cache.
460460
461461
Args:
462462
pkg_fullname: package to update.
463463
"""
464464
# remove this package from any patch entries that reference it.
465-
empty = []
466-
for sha256, package_to_patch in self.index.items():
467-
remove = []
468-
for fullname, patch_dict in package_to_patch.items():
469-
if patch_dict["owner"] == pkg_fullname:
470-
remove.append(fullname)
465+
if self.index:
466+
empty = []
467+
for sha256, package_to_patch in self.index.items():
468+
remove = []
469+
for fullname, patch_dict in package_to_patch.items():
470+
if patch_dict["owner"] in pkgs_fullname:
471+
remove.append(fullname)
471472

472-
for fullname in remove:
473-
package_to_patch.pop(fullname)
473+
for fullname in remove:
474+
package_to_patch.pop(fullname)
474475

475-
if not package_to_patch:
476-
empty.append(sha256)
476+
if not package_to_patch:
477+
empty.append(sha256)
477478

478-
# remove any entries that are now empty
479-
for sha256 in empty:
480-
del self.index[sha256]
479+
# remove any entries that are now empty
480+
for sha256 in empty:
481+
del self.index[sha256]
481482

482483
# update the index with per-package patch indexes
483-
pkg_cls = self.repository.get_pkg_class(pkg_fullname)
484-
partial_index = self._index_patches(pkg_cls, self.repository)
485-
for sha256, package_to_patch in partial_index.items():
486-
p2p = self.index.setdefault(sha256, {})
487-
p2p.update(package_to_patch)
484+
for pkg_fullname in pkgs_fullname:
485+
pkg_cls = self.repository.get_pkg_class(pkg_fullname)
486+
partial_index = self._index_patches(pkg_cls, self.repository)
487+
for sha256, package_to_patch in partial_index.items():
488+
p2p = self.index.setdefault(sha256, {})
489+
p2p.update(package_to_patch)
488490

489491
def update(self, other: "PatchCache") -> None:
490492
"""Update this cache with the contents of another.

lib/spack/spack/provider_index.py

Lines changed: 40 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,8 @@ def __init__(
5353
self.repository = repository
5454
self.restrict = restrict
5555
self.providers = {}
56-
57-
specs = specs or []
58-
for spec in specs:
59-
if isinstance(spec, str):
60-
from spack.spec import Spec
61-
62-
spec = Spec(spec)
63-
64-
if self.repository.is_virtual_safe(spec.name):
65-
continue
66-
67-
self.update(spec)
56+
if specs:
57+
self.update_packages(specs)
6858

6959
def providers_for(self, virtual: Union[str, "spack.spec.Spec"]) -> List["spack.spec.Spec"]:
7060
"""Return a list of specs of all packages that provide virtual packages with the supplied
@@ -122,57 +112,55 @@ def __str__(self):
122112
def __repr__(self):
123113
return repr(self.providers)
124114

125-
def update(self, spec: Union[str, "spack.spec.Spec"]) -> None:
115+
def update_packages(self, specs: Iterable[Union[str, "spack.spec.Spec"]]):
126116
"""Update the provider index with additional virtual specs.
127117
128118
Args:
129119
spec: spec potentially providing additional virtual specs
130120
"""
131-
if isinstance(spec, str):
132-
from spack.spec import Spec
133-
134-
spec = Spec(spec)
121+
from spack.spec import Spec
135122

136-
if not spec.name:
137-
# Empty specs do not have a package
138-
return
123+
for spec in specs:
124+
if not isinstance(spec, Spec):
125+
spec = Spec(spec)
139126

140-
msg = "cannot update an index passing the virtual spec '{}'".format(spec.name)
141-
assert not self.repository.is_virtual_safe(spec.name), msg
127+
if not spec.name or self.repository.is_virtual_safe(spec.name):
128+
# Only non-virtual packages with name can provide virtual specs.
129+
continue
142130

143-
pkg_cls = self.repository.get_pkg_class(spec.name)
144-
for provider_spec_readonly, provided_specs in pkg_cls.provided.items():
145-
for provided_spec in provided_specs:
146-
# TODO: fix this comment.
147-
# We want satisfaction other than flags
148-
provider_spec = provider_spec_readonly.copy()
149-
provider_spec.compiler_flags = spec.compiler_flags.copy()
131+
pkg_cls = self.repository.get_pkg_class(spec.name)
132+
for provider_spec_readonly, provided_specs in pkg_cls.provided.items():
133+
for provided_spec in provided_specs:
134+
# TODO: fix this comment.
135+
# We want satisfaction other than flags
136+
provider_spec = provider_spec_readonly.copy()
137+
provider_spec.compiler_flags = spec.compiler_flags.copy()
150138

151-
if spec.intersects(provider_spec, deps=False):
152-
provided_name = provided_spec.name
139+
if spec.intersects(provider_spec, deps=False):
140+
provided_name = provided_spec.name
153141

154-
provider_map = self.providers.setdefault(provided_name, {})
155-
if provided_spec not in provider_map:
156-
provider_map[provided_spec] = set()
142+
provider_map = self.providers.setdefault(provided_name, {})
143+
if provided_spec not in provider_map:
144+
provider_map[provided_spec] = set()
157145

158-
if self.restrict:
159-
provider_set = provider_map[provided_spec]
146+
if self.restrict:
147+
provider_set = provider_map[provided_spec]
160148

161-
# If this package existed in the index before,
162-
# need to take the old versions out, as they're
163-
# now more constrained.
164-
old = set([s for s in provider_set if s.name == spec.name])
165-
provider_set.difference_update(old)
149+
# If this package existed in the index before,
150+
# need to take the old versions out, as they're
151+
# now more constrained.
152+
old = {s for s in provider_set if s.name == spec.name}
153+
provider_set.difference_update(old)
166154

167-
# Now add the new version.
168-
provider_set.add(spec)
155+
# Now add the new version.
156+
provider_set.add(spec)
169157

170-
else:
171-
# Before putting the spec in the map, constrain
172-
# it so that it provides what was asked for.
173-
constrained = spec.copy()
174-
constrained.constrain(provider_spec)
175-
provider_map[provided_spec].add(constrained)
158+
else:
159+
# Before putting the spec in the map, constrain
160+
# it so that it provides what was asked for.
161+
constrained = spec.copy()
162+
constrained.constrain(provider_spec)
163+
provider_map[provided_spec].add(constrained)
176164

177165
def to_json(self, stream=None):
178166
"""Dump a JSON representation of this object.
@@ -207,14 +195,14 @@ def merge(self, other):
207195

208196
spdict[provided_spec] = spdict[provided_spec].union(opdict[provided_spec])
209197

210-
def remove_provider(self, pkg_name):
198+
def remove_providers(self, pkg_names: Set[str]):
211199
"""Remove a provider from the ProviderIndex."""
212200
empty_pkg_dict = []
213201
for pkg, pkg_dict in self.providers.items():
214202
empty_pset = []
215203
for provided, pset in pkg_dict.items():
216-
same_name = set(p for p in pset if p.fullname == pkg_name)
217-
pset.difference_update(same_name)
204+
to_remove = {spec for spec in pset if spec.name in pkg_names}
205+
pset.difference_update(to_remove)
218206

219207
if not pset:
220208
empty_pset.append(provided)

lib/spack/spack/repo.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ def read(self, stream):
485485
"""Read this index from a provided file object."""
486486

487487
@abc.abstractmethod
488-
def update(self, pkg_fullname):
488+
def update(self, pkgs_fullname: Set[str]):
489489
"""Update the index in memory with information about a package."""
490490

491491
@abc.abstractmethod
@@ -502,8 +502,8 @@ def _create(self) -> spack.tag.TagIndex:
502502
def read(self, stream):
503503
self.index = spack.tag.TagIndex.from_json(stream)
504504

505-
def update(self, pkg_fullname):
506-
self.index.update_package(pkg_fullname.split(".")[-1], self.repository)
505+
def update(self, pkgs_fullname: Set[str]):
506+
self.index.update_packages({p.split(".")[-1] for p in pkgs_fullname}, self.repository)
507507

508508
def write(self, stream):
509509
self.index.to_json(stream)
@@ -518,15 +518,15 @@ def _create(self) -> "spack.provider_index.ProviderIndex":
518518
def read(self, stream):
519519
self.index = spack.provider_index.ProviderIndex.from_json(stream, self.repository)
520520

521-
def update(self, pkg_fullname):
522-
name = pkg_fullname.split(".")[-1]
521+
def update(self, pkgs_fullname: Set[str]):
523522
is_virtual = (
524-
not self.repository.exists(name) or self.repository.get_pkg_class(name).virtual
523+
lambda name: not self.repository.exists(name)
524+
or self.repository.get_pkg_class(name).virtual
525525
)
526-
if is_virtual:
527-
return
528-
self.index.remove_provider(pkg_fullname)
529-
self.index.update(pkg_fullname)
526+
non_virtual_pkgs_fullname = {p for p in pkgs_fullname if not is_virtual(p.split(".")[-1])}
527+
non_virtual_pkgs_names = {p.split(".")[-1] for p in non_virtual_pkgs_fullname}
528+
self.index.remove_providers(non_virtual_pkgs_names)
529+
self.index.update_packages(non_virtual_pkgs_fullname)
530530

531531
def write(self, stream):
532532
self.index.to_json(stream)
@@ -551,8 +551,8 @@ def read(self, stream):
551551
def write(self, stream):
552552
self.index.to_json(stream)
553553

554-
def update(self, pkg_fullname):
555-
self.index.update_package(pkg_fullname)
554+
def update(self, pkgs_fullname: Set[str]):
555+
self.index.update_packages(pkgs_fullname)
556556

557557

558558
class RepoIndex:
@@ -644,9 +644,7 @@ def _build_index(self, name: str, indexer: Indexer):
644644
if new_index_mtime != index_mtime:
645645
needs_update = self.checker.modified_since(new_index_mtime)
646646

647-
for pkg_name in needs_update:
648-
indexer.update(f"{self.namespace}.{pkg_name}")
649-
647+
indexer.update({f"{self.namespace}.{pkg_name}" for pkg_name in needs_update})
650648
indexer.write(new)
651649

652650
return indexer.index

lib/spack/spack/tag.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
44
"""Classes and functions to manage package tags"""
5-
from typing import TYPE_CHECKING, Dict, List
5+
from typing import TYPE_CHECKING, Dict, List, Set
66

77
import spack.error
88
import spack.util.spack_json as sjson
@@ -51,26 +51,28 @@ def merge(self, other: "TagIndex") -> None:
5151
else:
5252
self.tags[tag] = sorted({*self.tags[tag], *pkgs})
5353

54-
def update_package(self, pkg_name: str, repo: "spack.repo.Repo") -> None:
55-
"""Updates a package in the tag index.
54+
def update_packages(self, pkg_names: Set[str], repo: "spack.repo.Repo") -> None:
55+
"""Updates packages in the tag index.
5656
5757
Args:
58-
pkg_name: name of the package to be updated
58+
pkg_names: names of the packages to be updated
59+
repo: the repository to get package classes from
5960
"""
60-
pkg_cls = repo.get_pkg_class(pkg_name)
61-
62-
# Remove the package from the list of packages, if present
61+
# Remove the packages from the list of packages, if present
6362
for pkg_list in self.tags.values():
64-
if pkg_name in pkg_list:
65-
pkg_list.remove(pkg_name)
66-
67-
# Add it again under the appropriate tags
68-
for tag in getattr(pkg_cls, "tags", []):
69-
tag = tag.lower()
70-
if tag not in self.tags:
71-
self.tags[tag] = [pkg_cls.name]
72-
else:
73-
self.tags[tag].append(pkg_cls.name)
63+
if pkg_names.isdisjoint(pkg_list):
64+
continue
65+
pkg_list[:] = [pkg for pkg in pkg_list if pkg not in pkg_names]
66+
67+
# Add them again under the appropriate tags
68+
for pkg_name in pkg_names:
69+
pkg_cls = repo.get_pkg_class(pkg_name)
70+
for tag in getattr(pkg_cls, "tags", []):
71+
tag = tag.lower()
72+
if tag not in self.tags:
73+
self.tags[tag] = [pkg_cls.name]
74+
else:
75+
self.tags[tag].append(pkg_cls.name)
7476

7577

7678
class TagIndexError(spack.error.SpackError):

lib/spack/spack/test/provider_index.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,15 @@ def test_copy(mock_packages):
7272
p = ProviderIndex(specs=spack.repo.all_package_names(), repository=spack.repo.PATH)
7373
q = p.copy()
7474
assert p == q
75+
76+
77+
def test_remove_providers(mock_packages):
78+
"""Test removing providers from the index."""
79+
p = ProviderIndex(specs=["mpich"], repository=spack.repo.PATH)
80+
# Check that mpich is a provider for mpi
81+
providers = p.providers_for("mpi")
82+
assert any(spec.name == "mpich" for spec in providers)
83+
p.remove_providers({"mpich"})
84+
# After removal, mpich should no longer be a provider for mpi
85+
providers = p.providers_for("mpi")
86+
assert not any(spec.name == "mpich" for spec in providers)

lib/spack/spack/test/tag.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ def test_tag_no_tags(mock_packages):
146146
def test_tag_update_package(mock_packages):
147147
mock_index = mock_packages.tag_index
148148
index = spack.tag.TagIndex()
149-
for name in spack.repo.all_package_names():
150-
index.update_package(name, repo=mock_packages)
149+
index.update_packages(set(spack.repo.all_package_names()), repo=mock_packages)
151150

152151
ensure_tags_results_equal(mock_index.tags, index.tags)

0 commit comments

Comments
 (0)