Skip to content

Commit 35f9cac

Browse files
authored
refactor: replace windowed_query with yield_per (#17361)
1 parent 24a2693 commit 35f9cac

File tree

4 files changed

+25
-149
lines changed

4 files changed

+25
-149
lines changed

tests/unit/utils/db/test_windowed_query.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

warehouse/search/tasks.py

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from opensearchpy.helpers import parallel_bulk
2424
from redis.lock import Lock
2525
from sqlalchemy import func, select, text
26-
from sqlalchemy.orm import aliased
2726
from urllib3.util import parse_url
2827

2928
from warehouse import tasks
@@ -36,28 +35,10 @@
3635
)
3736
from warehouse.packaging.search import Project as ProjectDocument
3837
from warehouse.search.utils import get_index
39-
from warehouse.utils.db import windowed_query
4038

4139

42-
def _project_docs(db, project_name=None):
43-
releases_list = (
44-
select(Release.id)
45-
.filter(Release.yanked.is_(False), Release.files.any())
46-
.order_by(
47-
Release.project_id,
48-
Release.is_prerelease.nullslast(),
49-
Release._pypi_ordering.desc(),
50-
)
51-
.distinct(Release.project_id)
52-
)
53-
54-
if project_name:
55-
releases_list = releases_list.join(Project).filter(Project.name == project_name)
56-
57-
releases_list = releases_list.subquery()
58-
rlist = aliased(Release, releases_list)
59-
60-
classifiers = (
40+
def _project_docs(db, project_name: str | None = None):
41+
classifiers_subquery = (
6142
select(func.array_agg(Classifier.classifier))
6243
.select_from(ReleaseClassifiers)
6344
.join(Classifier, Classifier.id == ReleaseClassifiers.trove_id)
@@ -66,8 +47,7 @@ def _project_docs(db, project_name=None):
6647
.scalar_subquery()
6748
.label("classifiers")
6849
)
69-
70-
release_data = (
50+
projects_to_index = (
7151
select(
7252
Description.raw.label("description"),
7353
Release.author,
@@ -80,18 +60,32 @@ def _project_docs(db, project_name=None):
8060
Release.platform,
8161
Release.download_url,
8262
Release.created,
83-
classifiers,
63+
classifiers_subquery,
8464
Project.normalized_name,
8565
Project.name,
8666
)
87-
.select_from(rlist)
88-
.join(Release, Release.id == rlist.id)
67+
.select_from(Release)
8968
.join(Description)
90-
.outerjoin(Release.project)
69+
.join(Project)
70+
.filter(
71+
Release.yanked.is_(False),
72+
Release.files.any(),
73+
# Filter by project_name if provided
74+
Project.name == project_name if project_name else text("TRUE"),
75+
)
76+
.order_by(
77+
Project.name,
78+
Release.is_prerelease.nullslast(),
79+
Release._pypi_ordering.desc(),
80+
)
81+
.distinct(Project.name)
82+
.execution_options(yield_per=25000)
9183
)
9284

93-
for chunk in windowed_query(db, release_data, Project.name, 25000):
94-
for release in chunk:
85+
results = db.execute(projects_to_index)
86+
87+
for partition in results.partitions():
88+
for release in partition:
9589
p = ProjectDocument.from_db(release)
9690
p._index = None
9791
p.full_clean()

warehouse/utils/db/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
from warehouse.utils.db.windowed_query import windowed_query
13+
from warehouse.utils.db.query_printer import print_query
1414

15-
__all__ = ["windowed_query"]
15+
__all__ = ["print_query"]

warehouse/utils/db/windowed_query.py

Lines changed: 0 additions & 77 deletions
This file was deleted.

0 commit comments

Comments
 (0)