23
23
from opensearchpy .helpers import parallel_bulk
24
24
from redis .lock import Lock
25
25
from sqlalchemy import func , select , text
26
- from sqlalchemy .orm import aliased
27
26
from urllib3 .util import parse_url
28
27
29
28
from warehouse import tasks
36
35
)
37
36
from warehouse .packaging .search import Project as ProjectDocument
38
37
from warehouse .search .utils import get_index
39
- from warehouse .utils .db import windowed_query
40
38
41
39
42
- def _project_docs (db , project_name = None ):
43
- releases_list = (
44
- select (Release .id )
45
- .filter (Release .yanked .is_ (False ), Release .files .any ())
46
- .order_by (
47
- Release .project_id ,
48
- Release .is_prerelease .nullslast (),
49
- Release ._pypi_ordering .desc (),
50
- )
51
- .distinct (Release .project_id )
52
- )
53
-
54
- if project_name :
55
- releases_list = releases_list .join (Project ).filter (Project .name == project_name )
56
-
57
- releases_list = releases_list .subquery ()
58
- rlist = aliased (Release , releases_list )
59
-
60
- classifiers = (
40
+ def _project_docs (db , project_name : str | None = None ):
41
+ classifiers_subquery = (
61
42
select (func .array_agg (Classifier .classifier ))
62
43
.select_from (ReleaseClassifiers )
63
44
.join (Classifier , Classifier .id == ReleaseClassifiers .trove_id )
@@ -66,8 +47,7 @@ def _project_docs(db, project_name=None):
66
47
.scalar_subquery ()
67
48
.label ("classifiers" )
68
49
)
69
-
70
- release_data = (
50
+ projects_to_index = (
71
51
select (
72
52
Description .raw .label ("description" ),
73
53
Release .author ,
@@ -80,18 +60,32 @@ def _project_docs(db, project_name=None):
80
60
Release .platform ,
81
61
Release .download_url ,
82
62
Release .created ,
83
- classifiers ,
63
+ classifiers_subquery ,
84
64
Project .normalized_name ,
85
65
Project .name ,
86
66
)
87
- .select_from (rlist )
88
- .join (Release , Release .id == rlist .id )
67
+ .select_from (Release )
89
68
.join (Description )
90
- .outerjoin (Release .project )
69
+ .join (Project )
70
+ .filter (
71
+ Release .yanked .is_ (False ),
72
+ Release .files .any (),
73
+ # Filter by project_name if provided
74
+ Project .name == project_name if project_name else text ("TRUE" ),
75
+ )
76
+ .order_by (
77
+ Project .name ,
78
+ Release .is_prerelease .nullslast (),
79
+ Release ._pypi_ordering .desc (),
80
+ )
81
+ .distinct (Project .name )
82
+ .execution_options (yield_per = 25000 )
91
83
)
92
84
93
- for chunk in windowed_query (db , release_data , Project .name , 25000 ):
94
- for release in chunk :
85
+ results = db .execute (projects_to_index )
86
+
87
+ for partition in results .partitions ():
88
+ for release in partition :
95
89
p = ProjectDocument .from_db (release )
96
90
p ._index = None
97
91
p .full_clean ()
0 commit comments