Skip to content

Commit cda052a

Browse files
refactor(collections): use builder classes for collection request builders (issue #3272)
Signed-off-by: Sukuna0007Abhi <[email protected]>
1 parent 8ec768d commit cda052a

File tree

2 files changed

+205
-53
lines changed

2 files changed

+205
-53
lines changed

augur/tasks/start_tasks.py

Lines changed: 104 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22
import logging
3+
from abc import ABC, abstractmethod
4+
from typing import Any, List
35
import os
46
#from celery.result import AsyncResult
57
from celery import group, chain
@@ -157,84 +159,133 @@ def non_repo_domain_tasks(self):
157159
tasks.apply_async()
158160

159161

160-
def build_primary_repo_collect_request(session, logger, enabled_phase_names, days_until_collect_again = 15):
161-
#Add all required tasks to a list and pass it to the CollectionRequest
162-
primary_enabled_phases = []
163-
primary_gitlab_enabled_phases = []
162+
class CollectionRequestBuilder(ABC):
163+
"""Base class for collection request builders.
164164
165-
#Primary jobs
166-
if prelim_phase.__name__ in enabled_phase_names:
167-
primary_enabled_phases.append(prelim_phase)
165+
Enforces the `build(session, logger, enabled_phase_names, days_until_collect_again)`
166+
signature so all builders accept the same arguments.
167+
"""
168168

169-
primary_enabled_phases.append(primary_repo_collect_phase)
170-
primary_gitlab_enabled_phases.append(primary_repo_collect_phase_gitlab)
169+
@abstractmethod
170+
def build(self, session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int) -> "CollectionRequest":
171+
"""Build and return a `CollectionRequest` configured for this builder."""
172+
pass
171173

172-
#task success is scheduled no matter what the config says.
173-
def core_task_success_util_gen(repo_git, full_collection):
174-
return core_task_success_util.si(repo_git)
175174

176-
primary_enabled_phases.append(core_task_success_util_gen)
177-
primary_gitlab_enabled_phases.append(core_task_success_util_gen)
175+
class PrimaryCollectionRequestBuilder(CollectionRequestBuilder):
176+
def build(self, session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 15) -> "CollectionRequest":
177+
"""Return a `CollectionRequest` for primary collections.
178178
179-
primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=days_until_collect_again, gitlab_phases=primary_gitlab_enabled_phases)
180-
primary_request.get_valid_repos(session)
181-
return primary_request
179+
Parameters mirror the runtime API used across task builders so callers can
180+
pass `enabled_phase_names` and an interval `days_until_collect_again`.
181+
"""
182+
# Build enabled phases
183+
primary_enabled_phases: List = []
184+
primary_gitlab_enabled_phases: List = []
182185

183-
def build_secondary_repo_collect_request(session, logger, enabled_phase_names, days_until_collect_again = 1):
184-
#Deal with secondary collection
185-
secondary_enabled_phases = []
186+
# Primary jobs
187+
if prelim_phase.__name__ in enabled_phase_names:
188+
primary_enabled_phases.append(prelim_phase)
186189

187-
if prelim_phase.__name__ in enabled_phase_names:
188-
secondary_enabled_phases.append(prelim_phase_secondary)
190+
primary_enabled_phases.append(primary_repo_collect_phase)
191+
primary_gitlab_enabled_phases.append(primary_repo_collect_phase_gitlab)
189192

193+
# Add success task
194+
def core_task_success_util_gen(repo_git, full_collection):
195+
return core_task_success_util.si(repo_git)
190196

191-
secondary_enabled_phases.append(secondary_repo_collect_phase)
197+
primary_enabled_phases.append(core_task_success_util_gen)
198+
primary_gitlab_enabled_phases.append(core_task_success_util_gen)
192199

193-
def secondary_task_success_util_gen(repo_git, full_collection):
194-
return secondary_task_success_util.si(repo_git)
200+
primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=days_until_collect_again, gitlab_phases=primary_gitlab_enabled_phases)
201+
primary_request.get_valid_repos(session)
202+
return primary_request
195203

196-
secondary_enabled_phases.append(secondary_task_success_util_gen)
197-
198-
request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=days_until_collect_again)
199204

200-
request.get_valid_repos(session)
201-
return request
205+
class SecondaryCollectionRequestBuilder(CollectionRequestBuilder):
206+
def build(self, session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 1) -> "CollectionRequest":
207+
"""Return a `CollectionRequest` for secondary collections."""
208+
# Secondary collection
209+
secondary_enabled_phases: List = []
210+
211+
if prelim_phase.__name__ in enabled_phase_names:
212+
secondary_enabled_phases.append(prelim_phase_secondary)
213+
214+
215+
secondary_enabled_phases.append(secondary_repo_collect_phase)
216+
217+
def secondary_task_success_util_gen(repo_git, full_collection):
218+
return secondary_task_success_util.si(repo_git)
219+
220+
secondary_enabled_phases.append(secondary_task_success_util_gen)
221+
222+
request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=days_until_collect_again)
223+
224+
request.get_valid_repos(session)
225+
return request
226+
227+
228+
class FacadeCollectionRequestBuilder(CollectionRequestBuilder):
229+
def build(self, session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 10) -> "CollectionRequest":
230+
"""Return a `CollectionRequest` for facade (git) collections."""
231+
# Facade collection
232+
facade_enabled_phases: List = []
233+
234+
facade_enabled_phases.append(facade_phase)
235+
236+
def facade_task_success_util_gen(repo_git, full_collection):
237+
return facade_task_success_util.si(repo_git)
238+
239+
facade_enabled_phases.append(facade_task_success_util_gen)
240+
241+
def facade_task_update_weight_util_gen(repo_git, full_collection):
242+
return git_update_commit_count_weight.si(repo_git)
243+
244+
facade_enabled_phases.append(facade_task_update_weight_util_gen)
245+
246+
request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=days_until_collect_again)
247+
248+
request.get_valid_repos(session)
249+
return request
250+
202251

252+
class MLCollectionRequestBuilder(CollectionRequestBuilder):
253+
def build(self, session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 40) -> "CollectionRequest":
254+
"""Return a `CollectionRequest` for machine-learning related collections."""
255+
ml_enabled_phases: List = []
203256

204-
def build_facade_repo_collect_request(session, logger, enabled_phase_names, days_until_collect_again = 10):
205-
#Deal with facade collection
206-
facade_enabled_phases = []
257+
ml_enabled_phases.append(machine_learning_phase)
207258

208-
facade_enabled_phases.append(facade_phase)
259+
def ml_task_success_util_gen(repo_git, full_collection):
260+
return ml_task_success_util.si(repo_git)
209261

210-
def facade_task_success_util_gen(repo_git, full_collection):
211-
return facade_task_success_util.si(repo_git)
262+
ml_enabled_phases.append(ml_task_success_util_gen)
212263

213-
facade_enabled_phases.append(facade_task_success_util_gen)
264+
request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=days_until_collect_again)
265+
request.get_valid_repos(session)
266+
return request
214267

215-
def facade_task_update_weight_util_gen(repo_git, full_collection):
216-
return git_update_commit_count_weight.si(repo_git)
217268

218-
facade_enabled_phases.append(facade_task_update_weight_util_gen)
269+
# Backwards-compatible thin wrappers (keep API stable)
219270

220-
request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=days_until_collect_again)
271+
def build_primary_repo_collect_request(session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 15) -> "CollectionRequest":
272+
"""Thin wrapper around `PrimaryCollectionRequestBuilder.build` to preserve API compatibility."""
273+
return PrimaryCollectionRequestBuilder().build(session, logger, enabled_phase_names, days_until_collect_again)
221274

222-
request.get_valid_repos(session)
223-
return request
224275

225-
def build_ml_repo_collect_request(session, logger, enabled_phase_names, days_until_collect_again = 40):
226-
ml_enabled_phases = []
276+
def build_secondary_repo_collect_request(session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 1) -> "CollectionRequest":
277+
"""Thin wrapper around `SecondaryCollectionRequestBuilder.build` to preserve API compatibility."""
278+
return SecondaryCollectionRequestBuilder().build(session, logger, enabled_phase_names, days_until_collect_again)
227279

228-
ml_enabled_phases.append(machine_learning_phase)
229280

230-
def ml_task_success_util_gen(repo_git, full_collection):
231-
return ml_task_success_util.si(repo_git)
281+
def build_facade_repo_collect_request(session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 10) -> "CollectionRequest":
282+
"""Thin wrapper around `FacadeCollectionRequestBuilder.build` to preserve API compatibility."""
283+
return FacadeCollectionRequestBuilder().build(session, logger, enabled_phase_names, days_until_collect_again)
232284

233-
ml_enabled_phases.append(ml_task_success_util_gen)
234285

235-
request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=days_until_collect_again)
236-
request.get_valid_repos(session)
237-
return request
286+
def build_ml_repo_collect_request(session: Any, logger: logging.Logger, enabled_phase_names: List[str], days_until_collect_again: int = 40) -> "CollectionRequest":
287+
"""Thin wrapper around `MLCollectionRequestBuilder.build` to preserve API compatibility."""
288+
return MLCollectionRequestBuilder().build(session, logger, enabled_phase_names, days_until_collect_again)
238289

239290
@celery.task(bind=True)
240291
def augur_collection_monitor(self):
@@ -246,7 +297,7 @@ def augur_collection_monitor(self):
246297
logger.info("Checking for repos to collect")
247298

248299

249-
#Get list of enabled phases
300+
# Get enabled phases
250301
enabled_phase_names = get_enabled_phase_names_from_config(engine, logger)
251302

252303
enabled_collection_hooks = []
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import inspect
2+
3+
from augur.tasks.start_tasks import (
4+
CollectionRequestBuilder,
5+
PrimaryCollectionRequestBuilder,
6+
SecondaryCollectionRequestBuilder,
7+
FacadeCollectionRequestBuilder,
8+
MLCollectionRequestBuilder,
9+
build_primary_repo_collect_request,
10+
build_secondary_repo_collect_request,
11+
build_facade_repo_collect_request,
12+
build_ml_repo_collect_request,
13+
)
14+
15+
16+
def test_builders_are_subclasses():
17+
assert issubclass(PrimaryCollectionRequestBuilder, CollectionRequestBuilder)
18+
assert issubclass(SecondaryCollectionRequestBuilder, CollectionRequestBuilder)
19+
assert issubclass(FacadeCollectionRequestBuilder, CollectionRequestBuilder)
20+
assert issubclass(MLCollectionRequestBuilder, CollectionRequestBuilder)
21+
22+
23+
def test_builders_have_build_method():
24+
for cls in (
25+
PrimaryCollectionRequestBuilder,
26+
SecondaryCollectionRequestBuilder,
27+
FacadeCollectionRequestBuilder,
28+
MLCollectionRequestBuilder,
29+
):
30+
assert hasattr(cls, "build")
31+
assert inspect.isfunction(cls.build) or inspect.ismethod(cls.build)
32+
33+
34+
def test_wrapper_functions_exist():
35+
assert callable(build_primary_repo_collect_request)
36+
assert callable(build_secondary_repo_collect_request)
37+
assert callable(build_facade_repo_collect_request)
38+
assert callable(build_ml_repo_collect_request)
39+
40+
41+
def test_build_method_signature_contains_expected_params():
42+
# Ensure the builders follow the expected signature so callers can pass enabled phase names
43+
for cls in (
44+
PrimaryCollectionRequestBuilder,
45+
SecondaryCollectionRequestBuilder,
46+
FacadeCollectionRequestBuilder,
47+
MLCollectionRequestBuilder,
48+
):
49+
sig = inspect.signature(cls.build)
50+
params = list(sig.parameters.keys())
51+
assert "enabled_phase_names" in params
52+
assert "days_until_collect_again" in params
53+
54+
55+
def test_wrapper_signatures_match_build():
56+
# Wrapper functions should accept the same parameters as the builders' build method
57+
builders_and_wrappers = [
58+
(PrimaryCollectionRequestBuilder, build_primary_repo_collect_request),
59+
(SecondaryCollectionRequestBuilder, build_secondary_repo_collect_request),
60+
(FacadeCollectionRequestBuilder, build_facade_repo_collect_request),
61+
(MLCollectionRequestBuilder, build_ml_repo_collect_request),
62+
]
63+
64+
for builder_cls, wrapper in builders_and_wrappers:
65+
builder_sig = inspect.signature(builder_cls.build)
66+
wrapper_sig = inspect.signature(wrapper)
67+
# Compare parameter names ignoring 'self' on the bound method
68+
builder_params = [p for p in list(builder_sig.parameters.keys()) if p != 'self']
69+
wrapper_params = list(wrapper_sig.parameters.keys())
70+
assert builder_params == wrapper_params
71+
72+
73+
def test_build_returns_collection_request(monkeypatch):
74+
import logging
75+
from augur.tasks.start_tasks import PrimaryCollectionRequestBuilder
76+
from augur.tasks.util.collection_util import CollectionRequest
77+
78+
# Avoid DB interaction by stubbing out get_valid_repos
79+
monkeypatch.setattr(CollectionRequest, "get_valid_repos", lambda self, session: None)
80+
81+
builder = PrimaryCollectionRequestBuilder()
82+
result = builder.build(session=None, logger=logging.getLogger("test"), enabled_phase_names=[], days_until_collect_again=1)
83+
84+
assert isinstance(result, CollectionRequest)
85+
86+
87+
def test_wrapper_is_passthrough(monkeypatch):
88+
import logging
89+
from augur.tasks.start_tasks import PrimaryCollectionRequestBuilder, build_primary_repo_collect_request
90+
from augur.tasks.util.collection_util import CollectionRequest
91+
92+
monkeypatch.setattr(CollectionRequest, "get_valid_repos", lambda self, session: None)
93+
94+
b = PrimaryCollectionRequestBuilder()
95+
res_builder = b.build(session=None, logger=logging.getLogger("test"), enabled_phase_names=["a"], days_until_collect_again=2)
96+
res_wrapper = build_primary_repo_collect_request(session=None, logger=logging.getLogger("test"), enabled_phase_names=["a"], days_until_collect_again=2)
97+
98+
# Basic sanity checks to ensure wrapper returns what the builder builds
99+
assert isinstance(res_wrapper, CollectionRequest)
100+
assert res_builder.name == res_wrapper.name
101+
assert res_builder.max_repo == res_wrapper.max_repo

0 commit comments

Comments
 (0)