Skip to content

Commit a6390fb

Browse files
authored
feat: add periodic task to update domain statuses (#18014)
1 parent a685531 commit a6390fb

File tree

13 files changed

+177
-18
lines changed

13 files changed

+177
-18
lines changed

requirements/tests.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ freezegun
44
pretend
55
pytest>=3.0.0
66
pytest-icdiff
7+
pytest-mock
78
pytest-postgresql>=3.1.3,<8.0.0
89
pytest-randomly
910
pytest-socket

requirements/tests.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ pytest==8.3.5 \
252252
# via
253253
# -r requirements/tests.in
254254
# pytest-icdiff
255+
# pytest-mock
255256
# pytest-postgresql
256257
# pytest-randomly
257258
# pytest-socket
@@ -261,6 +262,10 @@ pytest-icdiff==0.9 \
261262
--hash=sha256:13aede616202e57fcc882568b64589002ef85438046f012ac30a8d959dac8b75 \
262263
--hash=sha256:efee0da3bd1b24ef2d923751c5c547fbb8df0a46795553fba08ef57c3ca03d82
263264
# via -r requirements/tests.in
265+
pytest-mock==3.14.0 \
266+
--hash=sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f \
267+
--hash=sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0
268+
# via -r requirements/tests.in
264269
pytest-postgresql==7.0.1 \
265270
--hash=sha256:7723dfbfc57ea6f6f9876c2828e7b36f8b0e60b6cb040b1ddd444a60eed06e0a \
266271
--hash=sha256:cbc6a67bbad5128b1f00def8cca5cf597020acc79893723f7a9cb60981b6840f

tests/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,8 +550,10 @@ def search_service():
550550

551551

552552
@pytest.fixture
553-
def domain_status_service():
554-
return account_services.NullDomainStatusService()
553+
def domain_status_service(mocker):
554+
service = account_services.NullDomainStatusService()
555+
mocker.spy(service, "get_domain_status")
556+
return service
555557

556558

557559
class QueryRecorder:

tests/unit/accounts/test_services.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,7 @@ def __init__(self):
16911691
session=session, client_id="some_client_id"
16921692
)
16931693

1694-
assert svc.get_domain_status("example.com") == []
1694+
assert svc.get_domain_status("example.com") is None
16951695
assert session.get.calls == [
16961696
pretend.call(
16971697
"https://api.domainr.com/v2/status",

tests/unit/accounts/test_tasks.py

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717

1818
from warehouse.accounts import tasks
1919
from warehouse.accounts.models import TermsOfServiceEngagement
20-
from warehouse.accounts.tasks import compute_user_metrics, notify_users_of_tos_update
20+
from warehouse.accounts.tasks import (
21+
batch_update_email_domain_status,
22+
compute_user_metrics,
23+
notify_users_of_tos_update,
24+
)
2125

2226
from ...common.db.accounts import EmailFactory, UserFactory
2327
from ...common.db.packaging import ProjectFactory, ReleaseFactory
@@ -192,3 +196,55 @@ def test_compute_user_metrics(db_request, metrics):
192196
],
193197
),
194198
]
199+
200+
201+
def test_update_email_domain_status(db_request, domain_status_service, mocker):
202+
"""
203+
Test that the batch update performs the correct queries and updates
204+
"""
205+
never_checked = EmailFactory.create(
206+
email="[email protected]", domain_last_checked=None
207+
)
208+
over_threshold = EmailFactory.create(
209+
210+
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=90),
211+
)
212+
on_threshold = EmailFactory.create(
213+
214+
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=30),
215+
)
216+
under_threshold = EmailFactory.create(
217+
218+
domain_last_checked=datetime.now(tz=timezone.utc) - timedelta(days=1),
219+
)
220+
221+
batch_update_email_domain_status(db_request)
222+
223+
assert domain_status_service.get_domain_status.call_count == 3
224+
domain_status_service.get_domain_status.assert_has_calls(
225+
[
226+
mocker.call(never_checked.domain),
227+
mocker.call(over_threshold.domain),
228+
mocker.call(on_threshold.domain),
229+
]
230+
)
231+
232+
assert never_checked.domain_last_status == ["active"]
233+
assert over_threshold.domain_last_status == ["active"]
234+
assert on_threshold.domain_last_status == ["active"]
235+
assert under_threshold.domain_last_status is None # no default, not updated
236+
237+
238+
def test_update_email_domain_status_does_not_update_if_not_needed(
239+
db_request, domain_status_service, mocker
240+
):
241+
mocker.patch.object(domain_status_service, "get_domain_status", return_value=None)
242+
243+
fail_check = EmailFactory.create()
244+
245+
batch_update_email_domain_status(db_request)
246+
247+
domain_status_service.get_domain_status.assert_called_once_with(fail_check.domain)
248+
249+
assert fail_check.domain_last_checked is None
250+
assert fail_check.domain_last_status is None

warehouse/accounts/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
TokenServiceFactory,
3333
database_login_factory,
3434
)
35-
from warehouse.accounts.tasks import compute_user_metrics, notify_users_of_tos_update
35+
from warehouse.accounts.tasks import (
36+
batch_update_email_domain_status,
37+
compute_user_metrics,
38+
notify_users_of_tos_update,
39+
)
3640
from warehouse.accounts.utils import UserContext
3741
from warehouse.admin.flags import AdminFlagValue
3842
from warehouse.macaroons.security_policy import MacaroonSecurityPolicy
@@ -215,3 +219,5 @@ def includeme(config):
215219
# Add a periodic task to generate Account metrics
216220
config.add_periodic_task(crontab(minute="*/20"), compute_user_metrics)
217221
config.add_periodic_task(crontab(minute="*"), notify_users_of_tos_update)
222+
# TODO: After initial backfill, this can be done less frequently
223+
config.add_periodic_task(crontab(minute="*/5"), batch_update_email_domain_status)

warehouse/accounts/interfaces.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def get_email_breach_count(email: str) -> int | None:
301301

302302

303303
class IDomainStatusService(Interface):
304-
def get_domain_status(domain: str) -> list[str]:
304+
def get_domain_status(domain: str) -> list[str] | None:
305305
"""
306306
Returns a list of status strings for the given domain.
307307
"""

warehouse/accounts/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ class Email(db.ModelBase):
427427
# Domain validation information
428428
domain_last_checked: Mapped[datetime.datetime | None] = mapped_column(
429429
comment="Last time domain was checked with the domain validation service.",
430+
index=True,
430431
)
431432
domain_last_status: Mapped[list[str] | None] = mapped_column(
432433
ARRAY(String),

warehouse/accounts/services.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ def create_service(cls, _context, request: Request) -> DomainrDomainStatusServic
992992
domainr_client_id = request.registry.settings.get("domain_status.client_id")
993993
return cls(session=request.http, client_id=domainr_client_id)
994994

995-
def get_domain_status(self, domain: str) -> list[str]:
995+
def get_domain_status(self, domain: str) -> list[str] | None:
996996
"""
997997
Check if a domain is available or not.
998998
See https://domainr.com/docs/api/v2/status
@@ -1006,6 +1006,6 @@ def get_domain_status(self, domain: str) -> list[str]:
10061006
resp.raise_for_status()
10071007
except requests.RequestException as exc:
10081008
logger.warning("Error contacting Domainr: %r", exc)
1009-
return []
1009+
return None
10101010

10111011
return resp.json()["status"][0]["status"].split()

warehouse/accounts/tasks.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
from datetime import datetime, timedelta, timezone
13+
from __future__ import annotations
1414

15-
from sqlalchemy import func
15+
import typing
16+
17+
from datetime import UTC, datetime, timedelta, timezone
18+
19+
from sqlalchemy import func, nullsfirst, or_, select
1620

1721
from warehouse import tasks
1822
from warehouse.accounts.models import (
@@ -22,10 +26,14 @@
2226
UserTermsOfServiceEngagement,
2327
)
2428
from warehouse.accounts.services import IUserService
29+
from warehouse.accounts.utils import update_email_domain_status
2530
from warehouse.email import send_user_terms_of_service_updated
2631
from warehouse.metrics import IMetricsService
2732
from warehouse.packaging.models import Release
2833

34+
if typing.TYPE_CHECKING:
35+
from pyramid.request import Request
36+
2937

3038
@tasks.task(ignore_result=True, acks_late=True)
3139
def notify_users_of_tos_update(request):
@@ -136,3 +144,32 @@ def compute_user_metrics(request):
136144
"primary:true",
137145
],
138146
)
147+
148+
149+
@tasks.task(ignore_result=True, acks_late=True)
150+
def batch_update_email_domain_status(request: Request) -> None:
151+
"""
152+
Update the email domain status for any domain last checked over 30 days ago.
153+
154+
30 days is roughly the time between a domain's expiration
155+
and when it enters a renewal grace period.
156+
Each TLD may express their own grace period, 30 days is an estimate
157+
of time before the registrar is likely to sell it.
158+
"""
159+
stmt = (
160+
select(Email)
161+
.where(
162+
# TODO: After completely backfilled, remove the `or_` for None
163+
or_(
164+
Email.domain_last_checked.is_(None),
165+
Email.domain_last_checked < datetime.now(tz=UTC) - timedelta(days=30),
166+
)
167+
)
168+
.order_by(nullsfirst(Email.domain_last_checked.asc()))
169+
.limit(10_000)
170+
)
171+
# Run in batches to avoid too much memory usage, API rate limits
172+
stmt = stmt.execution_options(yield_per=1_000)
173+
174+
for email in request.db.scalars(stmt):
175+
update_email_domain_status(email, request)

0 commit comments

Comments
 (0)