Skip to content

Commit 0bb2fe3

Browse files
KevinMindeviljeff
authored andcommitted
Add bulk add functionality for disposable email domains (#23726)
* Add bulk add functionality for disposable email domains - Implemented the `bulk_add_disposable_email_domains` task to handle the addition of these domains. - Added a management command to facilitate bulk addition from a CSV file. - Created unit tests to ensure the functionality works as expected, including handling of duplicate entries. * Fix comments * TMP: Respond to comments. * Robustify the testimajigger. * A great offering to the oracle of beautiful python. * Strip whitespace from domain * Fix broken test
1 parent 6edc442 commit 0bb2fe3

File tree

5 files changed

+292
-2
lines changed

5 files changed

+292
-2
lines changed

src/olympia/lib/settings_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ def get_db_config(environ_var, atomic_requests=True):
825825
},
826826
'olympia.users.tasks.sync_suppressed_emails_task': {'queue': 'cron'},
827827
'olympia.users.tasks.send_suppressed_email_confirmation': {'queue': 'devhub'},
828+
'olympia.users.tasks.bulk_add_disposable_email_domains': {'queue': 'devhub'},
828829
# Reviewers.
829830
'olympia.reviewers.tasks.recalculate_post_review_weight': {'queue': 'reviewers'},
830831
# Admin.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import csv
2+
from pathlib import Path
3+
4+
from django.core.management.base import BaseCommand, CommandError
5+
6+
from olympia.core import logger
7+
from olympia.users.tasks import bulk_add_disposable_email_domains
8+
9+
10+
logger = logger.getLogger('z.users')
11+
12+
13+
class Command(BaseCommand):
14+
help = 'Bulk add disposable email domains'
15+
16+
def add_arguments(self, parser):
17+
parser.add_argument('file', type=str)
18+
19+
def handle(self, *args, **options):
20+
file_path = Path(options['file'])
21+
if not file_path.exists():
22+
raise CommandError(f'File {file_path} does not exist')
23+
24+
records = []
25+
with file_path.open(newline='') as f:
26+
reader = csv.reader(f)
27+
next(reader, None) # skip header row
28+
for row in reader:
29+
if len(row) >= 2:
30+
domain, provider = row[0], row[1]
31+
records.append((domain.strip(), provider.strip()))
32+
33+
result = bulk_add_disposable_email_domains.apply(args=[records])
34+
logger.info(result)

src/olympia/users/tasks.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import urllib.parse
55

66
from django.conf import settings
7+
from django.core.exceptions import ValidationError
8+
from django.db.utils import InterfaceError, OperationalError
79
from django.urls import reverse
810
from django.utils.translation import gettext
911

@@ -24,6 +26,7 @@
2426

2527
from .models import (
2628
BannedUserContent,
29+
DisposableEmailDomainRestriction,
2730
SuppressedEmail,
2831
SuppressedEmailVerification,
2932
UserProfile,
@@ -196,3 +199,53 @@ def send_suppressed_email_confirmation(suppressed_email_verification_id):
196199
)
197200

198201
verification.save()
202+
203+
204+
@task(
205+
autoretry_for=(OperationalError, InterfaceError), max_retries=5, retry_backoff=True
206+
)
207+
def bulk_add_disposable_email_domains(entries: list[tuple[str, str]], batch_size=1000):
208+
if not isinstance(batch_size, int) or batch_size <= 0:
209+
raise ValueError('batch_size must be a positive integer')
210+
211+
task_log.info(f'Adding {len(entries)} disposable email domains')
212+
213+
records = []
214+
errors = []
215+
216+
for entry in entries:
217+
[domain, provider] = entry
218+
record = DisposableEmailDomainRestriction(
219+
domain=domain,
220+
reason=f'Disposable email domain of {provider}',
221+
)
222+
223+
try:
224+
record.full_clean()
225+
records.append(record)
226+
except ValidationError as e:
227+
errors.append(e)
228+
229+
if not records:
230+
task_log.info('No valid entries provided')
231+
return
232+
233+
processed_domains = []
234+
235+
for i in range(0, len(records), batch_size):
236+
batch = records[i : i + batch_size]
237+
created_objects = DisposableEmailDomainRestriction.objects.bulk_create(
238+
batch,
239+
batch_size,
240+
ignore_conflicts=True,
241+
)
242+
processed_domains.extend(created_objects)
243+
task_log.info(
244+
f'Successfully processed {len(created_objects)} '
245+
f'of {len(batch)} domains in this batch'
246+
)
247+
248+
task_log.info(
249+
f'Processed {len(processed_domains)} domains: '
250+
f'{[obj.domain for obj in processed_domains]}'
251+
)

src/olympia/users/tests/test_commands.py

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
import json
33
import os
44
import re
5+
import tempfile
56
import uuid
67
from ipaddress import IPv4Address
78
from unittest.mock import ANY, patch
89

910
from django.core.files.base import ContentFile
10-
from django.core.management import call_command
11+
from django.core.management import CommandError, call_command
1112

13+
from celery.result import EagerResult
1214
from waffle.testutils import override_switch
1315

1416
from olympia import amo
@@ -17,7 +19,11 @@
1719
from olympia.amo.tests import TestCase, addon_factory, user_factory
1820
from olympia.amo.utils import SafeStorage
1921
from olympia.users.management.commands.createsuperuser import Command as CreateSuperUser
20-
from olympia.users.models import UserProfile, UserRestrictionHistory
22+
from olympia.users.models import (
23+
DisposableEmailDomainRestriction,
24+
UserProfile,
25+
UserRestrictionHistory,
26+
)
2127

2228

2329
@patch('olympia.users.management.commands.createsuperuser.input')
@@ -473,3 +479,143 @@ def test_migrate_twice(self):
473479
self.test_migrate()
474480
# Running the migration command again shouldn't do anything.
475481
call_command('migrate_user_photos')
482+
483+
484+
class TestBulkAddDisposableEmailDomains(TestCase):
485+
def test_missing_file_raises_command_error(self):
486+
"""Test that the command raises CommandError if the file does not exist"""
487+
488+
for file, message in [
489+
(None, 'Error: the following arguments are required: file'),
490+
('none', 'File none does not exist'),
491+
]:
492+
with self.assertRaises(CommandError) as e:
493+
args = ['bulk_add_disposable_domains']
494+
if file:
495+
args.append(file)
496+
call_command(*args)
497+
assert message == e.exception.args[0]
498+
499+
def test_valid_file_triggers_bulk_add(self):
500+
"""Test that a valid CSV file triggers
501+
the bulk_add_disposable_email_domains task
502+
and creates DisposableDomainRestriction objects.
503+
"""
504+
505+
csv_content = (
506+
'Domain,Provider\n'
507+
'mailfast.pro,incognitomail.co\n'
508+
'foo.com,mail.tm\n'
509+
'mailpro.live,incognitomail.co\n'
510+
)
511+
512+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmp:
513+
tmp.write(csv_content)
514+
tmp.flush()
515+
tmp_path = tmp.name
516+
517+
assert DisposableEmailDomainRestriction.objects.count() == 0
518+
519+
call_command('bulk_add_disposable_domains', tmp_path)
520+
521+
expected = [
522+
('mailfast.pro', 'incognitomail.co'),
523+
('foo.com', 'mail.tm'),
524+
('mailpro.live', 'incognitomail.co'),
525+
]
526+
for domain, provider in expected:
527+
assert DisposableEmailDomainRestriction.objects.filter(
528+
domain=domain, reason=f'Disposable email domain of {provider}'
529+
).exists()
530+
531+
assert DisposableEmailDomainRestriction.objects.count() == len(expected)
532+
533+
def test_file_with_missing_columns(self):
534+
"""Test that rows with missing columns are ignored or handled gracefully."""
535+
csv_content = (
536+
'Domain,Provider\n'
537+
'mailfast.pro,incognitomail.co\n'
538+
',mail.tm\n' # Missing domain
539+
'mailpro.live,incognitomail.co\n'
540+
)
541+
542+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmp:
543+
tmp.write(csv_content)
544+
tmp.flush()
545+
tmp_path = tmp.name
546+
547+
assert DisposableEmailDomainRestriction.objects.count() == 0
548+
549+
call_command('bulk_add_disposable_domains', tmp_path)
550+
551+
expected = [
552+
('mailfast.pro', 'incognitomail.co'),
553+
('mailpro.live', 'incognitomail.co'),
554+
]
555+
for domain, provider in expected:
556+
assert DisposableEmailDomainRestriction.objects.filter(
557+
domain=domain, reason=f'Disposable email domain of {provider}'
558+
).exists()
559+
560+
assert not DisposableEmailDomainRestriction.objects.filter(domain='').exists()
561+
assert DisposableEmailDomainRestriction.objects.count() == len(expected)
562+
563+
def test_file_with_header_only(self):
564+
"""Test that a file with only a header row does not trigger any additions."""
565+
csv_content = 'Domain,Provider\n'
566+
567+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmp:
568+
tmp.write(csv_content)
569+
tmp.flush()
570+
tmp_path = tmp.name
571+
572+
call_command('bulk_add_disposable_domains', tmp_path)
573+
assert DisposableEmailDomainRestriction.objects.count() == 0
574+
575+
@patch('olympia.users.management.commands.bulk_add_disposable_domains.logger')
576+
def test_bulk_add_result_is_printed(self, mock_logger):
577+
"""result of bulk_add_disposable_email_domains task logged."""
578+
579+
csv_content = (
580+
'Domain,Provider\n'
581+
'mailfast.pro,incognitomail.co\n'
582+
'mailpro.live,incognitomail.co\n'
583+
)
584+
585+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmp:
586+
tmp.write(csv_content)
587+
tmp.flush()
588+
tmp_path = tmp.name
589+
590+
call_command('bulk_add_disposable_domains', tmp_path)
591+
result = mock_logger.info.call_args[0][0]
592+
assert isinstance(result, EagerResult)
593+
assert result.state == 'SUCCESS'
594+
595+
def test_trims_domains_of_whitespace(self):
596+
"""Test that domains with whitespace are trimmed."""
597+
csv_content = (
598+
'Domain,Provider\n'
599+
' mailfast.pro ,incognitomail.co\n'
600+
'foo.com,mail.tm\n'
601+
'mailpro.live,incognitomail.co\n'
602+
)
603+
604+
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv') as tmp:
605+
tmp.write(csv_content)
606+
tmp.flush()
607+
tmp_path = tmp.name
608+
609+
assert DisposableEmailDomainRestriction.objects.count() == 0
610+
611+
call_command('bulk_add_disposable_domains', tmp_path)
612+
613+
expected = [
614+
('mailfast.pro', 'incognitomail.co'),
615+
('foo.com', 'mail.tm'),
616+
('mailpro.live', 'incognitomail.co'),
617+
]
618+
for domain, provider in expected:
619+
assert DisposableEmailDomainRestriction.objects.filter(
620+
domain=domain, reason=f'Disposable email domain of {provider}'
621+
).exists()

src/olympia/users/tests/test_tasks.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from django.conf import settings
99
from django.core import mail
10+
from django.db.utils import OperationalError
1011
from django.urls import reverse
1112

1213
import pytest
@@ -21,10 +22,12 @@
2122
from olympia.amo.utils import SafeStorage
2223
from olympia.users.models import (
2324
BannedUserContent,
25+
DisposableEmailDomainRestriction,
2426
SuppressedEmail,
2527
SuppressedEmailVerification,
2628
)
2729
from olympia.users.tasks import (
30+
bulk_add_disposable_email_domains,
2831
delete_photo,
2932
resize_photo,
3033
send_suppressed_email_confirmation,
@@ -414,3 +417,56 @@ def test_retry_existing_verification(
414417
verification.reload().status
415418
== SuppressedEmailVerification.STATUS_CHOICES.Pending
416419
)
420+
421+
422+
class TestBulkAddDisposableEmailDomains(TestCase):
423+
def setUp(self):
424+
self.user_profile = user_factory()
425+
self.entries = [
426+
(f'test-{i}-{j}.com', f'provider-{i}')
427+
for i in range(10)
428+
for j in range(100)
429+
]
430+
# Ensure that the task runs with 2 batches by default
431+
self.batch_size = len(self.entries) // 2
432+
433+
def test_bulk_add_disposable_email_domains_success(self):
434+
assert DisposableEmailDomainRestriction.objects.count() == 0
435+
436+
result = bulk_add_disposable_email_domains.apply(
437+
args=[self.entries, self.batch_size]
438+
)
439+
440+
assert result.status == 'SUCCESS'
441+
assert DisposableEmailDomainRestriction.objects.count() == len(self.entries)
442+
443+
def test_bulk_add_disposable_email_domains_skips_duplicate_entries(self):
444+
[domain, provider] = self.entries[0]
445+
DisposableEmailDomainRestriction.objects.create(
446+
domain=domain, reason=f'Disposable email domain of {provider}'
447+
)
448+
assert DisposableEmailDomainRestriction.objects.count() == 1
449+
450+
result = bulk_add_disposable_email_domains.apply(
451+
args=[self.entries, self.batch_size]
452+
)
453+
454+
assert result.status == 'SUCCESS'
455+
assert DisposableEmailDomainRestriction.objects.count() == len(self.entries)
456+
457+
def test_zero_batch_size_raises_error(self):
458+
with pytest.raises(ValueError):
459+
bulk_add_disposable_email_domains.apply(args=[self.entries, 0])
460+
461+
def test_retries_on_db_timeout(self):
462+
def always_raise_operational_error(*args, **kwargs):
463+
raise OperationalError()
464+
465+
with mock.patch(
466+
'olympia.users.tasks.DisposableEmailDomainRestriction.objects.bulk_create',
467+
side_effect=always_raise_operational_error,
468+
):
469+
with pytest.raises(Retry):
470+
bulk_add_disposable_email_domains.apply(
471+
args=[self.entries, self.batch_size]
472+
).get()

0 commit comments

Comments
 (0)