|
| 1 | +import datetime |
| 2 | +import time |
| 3 | + |
| 4 | +from django.db.models import OuterRef |
| 5 | + |
| 6 | +from trove.util.django import pk_chunked |
| 7 | + |
| 8 | +from share import models as share_db |
| 9 | +from share.management.commands import BaseShareCommand |
| 10 | +from trove import models as trove_db |
| 11 | + |
| 12 | + |
| 13 | +class Command(BaseShareCommand): |
| 14 | + # copy all non-null values from `RawDatum.expiration_date` to `SupplementaryIndexcardRdf.expiration_date` |
| 15 | + # (while being overly cautious to avoid joins on `RawDatum` or `SourceUniqueIdentifier`) |
| 16 | + # meant to be run after trove migration 0008_expiration_dates, before share.RawDatum is deleted |
| 17 | + |
| 18 | + def add_arguments(self, parser): |
| 19 | + parser.add_argument('--chunk-size', type=int, default=666) |
| 20 | + parser.add_argument('--today', type=datetime.date.fromisoformat, default=datetime.date.today()) |
| 21 | + parser.add_argument('--continue-after', type=str, default=None) |
| 22 | + |
| 23 | + def handle(self, *args, chunk_size: int, today: datetime.date, continue_after, **kwargs): |
| 24 | + _before = time.perf_counter() |
| 25 | + _total_updated = 0 |
| 26 | + _raw_qs = ( |
| 27 | + share_db.RawDatum.objects.latest_for_each_suid() |
| 28 | + .filter(expiration_date__gt=today) # ignore the expired (and the non-expiring) |
| 29 | + ) |
| 30 | + if continue_after is not None: |
| 31 | + _raw_qs = _raw_qs.filter(pk__gt=continue_after) |
| 32 | + for _raw_pk_chunk in pk_chunked(_raw_qs, chunk_size): |
| 33 | + _supp_qs = trove_db.SupplementaryIndexcardRdf.objects.filter( |
| 34 | + from_raw_datum_id__in=_raw_pk_chunk, |
| 35 | + expiration_date__isnull=True, # avoid overwriting non-null values |
| 36 | + ) |
| 37 | + _updated_count = _supp_qs.update( |
| 38 | + expiration_date=share_db.RawDatum.objects.filter( |
| 39 | + id=OuterRef('from_raw_datum_id'), |
| 40 | + ).values('expiration_date'), |
| 41 | + ) |
| 42 | + _total_updated += _updated_count |
| 43 | + _last_pk = _raw_pk_chunk[-1] |
| 44 | + _elapsed = time.perf_counter() - _before |
| 45 | + self.stdout.write( |
| 46 | + f'{_elapsed:.2f}: migrated {_updated_count} of {len(_raw_pk_chunk)} --continue-after={_last_pk}', |
| 47 | + ) |
| 48 | + _total_seconds = time.perf_counter() - _before |
| 49 | + self.stdout.write( |
| 50 | + self.style.SUCCESS(f'done! migrated {_total_updated} in {_total_seconds}s'), |
| 51 | + ) |
0 commit comments