77
88class Command (BaseShareCommand ):
99 def add_arguments (self , parser ):
10+ parser .add_argument ('--chunksize' , type = int , default = 1024 , help = 'number of RawData per DELETE' )
1011 parser .add_argument ('--really-really' , action = 'store_true' , help = 'skip final confirmation prompt before really deleting' )
1112
12- def handle (self , * args , really_really : bool , ** kwargs ):
13+ def handle (self , * args , chunksize : int , really_really : bool , ** kwargs ):
1314 # note: `share.transform` deleted; `transformer_key` always null for trove-ingested rdf
1415 _pretrove_configs = _db .SourceConfig .objects .filter (transformer_key__isnull = False )
1516 _pretrove_configs_with_rawdata = (
@@ -23,17 +24,50 @@ def handle(self, *args, really_really: bool, **kwargs):
2324 if not _pretrove_configs_with_rawdata .exists ():
2425 self .stdout .write (self .style .SUCCESS (_ ('nothing to delete' )))
2526 return
27+ _sourceconfig_ids_and_labels = list (
28+ _pretrove_configs_with_rawdata .values_list ('id' , 'label' ),
29+ )
2630 self .stdout .write (self .style .WARNING (_ ('pre-trove source-configs with deletable rawdata:' )))
27- for _label in _pretrove_configs_with_rawdata . values_list ( 'label' , flat = True ) :
28- self .stdout .write (f'\t { _label } ' )
31+ for __ , _sourceconfig_label in _sourceconfig_ids_and_labels :
32+ self .stdout .write (f'\t { _sourceconfig_label } ' )
2933 if really_really or self .input_confirm (self .style .WARNING (_ ('really DELETE ALL raw metadata records belonging to these source-configs? (y/n)' ))):
30- self .stdout .write (_ ('deleting...' ))
31- _rawdata_to_delete = (
32- _db .RawDatum .objects
33- .filter (suid__source_config_id__in = _pretrove_configs )
34- )
35- _deleted_total , _deleted_counts = _rawdata_to_delete .delete ()
36- for _name , _count in _deleted_counts .items ():
37- self .stdout .write (self .style .SUCCESS (f'{ _name } : deleted { _count } ' ))
34+ _total_deleted = 0
35+ for _sourceconfig_id , _sourceconfig_label in _sourceconfig_ids_and_labels :
36+ _total_deleted += self ._do_delete_rawdata (_sourceconfig_id , _sourceconfig_label , chunksize )
37+ self .stdout .write (self .style .SUCCESS (_ ('deleted %(count)s items' ) % {'count' : _total_deleted }))
3838 else :
39- self .stdout .write (self .style .SUCCESS ('deleted nothing' ))
39+ self .stdout .write (self .style .SUCCESS (_ ('deleted nothing' )))
40+
41+ def _do_delete_rawdata (self , sourceconfig_id , sourceconfig_label , chunksize ) -> int :
42+ # note: `.delete()` cannot be called on sliced querysets, so chunking is more complicated
43+ # -- before deleting each chunk, query for its last pk to filter on as a sentinel value
44+ _prior_sentinel_pk = None
45+ _deleted_count = 0
46+ _rawdata_qs = (
47+ _db .RawDatum .objects
48+ .filter (suid__source_config_id = sourceconfig_id )
49+ .order_by ('pk' ) # for consistent chunking
50+ )
51+ self .stdout .write (_ ('%(label)s: deleting all rawdata...' ) % {'label' : sourceconfig_label })
52+ while True : # for each chunk:
53+ _pk_qs = _rawdata_qs .values_list ('pk' , flat = True )
54+ # get the last pk in the chunk
55+ _sentinel_pk = _pk_qs [chunksize - 1 : chunksize ].first () or _pk_qs .last ()
56+ if _sentinel_pk is not None :
57+ if (_prior_sentinel_pk is not None ) and (_sentinel_pk <= _prior_sentinel_pk ):
58+ raise RuntimeError (f'sentinel pks not ascending?? got { _sentinel_pk } after { _prior_sentinel_pk } ' )
59+ _prior_sentinel_pk = _sentinel_pk
60+ _chunk_to_delete = _rawdata_qs .filter (pk__lte = _sentinel_pk )
61+ _chunk_deleted_count , _by_model = _chunk_to_delete .delete ()
62+ if _by_model and set (_by_model .keys ()) != {'share.RawDatum' }:
63+ raise RuntimeError (f'deleted models other than RawDatum?? { _by_model } ' )
64+ self .stdout .write (
65+ _ ('%(label)s: deleted %(count)s' ) % {'label' : sourceconfig_label , 'count' : _chunk_deleted_count },
66+ )
67+ _deleted_count += _chunk_deleted_count
68+ continue # next chunk
69+ # end
70+ self .stdout .write (self .style .SUCCESS (
71+ _ ('%(label)s: done; deleted %(count)s' ) % {'label' : sourceconfig_label , 'count' : _deleted_count },
72+ ))
73+ return _deleted_count
0 commit comments