33import ora
44
55from ..base import _InternalProgram , ProgramRunning , ProgramSuccess
6- from apsis .lib .json import check_schema
6+ from apsis .lib .json import check_schema , nkey
77from apsis .lib .parse import parse_duration
88from apsis .runs import template_expand
99
@@ -23,7 +23,7 @@ class ArchiveProgram(_InternalProgram):
2323 skipped for archiving.
2424 """
2525
26- def __init__ (self , * , age , path , count ):
26+ def __init__ (self , * , age , path , count , chunk_size = None , chunk_sleep = None ):
2727 """
2828 If this archive file doesn't exist, it is created automatically on
2929 first use; the contianing directory must exist.
@@ -35,38 +35,59 @@ def __init__(self, *, age, path, count):
3535 Apsis database file.
3636 :param count:
3737 Maximum number of runs to archive per run of this program.
38+ :param chunk_size:
39+ Number of runs to archive in one chunk. Each chunk is blocking.
40+ :param chunk_sleep:
41+ Time in seconds to wait between chunks.
3842 """
39- self .__age = age
40- self .__path = path
41- self .__count = count
43+ self .__age = age
44+ self .__path = path
45+ self .__count = count
46+ self .__chunk_size = chunk_size
47+ self .__chunk_sleep = chunk_sleep
4248
4349
4450 def __str__ (self ):
4551 return f"archive age { self .__age } → { self .__path } "
4652
4753
4854 def bind (self , args ):
49- age = parse_duration (template_expand (self .__age , args ))
50- path = template_expand (self .__path , args )
51- count = int (template_expand (self .__count , args ))
52- return type (self )(age = age , path = path , count = count )
55+ return type (self )(
56+ age = parse_duration (template_expand (self .__age , args )),
57+ path = template_expand (self .__path , args ),
58+ count = int (template_expand (self .__count , args )),
59+ chunk_size = None if self .__chunk_size is None
60+ else int (template_expand (self .__chunk_size , args )),
61+ chunk_sleep = None if self .__chunk_sleep is None
62+ else float (template_expand (self .__chunk_sleep , args )),
63+ )
5364
5465
5566 @classmethod
5667 def from_jso (cls , jso ):
5768 with check_schema (jso ) as pop :
58- age = pop ("age" )
59- path = pop ("path" , str )
60- count = pop ("count" , int )
61- return cls (age = age , path = path , count = count )
69+ age = pop ("age" )
70+ path = pop ("path" , str )
71+ count = pop ("count" , int )
72+ chunk_size = pop ("chunk_size" , int , None )
73+ chunk_sleep = pop ("chunk_sleep" , float , None )
74+ return cls (
75+ age = age ,
76+ path = path ,
77+ count = count ,
78+ chunk_size = chunk_size ,
79+ chunk_sleep = chunk_sleep ,
80+ )
6281
6382
6483 def to_jso (self ):
6584 return {
6685 ** super ().to_jso (),
67- "age" : self .__age ,
68- "path" : self .__path ,
69- "count" : self .__count ,
86+ "age" : self .__age ,
87+ "path" : self .__path ,
88+ "count" : self .__count ,
89+ ** nkey ("chunk_size" , self .__chunk_size ),
90+ ** nkey ("chunk_sleep" , self .__chunk_sleep ),
7091 }
7192
7293
@@ -78,28 +99,47 @@ async def wait(self, apsis):
7899 # FIXME: Private attributes.
79100 db = apsis ._Apsis__db
80101
81- run_ids = db .get_archive_run_ids (
82- before = ora .now () - self .__age ,
83- count = self .__count ,
84- )
85-
86- # Make sure all runs are retired; else skip them.
87- run_ids = [ r for r in run_ids if apsis .run_store .retire (r ) ]
88-
89- if len (run_ids ) > 0 :
90- # Archive these runs.
91- row_counts = db .archive (self .__path , run_ids )
92- # Also vacuum to free space.
93- db .vacuum ()
102+ if not (self .__chunk_size is None or 0 < self .__chunk_size ):
103+ raise ValueError ("nonpositive chunk size" )
94104
95- else :
96- row_counts = {}
105+ row_counts = {}
106+ meta = {
107+ "run count" : 0 ,
108+ "run_ids" : [],
109+ "row counts" : row_counts
110+ }
97111
98- return ProgramSuccess (meta = {
99- "run count" : len (run_ids ),
100- "run_ids" : run_ids ,
101- "row counts" : row_counts ,
102- })
112+ count = self .__count
113+ while count > 0 :
114+ chunk = (
115+ count if self .__chunk_size is None
116+ else min (count , self .__chunk_size )
117+ )
118+ run_ids = db .get_archive_run_ids (
119+ before = ora .now () - self .__age ,
120+ count = chunk ,
121+ )
122+ count -= chunk
123+
124+ # Make sure all runs are retired; else skip them.
125+ run_ids = [ r for r in run_ids if apsis .run_store .retire (r ) ]
126+
127+ if len (run_ids ) > 0 :
128+ # Archive these runs.
129+ chunk_row_counts = db .archive (self .__path , run_ids )
130+ # Accumulate metadata.
131+ meta ["run count" ] += len (run_ids )
132+ meta ["run_ids" ].append (run_ids )
133+ for key , value in chunk_row_counts .items ():
134+ row_counts [key ] = row_counts .get (key , 0 ) + value
135+ # Also vacuum to free space.
136+ db .vacuum ()
137+
138+ if count > 0 and self .__chunk_sleep is not None :
139+ # Yield to the event loop.
140+ await asyncio .sleep (self .__chunk_sleep )
141+
142+ return ProgramSuccess (meta = meta )
103143
104144
105145 def reconnect (self , run_id , run_state , apsis ):
0 commit comments