11/*
2- * Copyright 2024 Oxide Computer Company
2+ * Copyright 2025 Oxide Computer Company
33 */
44
55use core:: mem:: size_of;
@@ -981,10 +981,11 @@ fn archive_jobs_sync_work(
981981 waiting : _,
982982
983983 /*
984- * This field tracks when the job was successfully archived, and thus
985- * cannot appear in the archive itself.
984+ * These fields track when the job was successfully archived or purged,
985+ * and thus cannot appear in the archive itself.
986986 */
987987 time_archived : _,
988+ time_purged : _,
988989
989990 /*
990991 * We use the target_id value we already fetched above, so ignore it
@@ -1001,7 +1002,7 @@ fn archive_jobs_sync_work(
10011002 * This structure should store things that don't grow substantially for
10021003 * longer-running jobs that produce more output. In particular, we store
10031004 * the job event stream in a different way. This is important because we
1004- * wil need to deserialize this object (at a cost of CPU and memory) in
1005+ * will need to deserialise this object (at a cost of CPU and memory) in
10051006 * order to answer questions about the job: heavier jobs should not use more
10061007 * resources than lighter jobs.
10071008 */
@@ -1198,3 +1199,100 @@ pub(crate) async fn archive_jobs(log: Logger, c: Arc<Central>) -> Result<()> {
11981199 tokio:: time:: sleep ( delay) . await ;
11991200 }
12001201}
1202+
1203+ fn purge_jobs_sync_work (
1204+ log : & Logger ,
1205+ c : & Central ,
1206+ ) -> Result < Option < db:: JobId > > {
1207+ let ( reason, job) = if let Some ( job) =
1208+ c. inner . lock ( ) . unwrap ( ) . purge_queue . pop_front ( )
1209+ {
1210+ /*
1211+ * Service explicit requests from the operator to purge a job first.
1212+ */
1213+ let job = c. db . job ( job) ?;
1214+ if !job. is_archived ( ) {
1215+ warn ! ( log, "job {} not archived; cannot purge yet" , job. id) ;
1216+ return Ok ( None ) ;
1217+ }
1218+ if job. is_purged ( ) {
1219+ warn ! ( log, "job {} was already purged; ignoring request" , job. id) ;
1220+ return Ok ( None ) ;
1221+ }
1222+ ( "operator request" , job)
1223+ } else if c. config . job . auto_purge {
1224+ /*
1225+ * Otherwise, if auto-purging is enabled, purge the next as-yet
1226+ * unpurged job.
1227+ */
1228+ if let Some ( job) = c. db . job_next_unpurged ( ) ? {
1229+ if let Some ( time) = job. time_archived {
1230+ if time. age ( ) . as_secs ( ) < 14 * 86400 {
1231+ /*
1232+ * Only purge once the job has been archived for at least a
1233+ * fortnight.
1234+ */
1235+ return Ok ( None ) ;
1236+ }
1237+ }
1238+
1239+ ( "automatic" , job)
1240+ } else {
1241+ return Ok ( None ) ;
1242+ }
1243+ } else {
1244+ return Ok ( None ) ;
1245+ } ;
1246+
1247+ assert ! ( job. complete) ;
1248+ assert ! ( job. time_archived. is_some( ) ) ;
1249+ assert ! ( job. time_purged. is_none( ) ) ;
1250+
1251+ info ! ( log, "purging job {} [{reason}]..." , job. id) ;
1252+
1253+ /*
1254+ * Purging a job from the database involves removing the live records. This
1255+ * should have no impact on access to details about the job, as we will
1256+ * fetch those details from the archive file once the job has been archived.
1257+ */
1258+ c. db . job_purge ( job. id ) ?;
1259+
1260+ Ok ( Some ( job. id ) )
1261+ }
1262+
1263+ async fn purge_jobs_one ( log : & Logger , c : & Arc < Central > ) -> Result < bool > {
1264+ let start = Instant :: now ( ) ;
1265+
1266+ /*
1267+ * The work to purge a job is synchronous and may take several seconds for
1268+ * larger jobs. Avoid holding up other async tasks while we wait:
1269+ */
1270+ let id = tokio:: task:: block_in_place ( || purge_jobs_sync_work ( log, c) ) ?;
1271+
1272+ if let Some ( id) = id {
1273+ let dur = Instant :: now ( ) . saturating_duration_since ( start) ;
1274+ info ! ( log, "job {id} purged" ; "duration_msec" => dur. as_millis( ) ) ;
1275+ }
1276+
1277+ Ok ( id. is_some ( ) )
1278+ }
1279+
1280+ pub ( crate ) async fn purge_jobs ( log : Logger , c : Arc < Central > ) -> Result < ( ) > {
1281+ let delay = Duration :: from_secs ( 1 ) ;
1282+ let ok_delay = Duration :: from_millis ( c. config . job . purge_delay_msec ) ;
1283+
1284+ info ! ( log, "start job purge task" ) ;
1285+
1286+ loop {
1287+ match purge_jobs_one ( & log, & c) . await {
1288+ Ok ( true ) => {
1289+ tokio:: time:: sleep ( ok_delay) . await ;
1290+ continue ;
1291+ }
1292+ Ok ( false ) => ( ) ,
1293+ Err ( e) => error ! ( log, "job purge task error: {:?}" , e) ,
1294+ }
1295+
1296+ tokio:: time:: sleep ( delay) . await ;
1297+ }
1298+ }
0 commit comments