Skip to content

Commit 7c67b3c

Browse files
authored
RS - Remove orphan attachments from db after a record is updated (#7294)
* fix: remove orphaned attachments when records update * fix: add test that fails with orphan cleanup * fix: lint
1 parent 9b60561 commit 7c67b3c

File tree

1 file changed

+196
-0
lines changed

1 file changed

+196
-0
lines changed

components/remote_settings/src/storage.rs

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ impl Storage {
231231

232232
Self::update_record_rows(&tx, collection_url, records)?;
233233
Self::update_collection_metadata(&tx, collection_url, last_modified, metadata)?;
234+
Self::cleanup_orphaned_attachments(&tx, collection_url)?;
234235
tx.commit()?;
235236
Ok(())
236237
}
@@ -326,6 +327,23 @@ impl Storage {
326327
tx.commit()?;
327328
Ok(())
328329
}
330+
331+
/// Remove attachments that are no longer referenced by any current record.
332+
/// When the server updates an attachment, the record gets a new UUID/hash in its location
333+
/// field. Without this cleanup, the old attachment blob persists forever, causing unbounded
334+
/// database growth (1+ GB observed in production for `quicksuggest-amp.sql`).
335+
fn cleanup_orphaned_attachments(tx: &Transaction<'_>, collection_url: &str) -> Result<()> {
336+
tx.execute(
337+
"DELETE FROM attachments
338+
WHERE collection_url = ?1
339+
AND NOT EXISTS (
340+
SELECT 1 FROM records WHERE collection_url = ?1
341+
AND json_extract(data, '$.attachment.location') = attachments.id
342+
)",
343+
params![collection_url],
344+
)?;
345+
Ok(())
346+
}
329347
}
330348

331349
/// Stores the SQLite connection, which is lazily constructed and can be closed/shutdown.
@@ -564,6 +582,184 @@ mod tests {
564582
Ok(())
565583
}
566584

585+
/// Test that orphaned attachments are cleaned up when a record's attachment location changes.
586+
/// This reproduces the 1.1GB bloat observed in production. The `quicksuggest-amp` collection
587+
/// has records like:
588+
/// {
589+
/// "type": "amp",
590+
/// "country": "US",
591+
/// "form_factor": "phone",
592+
/// "filter_expression": "env.country == 'US' && env.formFactor == 'phone'",
593+
/// "id": "sponsored-suggestions-us-phone",
594+
/// "attachment": { "hash": "992ed42aa...", "size": 9795975, "filename": "sponsored-suggestions-us-phone.json", ... }
595+
/// }
596+
/// When the data refreshes (schema timestamp changes), the record keeps its ID but gets
597+
/// a new attachment with a different hash/location. The old cached blob was never cleaned up.
598+
#[test]
599+
fn test_storage_orphaned_attachments_cleaned_up_on_update() -> Result<()> {
600+
let mut storage = Storage::new(":memory:".into());
601+
let collection_url = "https://example.com/api";
602+
603+
let attachment_v1 = b"version 1 data";
604+
let attachment_v2 = b"version 2 data";
605+
606+
let attachment_meta_v1 = Attachment {
607+
filename: "sponsored-suggestions-us-phone.json".to_string(),
608+
mimetype: "application/json".to_string(),
609+
location: "main-workspace/quicksuggest-amp/attachment-v1.json".to_string(),
610+
hash: format!("{:x}", Sha256::digest(attachment_v1)),
611+
size: attachment_v1.len() as u64,
612+
};
613+
614+
let attachment_meta_v2 = Attachment {
615+
filename: "sponsored-suggestions-us-phone.json".to_string(),
616+
mimetype: "application/json".to_string(),
617+
location: "main-workspace/quicksuggest-amp/attachment-v2.json".to_string(),
618+
hash: format!("{:x}", Sha256::digest(attachment_v2)),
619+
size: attachment_v2.len() as u64,
620+
};
621+
622+
// Initially record points to attachment_v1
623+
let records_v1 = vec![RemoteSettingsRecord {
624+
id: "sponsored-suggestions-us-phone".to_string(),
625+
last_modified: 100,
626+
deleted: false,
627+
attachment: Some(attachment_meta_v1.clone()),
628+
fields: serde_json::json!({"type": "amp"})
629+
.as_object()
630+
.unwrap()
631+
.clone(),
632+
}];
633+
634+
storage.insert_collection_content(
635+
collection_url,
636+
&records_v1,
637+
100,
638+
CollectionMetadata::default(),
639+
)?;
640+
storage.set_attachment(collection_url, &attachment_meta_v1.location, attachment_v1)?;
641+
642+
let fetched = storage.get_attachment(collection_url, attachment_meta_v1.clone())?;
643+
assert!(fetched.is_some(), "v1 attachment should be stored");
644+
645+
// Simulate a server data refresh while keeping the same record ID
646+
// but a new attachment location
647+
let records_v2 = vec![RemoteSettingsRecord {
648+
id: "sponsored-suggestions-us-phone".to_string(),
649+
last_modified: 200,
650+
deleted: false,
651+
attachment: Some(attachment_meta_v2.clone()),
652+
fields: serde_json::json!({"type": "amp"})
653+
.as_object()
654+
.unwrap()
655+
.clone(),
656+
}];
657+
658+
storage.insert_collection_content(
659+
collection_url,
660+
&records_v2,
661+
200,
662+
CollectionMetadata::default(),
663+
)?;
664+
storage.set_attachment(collection_url, &attachment_meta_v2.location, attachment_v2)?;
665+
666+
let fetched_v2 = storage.get_attachment(collection_url, attachment_meta_v2)?;
667+
assert!(fetched_v2.is_some(), "v2 attachment should be stored");
668+
669+
let fetched_v1 = storage.get_attachment(collection_url, attachment_meta_v1)?;
670+
assert!(
671+
fetched_v1.is_none(),
672+
"v1 attachment should be cleaned up after record points to v2"
673+
);
674+
675+
Ok(())
676+
}
677+
678+
/// Test that orphaned attachments are cleaned up when a record is deleted via tombstone.
679+
/// The `quicksuggest-amp` changeset uses filter_expression to target specific countries
680+
/// and form factors. If the server removes a record (e.g. drops a country), a tombstone
681+
/// is sent:
682+
/// {
683+
/// "id": "sponsored-suggestions-gb-phone",
684+
/// "last_modified": 1774549156905,
685+
/// "deleted": true
686+
/// }
687+
/// The record row gets deleted, but the cached attachment blob was never cleaned up.
688+
#[test]
689+
fn test_storage_orphaned_attachments_cleaned_up_on_delete() -> Result<()> {
690+
let mut storage = Storage::new(":memory:".into());
691+
let collection_url = "https://example.com/api";
692+
693+
let attachment_data = b"sponsored suggestions for GB phone";
694+
695+
let attachment_meta = Attachment {
696+
filename: "sponsored-suggestions-gb-phone.json".to_string(),
697+
mimetype: "application/json".to_string(),
698+
location: "main-workspace/quicksuggest-amp/attachment-gb.json".to_string(),
699+
hash: format!("{:x}", Sha256::digest(attachment_data)),
700+
size: attachment_data.len() as u64,
701+
};
702+
703+
// Initially we have two records, one with an attachment
704+
let initial_records = vec![
705+
RemoteSettingsRecord {
706+
id: "sponsored-suggestions-gb-phone".to_string(),
707+
last_modified: 100,
708+
deleted: false,
709+
attachment: Some(attachment_meta.clone()),
710+
fields: serde_json::json!({"type": "amp"})
711+
.as_object()
712+
.unwrap()
713+
.clone(),
714+
},
715+
RemoteSettingsRecord {
716+
id: "sponsored-suggestions-us-phone".to_string(),
717+
last_modified: 100,
718+
deleted: false,
719+
attachment: None,
720+
fields: serde_json::json!({"type": "amp"})
721+
.as_object()
722+
.unwrap()
723+
.clone(),
724+
},
725+
];
726+
727+
storage.insert_collection_content(
728+
collection_url,
729+
&initial_records,
730+
100,
731+
CollectionMetadata::default(),
732+
)?;
733+
storage.set_attachment(collection_url, &attachment_meta.location, attachment_data)?;
734+
735+
let fetched = storage.get_attachment(collection_url, attachment_meta.clone())?;
736+
assert!(fetched.is_some(), "GB attachment should be stored");
737+
738+
// Simulate server sending a tombstone, GB record is deleted
739+
let updated_records = vec![RemoteSettingsRecord {
740+
id: "sponsored-suggestions-gb-phone".to_string(),
741+
last_modified: 200,
742+
deleted: true,
743+
attachment: None,
744+
fields: RsJsonObject::new(),
745+
}];
746+
747+
storage.insert_collection_content(
748+
collection_url,
749+
&updated_records,
750+
200,
751+
CollectionMetadata::default(),
752+
)?;
753+
754+
let fetched = storage.get_attachment(collection_url, attachment_meta)?;
755+
assert!(
756+
fetched.is_none(),
757+
"GB attachment should be cleaned up after record is deleted via tombstone"
758+
);
759+
760+
Ok(())
761+
}
762+
567763
#[test]
568764
fn test_storage_get_attachment_not_found() -> Result<()> {
569765
let mut storage = Storage::new(":memory:".into());

0 commit comments

Comments
 (0)