@@ -231,6 +231,7 @@ impl Storage {
231231
232232 Self :: update_record_rows ( & tx, collection_url, records) ?;
233233 Self :: update_collection_metadata ( & tx, collection_url, last_modified, metadata) ?;
234+ Self :: cleanup_orphaned_attachments ( & tx, collection_url) ?;
234235 tx. commit ( ) ?;
235236 Ok ( ( ) )
236237 }
@@ -326,6 +327,23 @@ impl Storage {
326327 tx. commit ( ) ?;
327328 Ok ( ( ) )
328329 }
330+
331+ /// Remove attachments that are no longer referenced by any current record.
332+ /// When the server updates an attachment, the record gets a new UUID/hash in its location
333+ /// field. Without this cleanup, the old attachment blob persists forever, causing unbounded
334+ /// database growth (1+ GB observed in production for `quicksuggest-amp.sql`).
335+ fn cleanup_orphaned_attachments ( tx : & Transaction < ' _ > , collection_url : & str ) -> Result < ( ) > {
336+ tx. execute (
337+ "DELETE FROM attachments
338+ WHERE collection_url = ?1
339+ AND NOT EXISTS (
340+ SELECT 1 FROM records WHERE collection_url = ?1
341+ AND json_extract(data, '$.attachment.location') = attachments.id
342+ )" ,
343+ params ! [ collection_url] ,
344+ ) ?;
345+ Ok ( ( ) )
346+ }
329347}
330348
331349/// Stores the SQLite connection, which is lazily constructed and can be closed/shutdown.
@@ -564,6 +582,184 @@ mod tests {
564582 Ok ( ( ) )
565583 }
566584
585+ /// Test that orphaned attachments are cleaned up when a record's attachment location changes.
586+ /// This reproduces the 1.1GB bloat observed in production. The `quicksuggest-amp` collection
587+ /// has records like:
588+ /// {
589+ /// "type": "amp",
590+ /// "country": "US",
591+ /// "form_factor": "phone",
592+ /// "filter_expression": "env.country == 'US' && env.formFactor == 'phone'",
593+ /// "id": "sponsored-suggestions-us-phone",
594+ /// "attachment": { "hash": "992ed42aa...", "size": 9795975, "filename": "sponsored-suggestions-us-phone.json", ... }
595+ /// }
596+ /// When the data refreshes (schema timestamp changes), the record keeps its ID but gets
597+ /// a new attachment with a different hash/location. The old cached blob was never cleaned up.
598+ #[ test]
599+ fn test_storage_orphaned_attachments_cleaned_up_on_update ( ) -> Result < ( ) > {
600+ let mut storage = Storage :: new ( ":memory:" . into ( ) ) ;
601+ let collection_url = "https://example.com/api" ;
602+
603+ let attachment_v1 = b"version 1 data" ;
604+ let attachment_v2 = b"version 2 data" ;
605+
606+ let attachment_meta_v1 = Attachment {
607+ filename : "sponsored-suggestions-us-phone.json" . to_string ( ) ,
608+ mimetype : "application/json" . to_string ( ) ,
609+ location : "main-workspace/quicksuggest-amp/attachment-v1.json" . to_string ( ) ,
610+ hash : format ! ( "{:x}" , Sha256 :: digest( attachment_v1) ) ,
611+ size : attachment_v1. len ( ) as u64 ,
612+ } ;
613+
614+ let attachment_meta_v2 = Attachment {
615+ filename : "sponsored-suggestions-us-phone.json" . to_string ( ) ,
616+ mimetype : "application/json" . to_string ( ) ,
617+ location : "main-workspace/quicksuggest-amp/attachment-v2.json" . to_string ( ) ,
618+ hash : format ! ( "{:x}" , Sha256 :: digest( attachment_v2) ) ,
619+ size : attachment_v2. len ( ) as u64 ,
620+ } ;
621+
622+ // Initially record points to attachment_v1
623+ let records_v1 = vec ! [ RemoteSettingsRecord {
624+ id: "sponsored-suggestions-us-phone" . to_string( ) ,
625+ last_modified: 100 ,
626+ deleted: false ,
627+ attachment: Some ( attachment_meta_v1. clone( ) ) ,
628+ fields: serde_json:: json!( { "type" : "amp" } )
629+ . as_object( )
630+ . unwrap( )
631+ . clone( ) ,
632+ } ] ;
633+
634+ storage. insert_collection_content (
635+ collection_url,
636+ & records_v1,
637+ 100 ,
638+ CollectionMetadata :: default ( ) ,
639+ ) ?;
640+ storage. set_attachment ( collection_url, & attachment_meta_v1. location , attachment_v1) ?;
641+
642+ let fetched = storage. get_attachment ( collection_url, attachment_meta_v1. clone ( ) ) ?;
643+ assert ! ( fetched. is_some( ) , "v1 attachment should be stored" ) ;
644+
645+ // Simulate a server data refresh while keeping the same record ID
646+ // but a new attachment location
647+ let records_v2 = vec ! [ RemoteSettingsRecord {
648+ id: "sponsored-suggestions-us-phone" . to_string( ) ,
649+ last_modified: 200 ,
650+ deleted: false ,
651+ attachment: Some ( attachment_meta_v2. clone( ) ) ,
652+ fields: serde_json:: json!( { "type" : "amp" } )
653+ . as_object( )
654+ . unwrap( )
655+ . clone( ) ,
656+ } ] ;
657+
658+ storage. insert_collection_content (
659+ collection_url,
660+ & records_v2,
661+ 200 ,
662+ CollectionMetadata :: default ( ) ,
663+ ) ?;
664+ storage. set_attachment ( collection_url, & attachment_meta_v2. location , attachment_v2) ?;
665+
666+ let fetched_v2 = storage. get_attachment ( collection_url, attachment_meta_v2) ?;
667+ assert ! ( fetched_v2. is_some( ) , "v2 attachment should be stored" ) ;
668+
669+ let fetched_v1 = storage. get_attachment ( collection_url, attachment_meta_v1) ?;
670+ assert ! (
671+ fetched_v1. is_none( ) ,
672+ "v1 attachment should be cleaned up after record points to v2"
673+ ) ;
674+
675+ Ok ( ( ) )
676+ }
677+
678+ /// Test that orphaned attachments are cleaned up when a record is deleted via tombstone.
679+ /// The `quicksuggest-amp` changeset uses filter_expression to target specific countries
680+ /// and form factors. If the server removes a record (e.g. drops a country), a tombstone
681+ /// is sent:
682+ /// {
683+ /// "id": "sponsored-suggestions-gb-phone",
684+ /// "last_modified": 1774549156905,
685+ /// "deleted": true
686+ /// }
687+ /// The record row gets deleted, but the cached attachment blob was never cleaned up.
688+ #[ test]
689+ fn test_storage_orphaned_attachments_cleaned_up_on_delete ( ) -> Result < ( ) > {
690+ let mut storage = Storage :: new ( ":memory:" . into ( ) ) ;
691+ let collection_url = "https://example.com/api" ;
692+
693+ let attachment_data = b"sponsored suggestions for GB phone" ;
694+
695+ let attachment_meta = Attachment {
696+ filename : "sponsored-suggestions-gb-phone.json" . to_string ( ) ,
697+ mimetype : "application/json" . to_string ( ) ,
698+ location : "main-workspace/quicksuggest-amp/attachment-gb.json" . to_string ( ) ,
699+ hash : format ! ( "{:x}" , Sha256 :: digest( attachment_data) ) ,
700+ size : attachment_data. len ( ) as u64 ,
701+ } ;
702+
703+ // Initially we have two records, one with an attachment
704+ let initial_records = vec ! [
705+ RemoteSettingsRecord {
706+ id: "sponsored-suggestions-gb-phone" . to_string( ) ,
707+ last_modified: 100 ,
708+ deleted: false ,
709+ attachment: Some ( attachment_meta. clone( ) ) ,
710+ fields: serde_json:: json!( { "type" : "amp" } )
711+ . as_object( )
712+ . unwrap( )
713+ . clone( ) ,
714+ } ,
715+ RemoteSettingsRecord {
716+ id: "sponsored-suggestions-us-phone" . to_string( ) ,
717+ last_modified: 100 ,
718+ deleted: false ,
719+ attachment: None ,
720+ fields: serde_json:: json!( { "type" : "amp" } )
721+ . as_object( )
722+ . unwrap( )
723+ . clone( ) ,
724+ } ,
725+ ] ;
726+
727+ storage. insert_collection_content (
728+ collection_url,
729+ & initial_records,
730+ 100 ,
731+ CollectionMetadata :: default ( ) ,
732+ ) ?;
733+ storage. set_attachment ( collection_url, & attachment_meta. location , attachment_data) ?;
734+
735+ let fetched = storage. get_attachment ( collection_url, attachment_meta. clone ( ) ) ?;
736+ assert ! ( fetched. is_some( ) , "GB attachment should be stored" ) ;
737+
738+ // Simulate server sending a tombstone, GB record is deleted
739+ let updated_records = vec ! [ RemoteSettingsRecord {
740+ id: "sponsored-suggestions-gb-phone" . to_string( ) ,
741+ last_modified: 200 ,
742+ deleted: true ,
743+ attachment: None ,
744+ fields: RsJsonObject :: new( ) ,
745+ } ] ;
746+
747+ storage. insert_collection_content (
748+ collection_url,
749+ & updated_records,
750+ 200 ,
751+ CollectionMetadata :: default ( ) ,
752+ ) ?;
753+
754+ let fetched = storage. get_attachment ( collection_url, attachment_meta) ?;
755+ assert ! (
756+ fetched. is_none( ) ,
757+ "GB attachment should be cleaned up after record is deleted via tombstone"
758+ ) ;
759+
760+ Ok ( ( ) )
761+ }
762+
567763 #[ test]
568764 fn test_storage_get_attachment_not_found ( ) -> Result < ( ) > {
569765 let mut storage = Storage :: new ( ":memory:" . into ( ) ) ;
0 commit comments