1+ use std:: {
2+ env:: VarError ,
3+ time:: { Duration , Instant } ,
4+ } ;
5+
16use crate :: {
27 distance:: Cosine ,
8+ key:: { KeyCodec , Prefix , PrefixCodec } ,
9+ node:: { Links , Node , NodeCodec } ,
10+ node_id:: NodeMode ,
311 tests:: { create_database_indices_with_items, DatabaseHandle } ,
412 Database , Reader , Writer ,
513} ;
614use arbitrary:: { Arbitrary , Unstructured } ;
715use heed:: RoTxn ;
8- use rand:: { self , rngs:: StdRng , Rng , SeedableRng } ;
16+ use rand:: {
17+ self ,
18+ distributions:: Uniform ,
19+ rngs:: { StdRng , ThreadRng } ,
20+ Rng , SeedableRng ,
21+ } ;
922use roaring:: RoaringBitmap ;
1023use tracing:: info;
1124
12- #[ derive( Debug ) ]
13- struct Item < const M : usize > {
14- id : u32 ,
15- data : [ f32 ; M ] ,
16- }
17-
18- impl < ' a , const M : usize > Arbitrary < ' a > for Item < M > {
19- fn arbitrary ( u : & mut arbitrary:: Unstructured < ' a > ) -> arbitrary:: Result < Self > {
20- let data: [ f32 ; M ] = u. arbitrary ( ) ?;
21- let id: u32 = u. arbitrary ( ) ?;
22-
23- Ok ( Item { data, id } )
24- }
25- }
26-
2725#[ derive( Arbitrary , Debug ) ]
2826enum WriteOp < const M : usize > {
29- Add ( Item < M > ) ,
27+ Add ( u32 ) ,
3028 Del ( u32 ) ,
3129}
3230
@@ -38,44 +36,106 @@ fn assert_all_readable<const DIM: usize>(rtxn: &RoTxn, database: Database<Cosine
3836 assert_eq ! ( & RoaringBitmap :: from_iter( found. into_iter( ) . map( |( id, _) | id) ) , reader. item_ids( ) )
3937}
4038
39+ fn assert_deleted_items_are_gone (
40+ rtxn : & RoTxn ,
41+ database : Database < Cosine > ,
42+ deleted : & RoaringBitmap ,
43+ ) {
44+ // assert the reader cannot find any deleted vectors
45+ let reader = Reader :: < Cosine > :: open ( & rtxn, 0 , database) . unwrap ( ) ;
46+ let item_intersection = deleted & reader. item_ids ( ) ;
47+ assert ! ( item_intersection. is_empty( ) , "{:?} should be deleted!" , item_intersection) ;
48+
49+ // now iter over ALL links and assert no connection exists to a deleted item
50+ let mut cursor = database
51+ . remap_types :: < PrefixCodec , NodeCodec < Cosine > > ( )
52+ . prefix_iter ( rtxn, & Prefix :: links ( 0 ) )
53+ . unwrap ( )
54+ . remap_key_type :: < KeyCodec > ( ) ;
55+
56+ while let Some ( ( key, node) ) = cursor. next ( ) . transpose ( ) . unwrap ( ) {
57+ assert ! (
58+ !deleted. contains( key. node. item) ,
59+ "the item and its data should be deleted!\n {:?}" ,
60+ & key
61+ ) ;
62+
63+ match key. node . mode {
64+ NodeMode :: Links => {
65+ if let Node :: Links ( Links { links : links_bitmap } ) = node {
66+ let link_intersection = deleted & links_bitmap. as_ref ( ) ;
67+ assert ! (
68+ link_intersection. is_empty( ) ,
69+ "LINKS VIOLATION: {:?} should be empty" ,
70+ link_intersection
71+ ) ;
72+ }
73+ }
74+ _ => continue ,
75+ }
76+ }
77+ }
78+
4179#[ test]
42- #[ ignore = "if working properly this should run infinitely" ]
4380fn random_read_writes ( ) {
4481 let seed: u64 = rand:: random ( ) ;
4582 let mut rng = StdRng :: seed_from_u64 ( seed) ;
4683
47- const DIM : usize = 768 ;
84+ const DIM : usize = 32 ;
4885 const NUMEL : usize = 1000 ;
4986 const M : usize = 16 ;
50- const M0 : usize = 32 ;
87+ const M0 : usize = 768 ;
5188
5289 let DatabaseHandle { env, database, tempdir : _ } =
5390 create_database_indices_with_items :: < Cosine , DIM , M , M0 , _ > ( 0 ..1 , NUMEL , & mut rng) ;
5491
55- for _ in 0 .. {
92+ let mut deleted = RoaringBitmap :: new ( ) ;
93+ let mut vec_rng = rand:: thread_rng ( ) ;
94+
95+ // util for generating new vectors on the fly
96+ fn gen_vec ( rng : & mut ThreadRng ) -> [ f32 ; DIM ] {
97+ let unif = Uniform :: new ( -1.0 , 1.0 ) ;
98+ std:: array:: from_fn ( |_| rng. sample ( unif) )
99+ }
100+
101+ let duration = match std:: env:: var ( "HANNOY_FUZZ_DURATION_SEC" ) {
102+ Ok ( value) => Duration :: from_secs ( value. parse ( ) . expect ( "valid number of seconds" ) ) ,
103+ Err ( VarError :: NotPresent ) => Duration :: from_secs ( 20 ) ,
104+ Err ( VarError :: NotUnicode ( e) ) => panic ! ( "Invalid duration: {e:?}" ) ,
105+ } ;
106+
107+ let before = Instant :: now ( ) ;
108+ while before. elapsed ( ) < duration {
56109 let rtxn = env. read_txn ( ) . unwrap ( ) ;
57110 assert_all_readable :: < DIM > ( & rtxn, database) ;
111+ assert_deleted_items_are_gone ( & rtxn, database, & deleted) ;
112+ deleted. clear ( ) ;
58113
59114 // get batch of write operations and apply them
60115 info ! ( "WRITING" ) ;
61116 let mut data = [ 0_u8 ; 1024 * 1024 * 1 ] ;
62117 rng. fill ( & mut data) ;
63118 let mut u = Unstructured :: new ( & data) ;
64- let ops: Vec < WriteOp < DIM > > = ( 0 ..1000 ) . map ( |_| u. arbitrary ( ) . unwrap ( ) ) . collect ( ) ;
119+ let ops: Vec < WriteOp < DIM > > = ( 0 ..100 ) . map ( |_| u. arbitrary ( ) . unwrap ( ) ) . collect ( ) ;
65120
66121 let mut wtxn = env. write_txn ( ) . unwrap ( ) ;
67122 let writer = Writer :: new ( database, 0 , DIM ) ;
68123
69124 for op in ops {
70125 match op {
71- WriteOp :: Add ( item) => {
72- let Item { data, id } = item;
126+ WriteOp :: Add ( id) => {
73127 let id = id % ( NUMEL as u32 ) ;
74- writer. add_item ( & mut wtxn, id, & data) . unwrap ( ) ;
128+ let vector = gen_vec ( & mut vec_rng) ;
129+ assert ! ( vector != [ 0.0f32 ; DIM ] ) ;
130+ writer. add_item ( & mut wtxn, id, & vector) . unwrap ( ) ;
131+
132+ // ensure added random id isn't registered in deleted
133+ let _ = deleted. remove ( id) ;
75134 }
76135 WriteOp :: Del ( id) => {
77136 let id = id % ( NUMEL as u32 ) ;
78137 let _ = writer. del_item ( & mut wtxn, id) . unwrap ( ) ;
138+ deleted. insert ( id) ;
79139 }
80140 }
81141 }
0 commit comments