@@ -3,13 +3,14 @@ use std::fmt;
33use std:: mem:: size_of;
44
55use bytemuck:: { bytes_of, cast_slice, pod_read_unaligned} ;
6- use byteorder:: { ByteOrder , NativeEndian } ;
6+ use byteorder:: { BigEndian , ByteOrder , NativeEndian } ;
77use heed:: { BoxedError , BytesDecode , BytesEncode } ;
88use roaring:: RoaringBitmap ;
99
1010use crate :: distance:: Distance ;
11+ use crate :: node_id:: NodeId ;
1112use crate :: unaligned_vector:: UnalignedVector ;
12- use crate :: { ItemId , NodeId } ;
13+ use crate :: ItemId ;
1314
1415#[ derive( Clone , Debug ) ]
1516pub enum Node < ' a , D : Distance > {
@@ -18,6 +19,15 @@ pub enum Node<'a, D: Distance> {
1819 SplitPlaneNormal ( SplitPlaneNormal < ' a , D > ) ,
1920}
2021
22+ /// A node generic over the version of the database.
23+ /// Should only be used while reading from the database.
24+ #[ derive( Clone , Debug ) ]
25+ pub enum GenericReadNode < ' a , D : Distance > {
26+ Leaf ( Leaf < ' a , D > ) ,
27+ Descendants ( Descendants < ' a > ) ,
28+ SplitPlaneNormal ( GenericReadSplitPlaneNormal < ' a , D > ) ,
29+ }
30+
2131const LEAF_TAG : u8 = 0 ;
2232const DESCENDANTS_TAG : u8 = 1 ;
2333const SPLIT_PLANE_NORMAL_TAG : u8 = 2 ;
@@ -113,8 +123,8 @@ impl fmt::Debug for ItemIds<'_> {
113123}
114124
115125pub struct SplitPlaneNormal < ' a , D : Distance > {
116- pub left : NodeId ,
117- pub right : NodeId ,
126+ pub left : ItemId ,
127+ pub right : ItemId ,
118128 pub normal : Option < Cow < ' a , UnalignedVector < D :: VectorCodec > > > ,
119129}
120130
@@ -138,6 +148,35 @@ impl<D: Distance> Clone for SplitPlaneNormal<'_, D> {
138148 }
139149}
140150
151+ pub struct GenericReadSplitPlaneNormal < ' a , D : Distance > {
152+ // Before version 0.7.0 the split plane normal was stored as a `NodeId` and could point directly to items.
153+ pub left : NodeId ,
154+ pub right : NodeId ,
155+ // Before version 0.7.0 instead of storing `None` for a missing normal, we were
156+ // storing a vector filled with zeros, that will be overwritten while creating this type.
157+ pub normal : Option < Cow < ' a , UnalignedVector < D :: VectorCodec > > > ,
158+ }
159+
160+ impl < D : Distance > fmt:: Debug for GenericReadSplitPlaneNormal < ' _ , D > {
161+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
162+ let name = format ! ( "GenericReadSplitPlaneNormal<{}>" , D :: name( ) ) ;
163+ let mut debug = f. debug_struct ( & name) ;
164+
165+ debug. field ( "left" , & self . left ) . field ( "right" , & self . right ) ;
166+ match & self . normal {
167+ Some ( normal) => debug. field ( "normal" , & normal) ,
168+ None => debug. field ( "normal" , & "none" ) ,
169+ } ;
170+ debug. finish ( )
171+ }
172+ }
173+
174+ impl < D : Distance > Clone for GenericReadSplitPlaneNormal < ' _ , D > {
175+ fn clone ( & self ) -> Self {
176+ Self { left : self . left , right : self . right , normal : self . normal . clone ( ) }
177+ }
178+ }
179+
141180/// The codec used internally to encode and decode nodes.
142181pub struct NodeCodec < D > ( D ) ;
143182
@@ -154,8 +193,8 @@ impl<'a, D: Distance> BytesEncode<'a> for NodeCodec<D> {
154193 }
155194 Node :: SplitPlaneNormal ( SplitPlaneNormal { normal, left, right } ) => {
156195 bytes. push ( SPLIT_PLANE_NORMAL_TAG ) ;
157- bytes. extend_from_slice ( & left. to_bytes ( ) ) ;
158- bytes. extend_from_slice ( & right. to_bytes ( ) ) ;
196+ bytes. extend_from_slice ( & left. to_be_bytes ( ) ) ;
197+ bytes. extend_from_slice ( & right. to_be_bytes ( ) ) ;
159198 if let Some ( normal) = normal {
160199 bytes. extend_from_slice ( normal. as_bytes ( ) ) ;
161200 }
@@ -182,8 +221,10 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec<D> {
182221 Ok ( Node :: Leaf ( Leaf { header, vector } ) )
183222 }
184223 [ SPLIT_PLANE_NORMAL_TAG , bytes @ ..] => {
185- let ( left, bytes) = NodeId :: from_bytes ( bytes) ;
186- let ( right, bytes) = NodeId :: from_bytes ( bytes) ;
224+ let left = BigEndian :: read_u32 ( bytes) ;
225+ let bytes = & bytes[ std:: mem:: size_of_val ( & left) ..] ;
226+ let right = BigEndian :: read_u32 ( bytes) ;
227+ let bytes = & bytes[ std:: mem:: size_of_val ( & right) ..] ;
187228 let normal = if bytes. is_empty ( ) {
188229 None
189230 } else {
@@ -194,7 +235,108 @@ impl<'a, D: Distance> BytesDecode<'a> for NodeCodec<D> {
194235 [ DESCENDANTS_TAG , bytes @ ..] => Ok ( Node :: Descendants ( Descendants {
195236 descendants : Cow :: Owned ( RoaringBitmap :: deserialize_from ( bytes) ?) ,
196237 } ) ) ,
197- unknown => panic ! ( "What the fuck is an {unknown:?}" ) ,
238+ unknown => panic ! (
239+ "Did not recognize node tag type: {unknown:?} while decoding a node from v0.7.0"
240+ ) ,
241+ }
242+ }
243+ }
244+
245+ /// The codec used internally during read operations to decode nodes to a common interface from the v0.4.0.
246+ pub struct GenericReadNodeCodecFromV0_4_0 < D > ( D ) ;
247+
248+ impl < ' a , D : Distance > BytesDecode < ' a > for GenericReadNodeCodecFromV0_4_0 < D > {
249+ type DItem = GenericReadNode < ' a , D > ;
250+
251+ fn bytes_decode ( bytes : & ' a [ u8 ] ) -> Result < Self :: DItem , BoxedError > {
252+ match bytes {
253+ [ LEAF_TAG , bytes @ ..] => {
254+ let ( header_bytes, remaining) = bytes. split_at ( size_of :: < D :: Header > ( ) ) ;
255+ let header = pod_read_unaligned ( header_bytes) ;
256+ let vector = UnalignedVector :: < D :: VectorCodec > :: from_bytes ( remaining) ?;
257+
258+ Ok ( GenericReadNode :: Leaf ( Leaf { header, vector } ) )
259+ }
260+ [ SPLIT_PLANE_NORMAL_TAG , bytes @ ..] => {
261+ // From v0.4.0 to v0.5.0 included, the children were stored as `NodeId` and could point directly to items.
262+ let ( left, bytes) = NodeId :: from_bytes ( bytes) ;
263+ let ( right, bytes) = NodeId :: from_bytes ( bytes) ;
264+ // And the normal could not be null, but it could be a vector filled with zeros.
265+ let normal = UnalignedVector :: < D :: VectorCodec > :: from_bytes ( bytes) ?;
266+ let normal = if normal. is_zero ( ) {
267+ None
268+ } else {
269+ Some ( normal)
270+ } ;
271+ Ok ( GenericReadNode :: SplitPlaneNormal ( GenericReadSplitPlaneNormal { normal, left, right } ) )
272+ }
273+ [ DESCENDANTS_TAG , bytes @ ..] => Ok ( GenericReadNode :: Descendants ( Descendants {
274+ descendants : Cow :: Owned ( RoaringBitmap :: deserialize_from ( bytes) ?) ,
275+ } ) ) ,
276+ unknown => panic ! ( "Did not recognize node tag type: {unknown:?} while decoding a generic read node from v0.4.0" ) ,
277+ }
278+ }
279+ }
280+
281+ /// The codec used internally during read operations to decode nodes to a common interface from the v0.7.0.
282+ pub struct GenericReadNodeCodecFromV0_7_0 < D > ( D ) ;
283+
284+ impl < ' a , D : Distance > BytesDecode < ' a > for GenericReadNodeCodecFromV0_7_0 < D > {
285+ type DItem = GenericReadNode < ' a , D > ;
286+
287+ fn bytes_decode ( bytes : & ' a [ u8 ] ) -> Result < Self :: DItem , BoxedError > {
288+ NodeCodec :: bytes_decode ( bytes) . map ( |node| match node {
289+ Node :: SplitPlaneNormal ( split_plane_normal) => {
290+ GenericReadNode :: SplitPlaneNormal ( GenericReadSplitPlaneNormal {
291+ // From v0.6.0 the split plane normal always points to a tree node.
292+ left : NodeId :: tree ( split_plane_normal. left ) ,
293+ right : NodeId :: tree ( split_plane_normal. right ) ,
294+ normal : split_plane_normal. normal ,
295+ } )
296+ }
297+ Node :: Descendants ( descendants) => GenericReadNode :: Descendants ( descendants) ,
298+ Node :: Leaf ( leaf) => GenericReadNode :: Leaf ( leaf) ,
299+ } )
300+ }
301+ }
302+
303+ /// The codec used internally during read operations to decode nodes to a common interface from the v0.4.0.
304+ pub struct WriteNodeCodecForV0_5_0 < D > ( D ) ;
305+
306+ impl < ' a , D : Distance > BytesEncode < ' a > for WriteNodeCodecForV0_5_0 < D > {
307+ // Since the dimension of the vector has been lost while converting to a generic node, we need to get it back.
308+ type EItem = ( GenericReadNode < ' a , D > , usize ) ;
309+
310+ fn bytes_encode ( item : & Self :: EItem ) -> Result < Cow < ' a , [ u8 ] > , BoxedError > {
311+ // It's ok to clone and be slow because that only happens once when upgrading from v0.4.0 to v0.5.0.
312+ match & item. 0 {
313+ // The leaf didn't change between v0.4.0 and today.
314+ GenericReadNode :: Leaf ( leaf) => {
315+ Ok ( NodeCodec :: bytes_encode ( & Node :: Leaf ( leaf. clone ( ) ) ) ?. into_owned ( ) . into ( ) )
316+ }
317+ // The descendants didn't change between v0.4.0 and today.
318+ GenericReadNode :: Descendants ( descendants) => {
319+ Ok ( NodeCodec :: bytes_encode ( & Node :: < D > :: Descendants ( descendants. clone ( ) ) ) ?
320+ . into_owned ( )
321+ . into ( ) )
322+ }
323+ GenericReadNode :: SplitPlaneNormal ( GenericReadSplitPlaneNormal {
324+ left,
325+ right,
326+ normal,
327+ } ) => {
328+ // Original code at: https://github.com/meilisearch/arroy/blob/5b748bac2c69c65a97980901b02067a3a545e357/src/node.rs#L152-L157
329+ let mut bytes = Vec :: new ( ) ;
330+ bytes. push ( SPLIT_PLANE_NORMAL_TAG ) ;
331+ bytes. extend_from_slice ( & left. to_bytes ( ) ) ;
332+ bytes. extend_from_slice ( & right. to_bytes ( ) ) ;
333+ match normal {
334+ Some ( normal) => bytes. extend_from_slice ( normal. as_bytes ( ) ) ,
335+ // If the normal is None, we need to write a vector filled with zeros.
336+ None => bytes. extend_from_slice ( & vec ! [ 0 ; item. 1 ] ) ,
337+ }
338+ Ok ( Cow :: Owned ( bytes) )
339+ }
198340 }
199341 }
200342}
0 commit comments