@@ -1100,6 +1100,268 @@ pub mod primitive {
11001100 }
11011101 }
11021102
1103+ /// Columnar stores for non-decreasing `u64`, stored in various ways.
1104+ ///
1105+ /// The venerable `Vec<u64>` works as a general container for arbitrary offests,
1106+ /// but it can be non-optimal for various patterns of offset, including constant
1107+ /// inter-offset spacing, and relatively short runs (compared to a `RankSelect`).
1108+ pub mod offsets {
1109+
1110+ pub use array:: Fixeds ;
1111+ pub use stride:: Strides ;
1112+
1113+ /// An offset container that encodes a constant spacing in its type.
1114+ ///
1115+ /// Any attempt to push any value will result in pushing the next value
1116+ /// at the specified spacing. This type is only appropriate in certain
1117+ /// contexts, for example when storing `[T; K]` array types, or having
1118+ /// introspected a `Strides` and found it to be only one constant stride.
1119+ mod array {
1120+
1121+ use crate :: { Container , Index , Len , Push } ;
1122+ use crate :: common:: index:: CopyAs ;
1123+
1124+ /// An offset container that encodes a constant `K` spacing.
1125+ #[ derive( Copy , Clone , Debug , Default ) ]
1126+ pub struct Fixeds < const K : u64 , CC = u64 > { pub count : CC }
1127+
1128+ impl < const K : u64 > Container for Fixeds < K > {
1129+ type Ref < ' a > = u64 ;
1130+ type Borrowed < ' a > = Fixeds < K , & ' a u64 > ;
1131+ #[ inline( always) ]
1132+ fn borrow < ' a > ( & ' a self ) -> Self :: Borrowed < ' a > { Fixeds { count : & self . count } }
1133+ #[ inline( always) ]
1134+ fn reborrow < ' b , ' a : ' b > ( thing : Self :: Borrowed < ' a > ) -> Self :: Borrowed < ' b > where Self : ' a {
1135+ Fixeds { count : thing. count }
1136+ }
1137+ #[ inline( always) ]
1138+ fn reborrow_ref < ' b , ' a : ' b > ( thing : Self :: Ref < ' a > ) -> Self :: Ref < ' b > where Self : ' a { thing }
1139+
1140+ #[ inline( always) ]
1141+ fn extend_from_self ( & mut self , _other : Self :: Borrowed < ' _ > , range : std:: ops:: Range < usize > ) {
1142+ self . count += range. len ( ) as u64 ;
1143+ }
1144+ }
1145+
1146+ impl < const K : u64 , CC : CopyAs < u64 > + Copy > Len for Fixeds < K , CC > {
1147+ #[ inline( always) ] fn len ( & self ) -> usize { self . count . copy_as ( ) as usize }
1148+ }
1149+
1150+ impl < const K : u64 , CC > Index for Fixeds < K , CC > {
1151+ type Ref = u64 ;
1152+ #[ inline( always) ]
1153+ fn get ( & self , index : usize ) -> Self :: Ref { ( index as u64 + 1 ) * K }
1154+ }
1155+ impl < ' a , const K : u64 , CC > Index for & ' a Fixeds < K , CC > {
1156+ type Ref = u64 ;
1157+ #[ inline( always) ]
1158+ fn get ( & self , index : usize ) -> Self :: Ref { ( index as u64 + 1 ) * K }
1159+ }
1160+
1161+ impl < ' a , const K : u64 , T > Push < T > for Fixeds < K > {
1162+ // TODO: check for overflow?
1163+ #[ inline( always) ]
1164+ fn push ( & mut self , _item : T ) { self . count += 1 ; }
1165+ #[ inline( always) ]
1166+ fn extend ( & mut self , iter : impl IntoIterator < Item =T > ) {
1167+ self . count += iter. into_iter ( ) . count ( ) as u64 ;
1168+ }
1169+ }
1170+
1171+ impl < const K : u64 > crate :: HeapSize for Fixeds < K > {
1172+ #[ inline( always) ]
1173+ fn heap_size ( & self ) -> ( usize , usize ) { ( 0 , 0 ) }
1174+ }
1175+ impl < const K : u64 > crate :: Clear for Fixeds < K > {
1176+ #[ inline( always) ]
1177+ fn clear ( & mut self ) { self . count = 0 ; }
1178+ }
1179+
1180+ impl < ' a , const K : u64 > crate :: AsBytes < ' a > for Fixeds < K , & ' a u64 > {
1181+ #[ inline( always) ]
1182+ fn as_bytes ( & self ) -> impl Iterator < Item =( u64 , & ' a [ u8 ] ) > {
1183+ std:: iter:: once ( ( 8 , bytemuck:: cast_slice ( std:: slice:: from_ref ( self . count ) ) ) )
1184+ }
1185+ }
1186+ impl < ' a , const K : u64 > crate :: FromBytes < ' a > for Fixeds < K , & ' a u64 > {
1187+ #[ inline( always) ]
1188+ fn from_bytes ( bytes : & mut impl Iterator < Item =& ' a [ u8 ] > ) -> Self {
1189+ Self { count : & bytemuck:: try_cast_slice ( bytes. next ( ) . expect ( "Iterator exhausted prematurely" ) ) . unwrap ( ) [ 0 ] }
1190+ }
1191+ }
1192+
1193+ use super :: Strides ;
1194+ impl < const K : u64 , BC : Len , CC : CopyAs < u64 > +Copy > std:: convert:: TryFrom < Strides < BC , CC > > for Fixeds < K , CC > {
1195+ // On error we return the original.
1196+ type Error = Strides < BC , CC > ;
1197+ fn try_from ( item : Strides < BC , CC > ) -> Result < Self , Self :: Error > {
1198+ if item. strided ( ) == Some ( K ) { Ok ( Self { count : item. length } ) } else { Err ( item) }
1199+ }
1200+ }
1201+ }
1202+
1203+ /// An general offset container optimized for fixed inter-offset sizes.
1204+ ///
1205+ /// Although it can handle general offsets, it starts with the optimistic
1206+ /// assumption that the offsets will be evenly spaced from zero, and while
1207+ /// that holds it will maintain the stride and length. Should it stop being
1208+ /// true, when a non-confirming offset is pushed, it will start to store
1209+ /// the offsets in a general container.
1210+ mod stride {
1211+
1212+ use std:: ops:: Deref ;
1213+ use crate :: { Container , Index , Len , Push , Clear , AsBytes , FromBytes } ;
1214+ use crate :: common:: index:: CopyAs ;
1215+
1216+ /// The first two integers describe a stride pattern, [stride, length].
1217+ ///
1218+ /// If the length is zero the collection is empty. The first `item` pushed
1219+ /// always becomes the first list element. The next element is the number of
1220+ /// items at position `i` whose value is `item * (i+1)`. After this comes
1221+ /// the remaining entries in the bounds container.
1222+ #[ derive( Copy , Clone , Debug , Default ) ]
1223+ pub struct Strides < BC = Vec < u64 > , CC = u64 > {
1224+ pub stride : CC ,
1225+ pub length : CC ,
1226+ pub bounds : BC ,
1227+ }
1228+
1229+ impl Container for Strides {
1230+ type Ref < ' a > = u64 ;
1231+ type Borrowed < ' a > = Strides < & ' a [ u64 ] , & ' a u64 > ;
1232+
1233+ #[ inline( always) ] fn borrow < ' a > ( & ' a self ) -> Self :: Borrowed < ' a > { Strides { stride : & self . stride , length : & self . length , bounds : & self . bounds [ ..] } }
1234+ /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1235+ #[ inline( always) ] fn reborrow < ' b , ' a : ' b > ( item : Self :: Borrowed < ' a > ) -> Self :: Borrowed < ' b > where Self : ' a {
1236+ Strides { stride : item. stride , length : item. length , bounds : item. bounds }
1237+ }
1238+ /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1239+ #[ inline( always) ] fn reborrow_ref < ' b , ' a : ' b > ( item : Self :: Ref < ' a > ) -> Self :: Ref < ' b > where Self : ' a { item }
1240+ }
1241+
1242+ impl < ' a > Push < & ' a u64 > for Strides { #[ inline( always) ] fn push ( & mut self , item : & ' a u64 ) { self . push ( * item) } }
1243+ impl Push < u64 > for Strides { #[ inline( always) ] fn push ( & mut self , item : u64 ) { self . push ( item) } }
1244+ impl Clear for Strides { #[ inline( always) ] fn clear ( & mut self ) { self . clear ( ) } }
1245+
1246+ impl < BC : Len , CC : CopyAs < u64 > + Copy > Len for Strides < BC , CC > {
1247+ #[ inline( always) ]
1248+ fn len ( & self ) -> usize { self . length . copy_as ( ) as usize + self . bounds . len ( ) }
1249+ }
1250+ impl Index for Strides < & [ u64 ] , & u64 > {
1251+ type Ref = u64 ;
1252+ #[ inline( always) ]
1253+ fn get ( & self , index : usize ) -> Self :: Ref {
1254+ let index = index as u64 ;
1255+ if index < * self . length { ( index+1 ) * self . stride } else { self . bounds [ ( index - self . length ) as usize ] }
1256+ }
1257+ }
1258+
1259+ impl < ' a , BC : AsBytes < ' a > > AsBytes < ' a > for Strides < BC , & ' a u64 > {
1260+ #[ inline( always) ]
1261+ fn as_bytes ( & self ) -> impl Iterator < Item =( u64 , & ' a [ u8 ] ) > {
1262+ let stride = std:: iter:: once ( ( 8 , bytemuck:: cast_slice ( std:: slice:: from_ref ( self . stride ) ) ) ) ;
1263+ let length = std:: iter:: once ( ( 8 , bytemuck:: cast_slice ( std:: slice:: from_ref ( self . length ) ) ) ) ;
1264+ let bounds = self . bounds . as_bytes ( ) ;
1265+ crate :: chain ( stride, crate :: chain ( length, bounds) )
1266+ }
1267+ }
1268+ impl < ' a , BC : FromBytes < ' a > > FromBytes < ' a > for Strides < BC , & ' a u64 > {
1269+ #[ inline( always) ]
1270+ fn from_bytes ( bytes : & mut impl Iterator < Item =& ' a [ u8 ] > ) -> Self {
1271+ let stride = & bytemuck:: try_cast_slice ( bytes. next ( ) . expect ( "Iterator exhausted prematurely" ) ) . unwrap ( ) [ 0 ] ;
1272+ let length = & bytemuck:: try_cast_slice ( bytes. next ( ) . expect ( "Iterator exhausted prematurely" ) ) . unwrap ( ) [ 0 ] ;
1273+ let bounds = BC :: from_bytes ( bytes) ;
1274+ Self { stride, length, bounds }
1275+ }
1276+ }
1277+
1278+ impl Strides {
1279+ pub fn new ( stride : u64 , length : u64 ) -> Self {
1280+ Self { stride, length, bounds : Vec :: default ( ) }
1281+ }
1282+ #[ inline( always) ]
1283+ pub fn push ( & mut self , item : u64 ) {
1284+ if self . length == 0 {
1285+ self . stride = item;
1286+ self . length = 1 ;
1287+ }
1288+ else if !self . bounds . is_empty ( ) {
1289+ self . bounds . push ( item) ;
1290+ }
1291+ else if item == self . stride * ( self . length + 1 ) {
1292+ self . length += 1 ;
1293+ }
1294+ else {
1295+ self . bounds . push ( item) ;
1296+ }
1297+ }
1298+ #[ inline( always) ]
1299+ pub fn clear ( & mut self ) {
1300+ self . stride = 0 ;
1301+ self . length = 0 ;
1302+ self . bounds . clear ( ) ;
1303+ }
1304+ }
1305+
1306+ impl < BC : Deref < Target =[ u64 ] > , CC : CopyAs < u64 > +Copy > Strides < BC , CC > {
1307+ #[ inline( always) ]
1308+ pub fn bounds ( & self , index : usize ) -> ( usize , usize ) {
1309+ let stride = self . stride . copy_as ( ) ;
1310+ let length = self . length . copy_as ( ) ;
1311+ let index = index as u64 ;
1312+ let lower = if index == 0 { 0 } else {
1313+ let index = index - 1 ;
1314+ if index < length { ( index+1 ) * stride } else { self . bounds [ ( index - length) as usize ] }
1315+ } as usize ;
1316+ let upper = if index < length { ( index+1 ) * stride } else { self . bounds [ ( index - length) as usize ] } as usize ;
1317+ ( lower, upper)
1318+ }
1319+ }
1320+ impl < BC : Len , CC : CopyAs < u64 > +Copy > Strides < BC , CC > {
1321+ #[ inline( always) ] pub fn strided ( & self ) -> Option < u64 > {
1322+ if self . bounds . is_empty ( ) {
1323+ Some ( self . stride . copy_as ( ) )
1324+ }
1325+ else { None }
1326+ }
1327+ }
1328+ }
1329+
1330+ #[ cfg( test) ]
1331+ mod test {
1332+ #[ test]
1333+ fn round_trip ( ) {
1334+
1335+ use crate :: common:: { Index , Push , Len } ;
1336+ use crate :: { Container , Vecs } ;
1337+ use crate :: primitive:: offsets:: { Strides , Fixeds } ;
1338+
1339+ let mut cols = Vecs :: < Vec :: < i32 > , Strides > :: default ( ) ;
1340+ for i in 0 .. 100 {
1341+ cols. push ( & [ 1i32 , 2 , i] ) ;
1342+ }
1343+
1344+ let cols = Vecs {
1345+ bounds : TryInto :: < Fixeds < 3 > > :: try_into ( cols. bounds ) . unwrap ( ) ,
1346+ values : cols. values ,
1347+ } ;
1348+
1349+ assert_eq ! ( cols. borrow( ) . len( ) , 100 ) ;
1350+ for i in 0 .. 100 {
1351+ assert_eq ! ( cols. borrow( ) . get( i) . len( ) , 3 ) ;
1352+ }
1353+
1354+ let mut cols = Vecs {
1355+ bounds : Strides :: new ( 3 , cols. bounds . count ) ,
1356+ values : cols. values
1357+ } ;
1358+
1359+ cols. push ( & [ 0 , 0 ] ) ;
1360+ assert ! ( TryInto :: <Fixeds <3 >>:: try_into( cols. bounds) . is_err( ) ) ;
1361+ }
1362+ }
1363+ }
1364+
11031365 pub use empty:: Empties ;
11041366 /// A columnar store for `()`.
11051367 mod empty {
0 commit comments