Skip to content

Commit 635f738

Browse files
Merge pull request #55 from frankmcsherry/offsets
Containers for integer offsets
2 parents bda04ef + 6a30da0 commit 635f738

File tree

1 file changed

+262
-0
lines changed

1 file changed

+262
-0
lines changed

src/lib.rs

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,268 @@ pub mod primitive {
11001100
}
11011101
}
11021102

1103+
/// Columnar stores for non-decreasing `u64`, stored in various ways.
1104+
///
1105+
/// The venerable `Vec<u64>` works as a general container for arbitrary offests,
1106+
/// but it can be non-optimal for various patterns of offset, including constant
1107+
/// inter-offset spacing, and relatively short runs (compared to a `RankSelect`).
1108+
pub mod offsets {
1109+
1110+
pub use array::Fixeds;
1111+
pub use stride::Strides;
1112+
1113+
/// An offset container that encodes a constant spacing in its type.
1114+
///
1115+
/// Any attempt to push any value will result in pushing the next value
1116+
/// at the specified spacing. This type is only appropriate in certain
1117+
/// contexts, for example when storing `[T; K]` array types, or having
1118+
/// introspected a `Strides` and found it to be only one constant stride.
1119+
mod array {
1120+
1121+
use crate::{Container, Index, Len, Push};
1122+
use crate::common::index::CopyAs;
1123+
1124+
/// An offset container that encodes a constant `K` spacing.
1125+
#[derive(Copy, Clone, Debug, Default)]
1126+
pub struct Fixeds<const K: u64, CC = u64> { pub count: CC }
1127+
1128+
impl<const K: u64> Container for Fixeds<K> {
1129+
type Ref<'a> = u64;
1130+
type Borrowed<'a> = Fixeds<K, &'a u64>;
1131+
#[inline(always)]
1132+
fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Fixeds { count: &self.count } }
1133+
#[inline(always)]
1134+
fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1135+
Fixeds { count: thing.count }
1136+
}
1137+
#[inline(always)]
1138+
fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1139+
1140+
#[inline(always)]
1141+
fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1142+
self.count += range.len() as u64;
1143+
}
1144+
}
1145+
1146+
impl<const K: u64, CC: CopyAs<u64> + Copy> Len for Fixeds<K, CC> {
1147+
#[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1148+
}
1149+
1150+
impl<const K: u64, CC> Index for Fixeds<K, CC> {
1151+
type Ref = u64;
1152+
#[inline(always)]
1153+
fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1154+
}
1155+
impl<'a, const K: u64, CC> Index for &'a Fixeds<K, CC> {
1156+
type Ref = u64;
1157+
#[inline(always)]
1158+
fn get(&self, index: usize) -> Self::Ref { (index as u64 + 1) * K }
1159+
}
1160+
1161+
impl<'a, const K: u64, T> Push<T> for Fixeds<K> {
1162+
// TODO: check for overflow?
1163+
#[inline(always)]
1164+
fn push(&mut self, _item: T) { self.count += 1; }
1165+
#[inline(always)]
1166+
fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
1167+
self.count += iter.into_iter().count() as u64;
1168+
}
1169+
}
1170+
1171+
impl<const K: u64> crate::HeapSize for Fixeds<K> {
1172+
#[inline(always)]
1173+
fn heap_size(&self) -> (usize, usize) { (0, 0) }
1174+
}
1175+
impl<const K: u64> crate::Clear for Fixeds<K> {
1176+
#[inline(always)]
1177+
fn clear(&mut self) { self.count = 0; }
1178+
}
1179+
1180+
impl<'a, const K: u64> crate::AsBytes<'a> for Fixeds<K, &'a u64> {
1181+
#[inline(always)]
1182+
fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1183+
std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1184+
}
1185+
}
1186+
impl<'a, const K: u64> crate::FromBytes<'a> for Fixeds<K, &'a u64> {
1187+
#[inline(always)]
1188+
fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1189+
Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0] }
1190+
}
1191+
}
1192+
1193+
use super::Strides;
1194+
impl<const K: u64, BC: Len, CC: CopyAs<u64>+Copy> std::convert::TryFrom<Strides<BC, CC>> for Fixeds<K, CC> {
1195+
// On error we return the original.
1196+
type Error = Strides<BC, CC>;
1197+
fn try_from(item: Strides<BC, CC>) -> Result<Self, Self::Error> {
1198+
if item.strided() == Some(K) { Ok( Self { count: item.length } ) } else { Err(item) }
1199+
}
1200+
}
1201+
}
1202+
1203+
/// An general offset container optimized for fixed inter-offset sizes.
1204+
///
1205+
/// Although it can handle general offsets, it starts with the optimistic
1206+
/// assumption that the offsets will be evenly spaced from zero, and while
1207+
/// that holds it will maintain the stride and length. Should it stop being
1208+
/// true, when a non-confirming offset is pushed, it will start to store
1209+
/// the offsets in a general container.
1210+
mod stride {
1211+
1212+
use std::ops::Deref;
1213+
use crate::{Container, Index, Len, Push, Clear, AsBytes, FromBytes};
1214+
use crate::common::index::CopyAs;
1215+
1216+
/// The first two integers describe a stride pattern, [stride, length].
1217+
///
1218+
/// If the length is zero the collection is empty. The first `item` pushed
1219+
/// always becomes the first list element. The next element is the number of
1220+
/// items at position `i` whose value is `item * (i+1)`. After this comes
1221+
/// the remaining entries in the bounds container.
1222+
#[derive(Copy, Clone, Debug, Default)]
1223+
pub struct Strides<BC = Vec<u64>, CC = u64> {
1224+
pub stride: CC,
1225+
pub length: CC,
1226+
pub bounds: BC,
1227+
}
1228+
1229+
impl Container for Strides {
1230+
type Ref<'a> = u64;
1231+
type Borrowed<'a> = Strides<&'a [u64], &'a u64>;
1232+
1233+
#[inline(always)] fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Strides { stride: &self.stride, length: &self.length, bounds: &self.bounds[..] } }
1234+
/// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1235+
#[inline(always)] fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1236+
Strides { stride: item.stride, length: item.length, bounds: item.bounds }
1237+
}
1238+
/// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
1239+
#[inline(always)]fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { item }
1240+
}
1241+
1242+
impl<'a> Push<&'a u64> for Strides { #[inline(always)] fn push(&mut self, item: &'a u64) { self.push(*item) } }
1243+
impl Push<u64> for Strides { #[inline(always)] fn push(&mut self, item: u64) { self.push(item) } }
1244+
impl Clear for Strides { #[inline(always)] fn clear(&mut self) { self.clear() } }
1245+
1246+
impl<BC: Len, CC: CopyAs<u64> + Copy> Len for Strides<BC, CC> {
1247+
#[inline(always)]
1248+
fn len(&self) -> usize { self.length.copy_as() as usize + self.bounds.len() }
1249+
}
1250+
impl Index for Strides<&[u64], &u64> {
1251+
type Ref = u64;
1252+
#[inline(always)]
1253+
fn get(&self, index: usize) -> Self::Ref {
1254+
let index = index as u64;
1255+
if index < *self.length { (index+1) * self.stride } else { self.bounds[(index - self.length) as usize] }
1256+
}
1257+
}
1258+
1259+
impl<'a, BC: AsBytes<'a>> AsBytes<'a> for Strides<BC, &'a u64> {
1260+
#[inline(always)]
1261+
fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1262+
let stride = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.stride))));
1263+
let length = std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.length))));
1264+
let bounds = self.bounds.as_bytes();
1265+
crate::chain(stride, crate::chain(length, bounds))
1266+
}
1267+
}
1268+
impl<'a, BC: FromBytes<'a>> FromBytes<'a> for Strides<BC, &'a u64> {
1269+
#[inline(always)]
1270+
fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1271+
let stride = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1272+
let length = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1273+
let bounds = BC::from_bytes(bytes);
1274+
Self { stride, length, bounds }
1275+
}
1276+
}
1277+
1278+
impl Strides {
1279+
pub fn new(stride: u64, length: u64) -> Self {
1280+
Self { stride, length, bounds: Vec::default() }
1281+
}
1282+
#[inline(always)]
1283+
pub fn push(&mut self, item: u64) {
1284+
if self.length == 0 {
1285+
self.stride = item;
1286+
self.length = 1;
1287+
}
1288+
else if !self.bounds.is_empty() {
1289+
self.bounds.push(item);
1290+
}
1291+
else if item == self.stride * (self.length + 1) {
1292+
self.length += 1;
1293+
}
1294+
else {
1295+
self.bounds.push(item);
1296+
}
1297+
}
1298+
#[inline(always)]
1299+
pub fn clear(&mut self) {
1300+
self.stride = 0;
1301+
self.length = 0;
1302+
self.bounds.clear();
1303+
}
1304+
}
1305+
1306+
impl<BC: Deref<Target=[u64]>, CC: CopyAs<u64>+Copy> Strides<BC, CC> {
1307+
#[inline(always)]
1308+
pub fn bounds(&self, index: usize) -> (usize, usize) {
1309+
let stride = self.stride.copy_as();
1310+
let length = self.length.copy_as();
1311+
let index = index as u64;
1312+
let lower = if index == 0 { 0 } else {
1313+
let index = index - 1;
1314+
if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] }
1315+
} as usize;
1316+
let upper = if index < length { (index+1) * stride } else { self.bounds[(index - length) as usize] } as usize;
1317+
(lower, upper)
1318+
}
1319+
}
1320+
impl<BC: Len, CC: CopyAs<u64>+Copy> Strides<BC, CC> {
1321+
#[inline(always)] pub fn strided(&self) -> Option<u64> {
1322+
if self.bounds.is_empty() {
1323+
Some(self.stride.copy_as())
1324+
}
1325+
else { None }
1326+
}
1327+
}
1328+
}
1329+
1330+
#[cfg(test)]
1331+
mod test {
1332+
#[test]
1333+
fn round_trip() {
1334+
1335+
use crate::common::{Index, Push, Len};
1336+
use crate::{Container, Vecs};
1337+
use crate::primitive::offsets::{Strides, Fixeds};
1338+
1339+
let mut cols = Vecs::<Vec::<i32>, Strides>::default();
1340+
for i in 0 .. 100 {
1341+
cols.push(&[1i32, 2, i]);
1342+
}
1343+
1344+
let cols = Vecs {
1345+
bounds: TryInto::<Fixeds<3>>::try_into(cols.bounds).unwrap(),
1346+
values: cols.values,
1347+
};
1348+
1349+
assert_eq!(cols.borrow().len(), 100);
1350+
for i in 0 .. 100 {
1351+
assert_eq!(cols.borrow().get(i).len(), 3);
1352+
}
1353+
1354+
let mut cols = Vecs {
1355+
bounds: Strides::new(3, cols.bounds.count),
1356+
values: cols.values
1357+
};
1358+
1359+
cols.push(&[0, 0]);
1360+
assert!(TryInto::<Fixeds<3>>::try_into(cols.bounds).is_err());
1361+
}
1362+
}
1363+
}
1364+
11031365
pub use empty::Empties;
11041366
/// A columnar store for `()`.
11051367
mod empty {

0 commit comments

Comments
 (0)