diff --git a/CHANGELOG.md b/CHANGELOG.md index 27a158441..31a6992e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). DDlog, which will crash DDlog programs at runtime if they overflow the weights attached to data values. This may be preferable to generating incorrect results. +- Rust compilation option `unbounded_weights` for the code generated by + DDlog which will never crash DDlog programs at runtime but may be slower. ## [0.48.2] - Sep 13, 2021 diff --git a/doc/tutorial/tutorial.md b/doc/tutorial/tutorial.md index 5c486c8f4..e8fecb750 100644 --- a/doc/tutorial/tutorial.md +++ b/doc/tutorial/tutorial.md @@ -3797,12 +3797,19 @@ Weight overflow in turns leads to silent wrong results at runtime [bug 878](https://github.com/vmware/differential-datalog/issues/878). To mitigate this problem, the generated Rust code can be compiled with -the a feature named `checked_weights`. This causes programs that +a feature named `checked_weights`. This causes programs that overflow the weights to crash with a Rust `panic` at runtime instead of producing incorrect results. This can be done e.g., by compiling the result produced by the ddlog compiler with a command line like `cargo build --release --features=checked_weights`. +Alternatively, the generated Rust code can be compiled with a feature +named `unbounded_weights`. This may cause the program to execute +slower and use more memory, but the program will never produce +incorrect results. This can be done e.g., by compiling +the result produced by the ddlog compiler with a command line like +`cargo build --release --features=unbounded_weights`. + ## Profiling DDlog's profiling features are designed to help the programmer identify diff --git a/lib/ddlog_std.rs b/lib/ddlog_std.rs index 9d06df8f6..437702b86 100644 --- a/lib/ddlog_std.rs +++ b/lib/ddlog_std.rs @@ -1264,7 +1264,7 @@ impl Clone for Group { key: key.clone(), group: group .iter() - .map(|(v, w)| tuple2(project(v).clone(), i64::from(*w) as DDWeight)) + .map(|(v, w)| tuple2(project(v).clone(), i64::from(w.clone()) as DDWeight)) .collect(), }, GroupEnum::ByVal { key, group } => GroupEnum::ByVal { @@ -1435,7 +1435,7 @@ impl<'a, V: Clone> Iterator for GroupIter<'a, V> { match self { GroupIter::ByRef { iter, project } => match iter.next() { None => None, - Some((x, w)) => Some(tuple2(project(x).clone(), i64::from(*w) as DDWeight)), + Some((x, w)) => Some(tuple2(project(x).clone(), i64::from(w.clone()) as DDWeight)), }, GroupIter::ByVal { iter } => match iter.next() { None => None, @@ -1531,7 +1531,7 @@ impl Iterator for GroupIntoIter { match self { GroupIntoIter::ByRef { iter, project } => match iter.next() { None => None, - Some((x, w)) => Some(tuple2(project(x).clone(), i64::from(*w) as DDWeight)), + Some((x, w)) => Some(tuple2(project(x).clone(), i64::from(w.clone()) as DDWeight)), }, GroupIntoIter::ByVal { iter } => match iter.next() { None => None, diff --git a/rust/ddlog_benches/Cargo.toml b/rust/ddlog_benches/Cargo.toml index b0f9088bf..0779cbc07 100644 --- a/rust/ddlog_benches/Cargo.toml +++ b/rust/ddlog_benches/Cargo.toml @@ -5,10 +5,9 @@ edition = "2018" license = "MIT" [features] -# Use the following to run with checked_weights: -# panic on weight overflow. -#default = ["benchmarks_ddlog/checked_weights"] default = [] +checked_weights = ["benchmarks_ddlog/checked_weights"] +unbounded_weights = ["benchmarks_ddlog/unbounded_weights"] [dependencies] criterion = "0.3.3" diff --git a/rust/ddlog_benches/Makefile.toml b/rust/ddlog_benches/Makefile.toml index fbda62657..0e0f7fc80 100644 --- a/rust/ddlog_benches/Makefile.toml +++ b/rust/ddlog_benches/Makefile.toml @@ -5,19 +5,19 @@ dependencies = ["bench-twitter", "bench-livejournal"] # Runs the benchmark suite on the citations dataset [tasks.bench-livejournal] command = "cargo" -args = ["bench", "LiveJournal", "--bench", "live_journal"] +args = ["bench", "LiveJournal", "--bench", "live_journal", "${@}"] dependencies = ["build-ddlog", "download-livejournal"] # Runs the benchmark suite on the twitter dataset [tasks.bench-twitter] command = "cargo" -args = ["bench", "--bench", "twitter"] +args = ["bench", "--bench", "twitter", "${@}"] dependencies = ["build-ddlog", "download-twitter"] # Run twitter microbenchmarks only [tasks.bench-twitter-micro] command = "cargo" -args = ["bench", "twitter-micro", "--bench", "twitter"] +args = ["bench", "twitter-micro", "--bench", "twitter", "${@}"] dependencies = ["build-ddlog", "download-twitter"] # Runs `ddlog` to generate ddlog code diff --git a/rust/ddlog_benches/README.md b/rust/ddlog_benches/README.md index abd255a25..e407ec0bf 100644 --- a/rust/ddlog_benches/README.md +++ b/rust/ddlog_benches/README.md @@ -30,3 +30,19 @@ For info on writing benchmarks see the [criterion user guide] and the [criterion [`cargo-make`]: https://github.com/sagiegurari/cargo-make [criterion user guide]: https://bheisler.github.io/criterion.rs/book/index.html [criterion docs]: https://docs.rs/criterion + +### Supported features + +The benchmarks can be run using checked_weights, which will cause a +Rust panic on weight overflow. This can be done by running: + +```sh +cargo make benchmarks --features checked_weights +``` + +The benchmarks can be run using unbounded_weights, which will always +produce correct results, but may run slower. This can be done by running: + +```sh +cargo make benchmarks --features unbounded_weights +``` diff --git a/rust/template/Cargo.toml b/rust/template/Cargo.toml index b43cb2907..1abda58bb 100644 --- a/rust/template/Cargo.toml +++ b/rust/template/Cargo.toml @@ -14,6 +14,7 @@ command-line = ["cmd_parser", "rustop"] nested_ts_32 = ["differential_datalog/nested_ts_32"] c_api = ["differential_datalog/c_api"] checked_weights = ["differential_datalog/checked_weights"] +unbounded_weights = ["differential_datalog/unbounded_weights"] [dependencies] abomonation = "0.7" diff --git a/rust/template/differential_datalog/Cargo.toml b/rust/template/differential_datalog/Cargo.toml index 0a831f975..97de0cc67 100644 --- a/rust/template/differential_datalog/Cargo.toml +++ b/rust/template/differential_datalog/Cargo.toml @@ -11,6 +11,8 @@ nested_ts_32 = [] c_api = [] # panic on weight overflow checked_weights = [] +# unbounded weights make the program slower but correct +unbounded_weights = [] [dependencies] #differential-dataflow = "0.11.0" diff --git a/rust/template/differential_datalog/src/api/update_handler.rs b/rust/template/differential_datalog/src/api/update_handler.rs index cba9a4161..4a4104034 100644 --- a/rust/template/differential_datalog/src/api/update_handler.rs +++ b/rust/template/differential_datalog/src/api/update_handler.rs @@ -440,8 +440,10 @@ impl MTUpdateHandler for MTChainedUpdateHandler { self.handlers.iter().map(|h| h.mt_update_cb()).collect(); Arc::new(move |relid, v, w| { + // not all weight implementations support copy + #[allow(clippy::clone_on_copy)] for cb in cbs.iter() { - cb(relid, v, w); + cb(relid, v, w.clone()); } }) } diff --git a/rust/template/differential_datalog/src/program/mod.rs b/rust/template/differential_datalog/src/program/mod.rs index 4bbc71ae7..64415a047 100644 --- a/rust/template/differential_datalog/src/program/mod.rs +++ b/rust/template/differential_datalog/src/program/mod.rs @@ -32,6 +32,7 @@ use crate::{ RenderContext, }, }; +use abomonation::Abomonation; use abomonation_derive::Abomonation; use arrange::{ antijoin_arranged, Arrangement as DataflowArrangement, ArrangementFlavor, Arrangements, @@ -39,7 +40,7 @@ use arrange::{ use config::SelfProfilingRig; use crossbeam_channel::{Receiver, Sender}; use fnv::{FnvHashMap, FnvHashSet}; -use num::{One, Zero}; +use num::{BigInt, One, ToPrimitive, Zero}; use std::{ any::Any, borrow::Cow, @@ -47,6 +48,7 @@ use std::{ collections::{hash_map, BTreeSet}, fmt::{self, Debug, Formatter}, iter::{self, Cycle, Skip}, + mem::size_of, ops::{Add, AddAssign, Mul, Neg, Range}, sync::{ atomic::{AtomicBool, Ordering}, @@ -88,6 +90,8 @@ type TKeyAgent = TraceAgent>; type TValEnter = TraceEnter, T>; type TKeyEnter = TraceEnter, T>; +// Checked weights are integers whose arithmetic is checked for overflow +// and causes a panic on overflow. #[derive(Abomonation, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Debug, Clone)] #[repr(transparent)] pub struct CheckedWeight { @@ -134,6 +138,16 @@ impl Mul for CheckedWeight { } } +impl Mul<&CheckedWeight> for &CheckedWeight { + type Output = CheckedWeight; + fn mul(self, rhs: &CheckedWeight) -> CheckedWeight { + // intentional panic on overflow + CheckedWeight { + value: self.value.checked_mul(rhs.value).expect("Weight overflow"), + } + } +} + impl Neg for CheckedWeight { type Output = Self; @@ -172,18 +186,265 @@ impl From for CheckedWeight { } } +impl From for CheckedWeight { + fn from(item: i8) -> Self { + Self { value: item as i32 } + } +} + impl From for i64 { fn from(item: CheckedWeight) -> Self { item.value as i64 } } +impl From<&CheckedWeight> for i64 { + fn from(item: &CheckedWeight) -> Self { + item.value as i64 + } +} + +// The type used to represent small weights. +type SmallType = i64; + +/// Convert a BigInt into a value of type SmallType +fn big_to_small(b: &BigInt) -> Option { + // Change this method if you modify SmallType above + b.to_i64() +} + +// Unbounded weights store essentially an unbounded integer, but are +// optimized for small weights. +#[derive(Ord, PartialOrd, Eq, PartialEq, Hash, Debug, Clone)] +pub enum UnboundedWeight { + Small(SmallType), + Big(BigInt), +} + +impl Semigroup for UnboundedWeight { + fn is_zero(&self) -> bool { + match &self { + UnboundedWeight::Small(v) => *v == 0, + UnboundedWeight::Big(v) => v.is_zero(), + } + } +} + +fn to_unbounded_weight(v: BigInt) -> UnboundedWeight { + // The invariant of this representation is: + // - if the value fits into SmallType it is stored as a Small, + // - else a Big(BigInt) is used. + let i = big_to_small(&v); + match i { + None => UnboundedWeight::Big(v), + Some(r) => UnboundedWeight::Small(r), + } +} + +fn to_bigint(v: &SmallType) -> BigInt { + BigInt::from(*v) +} + +impl<'a> AddAssign<&'a Self> for UnboundedWeight { + fn add_assign(&mut self, other: &'a Self) { + match (&self, &other) { + (UnboundedWeight::Small(v1), UnboundedWeight::Small(v2)) => { + let res = v1.checked_add(*v2); + match res { + Some(v) => *self = UnboundedWeight::Small(v), + None => *self = UnboundedWeight::Big((to_bigint(v1)).add(to_bigint(v2))), + }; + } + (UnboundedWeight::Big(v1), UnboundedWeight::Small(v2)) => { + let res = v1.add(to_bigint(v2)); + *self = to_unbounded_weight(res); + } + (UnboundedWeight::Small(v1), UnboundedWeight::Big(v2)) => { + let res = to_bigint(v1).add(v2); + *self = to_unbounded_weight(res); + } + (UnboundedWeight::Big(v1), UnboundedWeight::Big(v2)) => { + let res = v1.add(v2); + *self = to_unbounded_weight(res); + } + } + } +} + +impl Add for UnboundedWeight { + type Output = Self; + + fn add(self, other: Self) -> Self { + match (&self, &other) { + (UnboundedWeight::Small(v1), UnboundedWeight::Small(v2)) => { + let res = v1.checked_add(*v2); + match res { + Some(v) => UnboundedWeight::Small(v), + None => UnboundedWeight::Big(to_bigint(v1).add(to_bigint(v2))), + } + } + (UnboundedWeight::Big(v1), UnboundedWeight::Small(v2)) => { + let res = v1.add(to_bigint(v2)); + to_unbounded_weight(res) + } + (UnboundedWeight::Small(v1), UnboundedWeight::Big(v2)) => { + let res = to_bigint(v1).add(v2); + to_unbounded_weight(res) + } + (UnboundedWeight::Big(v1), UnboundedWeight::Big(v2)) => { + let res = v1.add(v2); + to_unbounded_weight(res) + } + } + } +} + +impl Mul for UnboundedWeight { + type Output = Self; + fn mul(self, other: Self) -> Self::Output { + match (&self, &other) { + (UnboundedWeight::Small(v1), UnboundedWeight::Small(v2)) => { + let res = v1.checked_mul(*v2); + match res { + Some(v) => UnboundedWeight::Small(v), + None => UnboundedWeight::Big((to_bigint(v1)).mul(to_bigint(v2))), + } + } + (UnboundedWeight::Big(v1), UnboundedWeight::Small(v2)) => { + let res = v1.mul(to_bigint(v2)); + to_unbounded_weight(res) + } + (UnboundedWeight::Small(v1), UnboundedWeight::Big(v2)) => { + let res = to_bigint(v1).mul(v2); + to_unbounded_weight(res) + } + (UnboundedWeight::Big(v1), UnboundedWeight::Big(v2)) => { + let res = v1.mul(v2); + to_unbounded_weight(res) + } + } + } +} + +impl Mul<&UnboundedWeight> for &UnboundedWeight { + type Output = UnboundedWeight; + fn mul(self, other: &UnboundedWeight) -> Self::Output { + match (&self, &other) { + (UnboundedWeight::Small(v1), UnboundedWeight::Small(v2)) => { + let res = v1.checked_mul(*v2); + match res { + Some(v) => UnboundedWeight::Small(v), + None => UnboundedWeight::Big((to_bigint(v1)).mul(to_bigint(v2))), + } + } + (UnboundedWeight::Big(v1), UnboundedWeight::Small(v2)) => { + let res = v1.mul(to_bigint(v2)); + to_unbounded_weight(res) + } + (UnboundedWeight::Small(v1), UnboundedWeight::Big(v2)) => { + let res = to_bigint(v1).mul(v2); + to_unbounded_weight(res) + } + (UnboundedWeight::Big(v1), UnboundedWeight::Big(v2)) => { + let res = v1.mul(v2); + to_unbounded_weight(res) + } + } + } +} + +impl Monoid for UnboundedWeight { + fn zero() -> Self { + Self::Small(0) + } +} + +impl One for UnboundedWeight { + fn one() -> Self { + UnboundedWeight::Small(1) + } +} + +impl Zero for UnboundedWeight { + fn zero() -> Self { + UnboundedWeight::Small(0) + } + fn is_zero(&self) -> bool { + matches!(self, UnboundedWeight::Small(0)) + } +} + +impl From for UnboundedWeight { + fn from(item: SmallType) -> Self { + Self::Small(item) + } +} + +impl From for UnboundedWeight { + fn from(item: i8) -> Self { + UnboundedWeight::Small(item as SmallType) + } +} + +impl Neg for UnboundedWeight { + type Output = Self; + + fn neg(self) -> Self::Output { + match &self { + UnboundedWeight::Small(v1) => { + let res = v1.checked_neg(); + match res { + Some(v) => UnboundedWeight::Small(v), + None => UnboundedWeight::Big(to_bigint(v1).neg()), + } + } + UnboundedWeight::Big(v1) => { + let res = v1.neg(); + to_unbounded_weight(res) + } + } + } +} + +impl From for i64 { + fn from(item: UnboundedWeight) -> Self { + match item { + UnboundedWeight::Small(v1) => { + // If this is not true this may need to panic as well + assert!(size_of::() <= size_of::()); + v1 as i64 + } + UnboundedWeight::Big(v2) => v2.to_i64().expect("Weight too large for 64 bits"), + } + } +} + +impl From<&UnboundedWeight> for i64 { + fn from(item: &UnboundedWeight) -> Self { + match &item { + UnboundedWeight::Small(v1) => { + // If this is not true this may need to panic as well + assert!(size_of::() <= size_of::()); + *v1 as i64 + } + UnboundedWeight::Big(v2) => v2.to_i64().expect("Weight too large for 64 bits"), + } + } +} + +// Abomonation is not really used, but it is required. +impl Abomonation for UnboundedWeight {} + /// Weight is a diff associated with records in differential dataflow #[cfg(feature = "checked_weights")] pub type Weight = CheckedWeight; /// Weight is a diff associated with records in differential dataflow -#[cfg(not(feature = "checked_weights"))] +#[cfg(feature = "unbounded_weights")] +pub type Weight = UnboundedWeight; + +/// Weight is a diff associated with records in differential dataflow +#[cfg(all(not(feature = "checked_weights"), not(feature = "unbounded_weights")))] pub type Weight = i32; /// Message buffer for profiling messages @@ -1441,8 +1702,10 @@ impl Program { ifun, ref next, } => { + // not all weight implementations support copy + #[allow(clippy::clone_on_copy)] let inspect = with_prof_context(description, || { - col.inspect(move |(v, ts, w)| ifun(v, ts.to_tuple_ts(), *w)) + col.inspect(move |(v, ts, w)| ifun(v, ts.to_tuple_ts(), w.clone())) }); Self::xform_collection(inspect, &*next, arrangements, lookup_collection) } @@ -1467,7 +1730,7 @@ impl Program { &collection_with_keys, arr, |(k, _), key| *key = k.clone(), - move |v1, &w1, v2, &w2| (jfun(&v1.1, v2), w1 * w2), + move |v1, w1, v2, w2| (jfun(&v1.1, v2), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1500,7 +1763,7 @@ impl Program { &collection_with_keys, arr, |(k, _), key| *key = k.clone(), - move |v1, &w1, _, &w2| (jfun(&v1.1), w1 * w2), + move |v1, w1, _, w2| (jfun(&v1.1), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1667,8 +1930,10 @@ impl Program { ifun, ref next, } => { + // not all weight implementations support copy + #[allow(clippy::clone_on_copy)] let inspect = with_prof_context(description, || { - col.inspect(move |(v, ts, w)| ifun(v, ts.to_tuple_ts(), *w)) + col.inspect(move |(v, ts, w)| ifun(v, ts.to_tuple_ts(), w.clone())) }); Self::streamless_xform_collection(inspect, &*next, arrangements, lookup_collection) } @@ -1693,7 +1958,7 @@ impl Program { &collection_with_keys, arr, |(k, _), key| *key = k.clone(), - move |v1, &w1, v2, &w2| (jfun(&v1.1, v2), w1 * w2), + move |v1, w1, v2, w2| (jfun(&v1.1, v2), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1726,7 +1991,7 @@ impl Program { &collection_with_keys, arr, |(k, _), key| *key = k.clone(), - move |v1, &w1, _, &w2| (jfun(&v1.1), w1 * w2), + move |v1, w1, _, w2| (jfun(&v1.1), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1934,7 +2199,7 @@ impl Program { &collection_with_keys, arr.clone(), |(k, _), key| *key = k.clone(), - move |v1, &w1, v2, &w2| (jfun(v2, &v1.1), w1 * w2), + move |v1, w1, v2, w2| (jfun(v2, &v1.1), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1945,7 +2210,7 @@ impl Program { &collection_with_keys, arr.filter(move |_, v| f(v)), |(k, _), key| *key = k.clone(), - move |v1, &w1, v2, &w2| (jfun(v2, &v1.1), w1 * w2), + move |v1, w1, v2, w2| (jfun(v2, &v1.1), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1985,7 +2250,7 @@ impl Program { &collection_keys, arr.clone(), |k, key| *key = k.clone(), - move |_, &w1, v2, &w2| (jfun(v2), w1 * w2), + move |_, w1, v2, w2| (jfun(v2), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), @@ -1996,7 +2261,7 @@ impl Program { &collection_keys, arr.filter(move |_, v| f(v)), |k, key| *key = k.clone(), - move |_, &w1, v2, &w2| (jfun(v2), w1 * w2), + move |_, w1, v2, w2| (jfun(v2), w1 * w2), ().into_ddvalue(), ().into_ddvalue(), ().into_ddvalue(), diff --git a/rust/template/differential_datalog/src/program/worker.rs b/rust/template/differential_datalog/src/program/worker.rs index 14f6c181c..70a5aebe0 100644 --- a/rust/template/differential_datalog/src/program/worker.rs +++ b/rust/template/differential_datalog/src/program/worker.rs @@ -386,7 +386,7 @@ impl<'a> DDlogWorker<'a> { let mut values = BTreeSet::new(); while cursor.val_valid(&storage) && *cursor.key(&storage) == k { let mut weight = Weight::zero(); - cursor.map_times(&storage, |_, &diff| weight += &Weight::from(diff)); + cursor.map_times(&storage, |_, diff| weight += diff); //assert!(weight >= 0); // FIXME: this will add the value to the set even if `weight < 0`, @@ -409,7 +409,7 @@ impl<'a> DDlogWorker<'a> { while cursor.key_valid(&storage) { while cursor.val_valid(&storage) { let mut weight = Weight::zero(); - cursor.map_times(&storage, |_, &diff| weight += &diff); + cursor.map_times(&storage, |_, diff| weight += diff); //assert!(weight >= 0); if !weight.is_zero() { @@ -612,11 +612,13 @@ impl<'a> DDlogWorker<'a> { let vcol = with_prof_context( &format!("join {} with 'Enabled' relation", drel.id), || { + // not all weight implementations support copy + #[allow(clippy::clone_on_copy)] lookup_map( &v, enabled_arrangement.clone(), |_: &DDValue, key| *key = (), - move |x, w, _, _| (x.clone(), *w), + move |x, w: &Weight, _, _| (x.clone(), w.clone()), (), (), (), @@ -681,11 +683,12 @@ impl<'a> DDlogWorker<'a> { with_prof_context(&format!("consolidate {}", relid), || { collection.consolidate() }); - + // not all weight implementations support copy + #[allow(clippy::clone_on_copy)] let inspected = with_prof_context(&format!("inspect {}", relid), || { consolidated.inspect(move |x| { // assert!(x.2 == 1 || x.2 == -1, "x: {:?}", x); - (relation_callback)(relid, &x.0, x.2) + (relation_callback)(relid, &x.0, x.2.clone()) }) }); diff --git a/test/datalog_tests/test-overflow.sh b/test/datalog_tests/test-overflow.sh index 909c49afb..8fd1fee89 100755 --- a/test/datalog_tests/test-overflow.sh +++ b/test/datalog_tests/test-overflow.sh @@ -8,3 +8,9 @@ if [ $? -eq 0 ]; then echo "Test should have failed" false fi + +RUSTFEATURES="unbounded_weights" ./run-test.sh overflow release +if [ $? -ne 0 ]; then + echo "Test should have passed" + false +fi