Skip to content

Commit 72834fd

Browse files
committed
graph, store: Make sure vid batching works with large vids
Changing to the new vid scheme of `block_num << 32 + sequence_num` revealed some numerical problems in the batching code.
1 parent 45dda04 commit 72834fd

File tree

2 files changed

+137
-14
lines changed

2 files changed

+137
-14
lines changed

graph/src/util/ogive.rs

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::{internal_error, prelude::StoreError};
1919
/// more fun to say.
2020
pub struct Ogive {
2121
/// The breakpoints of the piecewise linear function
22-
points: Vec<f64>,
22+
points: Vec<i64>,
2323
/// The size of each bin; the linear piece from `points[i]` to
2424
/// `points[i+1]` rises by this much
2525
bin_size: f64,
@@ -46,7 +46,6 @@ impl Ogive {
4646
let bins = points.len() - 1;
4747
let bin_size = total as f64 / bins as f64;
4848
let range = points[0]..=points[bins];
49-
let points = points.into_iter().map(|p| p as f64).collect();
5049
Ok(Self {
5150
points,
5251
bin_size,
@@ -90,7 +89,6 @@ impl Ogive {
9089
fn interval_start(&self, point: i64) -> Result<usize, StoreError> {
9190
self.check_in_range(point)?;
9291

93-
let point = point as f64;
9492
let idx = self
9593
.points
9694
.iter()
@@ -102,35 +100,61 @@ impl Ogive {
102100

103101
/// Return the value of the ogive at `point`, i.e., `f(point)`. It is an
104102
/// error if `point` is outside the range of points of this ogive.
103+
///
104+
/// If `i` is such that
105+
/// `points[i] <= point < points[i+1]`, then
106+
/// ```text
107+
/// f(point) = i * bin_size + (point - points[i]) / (points[i+1] - points[i]) * bin_size
108+
/// ```
109+
// See the comment on `inverse` for numerical considerations
105110
fn value(&self, point: i64) -> Result<i64, StoreError> {
106111
if self.points.len() == 1 {
107112
return Ok(*self.range.end());
108113
}
109114

110115
let idx = self.interval_start(point)?;
111-
let bin_size = self.bin_size as f64;
112116
let (a, b) = (self.points[idx], self.points[idx + 1]);
113-
let point = point as f64;
114-
let value = (idx as f64 + (point - a) / (b - a)) * bin_size;
117+
let offset = (point - a) as f64 / (b - a) as f64;
118+
let value = (idx as f64 + offset) * self.bin_size;
115119
Ok(value as i64)
116120
}
117121

118122
/// Return the value of the inverse ogive at `value`, i.e., `g(value)`.
119123
/// It is an error if `value` is negative. If `value` is greater than
120124
/// the total count of the ogive, the maximum point of the ogive is
121125
/// returned.
126+
///
127+
/// For `points[j] <= v < points[j+1]`, the value of `g(v)` is
128+
/// ```text
129+
/// g(v) = (1-lambda)*points[j] + lambda * points[j+1]
130+
/// ```
131+
/// where `lambda = (v - j * bin_size) / bin_size`
132+
///
133+
// Note that in the definition of `lambda`, the numerator is
134+
// `v.rem_euclid(bin_size)`
135+
//
136+
// Numerical consideration: in these calculations, we need to be careful
137+
// to never convert one of the points directly to f64 since they can be
138+
// so large that the conversion from i64 to f64 loses precision. That
139+
// loss of precision can cause the convex combination of `points[j]` and
140+
// `points[j+1]` above to lie outside of that interval when `(points[j]
141+
// as f64) as i64 < points[j]`
142+
//
143+
// We therefore try to only convert differences between points to f64
144+
// which are much smaller.
122145
fn inverse(&self, value: i64) -> Result<i64, StoreError> {
123-
let value = value as f64;
124-
if value < 0.0 {
146+
if value < 0 {
125147
return Err(internal_error!("value {} can not be negative", value));
126148
}
127-
let idx = (value / self.bin_size) as usize;
128-
if idx >= self.points.len() - 1 {
149+
let j = (value / self.bin_size as i64) as usize;
150+
if j >= self.points.len() - 1 {
129151
return Ok(*self.range.end());
130152
}
131-
let (a, b) = (self.points[idx] as f64, self.points[idx + 1] as f64);
132-
let lambda = (value - idx as f64 * self.bin_size) / self.bin_size;
133-
let x = (1.0 - lambda) * a + lambda * b;
153+
let (a, b) = (self.points[j], self.points[j + 1]);
154+
// This is the same calculation as in the comment above, but
155+
// rewritten to be more friendly to lossy calculations with f64
156+
let offset = (value as f64).rem_euclid(self.bin_size) * (b - a) as f64;
157+
let x = a + (offset / self.bin_size) as i64;
134158
Ok(x as i64)
135159
}
136160

store/postgres/src/vid_batcher.rs

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ impl VidBatcher {
244244
}
245245
}
246246

247-
#[derive(Copy, Clone, QueryableByName)]
247+
#[derive(Debug, Copy, Clone, QueryableByName)]
248248
pub(crate) struct VidRange {
249249
#[diesel(sql_type = BigInt, column_name = "min_vid")]
250250
pub min: i64,
@@ -470,4 +470,103 @@ mod tests {
470470
assert_eq!(1, ogive.start());
471471
assert_eq!(100_000, ogive.end());
472472
}
473+
474+
#[test]
475+
fn vid_batcher_handles_large_vid() {
476+
// An example with very large `vid` values which come from the new
477+
// schema of setting the `vid` to `block_num << 32 + sequence_num`.
478+
// These values are taken from an actual example subgraph and cuased
479+
// errors because of numerical roundoff issues
480+
const MIN: i64 = 186155521970012263;
481+
const MAX: i64 = 187989601854423140;
482+
const BOUNDS: &[i64] = &[
483+
186155521970012263,
484+
186155552034783334,
485+
186166744719556711,
486+
187571594162339943,
487+
187571628522078310,
488+
187576619274076263,
489+
187576649338847334,
490+
187580570643988583,
491+
187590242910339175,
492+
187590268680142950,
493+
187963647367053415,
494+
187970828552372324,
495+
187986749996138596,
496+
187989601854423140,
497+
];
498+
499+
// The start, end, and batch size we expect when we run through the
500+
// `vid_batcher` we set up below with `MIN`, `MAX` and `BOUNDS`
501+
const STEPS: &[(i64, i64, i64)] = &[
502+
(186155521970012263, 186155521970012265, 2),
503+
(186155521970012266, 186155521970012269, 3),
504+
(186155521970012270, 186155521970012276, 6),
505+
(186155521970012277, 186155521970012289, 12),
506+
(186155521970012290, 186155521970012312, 22),
507+
(186155521970012313, 186155521970012353, 40),
508+
(186155521970012354, 186155521970012426, 72),
509+
(186155521970012427, 186155521970012557, 130),
510+
(186155521970012558, 186155521970012792, 234),
511+
(186155521970012793, 186155521970013215, 422),
512+
(186155521970013216, 186155521970013976, 760),
513+
(186155521970013977, 186155521970015346, 1369),
514+
(186155521970015347, 186155521970017812, 2465),
515+
(186155521970017813, 186155521970022250, 4437),
516+
(186155521970022251, 186155521970030238, 7987),
517+
(186155521970030239, 186155521970044616, 14377),
518+
(186155521970044617, 186155521970070495, 25878),
519+
(186155521970070496, 186155521970117077, 46581),
520+
(186155521970117078, 186155521970200925, 83847),
521+
(186155521970200926, 186155521970351851, 150925),
522+
(186155521970351852, 186155521970623517, 271665),
523+
(186155521970623518, 186155521971112515, 488997),
524+
(186155521971112516, 186155521971992710, 880194),
525+
(186155521971992711, 186155521973577061, 1584350),
526+
(186155521973577062, 186155521976428893, 2851831),
527+
(186155521976428894, 186155521981562190, 5133296),
528+
(186155521981562191, 186155521990802124, 9239933),
529+
(186155521990802125, 186155522007434004, 16631879),
530+
(186155522007434005, 186155522037371388, 29937383),
531+
(186155522037371389, 186155522091258678, 53887289),
532+
(186155522091258679, 186155522188255800, 96997121),
533+
(186155522188255801, 186155522362850619, 174594818),
534+
(186155522362850620, 186155522677121292, 314270672),
535+
(186155522677121293, 186155523242808503, 565687210),
536+
(186155523242808504, 186155524261045483, 1018236979),
537+
(186155524261045484, 186155526093872046, 1832826562),
538+
(186155526093872047, 186155529392959859, 3299087812),
539+
(186155529392959860, 186155535331317922, 5938358062),
540+
(186155535331317923, 186155546020362436, 10689044513),
541+
(186155546020362437, 186160475833232786, 4929812870349),
542+
(186160475833232787, 186998193536485260, 837717703252473),
543+
(186998193536485261, 187574948946679478, 576755410194217),
544+
(187574948946679479, 187590253155585376, 15304208905897),
545+
(187590253155585377, 187989601854423140, 399348698837763),
546+
];
547+
548+
let vid_range = VidRange::new(MIN, MAX);
549+
let batch_size = AdaptiveBatchSize {
550+
size: 10000,
551+
target: Duration::from_secs(180),
552+
};
553+
554+
let mut vid_batcher = VidBatcher::new(BOUNDS.to_vec(), vid_range, batch_size).unwrap();
555+
vid_batcher.step_timer.set(Duration::from_secs(100));
556+
557+
// Run through the entire `vid_batcher`, collecting start and end in
558+
// `steps`
559+
let steps = std::iter::from_fn(|| {
560+
vid_batcher
561+
.step(|start, end| Ok((start, end, end - start)))
562+
.unwrap()
563+
.1
564+
})
565+
.fold(Vec::new(), |mut steps, (start, end, step)| {
566+
steps.push((start, end, step));
567+
steps
568+
});
569+
570+
assert_eq!(STEPS, &steps);
571+
}
473572
}

0 commit comments

Comments
 (0)