Skip to content

Commit 86c2e69

Browse files
committed
graph, store: Load estimates for the block range distribution in tables
1 parent 3cd3860 commit 86c2e69

File tree

3 files changed

+72
-3
lines changed

3 files changed

+72
-3
lines changed

graph/src/components/store/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,14 @@ pub struct VersionStats {
928928
pub ratio: f64,
929929
/// The last block to which this table was pruned
930930
pub last_pruned_block: Option<BlockNumber>,
931+
/// Histograms for the lower and upper bounds of the block ranges in
932+
/// this table. Each histogram bucket contains roughly the same number
933+
/// of rows; values might be repeated to achieve that. The vectors are
934+
/// empty if the table hasn't been analyzed, the subgraph is stored in
935+
/// Postgres version 16 or lower, or if the table doesn't have a
936+
/// block_range column.
937+
pub block_range_lower: Vec<BlockNumber>,
938+
pub block_range_upper: Vec<BlockNumber>,
931939
}
932940

933941
/// What phase of pruning we are working on

store/postgres/src/catalog.rs

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ use std::time::Duration;
1919
use graph::prelude::anyhow::anyhow;
2020
use graph::{
2121
data::subgraph::schema::POI_TABLE,
22-
prelude::{lazy_static, StoreError},
22+
prelude::{lazy_static, StoreError, BLOCK_NUMBER_MAX},
2323
};
2424

2525
use crate::{
26+
block_range::BLOCK_RANGE_COLUMN,
2627
pool::ForeignServer,
2728
primary::{Namespace, Site, NAMESPACE_PUBLIC},
2829
relational::SqlName,
@@ -190,7 +191,6 @@ pub struct Catalog {
190191
/// Whether the column `pg_stats.range_bounds_histogram` introduced in
191192
/// Postgres 17 exists. See the [Postgres
192193
/// docs](https://www.postgresql.org/docs/17/view-pg-stats.html)
193-
#[allow(dead_code)]
194194
pg_stats_has_range_bounds_histogram: bool,
195195
}
196196

@@ -305,10 +305,42 @@ impl Catalog {
305305
tablename: s.tablename,
306306
ratio: s.ratio,
307307
last_pruned_block: s.last_pruned_block,
308+
block_range_lower: vec![],
309+
block_range_upper: vec![],
308310
}
309311
}
310312
}
311313

314+
#[derive(Queryable, QueryableByName)]
315+
struct RangeHistogram {
316+
#[diesel(sql_type = Text)]
317+
tablename: String,
318+
#[diesel(sql_type = Array<Integer>)]
319+
lower: Vec<i32>,
320+
#[diesel(sql_type = Array<Integer>)]
321+
upper: Vec<i32>,
322+
}
323+
324+
fn block_range_histogram(
325+
conn: &mut PgConnection,
326+
namespace: &Namespace,
327+
) -> Result<Vec<RangeHistogram>, StoreError> {
328+
let query = format!(
329+
"select tablename, \
330+
array_agg(lower(block_range)) lower, \
331+
array_agg(coalesce(upper(block_range), {BLOCK_NUMBER_MAX})) upper \
332+
from (select tablename,
333+
unnest(range_bounds_histogram::text::int4range[]) block_range
334+
from pg_stats where schemaname = $1 and attname = '{BLOCK_RANGE_COLUMN}') a
335+
group by tablename
336+
order by tablename"
337+
);
338+
let result = sql_query(query)
339+
.bind::<Text, _>(namespace.as_str())
340+
.get_results::<RangeHistogram>(conn)?;
341+
Ok(result)
342+
}
343+
312344
// Get an estimate of number of rows (pg_class.reltuples) and number of
313345
// distinct entities (based on the planners idea of how many distinct
314346
// values there are in the `id` column) See the [Postgres
@@ -342,7 +374,34 @@ impl Catalog {
342374
.load::<DbStats>(conn)
343375
.map_err(StoreError::from)?;
344376

345-
Ok(stats.into_iter().map(|s| s.into()).collect())
377+
let mut range_histogram = if self.pg_stats_has_range_bounds_histogram {
378+
block_range_histogram(conn, &self.site.namespace)?
379+
} else {
380+
vec![]
381+
};
382+
383+
let stats = stats
384+
.into_iter()
385+
.map(|s| {
386+
let pos = range_histogram
387+
.iter()
388+
.position(|h| h.tablename == s.tablename);
389+
let (mut lower, mut upper) = pos
390+
.map(|pos| range_histogram.swap_remove(pos))
391+
.map(|h| (h.lower, h.upper))
392+
.unwrap_or((vec![], vec![]));
393+
// Since lower and upper are supposed to be histograms, we
394+
// sort them
395+
lower.sort_unstable();
396+
upper.sort_unstable();
397+
let mut vs = VersionStats::from(s);
398+
vs.block_range_lower = lower;
399+
vs.block_range_upper = upper;
400+
vs
401+
})
402+
.collect::<Vec<_>>();
403+
404+
Ok(stats)
346405
}
347406
}
348407

store/test-store/tests/postgres/graft.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,8 @@ fn prune() {
648648
tablename: USER.to_ascii_lowercase(),
649649
ratio: 3.0 / 5.0,
650650
last_pruned_block: None,
651+
block_range_lower: vec![],
652+
block_range_upper: vec![],
651653
};
652654
assert_eq!(
653655
Some(strategy),

0 commit comments

Comments
 (0)