Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions docs/graphman.md
Original file line number Diff line number Diff line change
Expand Up @@ -371,21 +371,27 @@ Inspect all blocks after block `13000000`:

Remove the call cache of the specified chain.

If block numbers are not mentioned in `--from` and `--to`, then all the call cache will be removed.
Either remove entries in the range `--from` and `--to`, remove stale contracts which have not been accessed for a specified duration `--ttl_days`, or remove the entire cache with `--remove-entire-cache`. Removing the entire cache can reduce indexing performance significantly and should generally be avoided.

USAGE:
graphman chain call-cache <CHAIN_NAME> remove [OPTIONS]
Usage: graphman chain call-cache <CHAIN_NAME> remove [OPTIONS]

Options:
--remove-entire-cache
Remove the entire cache

--ttl-days <TTL_DAYS>
Remove stale contracts based on call_meta table

OPTIONS:
-f, --from <FROM>
Starting block number

-h, --help
Print help information

-t, --to <TO>
Ending block number

-h, --help
Print help (see a summary with '-h')


### DESCRIPTION

Remove the call cache of a specified chain.
Expand All @@ -404,6 +410,12 @@ the first block number will be used as the starting block number.
The `to` option is used to specify the ending block number of the block range. In the absence of `to` option,
the last block number will be used as the ending block number.

#### `--remove-entire-cache`
The `--remove-entire-cache` option is used to remove the entire call cache of the specified chain.

#### `--ttl-days <TTL_DAYS>`
The `--ttl-days` option is used to remove stale contracts based on the `call_meta.accessed_at` field. For example, if `--ttl-days` is set to 7, all calls to a contract that has not been accessed in the last 7 days will be removed from the call cache.

### EXAMPLES

Remove the call cache for all blocks numbered from 10 to 20:
Expand All @@ -412,5 +424,9 @@ Remove the call cache for all blocks numbered from 10 to 20:

Remove all the call cache of the specified chain:

graphman --config config.toml chain call-cache ethereum remove
graphman --config config.toml chain call-cache ethereum remove --remove-entire-cache

Remove stale contracts from the call cache that have not been accessed in the last 7 days:

graphman --config config.toml chain call-cache ethereum remove --ttl-days 7

3 changes: 3 additions & 0 deletions graph/src/blockchain/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,9 @@ impl ChainStore for MockChainStore {
async fn clear_call_cache(&self, _from: BlockNumber, _to: BlockNumber) -> Result<(), Error> {
unimplemented!()
}
async fn clear_stale_call_cache(&self, _ttl_days: i32) -> Result<(), Error> {
unimplemented!()
}
fn chain_identifier(&self) -> Result<ChainIdentifier, Error> {
unimplemented!()
}
Expand Down
3 changes: 3 additions & 0 deletions graph/src/components/store/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,9 @@ pub trait ChainStore: ChainHeadStore {
/// Clears call cache of the chain for the given `from` and `to` block number.
async fn clear_call_cache(&self, from: BlockNumber, to: BlockNumber) -> Result<(), Error>;

/// Clears stale call cache entries for the given TTL in days.
async fn clear_stale_call_cache(&self, ttl_days: i32) -> Result<(), Error>;

/// Return the chain identifier for this store.
fn chain_identifier(&self) -> Result<ChainIdentifier, Error>;

Expand Down
17 changes: 15 additions & 2 deletions node/src/bin/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,14 +555,18 @@ pub enum ChainCommand {
pub enum CallCacheCommand {
/// Remove the call cache of the specified chain.
///
/// Either remove entries in the range `--from` and `--to`, or remove
/// the entire cache with `--remove-entire-cache`. Removing the entire
/// Either remove entries in the range `--from` and `--to`,
/// remove the cache for contracts that have not been accessed for the specified duration --ttl_days,
/// or remove the entire cache with `--remove-entire-cache`. Removing the entire
/// cache can reduce indexing performance significantly and should
/// generally be avoided.
Remove {
/// Remove the entire cache
#[clap(long, conflicts_with_all = &["from", "to"])]
remove_entire_cache: bool,
/// Remove the cache for contracts that have not been accessed in the last <TTL_DAYS> days
#[clap(long, conflicts_with_all = &["from", "to", "remove-entire-cache"])]
ttl_days: Option<i32>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't allow negative values here

/// Starting block number
#[clap(long, short, conflicts_with = "remove-entire-cache", requires = "to")]
from: Option<i32>,
Expand Down Expand Up @@ -1472,8 +1476,17 @@ async fn main() -> anyhow::Result<()> {
from,
to,
remove_entire_cache,
ttl_days,
} => {
let chain_store = ctx.chain_store(&chain_name)?;
if let Some(ttl_days) = ttl_days {
return commands::chain::clear_stale_call_cache(
chain_store,
ttl_days,
)
.await;
}

if !remove_entire_cache && from.is_none() && to.is_none() {
bail!("you must specify either --from and --to or --remove-entire-cache");
}
Expand Down
12 changes: 12 additions & 0 deletions node/src/manager/commands/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ pub async fn clear_call_cache(
Ok(())
}

pub async fn clear_stale_call_cache(
chain_store: Arc<ChainStore>,
ttl_days: i32,
) -> Result<(), Error> {
println!(
"Removing stale entries from the call cache for `{}`",
chain_store.chain
);
chain_store.clear_stale_call_cache(ttl_days).await?;
Ok(())
}

pub async fn info(
primary: ConnectionPool,
store: Arc<BlockStore>,
Expand Down
129 changes: 128 additions & 1 deletion store/postgres/src/chain_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ pub use data::Storage;

/// Encapuslate access to the blocks table for a chain.
mod data {
use crate::diesel::dsl::IntervalDsl;
use diesel::sql_types::{Array, Binary, Bool, Nullable};
use diesel::{connection::SimpleConnection, insert_into};
use diesel::{delete, prelude::*, sql_query};
Expand All @@ -104,7 +105,7 @@ mod data {
use graph::prelude::transaction_receipt::LightTransactionReceipt;
use graph::prelude::web3::types::H256;
use graph::prelude::{
serde_json as json, BlockNumber, BlockPtr, CachedEthereumCall, Error, StoreError,
serde_json as json, BlockNumber, BlockPtr, CachedEthereumCall, Error, Logger, StoreError,
};
use std::collections::HashMap;
use std::convert::TryFrom;
Expand Down Expand Up @@ -1398,6 +1399,126 @@ mod data {
}
}

pub fn clear_stale_call_cache(
&self,
conn: &mut PgConnection,
logger: &Logger,
ttl_days: i32,
) -> Result<(), Error> {
// Delete cache entries in batches since there could be thousands of cache entries per contract
let mut total_deleted = 0;
let batch_size = 5000;

match self {
Storage::Shared => {
use public::eth_call_cache as cache;
use public::eth_call_meta as meta;

let stale_contracts = meta::table
.select(meta::contract_address)
.filter(
meta::accessed_at
.lt(diesel::dsl::date(diesel::dsl::now - ttl_days.days())),
)
.get_results::<Vec<u8>>(conn)?;

if stale_contracts.is_empty() {
return Ok(());
}

loop {
let next_batch = cache::table
.select(cache::id)
.filter(cache::contract_address.eq_any(&stale_contracts))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to batch by contracts rather than cache entries? Im wondering because the number of stale_contracts itself can be very huge, in our production i just checked and the number for >7days is 9624844.
So for each row postgres would need to do a linear scan through this number of address to filter.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is this case, probably both have to be batched

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw is that number in the public eth_call_meta table?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no its just for ethereum in the private schema

Copy link
Member Author

@dimitrovmaksim dimitrovmaksim Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look into adjusting the queries to make them more efficient, but In general I doubt this approach is preferable, even if optimized for such a number of records. Depending on the avg number of call cache records per contract, it could take days to clean up this many.

.limit(batch_size as i64)
.get_results::<Vec<u8>>(conn)?;
let deleted_count =
diesel::delete(cache::table.filter(cache::id.eq_any(&next_batch)))
.execute(conn)?;

total_deleted += deleted_count;

if deleted_count < batch_size {
break;
}
}

graph::slog::info!(
logger,
"Cleaned call cache: deleted {} entries for {} contracts",
total_deleted,
stale_contracts.len()
);

diesel::delete(
meta::table.filter(meta::contract_address.eq_any(&stale_contracts)),
)
.execute(conn)?;

Ok(())
}
Storage::Private(Schema {
call_cache,
call_meta,
..
}) => {
let select_query = format!(
"SELECT contract_address FROM {} \
WHERE accessed_at < CURRENT_DATE - interval '{} days'",
call_meta.qname, ttl_days
);

#[derive(QueryableByName)]
struct ContractAddress {
#[diesel(sql_type = Bytea)]
contract_address: Vec<u8>,
}

let all_stale_contracts: Vec<Vec<u8>> = sql_query(select_query)
.load::<ContractAddress>(conn)?
.into_iter()
.map(|row| row.contract_address)
.collect();

if all_stale_contracts.is_empty() {
graph::slog::info!(logger, "Cleaned call cache: no stale entries found");
return Ok(());
}

loop {
let delete_cache_query = format!(
"DELETE FROM {} WHERE id IN (
SELECT id FROM {}
WHERE contract_address = ANY($1)
LIMIT {}
)",
call_cache.qname, call_cache.qname, batch_size
);

let deleted_count = sql_query(delete_cache_query)
.bind::<Array<Bytea>, _>(&all_stale_contracts)
.execute(conn)?;

total_deleted += deleted_count;

if deleted_count < batch_size {
break;
}
}

let delete_meta_query = format!(
"DELETE FROM {} WHERE contract_address = ANY($1)",
call_meta.qname
);
sql_query(delete_meta_query)
.bind::<Array<Bytea>, _>(&all_stale_contracts)
.execute(conn)?;

Ok(())
}
}
}

pub(super) fn update_accessed_at(
&self,
conn: &mut PgConnection,
Expand Down Expand Up @@ -2508,6 +2629,12 @@ impl ChainStoreTrait for ChainStore {
Ok(())
}

async fn clear_stale_call_cache(&self, ttl_days: i32) -> Result<(), Error> {
let conn = &mut *self.get_conn()?;
self.storage
.clear_stale_call_cache(conn, &self.logger, ttl_days)
}

async fn transaction_receipts_in_block(
&self,
block_hash: &H256,
Expand Down
33 changes: 33 additions & 0 deletions store/test-store/tests/postgres/chain_head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,39 @@ fn eth_call_cache() {
})
}

#[test]
/// Tests mainly query correctness. Requires data in order not to hit early returns when no stale contracts are found.
fn test_clear_stale_call_cache() {
let chain = vec![];
run_test_async(chain, |store, _, _| async move {
let logger = LOGGER.cheap_clone();
let address = H160([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]);
let call: [u8; 6] = [1, 2, 3, 4, 5, 6];
let return_value: [u8; 3] = [7, 8, 9];

// Insert a call cache entry, otherwise it will hit an early return and won't test all queries
let call = call::Request::new(address, call.to_vec(), 0);
store
.set_call(
&logger,
call.cheap_clone(),
BLOCK_ONE.block_ptr(),
call::Retval::Value(Bytes::from(return_value)),
)
.unwrap();

// Confirm the call cache entry is there
let ret = store.get_call(&call, BLOCK_ONE.block_ptr()).unwrap();
assert!(ret.is_some());

// Note: The storage field is not accessible from here, so we cannot fetch the Schema for the private chain
// and manually populate the cache and meta tables or alter the accessed_at timestamp.
// We can only test that the function runs to completion without error.
let result = store.clear_stale_call_cache(7).await;
assert!(result.is_ok());
});
}

#[test]
/// Tests only query correctness. No data is involved.
fn test_transaction_receipts_in_block_function() {
Expand Down
Loading