Skip to content

Commit b4e00b8

Browse files
authored
pageserver: refuse to load tenants with suspiciously old indices in old generations (#9719)
## Problem Historically, if a control component passed a pageserver "generation: 1" this could be a quick way to corrupt a tenant by loading a historic index. Follows #9383 Closes #6951 ## Summary of changes - Introduce a Fatal variant to DownloadError, to enable index downloads to signal when they have encountered a scary enough situation that we shouldn't proceed to load the tenant. - Handle this variant by putting the tenant into a broken state (no matter which timeline within the tenant reported it) - Add a test for this case In the event that this behavior fires when we don't want it to, we have ways to intervene: - "Touch" an affected index to update its mtime (download+upload S3 object) - If this behavior is triggered, it indicates we're attaching in some old generation, so we should be able to fix that by manually bumping generation numbers in the storage controller database (this should never happen, but it's an option if it does)
1 parent 10aaa36 commit b4e00b8

File tree

4 files changed

+85
-3
lines changed

4 files changed

+85
-3
lines changed

libs/remote_storage/src/error.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ pub enum DownloadError {
1515
///
1616
/// Concurrency control is not timed within timeout.
1717
Timeout,
18+
/// Some integrity/consistency check failed during download. This is used during
19+
/// timeline loads to cancel the load of a tenant if some timeline detects fatal corruption.
20+
Fatal(String),
1821
/// The file was found in the remote storage, but the download failed.
1922
Other(anyhow::Error),
2023
}
@@ -29,6 +32,7 @@ impl std::fmt::Display for DownloadError {
2932
DownloadError::Unmodified => write!(f, "File was not modified"),
3033
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
3134
DownloadError::Timeout => write!(f, "timeout"),
35+
DownloadError::Fatal(why) => write!(f, "Fatal read error: {why}"),
3236
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
3337
}
3438
}
@@ -41,7 +45,7 @@ impl DownloadError {
4145
pub fn is_permanent(&self) -> bool {
4246
use DownloadError::*;
4347
match self {
44-
BadInput(_) | NotFound | Unmodified | Cancelled => true,
48+
BadInput(_) | NotFound | Unmodified | Fatal(_) | Cancelled => true,
4549
Timeout | Other(_) => false,
4650
}
4751
}

pageserver/src/tenant.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,12 @@ impl Tenant {
14331433
info!(%timeline_id, "index_part not found on remote");
14341434
continue;
14351435
}
1436+
Err(DownloadError::Fatal(why)) => {
1437+
// If, while loading one remote timeline, we saw an indication that our generation
1438+
// number is likely invalid, then we should not load the whole tenant.
1439+
error!(%timeline_id, "Fatal error loading timeline: {why}");
1440+
anyhow::bail!(why.to_string());
1441+
}
14361442
Err(e) => {
14371443
// Some (possibly ephemeral) error happened during index_part download.
14381444
// Pretend the timeline exists to not delete the timeline directory,

pageserver/src/tenant/remote_timeline_client.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,12 +574,18 @@ impl RemoteTimelineClient {
574574

575575
if latest_index_generation > index_generation {
576576
// Unexpected! Why are we loading such an old index if a more recent one exists?
577-
tracing::warn!(
577+
// We will refuse to proceed, as there is no reasonable scenario where this should happen, but
578+
// there _is_ a clear bug/corruption scenario where it would happen (controller sets the generation
579+
// backwards).
580+
tracing::error!(
578581
?index_generation,
579582
?latest_index_generation,
580583
?latest_index_mtime,
581584
"Found a newer index while loading an old one"
582585
);
586+
return Err(DownloadError::Fatal(
587+
"Index age exceeds threshold and a newer index exists".into(),
588+
));
583589
}
584590
}
585591

test_runner/regress/test_pageserver_generations.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@
3535
wait_for_upload,
3636
)
3737
from fixtures.remote_storage import (
38+
LocalFsStorage,
3839
RemoteStorageKind,
3940
)
40-
from fixtures.utils import wait_until
41+
from fixtures.utils import run_only_on_default_postgres, wait_until
4142
from fixtures.workload import Workload
4243

4344
if TYPE_CHECKING:
@@ -728,3 +729,68 @@ def test_upgrade_generationless_local_file_paths(
728729
)
729730
# We should download into the same local path we started with
730731
assert os.path.exists(victim_path)
732+
733+
734+
@run_only_on_default_postgres("Only tests index logic")
735+
def test_old_index_time_threshold(
736+
neon_env_builder: NeonEnvBuilder,
737+
):
738+
"""
739+
Exercise pageserver's detection of trying to load an ancient non-latest index.
740+
(see https://github.com/neondatabase/neon/issues/6951)
741+
"""
742+
743+
# Run with local_fs because we will interfere with mtimes by local filesystem access
744+
neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
745+
env = neon_env_builder.init_start()
746+
tenant_id = env.initial_tenant
747+
timeline_id = env.initial_timeline
748+
749+
workload = Workload(env, tenant_id, timeline_id)
750+
workload.init()
751+
workload.write_rows(32)
752+
753+
# Remember generation 1's index path
754+
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
755+
index_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id)
756+
757+
# Increment generation by detaching+attaching, and write+flush some data to get a new remote index
758+
env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"})
759+
env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 0}})
760+
env.storage_controller.reconcile_until_idle()
761+
workload.churn_rows(32)
762+
763+
# A new index should have been written
764+
assert env.pageserver_remote_storage.index_path(tenant_id, timeline_id) != index_path
765+
766+
# Hack the mtime on the generation 1 index
767+
log.info(f"Setting old mtime on {index_path}")
768+
os.utime(index_path, times=(time.time(), time.time() - 30 * 24 * 3600))
769+
env.pageserver.allowed_errors.extend(
770+
[
771+
".*Found a newer index while loading an old one.*",
772+
".*Index age exceeds threshold and a newer index exists.*",
773+
]
774+
)
775+
776+
# Detach from storage controller + attach in an old generation directly on the pageserver.
777+
workload.stop()
778+
env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"})
779+
env.storage_controller.reconcile_until_idle()
780+
env.storage_controller.tenant_policy_update(tenant_id, {"scheduling": "Stop"})
781+
env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy")
782+
783+
# The controller would not do this (attach in an old generation): we are doing it to simulate
784+
# a hypothetical profound bug in the controller.
785+
env.pageserver.http_client().tenant_location_conf(
786+
tenant_id, {"generation": 1, "mode": "AttachedSingle", "tenant_conf": {}}
787+
)
788+
789+
# The pageserver should react to this situation by refusing to attach the tenant and putting
790+
# it into Broken state
791+
env.pageserver.allowed_errors.append(".*tenant is broken.*")
792+
with pytest.raises(
793+
PageserverApiException,
794+
match="tenant is broken: Index age exceeds threshold and a newer index exists",
795+
):
796+
env.pageserver.http_client().timeline_detail(tenant_id, timeline_id)

0 commit comments

Comments
 (0)