|
35 | 35 | wait_for_upload,
|
36 | 36 | )
|
37 | 37 | from fixtures.remote_storage import (
|
| 38 | + LocalFsStorage, |
38 | 39 | RemoteStorageKind,
|
39 | 40 | )
|
40 |
| -from fixtures.utils import wait_until |
| 41 | +from fixtures.utils import run_only_on_default_postgres, wait_until |
41 | 42 | from fixtures.workload import Workload
|
42 | 43 |
|
43 | 44 | if TYPE_CHECKING:
|
@@ -728,3 +729,68 @@ def test_upgrade_generationless_local_file_paths(
|
728 | 729 | )
|
729 | 730 | # We should download into the same local path we started with
|
730 | 731 | assert os.path.exists(victim_path)
|
| 732 | + |
| 733 | + |
| 734 | +@run_only_on_default_postgres("Only tests index logic") |
| 735 | +def test_old_index_time_threshold( |
| 736 | + neon_env_builder: NeonEnvBuilder, |
| 737 | +): |
| 738 | + """ |
| 739 | + Exercise pageserver's detection of trying to load an ancient non-latest index. |
| 740 | + (see https://github.com/neondatabase/neon/issues/6951) |
| 741 | + """ |
| 742 | + |
| 743 | + # Run with local_fs because we will interfere with mtimes by local filesystem access |
| 744 | + neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) |
| 745 | + env = neon_env_builder.init_start() |
| 746 | + tenant_id = env.initial_tenant |
| 747 | + timeline_id = env.initial_timeline |
| 748 | + |
| 749 | + workload = Workload(env, tenant_id, timeline_id) |
| 750 | + workload.init() |
| 751 | + workload.write_rows(32) |
| 752 | + |
| 753 | + # Remember generation 1's index path |
| 754 | + assert isinstance(env.pageserver_remote_storage, LocalFsStorage) |
| 755 | + index_path = env.pageserver_remote_storage.index_path(tenant_id, timeline_id) |
| 756 | + |
| 757 | + # Increment generation by detaching+attaching, and write+flush some data to get a new remote index |
| 758 | + env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"}) |
| 759 | + env.storage_controller.tenant_policy_update(tenant_id, {"placement": {"Attached": 0}}) |
| 760 | + env.storage_controller.reconcile_until_idle() |
| 761 | + workload.churn_rows(32) |
| 762 | + |
| 763 | + # A new index should have been written |
| 764 | + assert env.pageserver_remote_storage.index_path(tenant_id, timeline_id) != index_path |
| 765 | + |
| 766 | + # Hack the mtime on the generation 1 index |
| 767 | + log.info(f"Setting old mtime on {index_path}") |
| 768 | + os.utime(index_path, times=(time.time(), time.time() - 30 * 24 * 3600)) |
| 769 | + env.pageserver.allowed_errors.extend( |
| 770 | + [ |
| 771 | + ".*Found a newer index while loading an old one.*", |
| 772 | + ".*Index age exceeds threshold and a newer index exists.*", |
| 773 | + ] |
| 774 | + ) |
| 775 | + |
| 776 | + # Detach from storage controller + attach in an old generation directly on the pageserver. |
| 777 | + workload.stop() |
| 778 | + env.storage_controller.tenant_policy_update(tenant_id, {"placement": "Detached"}) |
| 779 | + env.storage_controller.reconcile_until_idle() |
| 780 | + env.storage_controller.tenant_policy_update(tenant_id, {"scheduling": "Stop"}) |
| 781 | + env.storage_controller.allowed_errors.append(".*Scheduling is disabled by policy") |
| 782 | + |
| 783 | + # The controller would not do this (attach in an old generation): we are doing it to simulate |
| 784 | + # a hypothetical profound bug in the controller. |
| 785 | + env.pageserver.http_client().tenant_location_conf( |
| 786 | + tenant_id, {"generation": 1, "mode": "AttachedSingle", "tenant_conf": {}} |
| 787 | + ) |
| 788 | + |
| 789 | + # The pageserver should react to this situation by refusing to attach the tenant and putting |
| 790 | + # it into Broken state |
| 791 | + env.pageserver.allowed_errors.append(".*tenant is broken.*") |
| 792 | + with pytest.raises( |
| 793 | + PageserverApiException, |
| 794 | + match="tenant is broken: Index age exceeds threshold and a newer index exists", |
| 795 | + ): |
| 796 | + env.pageserver.http_client().timeline_detail(tenant_id, timeline_id) |
0 commit comments