Skip to content

Commit 9484c9f

Browse files
authored
feat(cli): add search index management tools (#18840)
* feat(cli): add search index management tools Two tools: - print out current state of cluster indices - remove older, non-current-alias indices Signed-off-by: Mike Fiedler <[email protected]> * Use choices Signed-off-by: Mike Fiedler <[email protected]> --------- Signed-off-by: Mike Fiedler <[email protected]>
1 parent 7b7d6cb commit 9484c9f

File tree

2 files changed

+250
-1
lines changed

2 files changed

+250
-1
lines changed

tests/unit/cli/test_search.py

Lines changed: 183 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import pretend
44

5-
from warehouse.cli.search import reindex
5+
from opensearchpy.exceptions import NotFoundError
6+
7+
from warehouse.cli.search import delete_older_indices, print_indices, reindex
68
from warehouse.search.tasks import reindex as _reindex
79

810

@@ -21,3 +23,183 @@ def test_reindex(self, cli):
2123
assert config.task.calls == [pretend.call(_reindex), pretend.call(_reindex)]
2224
assert task.get_request.calls == [pretend.call()]
2325
assert task.run.calls == [pretend.call(request)]
26+
27+
def test_print_indices(self, cli):
28+
# Mock the OpenSearch client responses
29+
cat_client = pretend.stub(
30+
indices=pretend.call_recorder(
31+
lambda **kw: "health status index id pri rep docs.count docs.deleted store.size creation.date.string\ngreen open production-2024-01-01 abc 1 0 1000 0 1mb 2024-01-01T00:00:00Z" # noqa: E501
32+
)
33+
)
34+
indices_client = pretend.stub(
35+
get_alias=pretend.call_recorder(
36+
lambda **kw: {
37+
"production-2024-01-01": {"aliases": {"production": {}}},
38+
"staging-2024-01-01": {"aliases": {"staging": {}}},
39+
}
40+
)
41+
)
42+
client = pretend.stub(cat=cat_client, indices=indices_client)
43+
config = pretend.stub(registry={"opensearch.client": client})
44+
45+
result = cli.invoke(print_indices, obj=config)
46+
47+
assert result.exit_code == 0
48+
# Check that cat.indices was called with correct parameters
49+
assert cat_client.indices.calls == [
50+
pretend.call(
51+
index="production*,staging*",
52+
h="health,status,index,id,pri,rep,docs.count,docs.deleted,store.size,creation.date.string", # noqa: E501
53+
s="creation.date.string:desc",
54+
v=True,
55+
)
56+
]
57+
# Check that indices.get_alias was called
58+
assert indices_client.get_alias.calls == [
59+
pretend.call(index="production*,staging*")
60+
]
61+
# Check output contains expected content
62+
assert "health status index" in result.output
63+
assert "Current Aliases:" in result.output
64+
assert "production -> production-2024-01-01" in result.output
65+
assert "staging -> staging-2024-01-01" in result.output
66+
67+
def test_delete_older_indices_success(self, cli):
68+
# Mock OpenSearch client with multiple indices
69+
# Keep latest 2: production-2024-03-01 + production-2024-02-01
70+
# Delete: production-2024-01-01
71+
indices_client = pretend.stub(
72+
get_alias=pretend.call_recorder(
73+
lambda **kw: {"production-2024-03-01": {"aliases": {"production": {}}}}
74+
),
75+
get=pretend.call_recorder(
76+
lambda **kw: {
77+
"production-2024-03-01": {},
78+
"production-2024-02-01": {},
79+
"production-2024-01-01": {},
80+
}
81+
),
82+
delete=pretend.call_recorder(lambda **kw: {"acknowledged": True}),
83+
)
84+
client = pretend.stub(indices=indices_client)
85+
config = pretend.stub(registry={"opensearch.client": client})
86+
87+
result = cli.invoke(delete_older_indices, ["production"], obj=config)
88+
89+
assert result.exit_code == 0
90+
# Check that get_alias was called correctly
91+
assert indices_client.get_alias.calls == [pretend.call(name="production")]
92+
# Check that get was called to fetch all indices
93+
assert indices_client.get.calls == [pretend.call(index="production-*")]
94+
# Check that delete was called only for the oldest index (keeps latest 2)
95+
assert indices_client.delete.calls == [
96+
pretend.call(index="production-2024-01-01"),
97+
]
98+
# Check output
99+
assert "Current index: production-2024-03-01" in result.output
100+
assert "Found 1 older indices to delete." in result.output
101+
assert "Deleting index: production-2024-01-01" in result.output
102+
assert "Done." in result.output
103+
104+
def test_delete_older_indices_multiple_deletions(self, cli):
105+
# Mock OpenSearch client with many indices
106+
# Keep latest 2: production-2024-05-01 + production-2024-04-01
107+
# Delete: production-2024-03-01, production-2024-02-01, production-2024-01-01
108+
indices_client = pretend.stub(
109+
get_alias=pretend.call_recorder(
110+
lambda **kw: {"production-2024-04-01": {"aliases": {"production": {}}}}
111+
),
112+
get=pretend.call_recorder(
113+
lambda **kw: {
114+
"production-2024-05-01": {},
115+
"production-2024-04-01": {},
116+
"production-2024-03-01": {},
117+
"production-2024-02-01": {},
118+
"production-2024-01-01": {},
119+
}
120+
),
121+
delete=pretend.call_recorder(lambda **kw: {"acknowledged": True}),
122+
)
123+
client = pretend.stub(indices=indices_client)
124+
config = pretend.stub(registry={"opensearch.client": client})
125+
126+
result = cli.invoke(delete_older_indices, ["production"], obj=config)
127+
128+
assert result.exit_code == 0
129+
# Check that delete was called for all old indices (keeps latest 2)
130+
assert indices_client.delete.calls == [
131+
pretend.call(index="production-2024-03-01"),
132+
pretend.call(index="production-2024-02-01"),
133+
pretend.call(index="production-2024-01-01"),
134+
]
135+
# Check output
136+
assert "Current index: production-2024-04-01" in result.output
137+
assert "Found 3 older indices to delete." in result.output
138+
assert "Done." in result.output
139+
140+
def test_delete_older_indices_no_alias(self, cli):
141+
# Mock OpenSearch client to raise NotFoundError when alias not found
142+
def raise_not_found(**kw):
143+
raise NotFoundError(404, "index_not_found_exception", "no such index")
144+
145+
indices_client = pretend.stub(get_alias=pretend.call_recorder(raise_not_found))
146+
client = pretend.stub(indices=indices_client)
147+
config = pretend.stub(registry={"opensearch.client": client})
148+
149+
result = cli.invoke(delete_older_indices, ["production"], obj=config)
150+
151+
assert result.exit_code == 1
152+
# Check that get_alias was called
153+
assert indices_client.get_alias.calls == [pretend.call(name="production")]
154+
# Check error message
155+
assert "No alias found for production, aborting." in result.output
156+
157+
def test_delete_older_indices_only_current(self, cli):
158+
# Mock OpenSearch client with only the current index (no backup, no older)
159+
indices_client = pretend.stub(
160+
get_alias=pretend.call_recorder(
161+
lambda **kw: {"production-2024-03-01": {"aliases": {"production": {}}}}
162+
),
163+
get=pretend.call_recorder(lambda **kw: {"production-2024-03-01": {}}),
164+
delete=pretend.call_recorder(lambda **kw: {"acknowledged": True}),
165+
)
166+
client = pretend.stub(indices=indices_client)
167+
config = pretend.stub(registry={"opensearch.client": client})
168+
169+
result = cli.invoke(delete_older_indices, ["production"], obj=config)
170+
171+
assert result.exit_code == 0
172+
# Check that delete was never called (only 1 index exists)
173+
assert indices_client.delete.calls == []
174+
# Check output
175+
assert "Current index: production-2024-03-01" in result.output
176+
assert "Found 0 older indices to delete." in result.output
177+
assert "Done." in result.output
178+
179+
def test_delete_older_indices_keeps_two(self, cli):
180+
# Mock OpenSearch client with exactly 2 indices (current + 1 backup)
181+
# Should not delete anything - keeps both
182+
indices_client = pretend.stub(
183+
get_alias=pretend.call_recorder(
184+
lambda **kw: {"production-2024-02-01": {"aliases": {"production": {}}}}
185+
),
186+
get=pretend.call_recorder(
187+
lambda **kw: {
188+
"production-2024-02-01": {},
189+
"production-2024-01-01": {},
190+
}
191+
),
192+
delete=pretend.call_recorder(lambda **kw: {"acknowledged": True}),
193+
)
194+
client = pretend.stub(indices=indices_client)
195+
config = pretend.stub(registry={"opensearch.client": client})
196+
197+
result = cli.invoke(delete_older_indices, ["production"], obj=config)
198+
199+
assert result.exit_code == 0
200+
# Check that delete was never called (keeps latest 2)
201+
assert indices_client.delete.calls == []
202+
# Check output
203+
assert "Current index: production-2024-02-01" in result.output
204+
assert "Found 0 older indices to delete." in result.output
205+
assert "Done." in result.output

warehouse/cli/search.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import click
44

5+
from opensearchpy.exceptions import NotFoundError
6+
57
from warehouse.cli import warehouse
68
from warehouse.search.tasks import reindex as _reindex
79

@@ -22,3 +24,68 @@ def reindex(config):
2224

2325
request = config.task(_reindex).get_request()
2426
config.task(_reindex).run(request)
27+
28+
29+
@search.command()
30+
@click.pass_obj
31+
def print_indices(config):
32+
"""
33+
Print details about all search existing indices.
34+
"""
35+
client = config.registry["opensearch.client"]
36+
# CAT Client https://docs.opensearch.org/latest/api-reference/cat/index/
37+
# https://opensearch-project.github.io/opensearch-py/api-ref/clients/cat_client.html
38+
click.echo(
39+
client.cat.indices(
40+
index="production*,staging*",
41+
h="health,status,index,id,pri,rep,docs.count,docs.deleted,store.size,creation.date.string", # noqa: E501
42+
s="creation.date.string:desc",
43+
v=True, # include column headers for easier reading
44+
)
45+
)
46+
47+
aliases = client.indices.get_alias(index="production*,staging*")
48+
click.echo("\nCurrent Aliases:")
49+
for index, alias in aliases.items():
50+
for a in alias["aliases"]:
51+
click.echo(f"{a} -> {index}")
52+
53+
54+
@search.command()
55+
@click.argument("env_name", type=click.Choice(["production", "staging", "development"]))
56+
@click.pass_obj
57+
def delete_older_indices(config, env_name):
58+
"""
59+
Delete older search indices, keeping the latest two.
60+
Ensure the current alias is pointing to the latest index before running this.
61+
62+
ENV_NAME: Environment name (e.g., 'production' or 'staging')
63+
"""
64+
client = config.registry["opensearch.client"]
65+
66+
# Gets alias of current "live" index, don't remove that one
67+
try:
68+
alias = client.indices.get_alias(name=env_name)
69+
except NotFoundError:
70+
click.echo(f"No alias found for {env_name}, aborting.", err=True)
71+
raise click.Abort()
72+
73+
current_index = list(alias.keys())[0]
74+
click.echo(f"Current index: {current_index}")
75+
76+
indices = client.indices.get(index=f"{env_name}-*")
77+
# sort the response by date, keep most recent 2
78+
indices = sorted(indices.keys(), reverse=True)
79+
# remove current index from the list
80+
indices.remove(current_index)
81+
# Remove the most recent, non-alias one from the list
82+
if indices:
83+
indices.pop(0)
84+
# Remaining indices are older than the most recent two, delete them
85+
click.echo(f"Found {len(indices)} older indices to delete.")
86+
87+
for index in indices:
88+
click.echo(f"Deleting index: {index}")
89+
client.indices.delete(index=index)
90+
91+
click.echo("Done.")

0 commit comments

Comments
 (0)