Skip to content

Commit 59463b5

Browse files
committed
indexer: add a new group for indexing
* INSPIR-3238
1 parent 1defd3f commit 59463b5

File tree

23 files changed

+68
-162
lines changed

23 files changed

+68
-162
lines changed

backend/inspirehep/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from inspirehep.factory import create_app
1313
from inspirehep.files.cli import files
1414
from inspirehep.fixtures import fixtures
15+
from inspirehep.indexer.cli import indexer
1516
from inspirehep.mailing.cli import mailing
1617
from inspirehep.orcid.cli import orcid
1718
from inspirehep.records.cli import citations, importer, jobs
@@ -27,3 +28,4 @@
2728
cli.add_command(mailing)
2829
cli.add_command(files)
2930
cli.add_command(sitemap)
31+
cli.add_command(indexer)

backend/inspirehep/indexer/cli.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717
from invenio_db import db
1818
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
1919
from invenio_search import current_search
20-
from invenio_search.cli import index
2120

21+
from inspirehep.indexer.tasks import batch_index
2222
from inspirehep.records.api import InspireRecord
23-
from inspirehep.records.indexer.tasks import batch_index
2423

2524
LOGGER = structlog.getLogger()
2625

2726

27+
@click.group()
28+
def indexer():
29+
"""Command group Inspire indexing & remap operations. (DO NOT USE ``index`` from Invenio)"""
30+
31+
2832
def next_batch(iterator, batch_size):
2933
"""Get first batch_size elements from the iterable, or remaining if less.
3034
@@ -68,7 +72,7 @@ def _prepare_logdir(log_path):
6872
makedirs(path.dirname(log_path))
6973

7074

71-
@index.command("reindex")
75+
@indexer.command("reindex")
7276
@click.option("--all", is_flag=True, help="Reindex all the records.", show_default=True)
7377
@click.option(
7478
"-p",
@@ -242,7 +246,7 @@ def _finished_tasks_count():
242246
LOGGER.warning(failure)
243247

244248

245-
@index.command(
249+
@indexer.command(
246250
"remap",
247251
help="Remaps specified indexes. Removes all data from index during this process.",
248252
)

backend/inspirehep/records/indexer/tasks.py

Lines changed: 0 additions & 149 deletions
This file was deleted.

backend/tests/integration/records/indexer/test_remap_cli.py renamed to backend/tests/integration/indexer/test_cli.py

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,63 @@
44
#
55
# inspirehep is free software; you can redistribute it and/or modify it under
66
# the terms of the MIT License; see LICENSE file for more details.
7+
78
import random
89

910
from click.testing import CliRunner
10-
from invenio_search import current_search
11+
from helpers.providers.faker import faker
12+
from invenio_search import current_search, current_search_client
1113
from invenio_search.utils import build_index_name
1214
from mock import patch
1315

14-
from inspirehep.indexer.cli import index
16+
from inspirehep.indexer.cli import indexer
17+
from inspirehep.search.api import (
18+
AuthorsSearch,
19+
ConferencesSearch,
20+
JobsSearch,
21+
LiteratureSearch,
22+
)
23+
24+
25+
def test_reindex_all_types_records(
26+
base_app, db, es_clear, create_record_factory, script_info
27+
):
28+
runner = CliRunner()
29+
30+
record_lit = create_record_factory("lit")
31+
record_aut = create_record_factory("aut")
32+
record_job = create_record_factory("job")
33+
record_con = create_record_factory("con")
34+
35+
result = runner.invoke(indexer, ["reindex", "--all"], obj=script_info)
36+
current_search.flush_and_refresh("*")
37+
results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
38+
results_aut_uuid = AuthorsSearch().execute().hits.hits[0]["_id"]
39+
results_con_uuid = ConferencesSearch().execute().hits.hits[0]["_id"]
40+
results_job_uuid = JobsSearch().execute().hits.hits[0]["_id"]
41+
42+
assert str(record_lit.id) == results_lit_uuid
43+
assert str(record_aut.id) == results_aut_uuid
44+
assert str(record_con.id) == results_con_uuid
45+
assert str(record_job.id) == results_job_uuid
46+
47+
48+
def test_reindex_one_type_of_record(
49+
base_app, db, es_clear, create_record_factory, script_info
50+
):
51+
runner = CliRunner()
52+
53+
record_lit = create_record_factory("lit")
54+
record_aut = create_record_factory("aut")
55+
56+
result = runner.invoke(indexer, ["reindex", "-p", "lit"], obj=script_info)
57+
current_search.flush_and_refresh("*")
58+
expected_aut_len = 0
59+
results_lit_uuid = LiteratureSearch().execute().hits.hits[0]["_id"]
60+
results_aut_len = len(AuthorsSearch().execute().hits.hits)
61+
62+
assert str(record_lit.id) == results_lit_uuid
63+
assert expected_aut_len == results_aut_len
1564

1665

1766
def test_remap_one_index(base_app, es_clear, script_info):
@@ -20,7 +69,7 @@ def test_remap_one_index(base_app, es_clear, script_info):
2069
# Generate new suffix to distinguish new indexes easier
2170
current_search._current_suffix = f"-{random.getrandbits(64)}"
2271
result = runner.invoke(
23-
index, ["remap", "--index", "records-hep", "--yes-i-know"], obj=script_info
72+
indexer, ["remap", "--index", "records-hep", "--yes-i-know"], obj=script_info
2473
)
2574
current_search.flush_and_refresh("*")
2675
assert result.exit_code == 0
@@ -36,7 +85,7 @@ def test_remap_two_indexex(base_app, es_clear, script_info):
3685
indexes_before = set(current_search.client.indices.get("*").keys())
3786
current_search._current_suffix = f"-{random.getrandbits(64)}"
3887
result = runner.invoke(
39-
index,
88+
indexer,
4089
[
4190
"remap",
4291
"--index",
@@ -66,7 +115,7 @@ def test_remap_index_with_wrong_name(base_app, es_clear, script_info):
66115

67116
indexes_before = set(current_search.client.indices.get("*").keys())
68117
result = runner.invoke(
69-
index, ["remap", "--index", "records-author", "--yes-i-know"], obj=script_info
118+
indexer, ["remap", "--index", "records-author", "--yes-i-know"], obj=script_info
70119
)
71120
current_search.flush_and_refresh("*")
72121

@@ -84,7 +133,7 @@ def test_remap_index_which_is_missing_in_es(base_app, es_clear, script_info):
84133
with patch.dict(base_app.config, config):
85134
indexes_before = set(current_search.client.indices.get("*").keys())
86135
result = runner.invoke(
87-
index,
136+
indexer,
88137
["remap", "--index", "records-authors", "--yes-i-know"],
89138
obj=script_info,
90139
)
@@ -107,7 +156,7 @@ def test_remap_index_which_is_missing_in_es_but_ignore_checks(
107156
with patch.dict(base_app.config, config):
108157
indexes_before = set(current_search.client.indices.get("*").keys())
109158
result = runner.invoke(
110-
index,
159+
indexer,
111160
["remap", "--index", "records-authors", "--yes-i-know", "--ignore-checks"],
112161
obj=script_info,
113162
)
@@ -133,7 +182,7 @@ def test_remap_index_when_there_are_more_than_one_indexes_with_same_name_but_dif
133182
current_search._current_suffix = f"-{random.getrandbits(64)}"
134183
indexes_before = set(current_search.client.indices.get("*").keys())
135184
result = runner.invoke(
136-
index, ["remap", "--index", "records-data", "--yes-i-know"], obj=script_info
185+
indexer, ["remap", "--index", "records-data", "--yes-i-know"], obj=script_info
137186
)
138187
current_search.flush_and_refresh("*")
139188

@@ -155,7 +204,7 @@ def test_remap_index_when_there_are_more_than_one_indexes_with_same_name_but_dif
155204
current_search._current_suffix = f"-{random.getrandbits(64)}"
156205
indexes_before = set(current_search.client.indices.get("*").keys())
157206
result = runner.invoke(
158-
index,
207+
indexer,
159208
["remap", "--index", "records-data", "--yes-i-know", "--ignore-checks"],
160209
obj=script_info,
161210
)

0 commit comments

Comments
 (0)