Skip to content

Commit 61ea9be

Browse files
authored
Merge pull request #955 from hubmapconsortium/karlburke/AdaptFreshIndicesToNewBaseImage
Karlburke/adapt fresh indices to new base image
2 parents f468aec + e7e798a commit 61ea9be

File tree

5 files changed

+121
-74
lines changed

5 files changed

+121
-74
lines changed

README.md

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,6 @@ git submodule update --init --remote
1919
Front end developers who need to work on the `portal` index should start in
2020
[the `addl_index_transformations/portal` subdirectory](https://github.com/hubmapconsortium/search-api/tree/main/hubmap-translation/src/hubmap_translation/addl_index_transformations/portal);
2121

22-
23-
### Local development
24-
After checking out the repo, installing the dependencies,
25-
and starting a local Elasticsearch instance, tests should pass:
26-
```shell
27-
pip install -r src/requirements.txt
28-
pip install -r src/requirements-dev.txt
29-
30-
# on mac:
31-
brew tap elastic/tap
32-
brew install elastic/tap/elasticsearch-full
33-
34-
## On MacOS 13, elasticsearch is not compatible with the default jdk. To workaround this, install openjdk and disable the machine learning functionality.
35-
brew install openjdk
36-
echo 'export ES_JAVA_HOME="/opt/homebrew/opt/openjdk"' >> ~/.zshrc
37-
echo 'xpack.ml.enabled: false' >> /opt/homebrew/etc/elasticsearch/elasticsearch.yml
38-
39-
elasticsearch & # Wait for it to start...
40-
41-
./test.sh
42-
```
43-
4422
### To release via TEST infrastructure
4523
- Make new feature or bug fix branches from `main` branch (the default branch)
4624
- Make PRs to `main`
@@ -58,9 +36,9 @@ elasticsearch & # Wait for it to start...
5836

5937
The search-api base URL for each deployment environment:
6038

61-
- DEV: `https://search-api.dev.hubmapconsortium.org`
62-
- TEST: `https://search-api.test.hubmapconsortium.org`
63-
- PROD: `https://search.api.hubmapconsortium.org`
39+
- DEV: `https://search-api.dev.hubmapconsortium.org/v3/`
40+
- TEST: `https://search-api.test.hubmapconsortium.org/v3/`
41+
- PROD: `https://search.api.hubmapconsortium.org/v3/`
6442

6543
## Request endpoints
6644

@@ -162,7 +140,7 @@ query_dict = {
162140
}
163141
}
164142
response = requests.post(
165-
'https://search-api.dev.hubmapconsortium.org/search',
143+
'https://search-api.dev.hubmapconsortium.org/v3/search',
166144
json = query_dict,
167145
headers = {'Authorization': 'Bearer ' + nexus_token})
168146
hits = response.json()['hits']['hits']
@@ -224,7 +202,7 @@ There are a few configurable environment variables to keep in mind:
224202
We can set and verify the environment variable like below:
225203

226204
````
227-
export COMMONS_BRANCH=master
205+
export COMMONS_BRANCH=main
228206
echo $COMMONS_BRANCH
229207
````
230208

@@ -244,11 +222,6 @@ cd docker
244222
./docker-deployment.sh [start|stop|down]
245223
```
246224

247-
For the Release candicate (RC) instance use a separate script:
248-
249-
```
250-
./docker-rc.sh [start|stop|down]
251-
```
252225

253226
## Updating API Documentation
254227

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.6.6
1+
3.6.7

scripts/fresh_indices/es_manager.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def get_document_agg_value(self, index_name, field_name, agg_name_enum: AggQuery
2828
raise Exception(f"agg_name_enum='{agg_name_enum}' is not a supported aggregation.")
2929

3030
headers = {'Content-Type': 'application/json'}
31-
agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum}": {{"field": "{field_name}"}}}}}}}}'
31+
agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum.value}": {{"field": "{field_name}"}}}}}}}}'
3232
try:
3333
rspn = requests.post(f"{self.elasticsearch_url}/{index_name}/_search?size=0"
3434
,headers=headers
@@ -205,18 +205,18 @@ def empty_index(self, index_name):
205205
# e.g. PUT your_index/_settings {"index": {"blocks.read_only": false}}
206206
# https://opensearch.org/docs/latest/api-reference/cluster-api/cluster-settings/
207207
# https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-blocks.html
208-
def set_index_block(self, index_name, block_name):
209-
if block_name not in IndexBlockType:
210-
raise ValueError(f"'{block_name}' is not a block name supported by IndexBlockType")
208+
def set_index_block(self, index_name: str, block_type_enum: IndexBlockType) -> None:
209+
if block_type_enum not in IndexBlockType:
210+
raise ValueError(f"'{block_type_enum}' is not a block name supported by IndexBlockType")
211211
try:
212-
if block_name is IndexBlockType.NONE:
212+
if block_type_enum is IndexBlockType.NONE:
213213
headers = {'Content-Type': 'application/json'}
214214
payload_json = '{"index": {"blocks.write": false, "blocks.read_only": false, "blocks.read_only_allow_delete": false}}'
215215
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_settings"
216216
,headers=headers
217217
,data=payload_json)
218218
else:
219-
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_name}")
219+
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_type_enum.value}")
220220
except Exception as e:
221221
msg = "Exception encountered during executing ESManager.set_index_block()"
222222
# Log the full stack trace, prepend a line with our message
@@ -233,12 +233,12 @@ def set_index_block(self, index_name, block_name):
233233
# "blocked": true
234234
# }]
235235
# }
236-
logger.info(f"Set '{block_name}' block on index: {index_name}")
236+
logger.info(f"Set '{block_type_enum.value}' block on index: {index_name}")
237237
return
238238
else:
239-
logger.error(f"Failed to set '{block_name}' block on index: {index_name}")
239+
logger.error(f"Failed to set '{block_type_enum.value}' block on index: {index_name}")
240240
logger.error(f"Error Message: {rspn.text}")
241-
raise Exception(f"Failed to set '{block_name}' block on"
241+
raise Exception(f"Failed to set '{block_type_enum.value}' block on"
242242
f" index: {index_name}, with"
243243
f" status_code {rspn.status_code}. See logs.")
244244

scripts/fresh_indices/fresh_indices.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ def get_translator():
230230

231231
a_translator = Translator(INDICES, appcfg['APP_CLIENT_ID'], appcfg['APP_CLIENT_SECRET'], token,
232232
appcfg['ONTOLOGY_API_BASE_URL'])
233+
a_translator.log_configuration()
233234

234235
# Skip the uuids comparision step that is only needed for live /reindex-all PUT call
235236
a_translator.skip_comparision = True
@@ -350,12 +351,12 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
350351
flush_index=destination_index.replace('fill','flush')
351352

352353
# Block writing on the indices, even though services which write to them should probably be down.
353-
logger.debug(f"Set {IndexBlockType.WRITE} block on source_index={source_index}.")
354+
logger.debug(f"Set {IndexBlockType.WRITE.value} block on source_index={source_index}.")
354355
es_mgr.set_index_block(index_name=source_index
355-
, block_name=IndexBlockType.WRITE)
356-
logger.debug(f"Set {IndexBlockType.WRITE} block on destination_index={destination_index}.")
356+
, block_type_enum=IndexBlockType.WRITE)
357+
logger.debug(f"Set {IndexBlockType.WRITE.value} block on destination_index={destination_index}.")
357358
es_mgr.set_index_block(index_name=destination_index
358-
, block_name=IndexBlockType.WRITE)
359+
, block_type_enum=IndexBlockType.WRITE)
359360
# Make sure the source_index health is "green" before proceeding.
360361
es_mgr.wait_until_index_green(index_name=source_index
361362
,wait_in_secs=30)
@@ -370,9 +371,9 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
370371
es_mgr.wait_until_index_green(index_name=flush_index
371372
,wait_in_secs=30)
372373
logger.debug(f"Health of flush_index={flush_index} is green.")
373-
logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.")
374+
logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.")
374375
es_mgr.set_index_block(index_name=source_index
375-
, block_name=IndexBlockType.NONE)
376+
, block_type_enum=IndexBlockType.NONE)
376377
es_mgr.delete_index(index_name=source_index)
377378
logger.debug(f"Deleted source_index={source_index}.")
378379
op_data_supplement['golive']['swap_info'].append(f"Deleted {source_index}")
@@ -387,21 +388,21 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
387388
es_mgr.wait_until_index_green(index_name=source_index
388389
,wait_in_secs=30)
389390
logger.debug(f"Health of source_index={source_index} is green.")
390-
logger.debug(f"Set {IndexBlockType.NONE} block on destination_index={destination_index}.")
391+
logger.debug(f"Set {IndexBlockType.NONE.value} block on destination_index={destination_index}.")
391392
es_mgr.set_index_block(index_name=destination_index
392-
, block_name=IndexBlockType.NONE)
393+
, block_type_enum=IndexBlockType.NONE)
393394
es_mgr.delete_index(index_name=destination_index)
394395
logger.debug(f"Deleted destination_index={destination_index}.")
395396
op_data_supplement['golive']['swap_info'].append(f"Deleted {destination_index}")
396397

397398
# Assure that the index which will be actively used by Search API and the
398399
# backup of the previous version are writeable.
399-
logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.")
400+
logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.")
400401
es_mgr.set_index_block(index_name=source_index
401-
, block_name=IndexBlockType.NONE)
402-
logger.debug(f"Set {IndexBlockType.NONE} block on flush_index={flush_index}.")
402+
, block_type_enum=IndexBlockType.NONE)
403+
logger.debug(f"Set {IndexBlockType.NONE.value} block on flush_index={flush_index}.")
403404
es_mgr.set_index_block(index_name=flush_index
404-
, block_name=IndexBlockType.NONE)
405+
, block_type_enum=IndexBlockType.NONE)
405406
else:
406407
logger.error(f"Unable to 'rename' indices for fill_strategy={fill_strategy}")
407408

scripts/fresh_indices/fresh_indices.sh

Lines changed: 93 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,45 @@ Help()
6262
################################################################################
6363
# Verify the needs of this script are available, the version is acceptable, etc.
6464
################################################################################
65-
StartupVerifications()
66-
{
67-
# No version requirement for Python 3, but don't expect it to report
68-
# a version if it is unavailable
69-
if ! python3 --version | grep '^Python 3.[0-9]' > /dev/null; then
70-
bail_out_errors+=("Python 3 does not seem to be available")
71-
elif [[ "$arg_verbose" == true ]]; then
72-
echo Python 3 found - `python3 --version`
65+
StartupVerifications() {
66+
# Check Python version >= 3.12 using Python itself, capturing
67+
# output of heredoc to python_output variable.
68+
python_output="$(
69+
python3.13 - << 'EOF'
70+
import sys
71+
72+
required = (3, 13)
73+
current = sys.version_info
74+
75+
if current >= required:
76+
# Print exact found version on success for the Bash wrapper to capture
77+
print(f"OK {current.major}.{current.minor}.{current.micro}")
78+
raise SystemExit(0)
79+
80+
print(
81+
f"Python ≥ {required[0]}.{required[1]} is required, "
82+
f"but found {current.major}.{current.minor}.{current.micro}"
83+
)
84+
raise SystemExit(2)
85+
EOF
86+
)"
87+
88+
status=$?
89+
90+
if [[ $status -eq 0 ]]; then
91+
# If verbose, print the discovered version
92+
if [[ "$arg_verbose" == true ]]; then
93+
# Extract version after "OK "
94+
python_version=${python_output#"OK "}
95+
echo "Python 3 found – $python_version"
96+
fi
97+
else
98+
# Append failure message to the global error array
99+
bail_out_errors+=("$python_output")
73100
fi
74101

102+
# Make sure an admin group token has been placed in a file so
103+
# it can be passed in on the Python command line.
75104
if [[ ! -f "./token_holder" ]]; then
76105
bail_out_errors+=("The file 'token_holder' is not found in `pwd`")
77106
fi
@@ -87,17 +116,6 @@ printf -v date_stamp '%(%Y-%m-%d)T' -1
87116
# Commands accepted in the script arguments after the options, as described in Help()
88117
recognized_commands=("create","catch-up","go-live")
89118

90-
# Pull the names of the destination indices from the same YAML which will be
91-
# used for reindexing.
92-
readarray -t entities_portal_indices < <(
93-
python -c 'import yaml,sys; \
94-
y=yaml.safe_load(sys.stdin); \
95-
print(y["indices"]["entities"]["public"]); \
96-
print(y["indices"]["entities"]["private"]); \
97-
print(y["indices"]["portal"]["public"]); \
98-
print(y["indices"]["portal"]["private"])' < ../../src/instance/search-config.yaml
99-
)
100-
101119
################################################################################
102120
# Set internal variables used by this script
103121
################################################################################
@@ -146,9 +164,64 @@ else
146164
esac
147165
fi
148166

167+
LoadEntitiesPortableIndices() {
168+
169+
# Assign the argument passed in to the config_file variable
170+
local config_file="$1"
171+
172+
# Reset the array on each call
173+
entities_portal_indices=()
174+
175+
# Capture *stdout and stderr* from Python into python_output
176+
python_output="$(
177+
python3.13 - "$config_file" << 'EOF' 2>&1
178+
import yaml, sys
179+
180+
try:
181+
with open(sys.argv[1]) as f:
182+
y = yaml.safe_load(f)
183+
except Exception:
184+
print(f"Unable to find configuration file: {sys.argv[1]}")
185+
raise SystemExit(2)
186+
187+
try:
188+
print(y["indices"]["entities"]["public"])
189+
print(y["indices"]["entities"]["private"])
190+
print(y["indices"]["portal"]["public"])
191+
print(y["indices"]["portal"]["private"])
192+
except KeyError as ke:
193+
raise SystemExit(f"Missing key in {sys.argv[1]}: {ke}")
194+
EOF
195+
)"
196+
197+
local status=$?
198+
199+
if [[ $status -eq 0 ]]; then
200+
# Success, split python_output into array lines
201+
readarray -t entities_portal_indices <<< "$python_output"
202+
203+
if [[ "$arg_verbose" == true ]]; then
204+
echo "Loaded indices from: $config_file"
205+
for index in "${entities_portal_indices[@]%,}"; do
206+
printf "\t%s\n" "$index"
207+
done
208+
fi
209+
210+
return 0
211+
else
212+
# Failure, add the *entire python_output* to the error array
213+
bail_out_errors+=("$python_output")
214+
return $status
215+
fi
216+
}
217+
149218
# Verify resources this script needs are available.
150219
StartupVerifications
151220

221+
# Load the indices names from the YAML file for the project
222+
config_file="../../src/instance/search-config.yaml"
223+
LoadEntitiesPortableIndices "$config_file"
224+
152225
# Verify the specified output directory is writeable.
153226
if [ ! -w $arg_output_dir ]; then
154227
bail_out_errors+=("Unable to write files to '${arg_output_dir}'.")
@@ -182,6 +255,6 @@ else
182255
echo "Unexpectedly tried to execute with cmd='$cmd'"
183256
fi
184257
MYPYPATH=../../src:../../src/search-adaptor/src:../../src/search-adaptor/src/libs:../../src/search-adaptor/src/translator
185-
PYTHONPATH=$MYPYPATH python3 fresh_indices.py $cmd `cat ./token_holder`
258+
PYTHONPATH=$MYPYPATH python3.13 fresh_indices.py $cmd `cat ./token_holder`
186259

187260
exit $EXIT_SUCCESS

0 commit comments

Comments
 (0)