diff --git a/README.md b/README.md index 943db541..90db0ccf 100644 --- a/README.md +++ b/README.md @@ -19,28 +19,6 @@ git submodule update --init --remote Front end developers who need to work on the `portal` index should start in [the `addl_index_transformations/portal` subdirectory](https://github.com/hubmapconsortium/search-api/tree/main/hubmap-translation/src/hubmap_translation/addl_index_transformations/portal); - -### Local development -After checking out the repo, installing the dependencies, -and starting a local Elasticsearch instance, tests should pass: -```shell -pip install -r src/requirements.txt -pip install -r src/requirements-dev.txt - -# on mac: -brew tap elastic/tap -brew install elastic/tap/elasticsearch-full - -## On MacOS 13, elasticsearch is not compatible with the default jdk. To workaround this, install openjdk and disable the machine learning functionality. -brew install openjdk -echo 'export ES_JAVA_HOME="/opt/homebrew/opt/openjdk"' >> ~/.zshrc -echo 'xpack.ml.enabled: false' >> /opt/homebrew/etc/elasticsearch/elasticsearch.yml - -elasticsearch & # Wait for it to start... - -./test.sh -``` - ### To release via TEST infrastructure - Make new feature or bug fix branches from `main` branch (the default branch) - Make PRs to `main` @@ -58,9 +36,9 @@ elasticsearch & # Wait for it to start... The search-api base URL for each deployment environment: -- DEV: `https://search-api.dev.hubmapconsortium.org` -- TEST: `https://search-api.test.hubmapconsortium.org` -- PROD: `https://search.api.hubmapconsortium.org` +- DEV: `https://search-api.dev.hubmapconsortium.org/v3/` +- TEST: `https://search-api.test.hubmapconsortium.org/v3/` +- PROD: `https://search.api.hubmapconsortium.org/v3/` ## Request endpoints @@ -162,7 +140,7 @@ query_dict = { } } response = requests.post( - 'https://search-api.dev.hubmapconsortium.org/search', + 'https://search-api.dev.hubmapconsortium.org/v3/search', json = query_dict, headers = {'Authorization': 'Bearer ' + nexus_token}) hits = response.json()['hits']['hits'] @@ -224,7 +202,7 @@ There are a few configurable environment variables to keep in mind: We can set and verify the environment variable like below: ```` -export COMMONS_BRANCH=master +export COMMONS_BRANCH=main echo $COMMONS_BRANCH ```` @@ -244,11 +222,6 @@ cd docker ./docker-deployment.sh [start|stop|down] ``` -For the Release candicate (RC) instance use a separate script: - -``` -./docker-rc.sh [start|stop|down] -``` ## Updating API Documentation diff --git a/VERSION b/VERSION index 4f2c1d15..5b341314 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.6.6 +3.6.7 diff --git a/scripts/fresh_indices/es_manager.py b/scripts/fresh_indices/es_manager.py index 3d81c5c6..e0bd0ecb 100644 --- a/scripts/fresh_indices/es_manager.py +++ b/scripts/fresh_indices/es_manager.py @@ -28,7 +28,7 @@ def get_document_agg_value(self, index_name, field_name, agg_name_enum: AggQuery raise Exception(f"agg_name_enum='{agg_name_enum}' is not a supported aggregation.") headers = {'Content-Type': 'application/json'} - agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum}": {{"field": "{field_name}"}}}}}}}}' + agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum.value}": {{"field": "{field_name}"}}}}}}}}' try: rspn = requests.post(f"{self.elasticsearch_url}/{index_name}/_search?size=0" ,headers=headers @@ -205,18 +205,18 @@ def empty_index(self, index_name): # e.g. PUT your_index/_settings {"index": {"blocks.read_only": false}} # https://opensearch.org/docs/latest/api-reference/cluster-api/cluster-settings/ # https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-blocks.html - def set_index_block(self, index_name, block_name): - if block_name not in IndexBlockType: - raise ValueError(f"'{block_name}' is not a block name supported by IndexBlockType") + def set_index_block(self, index_name: str, block_type_enum: IndexBlockType) -> None: + if block_type_enum not in IndexBlockType: + raise ValueError(f"'{block_type_enum}' is not a block name supported by IndexBlockType") try: - if block_name is IndexBlockType.NONE: + if block_type_enum is IndexBlockType.NONE: headers = {'Content-Type': 'application/json'} payload_json = '{"index": {"blocks.write": false, "blocks.read_only": false, "blocks.read_only_allow_delete": false}}' rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_settings" ,headers=headers ,data=payload_json) else: - rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_name}") + rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_type_enum.value}") except Exception as e: msg = "Exception encountered during executing ESManager.set_index_block()" # Log the full stack trace, prepend a line with our message @@ -233,12 +233,12 @@ def set_index_block(self, index_name, block_name): # "blocked": true # }] # } - logger.info(f"Set '{block_name}' block on index: {index_name}") + logger.info(f"Set '{block_type_enum.value}' block on index: {index_name}") return else: - logger.error(f"Failed to set '{block_name}' block on index: {index_name}") + logger.error(f"Failed to set '{block_type_enum.value}' block on index: {index_name}") logger.error(f"Error Message: {rspn.text}") - raise Exception(f"Failed to set '{block_name}' block on" + raise Exception(f"Failed to set '{block_type_enum.value}' block on" f" index: {index_name}, with" f" status_code {rspn.status_code}. See logs.") diff --git a/scripts/fresh_indices/fresh_indices.py b/scripts/fresh_indices/fresh_indices.py index 65da72be..8610f077 100644 --- a/scripts/fresh_indices/fresh_indices.py +++ b/scripts/fresh_indices/fresh_indices.py @@ -230,6 +230,7 @@ def get_translator(): a_translator = Translator(INDICES, appcfg['APP_CLIENT_ID'], appcfg['APP_CLIENT_SECRET'], token, appcfg['ONTOLOGY_API_BASE_URL']) + a_translator.log_configuration() # Skip the uuids comparision step that is only needed for live /reindex-all PUT call a_translator.skip_comparision = True @@ -350,12 +351,12 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy flush_index=destination_index.replace('fill','flush') # Block writing on the indices, even though services which write to them should probably be down. - logger.debug(f"Set {IndexBlockType.WRITE} block on source_index={source_index}.") + logger.debug(f"Set {IndexBlockType.WRITE.value} block on source_index={source_index}.") es_mgr.set_index_block(index_name=source_index - , block_name=IndexBlockType.WRITE) - logger.debug(f"Set {IndexBlockType.WRITE} block on destination_index={destination_index}.") + , block_type_enum=IndexBlockType.WRITE) + logger.debug(f"Set {IndexBlockType.WRITE.value} block on destination_index={destination_index}.") es_mgr.set_index_block(index_name=destination_index - , block_name=IndexBlockType.WRITE) + , block_type_enum=IndexBlockType.WRITE) # Make sure the source_index health is "green" before proceeding. es_mgr.wait_until_index_green(index_name=source_index ,wait_in_secs=30) @@ -370,9 +371,9 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy es_mgr.wait_until_index_green(index_name=flush_index ,wait_in_secs=30) logger.debug(f"Health of flush_index={flush_index} is green.") - logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.") + logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.") es_mgr.set_index_block(index_name=source_index - , block_name=IndexBlockType.NONE) + , block_type_enum=IndexBlockType.NONE) es_mgr.delete_index(index_name=source_index) logger.debug(f"Deleted source_index={source_index}.") op_data_supplement['golive']['swap_info'].append(f"Deleted {source_index}") @@ -387,21 +388,21 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy es_mgr.wait_until_index_green(index_name=source_index ,wait_in_secs=30) logger.debug(f"Health of source_index={source_index} is green.") - logger.debug(f"Set {IndexBlockType.NONE} block on destination_index={destination_index}.") + logger.debug(f"Set {IndexBlockType.NONE.value} block on destination_index={destination_index}.") es_mgr.set_index_block(index_name=destination_index - , block_name=IndexBlockType.NONE) + , block_type_enum=IndexBlockType.NONE) es_mgr.delete_index(index_name=destination_index) logger.debug(f"Deleted destination_index={destination_index}.") op_data_supplement['golive']['swap_info'].append(f"Deleted {destination_index}") # Assure that the index which will be actively used by Search API and the # backup of the previous version are writeable. - logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.") + logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.") es_mgr.set_index_block(index_name=source_index - , block_name=IndexBlockType.NONE) - logger.debug(f"Set {IndexBlockType.NONE} block on flush_index={flush_index}.") + , block_type_enum=IndexBlockType.NONE) + logger.debug(f"Set {IndexBlockType.NONE.value} block on flush_index={flush_index}.") es_mgr.set_index_block(index_name=flush_index - , block_name=IndexBlockType.NONE) + , block_type_enum=IndexBlockType.NONE) else: logger.error(f"Unable to 'rename' indices for fill_strategy={fill_strategy}") diff --git a/scripts/fresh_indices/fresh_indices.sh b/scripts/fresh_indices/fresh_indices.sh index 8bef4363..9f652b40 100755 --- a/scripts/fresh_indices/fresh_indices.sh +++ b/scripts/fresh_indices/fresh_indices.sh @@ -62,16 +62,45 @@ Help() ################################################################################ # Verify the needs of this script are available, the version is acceptable, etc. ################################################################################ -StartupVerifications() -{ - # No version requirement for Python 3, but don't expect it to report - # a version if it is unavailable - if ! python3 --version | grep '^Python 3.[0-9]' > /dev/null; then - bail_out_errors+=("Python 3 does not seem to be available") - elif [[ "$arg_verbose" == true ]]; then - echo Python 3 found - `python3 --version` +StartupVerifications() { + # Check Python version >= 3.12 using Python itself, capturing + # output of heredoc to python_output variable. + python_output="$( +python3.13 - << 'EOF' +import sys + +required = (3, 13) +current = sys.version_info + +if current >= required: + # Print exact found version on success for the Bash wrapper to capture + print(f"OK {current.major}.{current.minor}.{current.micro}") + raise SystemExit(0) + +print( + f"Python ≥ {required[0]}.{required[1]} is required, " + f"but found {current.major}.{current.minor}.{current.micro}" +) +raise SystemExit(2) +EOF + )" + + status=$? + + if [[ $status -eq 0 ]]; then + # If verbose, print the discovered version + if [[ "$arg_verbose" == true ]]; then + # Extract version after "OK " + python_version=${python_output#"OK "} + echo "Python 3 found – $python_version" + fi + else + # Append failure message to the global error array + bail_out_errors+=("$python_output") fi + # Make sure an admin group token has been placed in a file so + # it can be passed in on the Python command line. if [[ ! -f "./token_holder" ]]; then bail_out_errors+=("The file 'token_holder' is not found in `pwd`") fi @@ -87,17 +116,6 @@ printf -v date_stamp '%(%Y-%m-%d)T' -1 # Commands accepted in the script arguments after the options, as described in Help() recognized_commands=("create","catch-up","go-live") -# Pull the names of the destination indices from the same YAML which will be -# used for reindexing. -readarray -t entities_portal_indices < <( -python -c 'import yaml,sys; \ - y=yaml.safe_load(sys.stdin); \ - print(y["indices"]["entities"]["public"]); \ - print(y["indices"]["entities"]["private"]); \ - print(y["indices"]["portal"]["public"]); \ - print(y["indices"]["portal"]["private"])' < ../../src/instance/search-config.yaml -) - ################################################################################ # Set internal variables used by this script ################################################################################ @@ -146,9 +164,64 @@ else esac fi +LoadEntitiesPortableIndices() { + + # Assign the argument passed in to the config_file variable + local config_file="$1" + + # Reset the array on each call + entities_portal_indices=() + + # Capture *stdout and stderr* from Python into python_output + python_output="$( + python3.13 - "$config_file" << 'EOF' 2>&1 +import yaml, sys + +try: + with open(sys.argv[1]) as f: + y = yaml.safe_load(f) +except Exception: + print(f"Unable to find configuration file: {sys.argv[1]}") + raise SystemExit(2) + +try: + print(y["indices"]["entities"]["public"]) + print(y["indices"]["entities"]["private"]) + print(y["indices"]["portal"]["public"]) + print(y["indices"]["portal"]["private"]) +except KeyError as ke: + raise SystemExit(f"Missing key in {sys.argv[1]}: {ke}") +EOF + )" + + local status=$? + + if [[ $status -eq 0 ]]; then + # Success, split python_output into array lines + readarray -t entities_portal_indices <<< "$python_output" + + if [[ "$arg_verbose" == true ]]; then + echo "Loaded indices from: $config_file" + for index in "${entities_portal_indices[@]%,}"; do + printf "\t%s\n" "$index" + done + fi + + return 0 + else + # Failure, add the *entire python_output* to the error array + bail_out_errors+=("$python_output") + return $status + fi +} + # Verify resources this script needs are available. StartupVerifications +# Load the indices names from the YAML file for the project +config_file="../../src/instance/search-config.yaml" +LoadEntitiesPortableIndices "$config_file" + # Verify the specified output directory is writeable. if [ ! -w $arg_output_dir ]; then bail_out_errors+=("Unable to write files to '${arg_output_dir}'.") @@ -182,6 +255,6 @@ else echo "Unexpectedly tried to execute with cmd='$cmd'" fi MYPYPATH=../../src:../../src/search-adaptor/src:../../src/search-adaptor/src/libs:../../src/search-adaptor/src/translator -PYTHONPATH=$MYPYPATH python3 fresh_indices.py $cmd `cat ./token_holder` +PYTHONPATH=$MYPYPATH python3.13 fresh_indices.py $cmd `cat ./token_holder` exit $EXIT_SUCCESS