|
8 | 8 | import re |
9 | 9 | import sys |
10 | 10 | import time |
| 11 | +from redis import Redis, ConnectionError, RedisError |
11 | 12 | from yaml import safe_load, YAMLError |
12 | 13 | from http.client import HTTPException |
13 | 14 | from enum import Enum |
|
27 | 28 |
|
28 | 29 | logger = logging.getLogger(__name__) |
29 | 30 |
|
| 31 | +config = {} |
| 32 | +app = Flask(__name__, instance_path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'instance'), |
| 33 | + instance_relative_config=True) |
| 34 | +app.config.from_pyfile('app.cfg') |
| 35 | +config['INDICES'] = safe_load((Path(__file__).absolute().parent / 'instance/search-config.yaml').read_text()) |
| 36 | + |
30 | 37 | # This list contains fields that are added to the top-level at index runtime |
31 | 38 | entity_properties_list = [ |
32 | 39 | 'donor', |
@@ -685,6 +692,147 @@ def _transform_and_write_entity_to_index_group(self, entity:dict, index_group:st |
685 | 692 | f" entity['uuid']={entity['uuid']}," |
686 | 693 | f" entity['entity_type']={entity['entity_type']}") |
687 | 694 |
|
| 695 | + def enqueue_reindex(self, entity_id, reindex_queue, priority): |
| 696 | + try: |
| 697 | + logger.info(f"Start executing translate() on entity_id: {entity_id}") |
| 698 | + entity = self.call_entity_api(entity_id=entity_id, endpoint_base='documents') |
| 699 | + logger.info(f"Enqueueing reindex for {entity['entity_type']} of uuid: {entity_id}") |
| 700 | + subsequent_priority = max(priority, 2) |
| 701 | + |
| 702 | + job_id = reindex_queue.enqueue( |
| 703 | + task_func=reindex_entity_queued_wrapper, |
| 704 | + entity_id=entity_id, |
| 705 | + args=[entity_id, self.token], |
| 706 | + priority=priority |
| 707 | + ) |
| 708 | + collection_associations = [] |
| 709 | + upload_associations = [] |
| 710 | + if entity['entity_type'] in ['Collection', 'Epicollection']: |
| 711 | + collection = self.get_collection_doc(entity_id=entity_id) |
| 712 | + if 'datasets' in collection: |
| 713 | + logger.info(f"Enqueing {len(collection['datasets'])} datasets for {entity['entity_type']} {entity_id}") |
| 714 | + dataset_ids = [ds['uuid'] for ds in collection['datasets']] |
| 715 | + for dataset_id in dataset_ids: |
| 716 | + collection_associations.append(dataset_id) |
| 717 | + if 'associated_publication' in collection and collection['associated_publication']: |
| 718 | + logger.info(f"Enqueueing associated_publication for {entity['entity_type']} {entity_id}") |
| 719 | + collection_associations.append(collection['associated_publication']) |
| 720 | + |
| 721 | + logger.info(f"Finished executing enqueue_reindex() for {entity['entity_type']} of uuid: {entity_id}") |
| 722 | + return job_id |
| 723 | + |
| 724 | + if entity['entity_type'] == 'Upload': |
| 725 | + if 'datasets' in entity: |
| 726 | + logger.info(f"Enqueueing {len(entity['datasets'])} datasets for Upload {entity_id}") |
| 727 | + for dataset in entity['datasets']: |
| 728 | + upload_associations.append(dataset['uuid']) |
| 729 | + logger.info(f"Finished executing enqueue_reindex() for Upload of uuid: {entity_id}") |
| 730 | + return job_id |
| 731 | + |
| 732 | + logger.info(f"Calculating related entities for {entity_id}") |
| 733 | + |
| 734 | + neo4j_ancestor_ids = self.call_entity_api( |
| 735 | + entity_id=entity_id, |
| 736 | + endpoint_base='ancestors', |
| 737 | + endpoint_suffix=None, |
| 738 | + url_property='uuid' |
| 739 | + ) |
| 740 | + |
| 741 | + neo4j_descendant_ids = self.call_entity_api( |
| 742 | + entity_id=entity_id, |
| 743 | + endpoint_base='descendants', |
| 744 | + endpoint_suffix=None, |
| 745 | + url_property='uuid' |
| 746 | + ) |
| 747 | + |
| 748 | + previous_revision_ids = [] |
| 749 | + next_revision_ids = [] |
| 750 | + neo4j_collection_ids = [] |
| 751 | + neo4j_upload_ids = [] |
| 752 | + |
| 753 | + if entity['entity_type'] in ['Dataset', 'Publication']: |
| 754 | + previous_revision_ids = self.call_entity_api( |
| 755 | + entity_id=entity_id, |
| 756 | + endpoint_base='previous_revisions', |
| 757 | + endpoint_suffix=None, |
| 758 | + url_property='uuid' |
| 759 | + ) |
| 760 | + |
| 761 | + next_revision_ids = self.call_entity_api( |
| 762 | + entity_id=entity_id, |
| 763 | + endpoint_base='next_revisions', |
| 764 | + endpoint_suffix=None, |
| 765 | + url_property='uuid' |
| 766 | + ) |
| 767 | + |
| 768 | + neo4j_collection_ids = self.call_entity_api( |
| 769 | + entity_id=entity_id, |
| 770 | + endpoint_base='entities', |
| 771 | + endpoint_suffix='collections', |
| 772 | + url_property='uuid' |
| 773 | + ) |
| 774 | + |
| 775 | + neo4j_upload_ids = self.call_entity_api( |
| 776 | + entity_id=entity_id, |
| 777 | + endpoint_base='entities', |
| 778 | + endpoint_suffix='uploads', |
| 779 | + url_property='uuid' |
| 780 | + ) |
| 781 | + |
| 782 | + target_ids = set( |
| 783 | + neo4j_ancestor_ids + |
| 784 | + neo4j_descendant_ids + |
| 785 | + previous_revision_ids + |
| 786 | + next_revision_ids + |
| 787 | + neo4j_collection_ids + |
| 788 | + neo4j_upload_ids + |
| 789 | + upload_associations + |
| 790 | + collection_associations |
| 791 | + ) |
| 792 | + |
| 793 | + logger.info(f"Enqueueing {len(target_ids)} related entities for {entity_id}") |
| 794 | + |
| 795 | + for related_entity_id in target_ids: |
| 796 | + reindex_queue.enqueue( |
| 797 | + task_func=reindex_entity_queued_wrapper, |
| 798 | + entity_id=related_entity_id, |
| 799 | + args=[related_entity_id, self.token], |
| 800 | + priority=subsequent_priority |
| 801 | + ) |
| 802 | + logger.info(f"Finished executing translate() on {entity['entity_type']} of uuid: {entity_id}") |
| 803 | + return job_id |
| 804 | + except ValueError as e: |
| 805 | + raise ValueError(e) |
| 806 | + except RedisError as e: |
| 807 | + raise RedisError(e) |
| 808 | + except Exception: |
| 809 | + msg = "Exception during executing translate()" |
| 810 | + logger.exception(msg) |
| 811 | + raise |
| 812 | + |
| 813 | + def reindex_entity_queued(self, entity_id): |
| 814 | + try: |
| 815 | + logger.info(f"Start executing reindex_entity_queued() on uuid: {entity_id}") |
| 816 | + entity = self.call_entity_api(entity_id=entity_id, endpoint_base='documents') |
| 817 | + logger.info(f"Reindexing {entity['entity_type']} of uuid: {entity_id}") |
| 818 | + |
| 819 | + if entity['entity_type'] in ['Collection', 'Epicollection']: |
| 820 | + self.translate_collection(entity_id, reindex=True) |
| 821 | + |
| 822 | + elif entity['entity_type'] == 'Upload': |
| 823 | + self.translate_upload(entity_id, reindex=True) |
| 824 | + |
| 825 | + else: |
| 826 | + self._call_indexer(entity=entity, delete_existing_doc_first=True) |
| 827 | + |
| 828 | + logger.info(f"Finished executing reindex_entity_queued() on {entity['entity_type']} of uuid: {entity_id}") |
| 829 | + |
| 830 | + except Exception as e: |
| 831 | + msg = f"Exception during reindex_entity_queued() for uuid: {entity_id}" |
| 832 | + logger.exception(msg) |
| 833 | + |
| 834 | + raise |
| 835 | + |
688 | 836 | # Used by individual live reindex call |
689 | 837 | def translate(self, entity_id): |
690 | 838 | try: |
@@ -2027,6 +2175,18 @@ def get_organ_types(self): |
2027 | 2175 | # Running full reindex script in command line |
2028 | 2176 | # This approach is different from the live /reindex-all PUT call |
2029 | 2177 | # It'll delete all the existing indices and recreate then then index everything |
| 2178 | + |
| 2179 | + |
| 2180 | +def reindex_entity_queued_wrapper(entity_id, token): |
| 2181 | + translator = Translator( |
| 2182 | + indices=config['INDICES'], |
| 2183 | + app_client_id=app.config['APP_CLIENT_ID'], |
| 2184 | + app_client_secret=app.config['APP_CLIENT_SECRET'], |
| 2185 | + token=token, |
| 2186 | + ontology_api_base_url=app.config['ONTOLOGY_API_BASE_URL'] |
| 2187 | + ) |
| 2188 | + translator.reindex_entity_queued(entity_id) |
| 2189 | + |
2030 | 2190 | if __name__ == "__main__": |
2031 | 2191 | # Specify the absolute path of the instance folder and use the config file relative to the instance path |
2032 | 2192 | app = Flask(__name__, instance_path=os.path.join(os.path.abspath(os.path.dirname(__file__)), '../src/instance'), |
|
0 commit comments