diff --git a/.gitignore b/.gitignore index 7a6b33b..a162c1e 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ __pycache__/ *$py.class pip-log.txt pip-delete-this-directory.txt +*/test_results/ # OS-generated junk # ##################### diff --git a/comptox_ai/db/graph_db.py b/comptox_ai/db/graph_db.py index 7869f40..e0eef6c 100644 --- a/comptox_ai/db/graph_db.py +++ b/comptox_ai/db/graph_db.py @@ -19,7 +19,7 @@ from neo4j.api import Version from yaml import load, Loader from dataclasses import dataclass -from typing import List, Dict +from typing import cast, Union, List, Dict from textwrap import dedent import ipdb @@ -38,6 +38,7 @@ def _get_default_config_file(): default_config_file = os.path.join(root_dir, 'CONFIG-default.yaml') return default_config_file + @dataclass class Metagraph: """ @@ -63,6 +64,7 @@ class Metagraph: relationship_types: List[str] relationship_path_schema: Dict[str, Dict[str, int]] + class Node(dict): """ A node in ComptoxAI's graph database. @@ -95,10 +97,12 @@ class Node(dict): 'synonyms': '', ...} """ + def __init__(self, db, node_type, search_params, return_first_match=False): # build search query - where_clause = ", ".join([f"n.{k} = '{v}'" for k, v in search_params.items()]) - + where_clause = ", ".join( + [f"n.{k} = '{v}'" for k, v in search_params.items()]) + node_search_query = f""" MATCH (n:{node_type}) WHERE {where_clause} @@ -112,10 +116,12 @@ def __init__(self, db, node_type, search_params, return_first_match=False): raise Exception("Error: Node not found in graph database") elif len(node_search_res) > 1: if return_first_match == False: - raise Exception("Error: Multiple nodes match query. Use `return_first_match=True` to bypass error and use first matching node.") + raise Exception( + "Error: Multiple nodes match query. Use `return_first_match=True` to bypass error and use first matching node.") + + self.update(node_search_res[0]['n'].items()) + - self.update(node_search_res[0]['n'].items()) - class Graph(object): """ A Neo4j graph, as defined by the Neo4j Graph Data Science Library. In @@ -147,6 +153,7 @@ class GraphDB(object): Sets verbosity to on or off. If True, status information will be returned to the user occasionally. """ + def __init__(self, username=None, password=None, hostname=None, verbose=False): self.is_connected = False self.verbose = verbose @@ -155,20 +162,20 @@ def __init__(self, username=None, password=None, hostname=None, verbose=False): self.username = username self.password = password self.hostname = hostname - + self._connect() self.exporter = comptox_ai.db.GraphExporter(self) def __repr__(self): - return( + return ( dedent(f"""\ ------------------------ ComptoxAI graph database ------------------------ Hostname: {self.hostname} Username: {self.username}""" - ) + ) ) def _connect(self): @@ -186,22 +193,28 @@ def _connect(self): if (username is None) and (password is None): self._driver = GraphDatabase.driver(uri) else: - self._driver = GraphDatabase.driver(uri, auth=(username, password)) + self._driver = GraphDatabase.driver( + uri, auth=(username, password)) except AuthError as e: - raise RuntimeError("Could not find a database using the configuration provided.") + raise RuntimeError( + "Could not find a database using the configuration provided.") # Test the connection to make sure we are connected to a database try: with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "The configuration may change in the future.") + warnings.filterwarnings( + "ignore", "The configuration may change in the future.") conn_result = self._driver.verify_connectivity() except ServiceUnavailable: - raise RuntimeError("Neo4j driver created but we couldn't connect to any routing servers. You might be using an invalid hostname.") + raise RuntimeError( + "Neo4j driver created but we couldn't connect to any routing servers. You might be using an invalid hostname.") except ValueError: - raise RuntimeError("Neo4j driver created but the host address couldn't be resolved. Check your hostname, port, and/or protocol.") - + raise RuntimeError( + "Neo4j driver created but the host address couldn't be resolved. Check your hostname, port, and/or protocol.") + if (conn_result is None): - raise RuntimeError("Neo4j driver created but a valid connection hasn't been established. You might be using an invalid hostname.") + raise RuntimeError( + "Neo4j driver created but a valid connection hasn't been established. You might be using an invalid hostname.") def _disconnect(self): self._driver.close() @@ -234,12 +247,13 @@ def run_cypher(self, qry_str, verbose=True): """ with self._driver.session() as session: if self.verbose: - if verbose: # users can still override verbosity at the run_cypher level + if verbose: # users can still override verbosity at the run_cypher level print(f"Writing Cypher transaction: \n {qry_str}") try: res = session.write_transaction(self._run_transaction, qry_str) except CypherSyntaxError as e: - warnings.warn("Neo4j returned a Cypher syntax error. Please check your query and try again.") + warnings.warn( + "Neo4j returned a Cypher syntax error. Please check your query and try again.") print(f"\nThe original error returned by Neo4j is:\n\n {e}") return None return res @@ -266,8 +280,9 @@ def get_graph_statistics(self): response = response[0] - stats = {k:response[k] for k in ('nodeCount', 'relCount', 'labelCount', 'relTypeCount') if k in response} - + stats = {k: response[k] for k in ( + 'nodeCount', 'relCount', 'labelCount', 'relTypeCount') if k in response} + return stats def fetch(self, field, operator, value, what='both', register_graph=True, @@ -293,7 +308,7 @@ def fetch(self, field, operator, value, what='both', register_graph=True, method call may be only the nodes or edges contained in that subgraph. filter : str 'Cypher-like' filter statement, equivalent to a `WHERE` clause used in - a Neo4j Cypher query (analogous to SQL `WHERE` clauses). + a Neo4j Cypher query (analogous to SQL `WHERE` clauses). query_type : {'cypher', 'native'} Whether to create a graph using a Cypher projection or a native projection. The 'standard' approach is to use a Cypher projection, but @@ -303,14 +318,16 @@ def fetch(self, field, operator, value, what='both', register_graph=True, information, as well as https://neo4j.com/docs/graph-data-science/current/management-ops/graph-catalog-ops/#catalog-graph-create. """ - raise NotImplementedError("Error: GraphDB.fetch() not yet implemented - see documentation notes.") + raise NotImplementedError( + "Error: GraphDB.fetch() not yet implemented - see documentation notes.") if query_type == 'cypher': new_graph = self.build_graph_cypher_projection() elif query_type == 'native': new_graph = self.build_graph_native_projection() else: - raise ValueError("'query_type' must be either 'cypher' or 'native'") + raise ValueError( + "'query_type' must be either 'cypher' or 'native'") # consume results @@ -324,17 +341,20 @@ def find_node(self, name=None, properties=None): """ if name: # search by name - query = "MATCH (n {{ commonName: \"{0}\" }}) RETURN n LIMIT 1;".format(name) + query = "MATCH (n {{ commonName: \"{0}\" }}) RETURN n LIMIT 1;".format( + name) else: if not properties: - raise ValueError("Error: Must provide a value for `name` or `properties`.") + raise ValueError( + "Error: Must provide a value for `name` or `properties`.") # search by properties # first, separate out properties with special meaning (e.g., `id`) # then, construct a MATCH clause suing the remaining properties - # strings should be enclosed in - prop_string = ", ".join([f"{k}: '{v}'" if type(v) == str else f"{k}: {v}" for k, v in properties.items()]) + # strings should be enclosed in + prop_string = ", ".join([f"{k}: '{v}'" if type( + v) == str else f"{k}: {v}" for k, v in properties.items()]) match_clause = f"MATCH (n {{ {prop_string} }})" # assemble the complete query @@ -343,13 +363,14 @@ def find_node(self, name=None, properties=None): node_response = self.run_cypher(query) if len(node_response) < 1: - warnings.warn("Warning: No node found matching the query you provided.") + warnings.warn( + "Warning: No node found matching the query you provided.") return False elif len(node_response) > 1: - warnings.warn("Warning: Multiple nodes found for query - only returning one (see `find_nodes` if you want all results).") - - return node_response[0]['n'] + warnings.warn( + "Warning: Multiple nodes found for query - only returning one (see `find_nodes` if you want all results).") + return node_response[0]['n'] def find_nodes(self, properties={}, node_types=[]): """ @@ -380,12 +401,15 @@ def find_nodes(self, properties={}, node_types=[]): rather than a list. """ if (not properties) and (len(node_types) == 0): - raise ValueError("Error: Query must contain at least one node property or node type.") + raise ValueError( + "Error: Query must contain at least one node property or node type.") if not properties: - warnings.warn("Warning: No property filters given - the query result may be very large!") + warnings.warn( + "Warning: No property filters given - the query result may be very large!") - prop_string = ", ".join([f"{k}: '{v}'" if type(v) == str else f"{k}: {v}" for k, v in properties.items()]) + prop_string = ", ".join([f"{k}: '{v}'" if type( + v) == str else f"{k}: {v}" for k, v in properties.items()]) # Use a WHERE clause when multiple node types are given if len(node_types) == 1: @@ -425,7 +449,7 @@ def build_graph_native_projection(self, graph_name, node_types, graph_name : str A (string) name for identifying the new graph. If a graph already exists with this name, a ValueError will be raised. - node_proj : str, list of str, or dict of + node_proj : str, list of str, or dict of Node projection for the new graph. This can be either a single node label, a list of node labels, or a node projection @@ -459,7 +483,7 @@ def build_graph_native_projection(self, graph_name, node_types, } } - where ``node-label-i`` is a name for a node label in the projected graph + where ``node-label-i`` is a name for a node label in the projected graph (it can be the same as or different from the label already in neo4j), ``neo4j-label`` is a node label to match against in the graph database, and ``node-property-mappings`` are filters against Neo4j node properties, as @@ -468,7 +492,7 @@ def build_graph_native_projection(self, graph_name, node_types, NODE PROPERTY MAPPINGS: RELATIONSHIP PROJECTIONS: - + Examples -------- >>> g = GraphDB() @@ -477,7 +501,7 @@ def build_graph_native_projection(self, graph_name, node_types, node_proj = ['Gene', 'StructuralEntity'], relationship_proj = "*" ) - >>> + >>> """ create_graph_query_template = """ @@ -490,9 +514,10 @@ def build_graph_native_projection(self, graph_name, node_types, node_proj_str = self._make_node_projection_str(node_types) # relationship_proj_str = "'{0}'".format(relationship_proj) - relationship_proj_str = self._make_node_projection_str(relationship_types) + relationship_proj_str = self._make_node_projection_str( + relationship_types) - #config_dict_str = str(config_dict) + # config_dict_str = str(config_dict) if config_dict is None: config_dict_str = "" else: @@ -521,9 +546,9 @@ def build_graph_cypher_projection(self, graph_name, node_query, -------- >>> g = GraphDB() >>> g.build_graph_cypher_projection(...) - >>> + >>> """ - + create_graph_query_template = """ CALL gds.graph.create.cypher({0},{1},{2}{3}) YIELD graphName, nodeCount, relationshipCount, createMillis; @@ -592,6 +617,58 @@ def fetch_nodes(self, node_type, property, values): """ raise NotImplementedError + def fetch_chemicals( + self, + property: str, + values: Union[str, List[str]] + ) -> List[Dict[str, str]]: + """ + Fetch all chemicals that matches the values of the specified property + type. + + Parameters + ---------- + property : str + Node property to match against. + + values : str or list + Value or list of values on which to match `property`. + + Returns + ------- + List[Dict[str, str]] + Each element in the list corresponds to a single chemical node. If + no matches are found in the database,an empty list will be + returned. + """ + if (not property): + raise ValueError( + "Error: Query must contain a node property.") + + # Check if input value is a list or a single string + if isinstance(values, list): + query = ( + f"MATCH (c:Chemical) " + f"WHERE c.{property} IN {values} " + f"RETURN c" + ) + else: + query = ( + f"MATCH (c:Chemical) " + f"WHERE c.{property} = '{values}' " + f"RETURN c" + ) + fetched_chemicals = self.run_cypher(query) + + chemicals: List[Dict[str, str]] = [] + + if fetched_chemicals is not None: + for chemical in fetched_chemicals: + if isinstance(chemical, dict) and 'c' in chemical: + chemicals.append(chemical['c']) + + return chemicals + def fetch_chemical_list(self, list_name): """ Fetch all chemicals that are members of a chemical list. @@ -609,7 +686,8 @@ def fetch_chemical_list(self, list_name): chemicals : list of dict Chemical nodes that are members of the chemical list """ - res = self.run_cypher(f"MATCH (l:ChemicalList {{ listAcronym: \"{list_name}\" }})-[:LISTINCLUDESCHEMICAL]->(c:Chemical) RETURN l, c") + res = self.run_cypher( + f"MATCH (l:ChemicalList {{ listAcronym: \"{list_name}\" }})-[:LISTINCLUDESCHEMICAL]->(c:Chemical) RETURN l, c") return (res[0]['l'], [r['c'] for r in res]) @@ -632,18 +710,18 @@ def fetch_node_type(self, node_label): that this method may take a very long time to run and/or be very demanding on computing resources. """ - + res = self.run_cypher(f"MATCH (n:{node_label}) RETURN n;") return (r['n'] for r in res) - def fetch_relationships(self, relationship_type, from_label, to_label): """ Fetch edges (relationships) from the Neo4j graph database. """ - - res = self.run_cypher(f"MATCH (s:{from_label})-[r:{relationship_type}]->(o:{to_label}) RETURN s, r, o;") + + res = self.run_cypher( + f"MATCH (s:{from_label})-[r:{relationship_type}]->(o:{to_label}) RETURN s, r, o;") return ((r['r'][0]['uri'], r['r'][1], r['r'][2]['uri']) for r in res) @@ -658,14 +736,15 @@ def get_metagraph(self): prudent to start doing that at some point in the future. It's not an extremely quick operation, but it's also not prohibitively slow. """ - + meta = self.run_cypher("CALL apoc.meta.graph();")[0] node_labels = [] for n in meta['nodes']: node_labels.append(n['name']) - + node_labels = [n['name'] for n in meta['nodes']] - node_label_counts = dict([(n['name'], n['count']) for n in meta['nodes']]) + node_label_counts = dict([(n['name'], n['count']) + for n in meta['nodes']]) rel_types = [] rel_path_schema = dict() @@ -673,7 +752,7 @@ def get_metagraph(self): if r[1] not in rel_types: rel_types.append(r[1]) rel_path_schema[r[1]] = [] - + rel_path_schema[r[1]].append({ 'from': r[0]['name'], 'to': r[2]['name'] @@ -685,7 +764,7 @@ def get_metagraph(self): relationship_types=rel_types, relationship_path_schema=rel_path_schema ) - + return metagraph def list_existing_graphs(self): @@ -703,7 +782,8 @@ def list_existing_graphs(self): if len(graphs) == 0: print("Graph catalog is currently empty.") else: - print("Number of graphs currently in GDS graph catalog: {0}".format(len(graphs))) + print("Number of graphs currently in GDS graph catalog: {0}".format( + len(graphs))) return graphs def drop_existing_graph(self, graph_name): @@ -715,7 +795,7 @@ def drop_existing_graph(self, graph_name): graph_name : str A name of a graph, corresponding to the `'graphName'` field in the graph's entry within the GDS graph catalog. - + Returns ------- dict @@ -769,9 +849,11 @@ def export_graph(self, graph_name, to='db'): graph's entry within the GDS graph catalog. """ if to == 'csv': - res = self.run_cypher(f"CALL gds.beta.graph.export('{graph_name}', {{exportName: '{graph_name}'}})") + res = self.run_cypher( + f"CALL gds.beta.graph.export('{graph_name}', {{exportName: '{graph_name}'}})") elif to == 'db': - res = self.run_cypher(f"CALL gds.graph.export('{graph_name}', {{dbName: '{graph_name}'}});") + res = self.run_cypher( + f"CALL gds.graph.export('{graph_name}', {{dbName: '{graph_name}'}});") return res def stream_named_graph(self, graph_name): @@ -852,10 +934,10 @@ def convert_ids(self, node_type, from_id, to_id, ids): id_list = [x['id'] for x in res] return id_list - + def find_shortest_paths(self, node1, node2, cleaned=True): """ Parameters ---------- node1 : comptox - """ \ No newline at end of file + """ diff --git a/tests/test_db.py b/tests/test_db.py index 63b219d..cb12253 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -3,6 +3,8 @@ import pytest import warnings import os +from pprint import pprint, pformat +import sys TEST_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -10,86 +12,188 @@ # Module-level scope so we don't keep reconnecting with every test @pytest.fixture(scope="module") def G(): - G = GraphDB(verbose=True, hostname="neo4j.comptox.ai") - return G + G = GraphDB(verbose=True, hostname="neo4j.comptox.ai") + return G + class TestGraphDB(object): - -<<<<<<< HEAD -======= - ->>>>>>> master - def test_neo4j_connection_does_exist(self, G): - with warnings.catch_warnings(): - # Supress the ExperimentalWarning for now - warnings.simplefilter("ignore") - assert G._driver.verify_connectivity() is not None - - def test_cypher_query_does_run(self, G): - x = G.run_cypher("RETURN 'hello';") - assert len(x[0]) > 0 - - def test_dsstox_to_casrn_converts(self, G): - converted_ids = G.convert_ids( - node_type='Chemical', - from_id='xrefDTXSID', - to_id='xrefCasRN', - ids=['DTXSID40857898', 'DTXSID40858749'] - ) - - # Hopefully DSSTOX -> CASRN mappings are stable between versions... - assert converted_ids == ['69313-80-0', '4559-79-9'] - - ## THE FOLLOWING ARE OBSOLETE UNTIL GDS GRAPH CATALOG IS COMPATIBLE WITH - # def test_raise_when_config_file_not_found(self): - # with pytest.raises(RuntimeError) as e_info: - # G_pre = GraphDB(config_file="/dev/null") - - # def test_raise_when_bad_config_given(self): - # bad_config_file = os.path.join(TEST_DIR, 'badconfig.txt') - # with pytest.raises(RuntimeError) as e_info: - # G_pre = GraphDB(config_file=bad_config_file) - - # def test_raise_when_database_unavailable(self): - # unavail_config_file = os.path.join(TEST_DIR, 'unavailconfig.txt') - # with pytest.raises(RuntimeError) as e_info: - # G_pre = GraphDB(config_file=unavail_config_file) - ## STRING PROPERTIES: - - # def test_gds_list_existing_graphs(self, G): - # x = G.list_existing_graphs() - # assert isinstance(x, list) - - # def test_gds_delete_existing_graphs(self, G): - # x = G.drop_all_existing_graphs() - - # y = G.list_existing_graphs() - - # assert len(y) is 0 - - # def test_gds_create_graph_native_projection(self, G): - # newgraph1 = G.build_graph_native_projection( - # "testgraph1", - # ["Chemical", "Disease"], - # "*", - # ) - - # def test_gds_new_num_graphs_is_1(self, G): - # y = G.list_existing_graphs() - # assert len(y) == 1 - - # def test_gds_delete_graph_native_projection(self, G): - # x = G.drop_existing_graph("testgraph1") - # assert x['graphName'] == "testgraph1" - - # def test_gds_create_graph_cypher_projection(self, G): - # newgraph2 = G.build_graph_cypher_projection( - # "testgraph2", - # "MATCH (n) WHERE n:Chemical OR n:Disease RETURN id(n) AS id, labels(n) AS labels", - # "MATCH (c:Chemical)-->(d:Disease) RETURN id(c) as source, id(d) as target" - # ) - - # def test_gds_delete_graph_cypher_projection(self, G): - # # Note: this test will fail if the previous test fails - # x = G.drop_existing_graph("testgraph2") - # assert x['graphName'] == "testgraph2" \ No newline at end of file + + def test_neo4j_connection_does_exist(self, G): + with warnings.catch_warnings(): + # Supress the ExperimentalWarning for now + warnings.simplefilter("ignore") + assert G._driver.verify_connectivity() is not None + + def test_cypher_query_does_run(self, G): + x = G.run_cypher("RETURN 'hello';") + assert len(x[0]) > 0 + + def test_dsstox_to_casrn_converts(self, G): + converted_ids = G.convert_ids( + node_type='Chemical', + from_id='xrefDTXSID', + to_id='xrefCasRN', + ids=['DTXSID40857898', 'DTXSID40858749'] + ) + + # Hopefully DSSTOX -> CASRN mappings are stable between versions... + assert converted_ids == ['69313-80-0', '4559-79-9'] + + def test_fetch_chemical_one_chemical_by_commonName(self, G): + expected_chemicals = { + 'commonName': 'Trisodium hydrogen diphosphate', + 'maccs': '0000000000000000000000000000100000100000000100011000000000000000000010000000000000000000100000000000010001000001000000000001010001000001001100000101000000000010000101', + 'sMILES': '[Na+].[Na+].[Na+].OP([O-])(=O)OP([O-])([O-])=O', + 'synonyms': 'Trisodium diphosphate|Diphosphoric acid, trisodium salt|Diphosphoric acid, sodium salt (1:3)', + 'uri': 'http://jdr.bio/ontologies/comptox.owl#chemical_dtxsid70872534', + 'xrefCasRN': '14691-80-6', + 'xrefDTXSID': 'DTXSID70872534', + 'xrefPubchemCID': '161081', + 'xrefPubchemSID': '316388586' + } + + # Perform the fetch operation + fetched_chemicals = G.fetch_chemicals( + property='commonName', + values='Trisodium hydrogen diphosphate' + ) + + # Assert that the result is a list + assert isinstance(fetched_chemicals, list) + + # Assert that the result is not empty + assert fetched_chemicals + + # Assert that the expected chemical data is in the result + assert expected_chemicals in fetched_chemicals + + def test_fetch_chemical_two_chemicals_by_CasRN(self, G): + expected_chemicals = [ + { + 'commonName': 'Silidianin', + 'maccs': '0000000000000000001000000000000000000000000000000100100010000100000000010001000000100000111010010110000010001001100000010010111000100001101100111100010111001011011110', + 'sMILES': 'COC1=C(O)C=CC(=C1)[C@@H]1[C@H]2CO[C@]3(O)[C@H]2C(=C[C@H]1C3=O)[C@H]1OC2=C(C(O)=CC(O)=C2)C(=O)[C@@H]1O', + 'synonyms': 'Silydianin|3,6-Methanobenzofuran-7(6H)-one, 4-[(2R,3R)-3,4-dihydro-3,5,7-trihydroxy-4-oxo-2H-1-benzopyran-2-yl]-2,3,3a,7a-tetrahydro-7a-hydroxy-8-(4-hydroxy-3-methoxyphenyl)-, (3R,3aR,6R,7aR,8R)-|EINECS 249-848-5|Silidianina|Silidianine|Silidianinum|UNII-7P89L7W179', + 'uri': 'http://jdr.bio/ontologies/comptox.owl#chemical_dtxsid70858696', + 'xrefCasRN': '29782-68-1', + 'xrefDTXSID': 'DTXSID70858696', + 'xrefMeSH': 'MESH:C015505', + 'xrefPubchemCID': '11982272', + 'xrefPubchemSID': '316388226', + }, + { + 'commonName': '3-Chloro-6-fluoro-2H-indazole', + 'maccs': '0000000000000000000000000000000000000000010000000001000000000000100010000000000100100010000001010000001010100000000000011001000000000100100001000000001000010000111010', + 'sMILES': 'FC1=CC2=NNC(Cl)=C2C=C1', + 'synonyms': '', + 'uri': 'http://jdr.bio/ontologies/comptox.owl#chemical_dtxsid80857655', + 'xrefCasRN': '1243360-12-4', + 'xrefDTXSID': 'DTXSID80857655', + 'xrefPubchemCID': '71748561', + 'xrefPubchemSID': '316387196', + } + ] + + # Perform the fetch operation + fetched_chemicals = G.fetch_chemicals( + property='xrefCasRN', + values=['29782-68-1', '1243360-12-4'] + ) + + # Assert that the result is a list + assert isinstance(fetched_chemicals, + list), f"Result is not a list: {fetched_chemicals}" + + # Assert that the result is not empty + assert fetched_chemicals, f"Result is empty: {fetched_chemicals}" + + # Define the path to the test_results directory + test_results_dir = "./test_results" + + # Ensure the directory exists; create it if it doesn't + os.makedirs(test_results_dir, exist_ok=True) + + # Specify the result file name with the full path + result_filename = os.path.join( + test_results_dir, "test_fetch_chemical_two_chemicals_by_CasRN.txt") + + # Open the result file for writing + with open(result_filename, "w") as result_file: + # Redirect print output to the result file + original_stdout = sys.stdout + sys.stdout = result_file + + # Print the result to the file + print(fetched_chemicals) + + # Compare the expected_chemicals with res + for i, (expected, fetched) in enumerate(zip(expected_chemicals, fetched_chemicals), start=1): + for key in expected.keys(): + if expected[key] != fetched[key]: + print(f"Difference in chemical {i}, key: {key}") + print(f"Expected: {expected[key]}") + print(f"Result: {fetched[key]}") + print() + + # Restore the original stdout + sys.stdout = original_stdout + + # Check if each expected chemical is in fetched_chemicals + for expected in expected_chemicals: + assert ( + expected in fetched_chemicals + ), f"Expected chemical not found in result: {expected}" + + # THE FOLLOWING ARE OBSOLETE UNTIL GDS GRAPH CATALOG IS COMPATIBLE WITH + # def test_raise_when_config_file_not_found(self): + # with pytest.raises(RuntimeError) as e_info: + # G_pre = GraphDB(config_file="/dev/null") + + # def test_raise_when_bad_config_given(self): + # bad_config_file = os.path.join(TEST_DIR, 'badconfig.txt') + # with pytest.raises(RuntimeError) as e_info: + # G_pre = GraphDB(config_file=bad_config_file) + + # def test_raise_when_database_unavailable(self): + # unavail_config_file = os.path.join(TEST_DIR, 'unavailconfig.txt') + # with pytest.raises(RuntimeError) as e_info: + # G_pre = GraphDB(config_file=unavail_config_file) + # STRING PROPERTIES: + + # def test_gds_list_existing_graphs(self, G): + # x = G.list_existing_graphs() + # assert isinstance(x, list) + + # def test_gds_delete_existing_graphs(self, G): + # x = G.drop_all_existing_graphs() + + # y = G.list_existing_graphs() + + # assert len(y) is 0 + + # def test_gds_create_graph_native_projection(self, G): + # newgraph1 = G.build_graph_native_projection( + # "testgraph1", + # ["Chemical", "Disease"], + # "*", + # ) + + # def test_gds_new_num_graphs_is_1(self, G): + # y = G.list_existing_graphs() + # assert len(y) == 1 + + # def test_gds_delete_graph_native_projection(self, G): + # x = G.drop_existing_graph("testgraph1") + # assert x['graphName'] == "testgraph1" + + # def test_gds_create_graph_cypher_projection(self, G): + # newgraph2 = G.build_graph_cypher_projection( + # "testgraph2", + # "MATCH (n) WHERE n:Chemical OR n:Disease RETURN id(n) AS id, labels(n) AS labels", + # "MATCH (c:Chemical)-->(d:Disease) RETURN id(c) as source, id(d) as target" + # ) + + # def test_gds_delete_graph_cypher_projection(self, G): + # # Note: this test will fail if the previous test fails + # x = G.drop_existing_graph("testgraph2") + # assert x['graphName'] == "testgraph2"