|
| 1 | +# Copyright © 2024 Battelle Memorial Institute |
| 2 | +# All rights reserved. |
| 3 | + |
| 4 | +import hypernetx as hnx |
| 5 | +import pandas as pd |
| 6 | +import json |
| 7 | +import fastjsonschema |
| 8 | +import requests |
| 9 | +from copy import deepcopy |
| 10 | +from .exception import HyperNetXError |
| 11 | + |
| 12 | +schema_url = "https://raw.githubusercontent.com/pszufe/HIF_validators/main/schemas/hif_schema_v0.1.0.json" |
| 13 | +resp = requests.get(schema_url) |
| 14 | +schema = json.loads(resp.text) |
| 15 | +validator = fastjsonschema.compile(schema) |
| 16 | + |
| 17 | + |
| 18 | +def normalize_dataframe(df): |
| 19 | + """ |
| 20 | + Moves common attributes into misc_properties for translating into HIF. |
| 21 | +
|
| 22 | + Parameters |
| 23 | + ---------- |
| 24 | + df : pd.DataFrame |
| 25 | + HypergraphView.dataframe |
| 26 | +
|
| 27 | + Returns |
| 28 | + ------- |
| 29 | + pd.DataFrame |
| 30 | + allowed columns are limited to HIF keys |
| 31 | + """ |
| 32 | + default_cols = ( |
| 33 | + ["weight"] |
| 34 | + + list(set(df.columns).intersection(["direction"])) |
| 35 | + + ["misc_properties"] |
| 36 | + ) |
| 37 | + cols = list(set(df.columns).difference(default_cols)) |
| 38 | + dfdict = df[cols].T.to_dict() |
| 39 | + newdf = df[default_cols] |
| 40 | + for uid in newdf.index: |
| 41 | + newdf.loc[uid]["misc_properties"].update(dfdict[uid]) |
| 42 | + return newdf.fillna("nil") |
| 43 | + |
| 44 | + |
| 45 | +def to_hif(hg, filename=None, network_type="undirected", metadata=None): |
| 46 | + """ |
| 47 | + Returns a dictionary object valid for the HIF Json schema |
| 48 | +
|
| 49 | + Parameters |
| 50 | + ---------- |
| 51 | + hg : hnx.Hypergraph |
| 52 | +
|
| 53 | + filename : str, optional |
| 54 | + filepath where json object is to be stored, by default None |
| 55 | + network_type : str, optional |
| 56 | + One of 'undirected','directed','asc', by default 'undirected' |
| 57 | + metadata : dict, optional |
| 58 | + Additional information to store, by default None |
| 59 | +
|
| 60 | + Returns |
| 61 | + ------- |
| 62 | + hif : dict |
| 63 | + format is defined by HIF schema |
| 64 | + """ |
| 65 | + hyp_objs = ["nodes", "edges", "incidences"] |
| 66 | + defaults = { |
| 67 | + part: dict(getattr(hg, part).property_store._defaults) for part in hyp_objs |
| 68 | + } |
| 69 | + for part in hyp_objs: |
| 70 | + misc_properties = defaults[part].pop("misc_properties", {}) |
| 71 | + defaults[part]["attrs"] = dict(misc_properties) |
| 72 | + |
| 73 | + incj = deepcopy(hg.incidences.to_dataframe) |
| 74 | + incj.index.names = ["edge", "node"] |
| 75 | + incj = normalize_dataframe(incj) |
| 76 | + incj = incj.rename(columns={"misc_properties": "attrs"}) |
| 77 | + incj = incj.reset_index().to_dict(orient="records") |
| 78 | + |
| 79 | + edgj = deepcopy(hg.edges.to_dataframe) |
| 80 | + edgj.index.names = ["edge"] |
| 81 | + edgj = normalize_dataframe(edgj) |
| 82 | + edgj = edgj.rename(columns={"misc_properties": "attrs"}) |
| 83 | + edgj = edgj.reset_index().to_dict(orient="records") |
| 84 | + |
| 85 | + nodj = deepcopy(hg.nodes.to_dataframe) |
| 86 | + nodj.index.names = ["node"] |
| 87 | + nodj = normalize_dataframe(nodj) |
| 88 | + nodj = nodj.rename(columns={"misc_properties": "attrs"}) |
| 89 | + nodj = nodj.reset_index().to_dict(orient="records") |
| 90 | + |
| 91 | + if isinstance(metadata, dict): |
| 92 | + metadata = metadata.update({"default_attrs": defaults}) |
| 93 | + else: |
| 94 | + metadata = {"default_attrs": defaults} |
| 95 | + if hg.name is not None: |
| 96 | + metadata["name"] = hg.name |
| 97 | + |
| 98 | + hif = { |
| 99 | + "edges": edgj, |
| 100 | + "nodes": nodj, |
| 101 | + "incidences": incj, |
| 102 | + "network-type": network_type, |
| 103 | + "metadata": metadata, |
| 104 | + } |
| 105 | + try: |
| 106 | + validator(hif) |
| 107 | + if filename is not None: |
| 108 | + json.dump(hif, open(filename, "w")) |
| 109 | + return hif |
| 110 | + except Exception as ex: |
| 111 | + HyperNetXError(ex) |
| 112 | + |
| 113 | + |
| 114 | +def from_hif(hif=None, filename=None): |
| 115 | + """ |
| 116 | + Reads HIF formatted string or dictionary and returns corresponding |
| 117 | + hnx.Hypergraph |
| 118 | +
|
| 119 | + Parameters |
| 120 | + ---------- |
| 121 | + hif : dict, optional |
| 122 | + Useful if file is read by json and inspected before turning into a hypergraph, |
| 123 | + by default None |
| 124 | + filename : str, optional |
| 125 | + Full path to location of HIF formatted JSON in storage, |
| 126 | + by default None |
| 127 | +
|
| 128 | + Returns |
| 129 | + ------- |
| 130 | + hnx.Hypergraph |
| 131 | +
|
| 132 | + """ |
| 133 | + if hif is not None: |
| 134 | + try: |
| 135 | + validator(hif) |
| 136 | + except Exception as ex: |
| 137 | + HyperNetXError(ex) |
| 138 | + return None |
| 139 | + elif filename is not None: |
| 140 | + hif = json.load(open(filename, "r")) |
| 141 | + try: |
| 142 | + validator(hif) |
| 143 | + except Exception as ex: |
| 144 | + HyperNetXError(ex) |
| 145 | + return None |
| 146 | + else: |
| 147 | + print("No data given") |
| 148 | + |
| 149 | + mkdd = lambda: {"weight": 1, "attrs": {}} |
| 150 | + hifex = deepcopy(hif) |
| 151 | + parts = { |
| 152 | + part: deepcopy(pd.DataFrame(hifex.get(part, {}))) |
| 153 | + for part in ["nodes", "edges", "incidences"] |
| 154 | + } |
| 155 | + metadata = hifex.get("metadata", {}) |
| 156 | + defaults = metadata.get("default_attrs", {}) |
| 157 | + defaults = {part: defaults.get(part, mkdd()) for part in parts} |
| 158 | + # cols = dict() |
| 159 | + default_weights = {part: defaults[part].get("weight", 1) for part in parts} |
| 160 | + for part in parts: |
| 161 | + if len(part) == 0: |
| 162 | + continue |
| 163 | + thispart = parts[part] |
| 164 | + d = deepcopy(defaults[part]) |
| 165 | + dkeys = [k for k in d.keys() if k not in ["weight", "attrs"]] |
| 166 | + # cols[part] = ['weight'] + dkeys + ['attrs'] |
| 167 | + if len(dkeys) > 0: |
| 168 | + for attr in dkeys: |
| 169 | + thispart[attr] = [ |
| 170 | + row.attrs.pop(attr, d[attr]) for row in thispart.itertuples() |
| 171 | + ] |
| 172 | + hyp_objects = dict() |
| 173 | + for part in ["nodes", "edges"]: |
| 174 | + if len(parts[part]) > 0: |
| 175 | + uid = part[:-1] |
| 176 | + cols = [uid] + list(set(parts[part].columns).difference([uid])) |
| 177 | + hyp_objects[part] = parts[part][cols] |
| 178 | + else: |
| 179 | + hyp_objects[part] = None |
| 180 | + cols = ["edge", "node"] + list( |
| 181 | + set(parts["incidences"].columns).difference(["edge", "node"]) |
| 182 | + ) |
| 183 | + incidences = parts["incidences"][cols] |
| 184 | + name = metadata.get("name", None) |
| 185 | + return hnx.Hypergraph( |
| 186 | + incidences, |
| 187 | + default_cell_weight=default_weights["incidences"], |
| 188 | + misc_cell_properties_col="attrs", |
| 189 | + node_properties=hyp_objects["nodes"], |
| 190 | + default_edge_weight=default_weights["edges"], |
| 191 | + edge_properties=hyp_objects["edges"], |
| 192 | + default_node_weight=default_weights["nodes"], |
| 193 | + misc_properties_col="attrs", |
| 194 | + name=name, |
| 195 | + ) |
0 commit comments