Skip to content

Commit 87b65c1

Browse files
committed
flora using folders
1 parent f4c2d5b commit 87b65c1

File tree

1 file changed

+67
-16
lines changed

1 file changed

+67
-16
lines changed

dataherb/flora.py

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import json
2+
import shutil
23
import sys
34
from pathlib import Path
45

56
from loguru import logger
67

78
from dataherb.core.base import Herb
8-
from dataherb.core.search import search_by_ids_in_flora as _search_by_ids_in_flora
9-
from dataherb.core.search import (
10-
search_by_keywords_in_flora as _search_by_keywords_in_flora,
11-
)
9+
from dataherb.core.search import \
10+
search_by_ids_in_flora as _search_by_ids_in_flora
11+
from dataherb.core.search import \
12+
search_by_keywords_in_flora as _search_by_keywords_in_flora
1213
from dataherb.fetch.remote import get_data_from_url as _get_data_from_url
1314
from dataherb.parse.model_json import MetaData
1415

@@ -21,12 +22,15 @@ class Flora(object):
2122
DataHerb is the container of datasets.
2223
"""
2324

24-
def __init__(self, flora):
25+
def __init__(self, flora, is_aggregated=False):
2526
"""
2627
:param flora: API of the DataHerb service, defaults to dataherb official or list of Herbs, defaults to everything from the API
2728
"""
29+
self.is_aggregated = is_aggregated
2830

2931
if isinstance(flora, str):
32+
if flora.endswith(".json"):
33+
self.is_aggregated = True
3034
self.workdir = Path(flora).parent.parent
3135
self.flora_config = flora
3236
flora = self._get_flora(flora)
@@ -42,8 +46,18 @@ def _get_flora(self, flora_config):
4246
_get_flora fetch flora from the provided API.
4347
"""
4448
if Path(flora_config).exists():
45-
with open(flora_config, "r") as f:
46-
flora = json.load(f)
49+
if self.is_aggregated:
50+
with open(flora_config, "r") as f:
51+
flora = json.load(f)
52+
else:
53+
# We do not have aggregated flora
54+
# read dataherb.json from all the folders here
55+
flora_folder = Path(flora_config)
56+
herb_paths = [f for f in flora_folder.iterdir() if f.is_dir()]
57+
flora = [
58+
json.load(open(f.joinpath("dataherb.json"), "r"))
59+
for f in herb_paths
60+
]
4761
else:
4862
# assuming the config is a url if the local file does not exist
4963
flora_request = _get_data_from_url(flora_config)
@@ -91,16 +105,18 @@ def remove(self, herb_id):
91105
"""
92106
add add a herb to the flora
93107
"""
94-
95108
for id in [i.id for i in self.flora]:
96109
if id == herb_id:
97110
logger.debug(f"found herb id = {herb_id}")
98111

99112
self.flora = [h for h in self.flora if h.id != herb_id]
100113

101-
self.save()
114+
if self.is_aggregated:
115+
self.save()
116+
else:
117+
self.remove_herb_from_flora(herb_id)
102118

103-
def save(self, path=None):
119+
def save(self, path=None, id=None):
104120
"""save flora metadata to json file"""
105121

106122
if path is None:
@@ -110,16 +126,51 @@ def save(self, path=None):
110126
f"type of a herb in flora: {type(self.flora[0])}\n{self.flora[0].metadata}"
111127
)
112128

113-
serialized_flora = []
114-
for h in self.flora:
115-
logger.debug(f"herb (type {type(h)}): {h}")
116-
serialized_flora.append(h.metadata)
129+
if self.is_aggregated:
130+
serialized_flora = []
131+
for h in self.flora:
132+
logger.debug(f"herb (type {type(h)}): {h}")
133+
serialized_flora.append(h.metadata)
134+
135+
with open(path, "w") as fp:
136+
json.dump(
137+
serialized_flora, fp, sort_keys=True, indent=4, separators=(",", ": ")
138+
)
139+
else:
140+
if not id:
141+
raise Exception("dataherb id must be provided")
142+
143+
def save_herb_meta(self, id, path=None):
144+
"""Save a herb metadata to json file"""
145+
if path is None:
146+
path = self.workdir / f"{id}"
147+
148+
if not path.exists():
149+
path.mkdir(parents=True)
117150

118-
with open(path, "w") as fp:
151+
logger.debug(f"Will replace dataherb id {id}")
152+
with open(path / "dataherb.json", "w") as fp:
119153
json.dump(
120-
serialized_flora, fp, sort_keys=True, indent=4, separators=(",", ": ")
154+
self.herb_meta(id), fp,
155+
sort_keys=True, indent=4, separators=(",", ": ")
121156
)
122157

158+
def remove_herb_from_flora(self, id, path=None):
159+
"""Remove a herb metadata to json file"""
160+
if path is None:
161+
path = self.workdir / f"{id}"
162+
163+
if not path.exists():
164+
logger.debug(f"dataherb {id} doesn't exist")
165+
return
166+
else:
167+
try:
168+
shutil.rmtree(path)
169+
except OSError as e:
170+
logger.error(
171+
f"Can not remove herb id {id}: {e.filename} - {e.strerror}."
172+
)
173+
123174
def search(self, keywords):
124175
"""
125176
search finds the datasets that matches the keywords

0 commit comments

Comments
 (0)