11import json
2+ import shutil
23import sys
34from pathlib import Path
45
56from loguru import logger
67
78from dataherb .core .base import Herb
8- from dataherb .core .search import search_by_ids_in_flora as _search_by_ids_in_flora
9- from dataherb . core . search import (
10- search_by_keywords_in_flora as _search_by_keywords_in_flora ,
11- )
9+ from dataherb .core .search import \
10+ search_by_ids_in_flora as _search_by_ids_in_flora
11+ from dataherb . core . search import \
12+ search_by_keywords_in_flora as _search_by_keywords_in_flora
1213from dataherb .fetch .remote import get_data_from_url as _get_data_from_url
1314from dataherb .parse .model_json import MetaData
1415
@@ -21,12 +22,15 @@ class Flora(object):
2122 DataHerb is the container of datasets.
2223 """
2324
24- def __init__ (self , flora ):
25+ def __init__ (self , flora , is_aggregated = False ):
2526 """
2627 :param flora: API of the DataHerb service, defaults to dataherb official or list of Herbs, defaults to everything from the API
2728 """
29+ self .is_aggregated = is_aggregated
2830
2931 if isinstance (flora , str ):
32+ if flora .endswith (".json" ):
33+ self .is_aggregated = True
3034 self .workdir = Path (flora ).parent .parent
3135 self .flora_config = flora
3236 flora = self ._get_flora (flora )
@@ -42,8 +46,18 @@ def _get_flora(self, flora_config):
4246 _get_flora fetch flora from the provided API.
4347 """
4448 if Path (flora_config ).exists ():
45- with open (flora_config , "r" ) as f :
46- flora = json .load (f )
49+ if self .is_aggregated :
50+ with open (flora_config , "r" ) as f :
51+ flora = json .load (f )
52+ else :
53+ # We do not have aggregated flora
54+ # read dataherb.json from all the folders here
55+ flora_folder = Path (flora_config )
56+ herb_paths = [f for f in flora_folder .iterdir () if f .is_dir ()]
57+ flora = [
58+ json .load (open (f .joinpath ("dataherb.json" ), "r" ))
59+ for f in herb_paths
60+ ]
4761 else :
4862 # assuming the config is a url if the local file does not exist
4963 flora_request = _get_data_from_url (flora_config )
@@ -91,16 +105,18 @@ def remove(self, herb_id):
91105 """
92106 add add a herb to the flora
93107 """
94-
95108 for id in [i .id for i in self .flora ]:
96109 if id == herb_id :
97110 logger .debug (f"found herb id = { herb_id } " )
98111
99112 self .flora = [h for h in self .flora if h .id != herb_id ]
100113
101- self .save ()
114+ if self .is_aggregated :
115+ self .save ()
116+ else :
117+ self .remove_herb_from_flora (herb_id )
102118
103- def save (self , path = None ):
119+ def save (self , path = None , id = None ):
104120 """save flora metadata to json file"""
105121
106122 if path is None :
@@ -110,16 +126,51 @@ def save(self, path=None):
110126 f"type of a herb in flora: { type (self .flora [0 ])} \n { self .flora [0 ].metadata } "
111127 )
112128
113- serialized_flora = []
114- for h in self .flora :
115- logger .debug (f"herb (type { type (h )} ): { h } " )
116- serialized_flora .append (h .metadata )
129+ if self .is_aggregated :
130+ serialized_flora = []
131+ for h in self .flora :
132+ logger .debug (f"herb (type { type (h )} ): { h } " )
133+ serialized_flora .append (h .metadata )
134+
135+ with open (path , "w" ) as fp :
136+ json .dump (
137+ serialized_flora , fp , sort_keys = True , indent = 4 , separators = ("," , ": " )
138+ )
139+ else :
140+ if not id :
141+ raise Exception ("dataherb id must be provided" )
142+
143+ def save_herb_meta (self , id , path = None ):
144+ """Save a herb metadata to json file"""
145+ if path is None :
146+ path = self .workdir / f"{ id } "
147+
148+ if not path .exists ():
149+ path .mkdir (parents = True )
117150
118- with open (path , "w" ) as fp :
151+ logger .debug (f"Will replace dataherb id { id } " )
152+ with open (path / "dataherb.json" , "w" ) as fp :
119153 json .dump (
120- serialized_flora , fp , sort_keys = True , indent = 4 , separators = ("," , ": " )
154+ self .herb_meta (id ), fp ,
155+ sort_keys = True , indent = 4 , separators = ("," , ": " )
121156 )
122157
158+ def remove_herb_from_flora (self , id , path = None ):
159+ """Remove a herb metadata to json file"""
160+ if path is None :
161+ path = self .workdir / f"{ id } "
162+
163+ if not path .exists ():
164+ logger .debug (f"dataherb { id } doesn't exist" )
165+ return
166+ else :
167+ try :
168+ shutil .rmtree (path )
169+ except OSError as e :
170+ logger .error (
171+ f"Can not remove herb id { id } : { e .filename } - { e .strerror } ."
172+ )
173+
123174 def search (self , keywords ):
124175 """
125176 search finds the datasets that matches the keywords
0 commit comments