3333from pathlib import Path
3434from urllib .parse import urljoin
3535
36+ from packaging .version import Version
37+
3638from .memory_buffer import MemoryBuffer
3739from .model import (
3840 ComputationalWorkflow ,
6365from .metadata import read_metadata , find_root_entity_id
6466
6567
68+ DATA_ENTITY_TYPES = {"File" , "Dataset" }
69+
70+
71+ def is_data_entity (entity ):
72+ if entity ["@id" ].startswith ("#" ):
73+ return False
74+ return DATA_ENTITY_TYPES .intersection (as_list (entity .get ("@type" , [])))
75+
76+
6677def pick_type (json_entity , type_map , fallback = None ):
6778 try :
6879 t = json_entity ["@type" ]
@@ -172,12 +183,14 @@ def __read_data_entities(self, entities, source, gen_preview):
172183
173184 def __add_parts (self , parts , entities , source ):
174185 type_map = OrderedDict ((_ .__name__ , _ ) for _ in subclasses (FileOrDir ))
175- for data_entity_ref in parts :
176- id_ = data_entity_ref ['@id' ]
177- try :
178- entity = entities .pop (id_ )
179- except KeyError :
186+ for ref in parts :
187+ id_ = ref ['@id' ]
188+ if id_ not in entities :
180189 continue
190+ if self .version_obj >= Version ("1.2" ):
191+ if not is_data_entity (entities [id_ ]):
192+ continue
193+ entity = entities .pop (id_ )
181194 assert id_ == entity .pop ('@id' )
182195 cls = pick_type (entity , type_map , fallback = DataEntity )
183196 if cls is DataEntity :
@@ -193,11 +206,13 @@ def __add_parts(self, parts, entities, source):
193206
194207 def __read_contextual_entities (self , entities ):
195208 type_map = {_ .__name__ : _ for _ in subclasses (ContextEntity )}
196- # types *commonly* used for data entities
197- data_entity_types = {"File" , "Dataset" }
198209 for identifier , entity in entities .items ():
199- if data_entity_types .intersection (as_list (entity .get ("@type" , []))):
200- warnings .warn (f"{ entity ['@id' ]} looks like a data entity but it's not listed in the root dataset's hasPart" )
210+ if is_data_entity (entity ):
211+ id_ = entity ['@id' ]
212+ if self .version_obj >= Version ("1.2" ):
213+ raise ValueError (f"'{ id_ } ' is a data entity but it's not linked to from the root dataset's hasPart" )
214+ else :
215+ warnings .warn (f"'{ id_ } ' looks like a data entity but it's not listed in the root dataset's hasPart" )
201216 assert identifier == entity .pop ('@id' )
202217 cls = pick_type (entity , type_map , fallback = ContextEntity )
203218 self .add (cls (self , identifier , entity ))
@@ -311,6 +326,10 @@ def mainEntity(self, value):
311326 def version (self ):
312327 return self .metadata .version
313328
329+ @property
330+ def version_obj (self ):
331+ return self .metadata .version_obj
332+
314333 @property
315334 def test_dir (self ):
316335 rval = self .dereference ("test" )
0 commit comments