Skip to content

Commit d3c5ad1

Browse files
committed
added logic to create file references and added logging
1 parent a416023 commit d3c5ad1

File tree

13 files changed

+833
-412
lines changed

13 files changed

+833
-412
lines changed

bia_ro_crate/cli.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
Protocol,
1414
SpecimenImagingPreparationProtocol,
1515
Study,
16+
FileReference
1617
)
1718
from .bia_to_zarr_crate.conversion import create_ro_crate_for_image
1819
from pathlib import Path
@@ -78,15 +79,9 @@ def convert(
7879
),
7980
] = Path(__file__).parents[1],
8081
):
81-
# crate_path = (
82-
# Path(__file__).parents[0]
83-
# / "model"
84-
# / "example"
85-
# / "S-BIAD1494"
86-
# / "ro-crate-version"
87-
# )
8882

89-
crate = crate_read(crate_path)
83+
# Just for validation
84+
crate_read(crate_path)
9085

9186
entities = process_ro_crate(crate_path)
9287

@@ -97,7 +92,8 @@ def convert(
9792

9893
study_uuid = study.uuid
9994

100-
api_objects += Dataset.create_api_dataset(entities, study_uuid)
95+
datasets = Dataset.create_api_dataset(entities, study_uuid)
96+
api_objects += datasets
10197
api_objects += AnnotationMethod.create_api_image_acquisition_protocol(
10298
entities, study_uuid
10399
)
@@ -110,6 +106,11 @@ def convert(
110106
entities, study_uuid
111107
)
112108

109+
110+
api_objects += FileReference.create_file_reference(
111+
entities, study_uuid, crate_path
112+
)
113+
113114
ApiModels = RootModel[list]
114115
write_out = ApiModels(api_objects)
115116

bia_ro_crate/model/example/S-BIAD1494/ro-crate-version/ro-crate-metadata.json

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"@context": {
3+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
34
"bia": "http://bia/",
45
"schema": "http://schema.org/",
56
"name": {
@@ -226,7 +227,7 @@
226227
"@type": "@id"
227228
},
228229
"association": {
229-
"@id": "biashapes:association",
230+
"@id": "bias:association",
230231
"@type": "@id"
231232

232233
},
@@ -498,7 +499,7 @@
498499
},
499500
{
500501
"@id": "_:a0",
501-
"@type": "biashape:Association",
502+
"@type": "bia:Association",
502503
"associated_bio_sample": [
503504
"Nup133 mouse stem cells"
504505
],
@@ -521,7 +522,7 @@
521522
},
522523
{
523524
"@id": "_:a1",
524-
"@type": "biashape:Association",
525+
"@type": "bia:Association",
525526
"associated_bio_sample": [
526527
"Nup133 mouse stem cells"
527528
],
@@ -544,7 +545,7 @@
544545
},
545546
{
546547
"@id": "_:a2",
547-
"@type": "biashape:Association",
548+
"@type": "bia:Association",
548549
"associated_bio_sample": [
549550
"Nup133 mouse stem cells"
550551
],
@@ -567,7 +568,7 @@
567568
},
568569
{
569570
"@id": "_:a3",
570-
"@type": "biashape:Association",
571+
"@type": "bia:Association",
571572
"associated_bio_sample": [
572573
"Nup133 mouse stem cells"
573574
],
@@ -590,7 +591,7 @@
590591
},
591592
{
592593
"@id": "_:a4",
593-
"@type": "biashape:Association",
594+
"@type": "bia:Association",
594595
"associated_bio_sample": [
595596
"Nup133 mouse stem cells"
596597
],
@@ -613,7 +614,7 @@
613614
},
614615
{
615616
"@id": "_:a5",
616-
"@type": "biashape:Association",
617+
"@type": "bia:Association",
617618
"associated_bio_sample": [
618619
"Nup133 mouse stem cells"
619620
],
@@ -636,7 +637,7 @@
636637
},
637638
{
638639
"@id": "_:a6",
639-
"@type": "biashape:Association",
640+
"@type": "bia:Association",
640641
"associated_bio_sample": [
641642
"Nup133 mouse stem cells"
642643
],
@@ -659,7 +660,7 @@
659660
},
660661
{
661662
"@id": "_:a7",
662-
"@type": "biashape:Association",
663+
"@type": "bia:Association",
663664
"associated_bio_sample": [
664665
"Nup133 mouse stem cells"
665666
],
@@ -682,7 +683,7 @@
682683
},
683684
{
684685
"@id": "_:a8",
685-
"@type": "biashape:Association",
686+
"@type": "bia:Association",
686687
"associated_bio_sample": [
687688
"Nup133 mouse stem cells"
688689
],
@@ -705,7 +706,7 @@
705706
},
706707
{
707708
"@id": "_:a9",
708-
"@type": "biashape:Association",
709+
"@type": "bia:Association",
709710
"associated_bio_sample": [
710711
"Wild type mouse stem cells"
711712
],
@@ -728,7 +729,7 @@
728729
},
729730
{
730731
"@id": "_:a10",
731-
"@type": "biashape:Association",
732+
"@type": "bia:Association",
732733
"associated_bio_sample": [
733734
"Wild type mouse stem cells"
734735
],
@@ -751,7 +752,7 @@
751752
},
752753
{
753754
"@id": "_:a11",
754-
"@type": "biashape:Association",
755+
"@type": "bia:Association",
755756
"associated_bio_sample": [
756757
"Wild type mouse stem cells"
757758
],
@@ -774,7 +775,7 @@
774775
},
775776
{
776777
"@id": "_:a12",
777-
"@type": "biashape:Association",
778+
"@type": "bia:Association",
778779
"associated_bio_sample": [
779780
"Wild type mouse stem cells"
780781
],
@@ -797,7 +798,7 @@
797798
},
798799
{
799800
"@id": "_:a13",
800-
"@type": "biashape:Association",
801+
"@type": "bia:Association",
801802
"associated_bio_sample": [
802803
"Wild type mouse stem cells"
803804
],
@@ -820,7 +821,7 @@
820821
},
821822
{
822823
"@id": "_:a14",
823-
"@type": "biashape:Association",
824+
"@type": "bia:Association",
824825
"associated_bio_sample": [
825826
"Wild type mouse stem cells"
826827
],
@@ -843,7 +844,7 @@
843844
},
844845
{
845846
"@id": "_:a15",
846-
"@type": "biashape:Association",
847+
"@type": "bia:Association",
847848
"associated_bio_sample": [
848849
"Wild type mouse stem cells"
849850
],
@@ -866,7 +867,7 @@
866867
},
867868
{
868869
"@id": "_:a16",
869-
"@type": "biashape:Association",
870+
"@type": "bia:Association",
870871
"associated_bio_sample": [
871872
"Wild type mouse stem cells"
872873
],
@@ -889,7 +890,7 @@
889890
},
890891
{
891892
"@id": "_:a17",
892-
"@type": "biashape:Association",
893+
"@type": "bia:Association",
893894
"associated_bio_sample": [
894895
"Wild type mouse stem cells"
895896
],

bia_ro_crate/ro_crate_to_bia/crate_reader.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
import bia_ro_crate.ro_crate_to_bia.ingest_models as ingest_models
77
import inspect
88
import pyld
9+
import rdflib
10+
import logging
11+
12+
logger = logging.getLogger("__main__." + __name__)
913

1014

1115
def read_json_from_ro_crate(crate_path: str) -> dict:
@@ -49,7 +53,13 @@ def load_entities(data: dict) -> dict[str, ROCrateModel]:
4953
crate_objects_by_id[object.id] = object
5054
break
5155
if len(crate_objects_by_id) == start_len:
52-
print(f"Could not find class for {entity}")
56+
if "ro-crate-metadata.json" == entity.get("@id"):
57+
logger.info("Skipping ro-crate-metadata.json entity.")
58+
elif str(rdflib.RDF.Property) in entity_type:
59+
logger.info(f"Skipping RDF.Property: {entity.get('name')}. Though we may want to processes these in some way later")
60+
else:
61+
logger.warning(f"Could not find class for entity of types: {entity_type}")
62+
logger.debug(f"Entity: {entity}")
5363
return crate_objects_by_id
5464

5565

bia_ro_crate/ro_crate_to_bia/entity_conversion/AnnotationMethod.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
from uuid import UUID
21
from bia_ro_crate.ro_crate_to_bia.pydantic_ld.ROCrateModel import ROCrateModel
32
from bia_shared_datamodels import uuid_creation
43
from bia_integrator_api.models import AnnotationMethod as APIAnnotationMethod
54
import bia_ro_crate.ro_crate_to_bia.ingest_models as ROCrateModels
5+
import logging
6+
7+
logger = logging.getLogger("__main__." + __name__)
68

79
def create_api_image_acquisition_protocol(
810
crate_objects_by_id: dict[str, ROCrateModel], study_uuid: str
@@ -27,7 +29,7 @@ def create_api_image_acquisition_protocol(
2729
def convert_annotation_method(
2830
ro_crate_annotation_method: ROCrateModels.AnnotationMethod,
2931
crate_objects_by_id: dict[str, ROCrateModel],
30-
study_uuid: UUID,
32+
study_uuid: str,
3133
) -> APIAnnotationMethod:
3234
iap = {
3335
"uuid": uuid_creation.create_annotation_method_uuid(

bia_ro_crate/ro_crate_to_bia/entity_conversion/BioSample.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
from uuid import UUID
21
from bia_ro_crate.ro_crate_to_bia.pydantic_ld.ROCrateModel import ROCrateModel
32
from bia_shared_datamodels import uuid_creation
43
import bia_integrator_api.models as APIModels
54
import bia_ro_crate.ro_crate_to_bia.ingest_models as ROCrateModels
5+
import logging
6+
7+
logger = logging.getLogger("__main__." + __name__)
68

79

810
def create_api_bio_sample(
@@ -25,7 +27,7 @@ def create_api_bio_sample(
2527
def convert_bio_sample(
2628
ro_crate_bio_sample: ROCrateModels.BioSample,
2729
crate_objects_by_id: dict[str, ROCrateModel],
28-
study_uuid: UUID,
30+
study_uuid: str,
2931
) -> APIModels.BioSample:
3032

3133
taxons = []

bia_ro_crate/ro_crate_to_bia/entity_conversion/Dataset.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
from uuid import UUID
21
from bia_ro_crate.ro_crate_to_bia.pydantic_ld.ROCrateModel import ROCrateModel
32
from bia_shared_datamodels import uuid_creation
43
import bia_integrator_api.models as APIModels
54
import bia_ro_crate.ro_crate_to_bia.ingest_models as ROCrateModels
5+
import logging
6+
7+
logger = logging.getLogger("__main__." + __name__)
68

79

810
def create_api_dataset(
@@ -26,7 +28,7 @@ def create_api_dataset(
2628
def convert_image_acquisition_protocol(
2729
ro_crate_dataset: ROCrateModels.Dataset,
2830
crate_objects_by_id: dict[str, ROCrateModel],
29-
study_uuid: UUID,
31+
study_uuid: str,
3032
) -> APIModels.Dataset:
3133

3234
title = None
@@ -37,7 +39,7 @@ def convert_image_acquisition_protocol(
3739

3840
dataset = {
3941
"uuid": str(
40-
uuid_creation.create_image_acquisition_protocol_uuid(
42+
uuid_creation.create_dataset_uuid(
4143
ro_crate_dataset.id, study_uuid
4244
)
4345
),
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
from bia_ro_crate.ro_crate_to_bia.pydantic_ld.ROCrateModel import ROCrateModel
2+
from bia_shared_datamodels import uuid_creation
3+
import bia_integrator_api.models as APIModels
4+
import bia_ro_crate.ro_crate_to_bia.ingest_models as ROCrateModels
5+
import pathlib
6+
import glob
7+
import logging
8+
9+
logger = logging.getLogger("__main__." + __name__)
10+
11+
12+
def create_file_reference(
13+
crate_objects_by_id: dict[str, ROCrateModel],
14+
study_uuid: str,
15+
crate_path: pathlib.Path,
16+
) -> list[APIModels.FileReference]:
17+
ro_crate_datasets = (
18+
obj
19+
for obj in crate_objects_by_id.values()
20+
if isinstance(obj, ROCrateModels.Dataset)
21+
)
22+
23+
file_reference_list = []
24+
for dataset in ro_crate_datasets:
25+
file_paths = find_files(dataset, crate_path)
26+
27+
dataset_uuid = str(uuid_creation.create_dataset_uuid(dataset.id, study_uuid))
28+
29+
for file_path in file_paths:
30+
file_reference_list.append(
31+
create_api_file_reference(file_path, study_uuid, dataset_uuid, crate_path)
32+
)
33+
34+
return file_reference_list
35+
36+
37+
def find_files(dataset: ROCrateModels.Dataset, crate_path: pathlib.Path) -> list[str]:
38+
path_to_search = crate_path / dataset.id / "*"
39+
paths = glob.glob(str(path_to_search), recursive=True)
40+
return paths
41+
42+
43+
def create_api_file_reference(
44+
file_path: str, study_uuid: str, dataset_uuid: str, crate_path: pathlib.Path
45+
) -> list[APIModels.FileReference]:
46+
47+
relative_path = pathlib.Path(file_path).relative_to(crate_path)
48+
49+
# TODO: Work out how file URI would be generated.
50+
51+
file_reference = {
52+
"uuid": str(uuid_creation.create_file_reference_uuid(relative_path, study_uuid)),
53+
"submission_dataset_uuid": dataset_uuid,
54+
"file_path": str(relative_path),
55+
"version": 1,
56+
"size_in_bytes": pathlib.Path(file_path).stat().st_size,
57+
"format": pathlib.Path(file_path).suffix,
58+
"uri": "None?",
59+
}
60+
61+
return APIModels.FileReference(**file_reference)

0 commit comments

Comments
 (0)