|
| 1 | +import json |
| 2 | +import os |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | +from flamingo_tools.s3_utils import create_s3_target, BUCKET_NAME |
| 7 | + |
| 8 | + |
| 9 | +def open_json(fs, path): |
| 10 | + s3_path = os.path.join(BUCKET_NAME, path) |
| 11 | + with fs.open(s3_path, "r") as f: |
| 12 | + content = json.load(f) |
| 13 | + return content |
| 14 | + |
| 15 | + |
| 16 | +def open_tsv(fs, path): |
| 17 | + s3_path = os.path.join(BUCKET_NAME, path) |
| 18 | + with fs.open(s3_path, "r") as f: |
| 19 | + table = pd.read_csv(f, sep="\t") |
| 20 | + return table |
| 21 | + |
| 22 | + |
| 23 | +def main(): |
| 24 | + fs = create_s3_target() |
| 25 | + project_info = open_json(fs, "project.json") |
| 26 | + for dataset in project_info["datasets"]: |
| 27 | + if dataset == "fens": |
| 28 | + continue |
| 29 | + print(dataset) |
| 30 | + dataset_info = open_json(fs, os.path.join(dataset, "dataset.json")) |
| 31 | + sources = dataset_info["sources"] |
| 32 | + for source, source_info in sources.items(): |
| 33 | + if not source.startswith("SGN"): |
| 34 | + continue |
| 35 | + assert "segmentation" in source_info |
| 36 | + source_info = source_info["segmentation"] |
| 37 | + table_path = source_info["tableData"]["tsv"]["relativePath"] |
| 38 | + table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv")) |
| 39 | + component_labels = table.component_labels.values |
| 40 | + remaining_sgns = component_labels[component_labels != 0] |
| 41 | + print(source) |
| 42 | + print("Number of SGNs (all components) :", len(remaining_sgns)) |
| 43 | + _, n_per_component = np.unique(remaining_sgns, return_counts=True) |
| 44 | + print("Number of SGNs (largest component):", max(n_per_component)) |
| 45 | + |
| 46 | + |
| 47 | +if __name__ == "__main__": |
| 48 | + main() |
0 commit comments