| 
 | 1 | +import logging  | 
 | 2 | +from argparse import ArgumentParser  | 
 | 3 | +from wkcuber.api.Dataset import WKDataset  | 
 | 4 | +from wkcuber.api.bounding_box import BoundingBox  | 
 | 5 | +import numpy as np  | 
 | 6 | + | 
 | 7 | +from .utils import (  | 
 | 8 | +    add_verbose_flag,  | 
 | 9 | +    open_wkw,  | 
 | 10 | +    WkwDatasetInfo,  | 
 | 11 | +    add_distribution_flags,  | 
 | 12 | +    get_executor_for_args,  | 
 | 13 | +    wait_and_ensure_success,  | 
 | 14 | +    setup_logging,  | 
 | 15 | +)  | 
 | 16 | +from .metadata import detect_resolutions, detect_bbox, detect_layers  | 
 | 17 | +import functools  | 
 | 18 | +from .compress import BACKUP_EXT  | 
 | 19 | + | 
 | 20 | +CHUNK_SIZE = 1024  | 
 | 21 | + | 
 | 22 | + | 
 | 23 | +def named_partial(func, *args, **kwargs):  | 
 | 24 | +    # Propagate __name__ and __doc__ attributes to partial function  | 
 | 25 | +    partial_func = functools.partial(func, *args, **kwargs)  | 
 | 26 | +    functools.update_wrapper(partial_func, func)  | 
 | 27 | +    if hasattr(func, "__annotations__"):  | 
 | 28 | +        # Generic types cannot be pickled in Python <= 3.6, see https://github.com/python/typing/issues/511  | 
 | 29 | +        partial_func.__annotations__ = {}  | 
 | 30 | +    return partial_func  | 
 | 31 | + | 
 | 32 | + | 
 | 33 | +def create_parser():  | 
 | 34 | +    parser = ArgumentParser()  | 
 | 35 | + | 
 | 36 | +    parser.add_argument("source_path", help="Path to input WKW dataset")  | 
 | 37 | + | 
 | 38 | +    parser.add_argument(  | 
 | 39 | +        "target_path", help="WKW dataset with which to compare the input dataset."  | 
 | 40 | +    )  | 
 | 41 | + | 
 | 42 | +    parser.add_argument(  | 
 | 43 | +        "--layer_name",  | 
 | 44 | +        "-l",  | 
 | 45 | +        help="Name of the layer to compare (if not provided, all layers are compared)",  | 
 | 46 | +        default=None,  | 
 | 47 | +    )  | 
 | 48 | + | 
 | 49 | +    add_verbose_flag(parser)  | 
 | 50 | +    add_distribution_flags(parser)  | 
 | 51 | + | 
 | 52 | +    return parser  | 
 | 53 | + | 
 | 54 | + | 
 | 55 | +def assert_equality_for_chunk(  | 
 | 56 | +    source_path: str, target_path: str, layer_name: str, mag, sub_box  | 
 | 57 | +):  | 
 | 58 | +    wk_dataset = WKDataset(source_path)  | 
 | 59 | +    layer = wk_dataset.layers[layer_name]  | 
 | 60 | +    backup_wkw_info = WkwDatasetInfo(target_path, layer_name, mag, header=None)  | 
 | 61 | +    with open_wkw(backup_wkw_info) as backup_wkw:  | 
 | 62 | +        mag_ds = layer.get_mag(mag)  | 
 | 63 | +        logging.info(f"Checking sub_box: {sub_box}")  | 
 | 64 | + | 
 | 65 | +        data = mag_ds.read(sub_box.size, sub_box.topleft)  | 
 | 66 | +        backup_data = backup_wkw.read(sub_box.topleft, sub_box.size)  | 
 | 67 | +        assert np.all(  | 
 | 68 | +            data == backup_data  | 
 | 69 | +        ), f"Data differs in bounding box {sub_box} for layer {layer_name} with mag {mag}"  | 
 | 70 | + | 
 | 71 | + | 
 | 72 | +def check_equality(source_path: str, target_path: str, args=None):  | 
 | 73 | + | 
 | 74 | +    logging.info(f"Comparing {source_path} with {target_path}")  | 
 | 75 | + | 
 | 76 | +    wk_src_dataset = WKDataset(source_path)  | 
 | 77 | +    src_layer_names = wk_src_dataset.layers.keys()  | 
 | 78 | +    target_layer_names = [  | 
 | 79 | +        layer["name"] for layer in detect_layers(target_path, 0, False)  | 
 | 80 | +    ]  | 
 | 81 | +    assert set(src_layer_names) == set(  | 
 | 82 | +        target_layer_names  | 
 | 83 | +    ), f"The provided input datasets have different layers: {src_layer_names} != {target_layer_names}"  | 
 | 84 | + | 
 | 85 | +    existing_layer_names = src_layer_names  | 
 | 86 | + | 
 | 87 | +    if args.layer_name is not None:  | 
 | 88 | +        assert (  | 
 | 89 | +            args.layer_name in existing_layer_names  | 
 | 90 | +        ), f"Provided layer {args.layer_name} does not exist in input dataset."  | 
 | 91 | +        existing_layer_names = [args.layer_name]  | 
 | 92 | + | 
 | 93 | +    for layer_name in existing_layer_names:  | 
 | 94 | + | 
 | 95 | +        logging.info(f"Checking layer_name: {layer_name}")  | 
 | 96 | + | 
 | 97 | +        source_mags = list(detect_resolutions(source_path, layer_name))  | 
 | 98 | +        target_mags = list(detect_resolutions(target_path, layer_name))  | 
 | 99 | +        source_mags.sort()  | 
 | 100 | +        target_mags.sort()  | 
 | 101 | +        mags = source_mags  | 
 | 102 | + | 
 | 103 | +        assert (  | 
 | 104 | +            source_mags == target_mags  | 
 | 105 | +        ), f"The mags between {source_path}/{layer_name} and {target_path}/{layer_name} are not equal: {source_mags} != {target_mags}"  | 
 | 106 | + | 
 | 107 | +        layer_properties = wk_src_dataset.properties.data_layers[layer_name]  | 
 | 108 | + | 
 | 109 | +        official_bbox = layer_properties.get_bounding_box()  | 
 | 110 | + | 
 | 111 | +        for mag in mags:  | 
 | 112 | +            inferred_src_bbox = BoundingBox.from_auto(  | 
 | 113 | +                detect_bbox(source_path, layer_name, mag)  | 
 | 114 | +            )  | 
 | 115 | +            inferred_target_bbox = BoundingBox.from_auto(  | 
 | 116 | +                detect_bbox(target_path, layer_name, mag)  | 
 | 117 | +            )  | 
 | 118 | + | 
 | 119 | +            bbox = inferred_src_bbox.extended_by(inferred_target_bbox).extended_by(  | 
 | 120 | +                official_bbox  | 
 | 121 | +            )  | 
 | 122 | +            logging.info(f"Start verification of {layer_name} in mag {mag} in {bbox}")  | 
 | 123 | + | 
 | 124 | +            with get_executor_for_args(args) as executor:  | 
 | 125 | +                boxes = list(  | 
 | 126 | +                    bbox.chunk([CHUNK_SIZE, CHUNK_SIZE, CHUNK_SIZE], [CHUNK_SIZE])  | 
 | 127 | +                )  | 
 | 128 | +                assert_fn = named_partial(  | 
 | 129 | +                    assert_equality_for_chunk, source_path, target_path, layer_name, mag  | 
 | 130 | +                )  | 
 | 131 | + | 
 | 132 | +                wait_and_ensure_success(executor.map_to_futures(assert_fn, boxes))  | 
 | 133 | + | 
 | 134 | +    logging.info(  | 
 | 135 | +        f"The following datasets seem to be equal (with regard to the layers: {existing_layer_names}):"  | 
 | 136 | +    )  | 
 | 137 | +    logging.info(source_path)  | 
 | 138 | +    logging.info(target_path)  | 
 | 139 | + | 
 | 140 | + | 
 | 141 | +if __name__ == "__main__":  | 
 | 142 | +    args = create_parser().parse_args()  | 
 | 143 | +    setup_logging(args)  | 
 | 144 | + | 
 | 145 | +    if args.target_path is None:  | 
 | 146 | +        target_path = args.source_path + BACKUP_EXT  | 
 | 147 | +    else:  | 
 | 148 | +        target_path = args.target_path  | 
 | 149 | +    check_equality(args.source_path, target_path, args)  | 
0 commit comments