|
22 | 22 | - RandomMosaic |
23 | 23 | - RandomZoom |
24 | 24 | - RandomColorMode |
| 25 | +- RandomElasticTransform |
25 | 26 |
|
26 | 27 | Implemented audio augmentors: |
27 | 28 | - RandomAudioNoise |
@@ -494,14 +495,14 @@ def __init__( |
494 | 495 | self, |
495 | 496 | random_chance: float = 0.5, |
496 | 497 | log_level: int = logging.INFO, |
497 | | - augment_annotation: bool = False, |
| 498 | + augment_annotation: bool = True, |
498 | 499 | ) -> None: |
499 | 500 | """ Randomly mirror image |
500 | 501 | |
501 | 502 | Args: |
502 | 503 | random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5. |
503 | 504 | log_level (int): Log level for the augmentor. Defaults to logging.INFO. |
504 | | - augment_annotation (bool): Whether to augment the annotation. Defaults to False. |
| 505 | + augment_annotation (bool): Whether to augment the annotation. Defaults to True. |
505 | 506 | """ |
506 | 507 | super(RandomMirror, self).__init__(random_chance, log_level, augment_annotation) |
507 | 508 |
|
@@ -534,14 +535,14 @@ def __init__( |
534 | 535 | self, |
535 | 536 | random_chance: float = 0.5, |
536 | 537 | log_level: int = logging.INFO, |
537 | | - augment_annotation: bool = False, |
| 538 | + augment_annotation: bool = True, |
538 | 539 | ) -> None: |
539 | 540 | """ Randomly mirror image |
540 | 541 | |
541 | 542 | Args: |
542 | 543 | random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5. |
543 | 544 | log_level (int): Log level for the augmentor. Defaults to logging.INFO. |
544 | | - augment_annotation (bool): Whether to augment the annotation. Defaults to False. |
| 545 | + augment_annotation (bool): Whether to augment the annotation. Defaults to True. |
545 | 546 | """ |
546 | 547 | super(RandomFlip, self).__init__(random_chance, log_level, augment_annotation) |
547 | 548 |
|
@@ -839,6 +840,110 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, |
839 | 840 | return image, annotation |
840 | 841 |
|
841 | 842 |
|
| 843 | +class RandomElasticTransform(Augmentor): |
| 844 | + """ Randomly apply elastic transform to an image |
| 845 | + |
| 846 | + Attributes: |
| 847 | + random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5. |
| 848 | + alpha_range (tuple): Tuple of 2 floats, setting bounds for random alpha value. Defaults to (0, 0.1). |
| 849 | + sigma_range (tuple): Tuple of 2 floats, setting bounds for random sigma value. Defaults to (0.01, 0.02). |
| 850 | + log_level (int): Log level for the augmentor. Defaults to logging.INFO. |
| 851 | + augment_annotation (bool): Whether to augment the annotation. Defaults to False. |
| 852 | + """ |
| 853 | + def __init__( |
| 854 | + self, |
| 855 | + random_chance: float = 0.5, |
| 856 | + alpha_range: tuple = (0, 0.1), |
| 857 | + sigma_range: tuple = (0.01, 0.02), |
| 858 | + log_level: int = logging.INFO, |
| 859 | + augment_annotation: bool = True, |
| 860 | + ) -> None: |
| 861 | + super(RandomElasticTransform, self).__init__(random_chance, log_level, augment_annotation) |
| 862 | + self.alpha_range = alpha_range |
| 863 | + self.sigma_range = sigma_range |
| 864 | + |
| 865 | + @staticmethod |
| 866 | + def elastic_transform(image: np.ndarray, alpha: float, sigma: float) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]: |
| 867 | + """ Apply elastic transform to an image |
| 868 | +
|
| 869 | + Args: |
| 870 | + image (np.ndarray): Image to be used for elastic transform |
| 871 | + alpha (float): Alpha value for elastic transform |
| 872 | + sigma (float): Sigma value for elastic transform |
| 873 | +
|
| 874 | + Returns: |
| 875 | + remap_fn (np.ndarray): Elastic transformed image |
| 876 | + dx (np.ndarray): X-axis displacement |
| 877 | + dy (np.ndarray): Y-axis displacement |
| 878 | + """ |
| 879 | + height, width, channels = image.shape |
| 880 | + dx = np.random.rand(height, width).astype(np.float32) * 2 - 1 |
| 881 | + dy = np.random.rand(height, width).astype(np.float32) * 2 - 1 |
| 882 | + |
| 883 | + cv2.GaussianBlur(dx, (0, 0), sigma, dst=dx) |
| 884 | + cv2.GaussianBlur(dy, (0, 0), sigma, dst=dy) |
| 885 | + |
| 886 | + dx *= alpha |
| 887 | + dy *= alpha |
| 888 | + |
| 889 | + x, y = np.meshgrid(np.arange(width), np.arange(height)) |
| 890 | + |
| 891 | + map_x = np.float32(x + dx) |
| 892 | + map_y = np.float32(y + dy) |
| 893 | + |
| 894 | + remap_fn = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT) |
| 895 | + |
| 896 | + return remap_fn, dx, dy |
| 897 | + |
| 898 | + @randomness_decorator |
| 899 | + def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]: |
| 900 | + """ Randomly apply elastic transform to an image |
| 901 | +
|
| 902 | + Args: |
| 903 | + image (Image): Image to be used for elastic transform |
| 904 | + annotation (typing.Any): Annotation to be used for elastic transform |
| 905 | +
|
| 906 | + Returns: |
| 907 | + image (Image): Elastic transformed image |
| 908 | + annotation (typing.Any): Elastic transformed annotation if necessary |
| 909 | + """ |
| 910 | + alpha = image.width * np.random.uniform(*self.alpha_range) |
| 911 | + sigma = image.width * np.random.uniform(*self.sigma_range) |
| 912 | + new_image, dx, dy = self.elastic_transform(image.numpy(), alpha, sigma) |
| 913 | + image.update(new_image) |
| 914 | + |
| 915 | + if isinstance(annotation, Detections) and self._augment_annotation: |
| 916 | + detections = [] |
| 917 | + for detection in annotation: |
| 918 | + x_min, y_min, x_max, y_max = detection.xyxy_abs |
| 919 | + new_x_min = min(max(0, x_min + dx[y_min, x_min]), image.width - 1) |
| 920 | + new_y_min = min(max(0, y_min + dy[y_min, x_min]), image.height - 1) |
| 921 | + new_x_max = min(max(0, x_max + dx[y_max, x_max]), image.width - 1) |
| 922 | + new_y_max = min(max(0, y_max + dy[y_max, x_max]), image.height - 1) |
| 923 | + detections.append( |
| 924 | + Detection( |
| 925 | + [new_x_min, new_y_min, new_x_max, new_y_max], |
| 926 | + label=detection.label, |
| 927 | + labels=detection.labels, |
| 928 | + confidence=detection.confidence, |
| 929 | + image_path=detection.image_path, |
| 930 | + width=image.width, |
| 931 | + height=image.height, |
| 932 | + relative=False, |
| 933 | + bbox_type = BboxType.XYXY |
| 934 | + ) |
| 935 | + ) |
| 936 | + |
| 937 | + annotation = Detections( |
| 938 | + labels=annotation.labels, |
| 939 | + width=image.width, |
| 940 | + height=image.height, |
| 941 | + detections=detections |
| 942 | + ) |
| 943 | + |
| 944 | + return image, annotation |
| 945 | + |
| 946 | + |
842 | 947 | class RandomAudioNoise(Augmentor): |
843 | 948 | """ Randomly add noise to audio |
844 | 949 |
|
|
0 commit comments