From 6dfd8627a1771acfa2fc6c46c8854049cee04a23 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 12 Apr 2022 10:03:50 -0400
Subject: [PATCH 1/4] Add feature to generate objects (ie. alpha-masked objects
 from png images) to aid AI training for object localization.

To generate a sample dataset, run:

run.py -c 60 -b 100 --text_color black -num -let -id images -w 200 -w 300 -wd 600 -k 20 -rk -d 3 -na 3 -bl 2 -rbl -i /tmp/alpha.txt -it object

where /tmp/alpha.txt is a listing of png files like the follwing:

/tmp/car.png
/tmp/person.png
/tmp/banana.png
---
 trdg/object_generator.py              |  13 +
 trdg/object_localization_generator.py | 394 ++++++++++++++++++++++++++
 trdg/run.py                           | 121 +++++---
 3 files changed, 489 insertions(+), 39 deletions(-)
 create mode 100644 trdg/object_generator.py
 create mode 100644 trdg/object_localization_generator.py

diff --git a/trdg/object_generator.py b/trdg/object_generator.py
new file mode 100644
index 000000000..a21d26ba4
--- /dev/null
+++ b/trdg/object_generator.py
@@ -0,0 +1,13 @@
+import random as rnd
+
+from PIL import Image, ImageColor, ImageFont, ImageDraw, ImageFilter
+
+
+def generate(
+    imagePath
+):
+    _img = Image.open(imagePath)
+    _mask = Image.new("RGB", (_img.width, _img.height), (0, 0, 0))
+
+
+    return _img, _mask
diff --git a/trdg/object_localization_generator.py b/trdg/object_localization_generator.py
new file mode 100644
index 000000000..71da74cbb
--- /dev/null
+++ b/trdg/object_localization_generator.py
@@ -0,0 +1,394 @@
+import os
+import random as rnd
+
+from PIL import Image, ImageFilter, ImageStat
+from PIL import ImageFont, ImageDraw
+
+from trdg import computer_text_generator, background_generator, distorsion_generator, object_generator
+from trdg.utils import mask_to_bboxes
+
+try:
+    from trdg import handwritten_text_generator
+except ImportError as e:
+    print("Missing modules for handwritten text generation.")
+
+
+class FakeObjectDataGenerator(object):
+    @classmethod
+    def generate_from_tuple(cls, t):
+        """
+            Same as generate, but takes all parameters as one tuple
+        """
+        cls.generate(*t)
+
+    @classmethod
+    def generate(
+        cls,
+        index,
+        strings,
+        name,
+        output_image_width,
+        output_image_height,
+        object_width,
+        skewing_angle,
+        random_skew,
+        blur,
+        random_blur,
+        background,
+        distorsion_type,
+        distorsion_orientation,
+        background_type,
+        name_format,
+        out_dir,
+    ):
+        image = None
+        print(strings)
+        OUTPUT_IMAGE_WIDTH = output_image_width
+        OUTPUT_IMAGE_HEIGHT = output_image_height
+        image_dir = "images"
+        OUTPUT_FILE_NAME=name
+        OBJECT_WIDTH=object_width
+
+
+        #margin_top, margin_left, margin_bottom, margin_right = margins
+        #horizontal_margin = margin_left + margin_right
+        #vertical_margin = margin_top + margin_bottom
+
+        ##########################
+        # Create picture of text #
+        ##########################
+        if os.path.exists(strings):
+            image, mask = object_generator.generate(
+                strings)
+        else:
+            image = Image.new(
+                "RGB", (object_width, object_width*3), (255, 255, 255, 0)
+            )
+            draw = ImageDraw.Draw(image)
+            font = ImageFont.truetype(r'/usr/share/fonts/truetype/ttf-bitstream-vera/Vera.ttf', 32)
+            draw.text((10, 20), strings, font=font, fill=(100, 100 , 100, 255))
+            mask = image
+
+
+        """
+        random_angle = rnd.randint(0 - skewing_angle, skewing_angle)
+
+        rotated_img = image.rotate(
+            skewing_angle if not random_skew else random_angle, expand=1
+        )
+
+        rotated_mask = mask.rotate(
+            skewing_angle if not random_skew else random_angle, expand=1
+        )
+            """
+
+        random_angle = rnd.randint(0 - skewing_angle, skewing_angle)
+        image = image.convert('RGBA')
+        rotated_img = image.rotate(
+            skewing_angle if not random_skew else random_angle, expand=1, fillcolor = (255,255,255,0)
+        )
+
+        rotated_mask = mask.rotate(
+            skewing_angle if not random_skew else random_angle, expand=1
+        )
+
+        #############################
+        # Apply distorsion to image #
+        #############################
+        if distorsion_type == 0:
+            distorted_img = rotated_img  # Mind = blown
+            distorted_mask = rotated_mask
+        elif distorsion_type == 1:
+            distorted_img, distorted_mask = distorsion_generator.sin(
+                rotated_img,
+                rotated_mask,
+                vertical=(distorsion_orientation == 0 or distorsion_orientation == 2),
+                horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2),
+            )
+        elif distorsion_type == 2:
+            distorted_img, distorted_mask = distorsion_generator.cos(
+                rotated_img,
+                rotated_mask,
+                vertical=(distorsion_orientation == 0 or distorsion_orientation == 2),
+                horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2),
+            )
+        else:
+            distorted_img, distorted_mask = distorsion_generator.random(
+                rotated_img,
+                rotated_mask,
+                vertical=(distorsion_orientation == 0 or distorsion_orientation == 2),
+                horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2),
+            )
+
+        ##################################
+        # Resize image to desired format #
+        ##################################
+
+        # Horizontal text
+        size=OBJECT_WIDTH
+        horizontal_margin = 0
+        vertical_margin = 0
+
+        new_width = int(
+                distorted_img.size[0]
+                * (float(size - vertical_margin) / float(distorted_img.size[1]))
+        )
+        resized_img = distorted_img.resize(
+                (new_width, size - vertical_margin), Image.ANTIALIAS
+        )
+        resized_mask = distorted_mask.resize((new_width, size - vertical_margin), Image.NEAREST)
+
+        #############################
+        # Generate background image #
+        #############################
+        background_width = OUTPUT_IMAGE_WIDTH
+        background_height = OUTPUT_IMAGE_HEIGHT
+
+        if background_type >= 100:
+            background_type = rnd.randint(0, 3)
+
+        if background_type == 0:
+            background_img = background_generator.gaussian_noise(
+                background_height, background_width
+            )
+        elif background_type == 1:
+            background_img = background_generator.plain_white(
+                background_height, background_width
+            )
+        elif background_type == 2:
+            background_img = background_generator.quasicrystal(
+                background_height, background_width
+            )
+        else:
+            background_img = background_generator.image(
+                background_height, background_width, image_dir
+            )
+        background_mask = Image.new(
+            "RGB", (background_width, background_height), (0, 0, 0)
+        )
+
+        ##############################################################
+        # Comparing average pixel value of text and background image #
+        ##############################################################
+        try:
+            background_mask = background_img
+            resized_mask = resized_img
+            resized_img_st = ImageStat.Stat(resized_img, resized_mask.split()[2])
+            background_img_st = ImageStat.Stat(background_img)
+            resized_img_px_mean = sum(resized_img_st.mean[:2]) / 3
+            background_img_px_mean = sum(background_img_st.mean) / 3
+            if abs(resized_img_px_mean - background_img_px_mean) < 15:
+                print("value of mean pixel is too similar. Ignore this image")
+                print("resized_img_st \n {}".format(resized_img_st.mean))
+                print("background_img_st \n {}".format(background_img_st.mean))
+
+                return
+        except Exception as err:
+            print("Cannot compute image stats")
+            print(err)
+            return
+
+        #############################
+        # Place text with alignment #
+        #############################
+
+        new_text_width, _ = resized_img.size
+        width=-10
+        alignment=3
+        if alignment == 0 or width == -1:
+            background_img.paste(resized_img, (margin_left, margin_top), resized_img)
+            background_mask.paste(resized_mask, (margin_left, margin_top))
+        elif alignment == 1:
+            background_img.paste(
+                resized_img,
+                (int(background_width / 2 - new_text_width / 2), margin_top),
+                resized_img,
+            )
+            background_mask.paste(
+                resized_mask,
+                (int(background_width / 2 - new_text_width / 2), margin_top),
+            )
+        else:
+            object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] )
+            object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1])
+            background_img.paste(
+                resized_img.copy(),
+                ((object_offset_x , object_offset_y)),
+                resized_img.convert("RGBA"),
+            )
+
+        #######################
+        # Apply gaussian blur #
+        #######################
+        gaussian_filter = ImageFilter.GaussianBlur(
+            radius=blur if not random_blur else rnd.randint(0, blur)
+        )
+        final_image = background_img.filter(gaussian_filter)
+        final_mask = background_mask.filter(gaussian_filter)
+        
+        ############################################
+        # Change image mode (RGB, grayscale, etc.) #
+        ############################################
+        image_mode="RGB"
+        final_image = final_image.convert(image_mode)
+        final_mask = final_mask.convert(image_mode) 
+
+        #####################################
+        # Generate name for resulting image #
+        #####################################
+        # We remove spaces if space_width == 0
+        space_width = 0
+        text=OUTPUT_FILE_NAME
+        if space_width == 0:
+            text = text.replace(" ", "")
+        if name_format == 0:
+            name = "{}_{}".format(text, str(index))
+        elif name_format == 1:
+            name = "{}_{}".format(str(index), text)
+        elif name_format == 2:
+            name = str(index)
+        else:
+            print("{} is not a valid name format. Using default.".format(name_format))
+            name = "{}_{}".format(text, str(index))
+
+        extension="jpg"
+        image_name = "{}.{}".format(name, extension)
+        mask_name = "{}_mask.png".format(name)
+        box_name = "{}_boxes.txt".format(name)
+        tess_box_name = "{}.box".format(name)
+
+
+        # Save the image
+        output_mask = 0
+        output_bboxes = 1
+        if out_dir is not None:
+            final_image.save(os.path.join(out_dir, image_name))
+            if output_mask == 1:
+                final_mask.save(os.path.join(out_dir, mask_name))
+            if output_bboxes == 1:
+                bboxes = [  (object_offset_x, object_offset_y, object_offset_x + resized_img.size[0], object_offset_y + resized_img.size[1]) ]
+                save_to_voc_xml(os.path.splitext(image_name)[0], "out", final_image, bboxes)
+                with open(os.path.join(out_dir, box_name), "w") as f:
+                    for bbox in bboxes:
+                        f.write(" ".join([str(v) for v in bbox]) + "\n")
+            if output_bboxes == 2:
+                bboxes = mask_to_bboxes(final_mask, tess=True)
+                with open(os.path.join(out_dir, tess_box_name), "w") as f:
+                    for bbox, char in zip(bboxes, text):
+                        f.write(" ".join([char] + [str(v) for v in bbox] + ['0']) + "\n")
+        else:
+            if output_mask == 1:
+                return final_image, final_mask
+            return final_image
+
+
+
+
+def save_to_voc_xml(image_name, save_folder="out", skiImage=None, bboxes=None, cat_np=None):
+    """
+    Static Method
+    bboxes = None, use internal dataset
+    """
+    if bboxes is None:
+        bboxes = []
+        return
+
+    #if cat_np is None:
+    #    cat_np = self._cat_index
+    if skiImage is None:
+        skiImage = self._image_np
+
+    if len(bboxes) == 0:
+        return
+
+    width=skiImage.size[0]
+    height = skiImage.size[1]
+    depth=3
+    """
+    with Image.fromarray((skiImage).astype(np.uint8)) as img:
+        width, height = img.size
+        if img.mode == 'YCbCr':
+            depth = 3
+        else:
+            depth = len(img.mode)
+
+        if image_name is None or image_name == "":
+            #md5hash = hashlib.md5(img.tobytes())
+            #_file_name = md5hash.hexdigest()
+            _file_name = "test"
+        else:
+            _file_name = image_name
+            """
+    img = skiImage
+    _file_name = image_name
+    objects = ''
+    counter = 0
+    database_name = "default"
+    image_folder_name = "default"
+    image_name = "default"
+    for bbox in bboxes:
+        # conversion of normalized b-boxes
+        if (bbox[0] + bbox[1] + bbox[2] + bbox[3]) < 4:
+            bbox[0] = bbox[0] * height
+            bbox[1] = bbox[1] * width
+            bbox[2] = bbox[2] * height
+            bbox[3] = bbox[3] * width
+
+        try:
+            _cat_name = bbox[5]
+        except:
+            _cat_name = "unknown"
+            pass
+
+        objects = objects + '''
+        	<object>
+        		<name>{category_name}</name>
+        		<pose>Unspecified</pose>
+        		<truncated>0</truncated>
+        		<difficult>0</difficult>
+        		<bndbox>
+        			<xmin>{xmin}</xmin>
+        			<ymin>{ymin}</ymin>
+        			<xmax>{xmax}</xmax>
+        			<ymax>{ymax}</ymax>
+        		</bndbox>
+        	</object>'''.format(
+            category_name=_cat_name,
+            xmin=bbox[0],
+            ymin=bbox[1],
+            xmax=bbox[2],
+            ymax=bbox[3]
+        )
+        counter = counter + 1
+
+    xml = '''<annotation>
+        	<folder>{image_folder_name}</folder>
+        	<filename>{image_name}</filename>
+        	<source>
+        		<database>{database_name}</database>
+        	</source>
+        	<size>
+        		<width>{width}</width>
+        		<height>{height}</height>
+        		<depth>{depth}</depth>
+        	</size>
+        	<segmented>0</segmented>{objects}
+        </annotation>'''.format(
+        image_folder_name=image_folder_name,
+        image_name=_file_name + ".jpg",
+        database_name=database_name,
+        width=width,
+        height=height,
+        depth=depth,
+        objects=objects
+    )
+
+    try:
+        os.mkdir(save_folder)
+    except OSError:
+        pass
+
+    anno_path = os.path.join(save_folder, _file_name + '.xml')
+    with open(anno_path, 'w') as file:
+        file.write(xml)
+        img.save(save_folder + "/" + _file_name + ".jpg", "JPEG")
\ No newline at end of file
diff --git a/trdg/run.py b/trdg/run.py
index bec40497a..4d3d3d251 100755
--- a/trdg/run.py
+++ b/trdg/run.py
@@ -3,7 +3,11 @@
 import os
 import sys
 
-sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+# adding Folder_2 to the system path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
+#sys.path.insert(0, "/projects/src/python/sandbox/tensorflow/workspace/ups_leak_detection/tools/apps/ups/lib/TextRecognitionDataGenerator/trdg")
+
+#sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 
 import random as rnd
 import string
@@ -13,6 +17,8 @@
 from tqdm import tqdm
 
 from trdg.data_generator import FakeTextDataGenerator
+from trdg.object_localization_generator import FakeObjectDataGenerator
+
 from trdg.string_generator import (create_strings_from_dict,
                                    create_strings_from_file,
                                    create_strings_from_wikipedia,
@@ -20,6 +26,7 @@
 from trdg.utils import load_dict, load_fonts
 
 
+
 def margins(margin):
     margins = margin.split(",")
     if len(margins) == 1:
@@ -46,6 +53,14 @@ def parse_arguments():
         help="When set, this argument uses a specified text file as source for the text",
         default="",
     )
+    parser.add_argument(
+        "-it",
+        "--input_type",
+        type=str,
+        nargs="?",
+        help="font or object",
+        default="font",
+    )
     parser.add_argument(
         "-l",
         "--language",
@@ -429,45 +444,73 @@ def main():
     string_count = len(strings)
 
     p = Pool(args.thread_count)
-    for _ in tqdm(
-        p.imap_unordered(
-            FakeTextDataGenerator.generate_from_tuple,
-            zip(
-                [i for i in range(0, string_count)],
-                strings,
-                [fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)],
-                [args.output_dir] * string_count,
-                [args.format] * string_count,
-                [args.extension] * string_count,
-                [args.skew_angle] * string_count,
-                [args.random_skew] * string_count,
-                [args.blur] * string_count,
-                [args.random_blur] * string_count,
-                [args.background] * string_count,
-                [args.distorsion] * string_count,
-                [args.distorsion_orientation] * string_count,
-                [args.handwritten] * string_count,
-                [args.name_format] * string_count,
-                [args.width] * string_count,
-                [args.alignment] * string_count,
-                [args.text_color] * string_count,
-                [args.orientation] * string_count,
-                [args.space_width] * string_count,
-                [args.character_spacing] * string_count,
-                [args.margins] * string_count,
-                [args.fit] * string_count,
-                [args.output_mask] * string_count,
-                [args.word_split] * string_count,
-                [args.image_dir] * string_count,
-                [args.stroke_width] * string_count,
-                [args.stroke_fill] * string_count,
-                [args.image_mode] * string_count,
-                [args.output_bboxes] * string_count,
+
+    if args.input_type == "object":
+        for _ in tqdm(
+                p.imap_unordered(
+                    FakeObjectDataGenerator.generate_from_tuple,
+                    zip(
+                        [i for i in range(0, string_count)],
+                        strings,
+                        [str(i) for i in range(0, string_count)],
+                        [args.width] * string_count,
+                        [args.length] * string_count,
+                        [rnd.randrange(10, 160) for i in range(0, string_count)],
+                        [args.skew_angle] * string_count,
+                        [args.random_skew] * string_count,
+                        [args.blur] * string_count,
+                        [args.random_blur] * string_count,
+                        [args.background] * string_count,
+                        [args.distorsion] * string_count,
+                        [args.distorsion_orientation] * string_count,
+                        [args.background] * string_count,
+                        [args.format] * string_count,
+                        [args.output_dir] * string_count,
+                    ),
+                ),
+                total=2,
+        ):
+            pass
+    else:
+        for _ in tqdm(
+            p.imap_unordered(
+                FakeTextDataGenerator.generate_from_tuple,
+                zip(
+                    [i for i in range(0, string_count)],
+                    strings,
+                    [fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)],
+                    [args.output_dir] * string_count,
+                    [args.format] * string_count,
+                    [args.extension] * string_count,
+                    [args.skew_angle] * string_count,
+                    [args.random_skew] * string_count,
+                    [args.blur] * string_count,
+                    [args.random_blur] * string_count,
+                    [args.background] * string_count,
+                    [args.distorsion] * string_count,
+                    [args.distorsion_orientation] * string_count,
+                    [args.handwritten] * string_count,
+                    [args.name_format] * string_count,
+                    [args.width] * string_count,
+                    [args.alignment] * string_count,
+                    [args.text_color] * string_count,
+                    [args.orientation] * string_count,
+                    [args.space_width] * string_count,
+                    [args.character_spacing] * string_count,
+                    [args.margins] * string_count,
+                    [args.fit] * string_count,
+                    [args.output_mask] * string_count,
+                    [args.word_split] * string_count,
+                    [args.image_dir] * string_count,
+                    [args.stroke_width] * string_count,
+                    [args.stroke_fill] * string_count,
+                    [args.image_mode] * string_count,
+                    [args.output_bboxes] * string_count,
+                ),
             ),
-        ),
-        total=args.count,
-    ):
-        pass
+            total=args.count,
+        ):
+            pass
     p.terminate()
 
     if args.name_format == 2:

From bb1de8dc9d54a8792802369f56015cd0c97ce0a3 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 12 Apr 2022 14:08:46 -0400
Subject: [PATCH 2/4] fix a bug when an object's size is bigger than
 background's size

---
 trdg/object_localization_generator.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/trdg/object_localization_generator.py b/trdg/object_localization_generator.py
index 71da74cbb..2c6a612a3 100644
--- a/trdg/object_localization_generator.py
+++ b/trdg/object_localization_generator.py
@@ -209,8 +209,15 @@ def generate(
                 (int(background_width / 2 - new_text_width / 2), margin_top),
             )
         else:
-            object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] )
-            object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1])
+            print(" bg.x: {}, object.x: {}".format(background_img.size[0], resized_img.size[0] ))
+            if resized_img.size[0] > background_img.size[0]:
+                object_offset_x = 0
+            else:
+                object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] )
+            if resized_img.size[1] > background_img.size[1]:
+                object_offset_y = 0
+            else:
+                object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1])
             background_img.paste(
                 resized_img.copy(),
                 ((object_offset_x , object_offset_y)),

From f5ce82e13975cdc9dc25ecfe02d783ba79d4e634 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 13 Apr 2022 08:00:52 -0400
Subject: [PATCH 3/4] fix a bug when image's width is less than or equal to
 zero

---
 trdg/data_generator.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/trdg/data_generator.py b/trdg/data_generator.py
index d67d81ae6..97a837933 100644
--- a/trdg/data_generator.py
+++ b/trdg/data_generator.py
@@ -134,6 +134,8 @@ def generate(
                 (new_width, size - vertical_margin), Image.ANTIALIAS
             )
             resized_mask = distorted_mask.resize((new_width, size - vertical_margin), Image.NEAREST)
+            if new_width + horizontal_margin <= 0:
+                horizontal_margin = 1 - new_width
             background_width = width if width > 0 else new_width + horizontal_margin
             background_height = size
         # Vertical text

From cfc3d74f3a62a1f854f472c86da9ac30babcfa01 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 19 Apr 2022 12:17:04 -0400
Subject: [PATCH 4/4] do not try to create quasicbackground when length or
 width of image is equal to 1 pix width, a scenario that causes division by
 zero

---
 trdg/background_generator.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/trdg/background_generator.py b/trdg/background_generator.py
index c383b7c81..6d6ba5836 100644
--- a/trdg/background_generator.py
+++ b/trdg/background_generator.py
@@ -33,13 +33,15 @@ def quasicrystal(height, width):
     """
         Create a background with quasicrystal (https://en.wikipedia.org/wiki/Quasicrystal)
     """
-
     image = Image.new("L", (width, height))
     pixels = image.load()
 
     frequency = rnd.random() * 30 + 20  # frequency
     phase = rnd.random() * 2 * math.pi  # phase
     rotation_count = rnd.randint(10, 20)  # of rotations
+    
+    if width == 1 or height == 1:
+        return image.convert("RGBA")
 
     for kw in range(width):
         y = float(kw) / (width - 1) * 4 * math.pi - 2 * math.pi