From 6dfd8627a1771acfa2fc6c46c8854049cee04a23 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 12 Apr 2022 10:03:50 -0400 Subject: [PATCH 1/4] Add feature to generate objects (ie. alpha-masked objects from png images) to aid AI training for object localization. To generate a sample dataset, run: run.py -c 60 -b 100 --text_color black -num -let -id images -w 200 -w 300 -wd 600 -k 20 -rk -d 3 -na 3 -bl 2 -rbl -i /tmp/alpha.txt -it object where /tmp/alpha.txt is a listing of png files like the follwing: /tmp/car.png /tmp/person.png /tmp/banana.png --- trdg/object_generator.py | 13 + trdg/object_localization_generator.py | 394 ++++++++++++++++++++++++++ trdg/run.py | 121 +++++--- 3 files changed, 489 insertions(+), 39 deletions(-) create mode 100644 trdg/object_generator.py create mode 100644 trdg/object_localization_generator.py diff --git a/trdg/object_generator.py b/trdg/object_generator.py new file mode 100644 index 000000000..a21d26ba4 --- /dev/null +++ b/trdg/object_generator.py @@ -0,0 +1,13 @@ +import random as rnd + +from PIL import Image, ImageColor, ImageFont, ImageDraw, ImageFilter + + +def generate( + imagePath +): + _img = Image.open(imagePath) + _mask = Image.new("RGB", (_img.width, _img.height), (0, 0, 0)) + + + return _img, _mask diff --git a/trdg/object_localization_generator.py b/trdg/object_localization_generator.py new file mode 100644 index 000000000..71da74cbb --- /dev/null +++ b/trdg/object_localization_generator.py @@ -0,0 +1,394 @@ +import os +import random as rnd + +from PIL import Image, ImageFilter, ImageStat +from PIL import ImageFont, ImageDraw + +from trdg import computer_text_generator, background_generator, distorsion_generator, object_generator +from trdg.utils import mask_to_bboxes + +try: + from trdg import handwritten_text_generator +except ImportError as e: + print("Missing modules for handwritten text generation.") + + +class FakeObjectDataGenerator(object): + @classmethod + def generate_from_tuple(cls, t): + """ + Same as generate, but takes all parameters as one tuple + """ + cls.generate(*t) + + @classmethod + def generate( + cls, + index, + strings, + name, + output_image_width, + output_image_height, + object_width, + skewing_angle, + random_skew, + blur, + random_blur, + background, + distorsion_type, + distorsion_orientation, + background_type, + name_format, + out_dir, + ): + image = None + print(strings) + OUTPUT_IMAGE_WIDTH = output_image_width + OUTPUT_IMAGE_HEIGHT = output_image_height + image_dir = "images" + OUTPUT_FILE_NAME=name + OBJECT_WIDTH=object_width + + + #margin_top, margin_left, margin_bottom, margin_right = margins + #horizontal_margin = margin_left + margin_right + #vertical_margin = margin_top + margin_bottom + + ########################## + # Create picture of text # + ########################## + if os.path.exists(strings): + image, mask = object_generator.generate( + strings) + else: + image = Image.new( + "RGB", (object_width, object_width*3), (255, 255, 255, 0) + ) + draw = ImageDraw.Draw(image) + font = ImageFont.truetype(r'/usr/share/fonts/truetype/ttf-bitstream-vera/Vera.ttf', 32) + draw.text((10, 20), strings, font=font, fill=(100, 100 , 100, 255)) + mask = image + + + """ + random_angle = rnd.randint(0 - skewing_angle, skewing_angle) + + rotated_img = image.rotate( + skewing_angle if not random_skew else random_angle, expand=1 + ) + + rotated_mask = mask.rotate( + skewing_angle if not random_skew else random_angle, expand=1 + ) + """ + + random_angle = rnd.randint(0 - skewing_angle, skewing_angle) + image = image.convert('RGBA') + rotated_img = image.rotate( + skewing_angle if not random_skew else random_angle, expand=1, fillcolor = (255,255,255,0) + ) + + rotated_mask = mask.rotate( + skewing_angle if not random_skew else random_angle, expand=1 + ) + + ############################# + # Apply distorsion to image # + ############################# + if distorsion_type == 0: + distorted_img = rotated_img # Mind = blown + distorted_mask = rotated_mask + elif distorsion_type == 1: + distorted_img, distorted_mask = distorsion_generator.sin( + rotated_img, + rotated_mask, + vertical=(distorsion_orientation == 0 or distorsion_orientation == 2), + horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2), + ) + elif distorsion_type == 2: + distorted_img, distorted_mask = distorsion_generator.cos( + rotated_img, + rotated_mask, + vertical=(distorsion_orientation == 0 or distorsion_orientation == 2), + horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2), + ) + else: + distorted_img, distorted_mask = distorsion_generator.random( + rotated_img, + rotated_mask, + vertical=(distorsion_orientation == 0 or distorsion_orientation == 2), + horizontal=(distorsion_orientation == 1 or distorsion_orientation == 2), + ) + + ################################## + # Resize image to desired format # + ################################## + + # Horizontal text + size=OBJECT_WIDTH + horizontal_margin = 0 + vertical_margin = 0 + + new_width = int( + distorted_img.size[0] + * (float(size - vertical_margin) / float(distorted_img.size[1])) + ) + resized_img = distorted_img.resize( + (new_width, size - vertical_margin), Image.ANTIALIAS + ) + resized_mask = distorted_mask.resize((new_width, size - vertical_margin), Image.NEAREST) + + ############################# + # Generate background image # + ############################# + background_width = OUTPUT_IMAGE_WIDTH + background_height = OUTPUT_IMAGE_HEIGHT + + if background_type >= 100: + background_type = rnd.randint(0, 3) + + if background_type == 0: + background_img = background_generator.gaussian_noise( + background_height, background_width + ) + elif background_type == 1: + background_img = background_generator.plain_white( + background_height, background_width + ) + elif background_type == 2: + background_img = background_generator.quasicrystal( + background_height, background_width + ) + else: + background_img = background_generator.image( + background_height, background_width, image_dir + ) + background_mask = Image.new( + "RGB", (background_width, background_height), (0, 0, 0) + ) + + ############################################################## + # Comparing average pixel value of text and background image # + ############################################################## + try: + background_mask = background_img + resized_mask = resized_img + resized_img_st = ImageStat.Stat(resized_img, resized_mask.split()[2]) + background_img_st = ImageStat.Stat(background_img) + resized_img_px_mean = sum(resized_img_st.mean[:2]) / 3 + background_img_px_mean = sum(background_img_st.mean) / 3 + if abs(resized_img_px_mean - background_img_px_mean) < 15: + print("value of mean pixel is too similar. Ignore this image") + print("resized_img_st \n {}".format(resized_img_st.mean)) + print("background_img_st \n {}".format(background_img_st.mean)) + + return + except Exception as err: + print("Cannot compute image stats") + print(err) + return + + ############################# + # Place text with alignment # + ############################# + + new_text_width, _ = resized_img.size + width=-10 + alignment=3 + if alignment == 0 or width == -1: + background_img.paste(resized_img, (margin_left, margin_top), resized_img) + background_mask.paste(resized_mask, (margin_left, margin_top)) + elif alignment == 1: + background_img.paste( + resized_img, + (int(background_width / 2 - new_text_width / 2), margin_top), + resized_img, + ) + background_mask.paste( + resized_mask, + (int(background_width / 2 - new_text_width / 2), margin_top), + ) + else: + object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] ) + object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1]) + background_img.paste( + resized_img.copy(), + ((object_offset_x , object_offset_y)), + resized_img.convert("RGBA"), + ) + + ####################### + # Apply gaussian blur # + ####################### + gaussian_filter = ImageFilter.GaussianBlur( + radius=blur if not random_blur else rnd.randint(0, blur) + ) + final_image = background_img.filter(gaussian_filter) + final_mask = background_mask.filter(gaussian_filter) + + ############################################ + # Change image mode (RGB, grayscale, etc.) # + ############################################ + image_mode="RGB" + final_image = final_image.convert(image_mode) + final_mask = final_mask.convert(image_mode) + + ##################################### + # Generate name for resulting image # + ##################################### + # We remove spaces if space_width == 0 + space_width = 0 + text=OUTPUT_FILE_NAME + if space_width == 0: + text = text.replace(" ", "") + if name_format == 0: + name = "{}_{}".format(text, str(index)) + elif name_format == 1: + name = "{}_{}".format(str(index), text) + elif name_format == 2: + name = str(index) + else: + print("{} is not a valid name format. Using default.".format(name_format)) + name = "{}_{}".format(text, str(index)) + + extension="jpg" + image_name = "{}.{}".format(name, extension) + mask_name = "{}_mask.png".format(name) + box_name = "{}_boxes.txt".format(name) + tess_box_name = "{}.box".format(name) + + + # Save the image + output_mask = 0 + output_bboxes = 1 + if out_dir is not None: + final_image.save(os.path.join(out_dir, image_name)) + if output_mask == 1: + final_mask.save(os.path.join(out_dir, mask_name)) + if output_bboxes == 1: + bboxes = [ (object_offset_x, object_offset_y, object_offset_x + resized_img.size[0], object_offset_y + resized_img.size[1]) ] + save_to_voc_xml(os.path.splitext(image_name)[0], "out", final_image, bboxes) + with open(os.path.join(out_dir, box_name), "w") as f: + for bbox in bboxes: + f.write(" ".join([str(v) for v in bbox]) + "\n") + if output_bboxes == 2: + bboxes = mask_to_bboxes(final_mask, tess=True) + with open(os.path.join(out_dir, tess_box_name), "w") as f: + for bbox, char in zip(bboxes, text): + f.write(" ".join([char] + [str(v) for v in bbox] + ['0']) + "\n") + else: + if output_mask == 1: + return final_image, final_mask + return final_image + + + + +def save_to_voc_xml(image_name, save_folder="out", skiImage=None, bboxes=None, cat_np=None): + """ + Static Method + bboxes = None, use internal dataset + """ + if bboxes is None: + bboxes = [] + return + + #if cat_np is None: + # cat_np = self._cat_index + if skiImage is None: + skiImage = self._image_np + + if len(bboxes) == 0: + return + + width=skiImage.size[0] + height = skiImage.size[1] + depth=3 + """ + with Image.fromarray((skiImage).astype(np.uint8)) as img: + width, height = img.size + if img.mode == 'YCbCr': + depth = 3 + else: + depth = len(img.mode) + + if image_name is None or image_name == "": + #md5hash = hashlib.md5(img.tobytes()) + #_file_name = md5hash.hexdigest() + _file_name = "test" + else: + _file_name = image_name + """ + img = skiImage + _file_name = image_name + objects = '' + counter = 0 + database_name = "default" + image_folder_name = "default" + image_name = "default" + for bbox in bboxes: + # conversion of normalized b-boxes + if (bbox[0] + bbox[1] + bbox[2] + bbox[3]) < 4: + bbox[0] = bbox[0] * height + bbox[1] = bbox[1] * width + bbox[2] = bbox[2] * height + bbox[3] = bbox[3] * width + + try: + _cat_name = bbox[5] + except: + _cat_name = "unknown" + pass + + objects = objects + ''' + + {category_name} + Unspecified + 0 + 0 + + {xmin} + {ymin} + {xmax} + {ymax} + + '''.format( + category_name=_cat_name, + xmin=bbox[0], + ymin=bbox[1], + xmax=bbox[2], + ymax=bbox[3] + ) + counter = counter + 1 + + xml = ''' + {image_folder_name} + {image_name} + + {database_name} + + + {width} + {height} + {depth} + + 0{objects} + '''.format( + image_folder_name=image_folder_name, + image_name=_file_name + ".jpg", + database_name=database_name, + width=width, + height=height, + depth=depth, + objects=objects + ) + + try: + os.mkdir(save_folder) + except OSError: + pass + + anno_path = os.path.join(save_folder, _file_name + '.xml') + with open(anno_path, 'w') as file: + file.write(xml) + img.save(save_folder + "/" + _file_name + ".jpg", "JPEG") \ No newline at end of file diff --git a/trdg/run.py b/trdg/run.py index bec40497a..4d3d3d251 100755 --- a/trdg/run.py +++ b/trdg/run.py @@ -3,7 +3,11 @@ import os import sys -sys.path.append(os.path.join(os.path.dirname(__file__), "..")) +# adding Folder_2 to the system path +sys.path.insert(0, os.path.join(os.path.dirname(__file__))) +#sys.path.insert(0, "/projects/src/python/sandbox/tensorflow/workspace/ups_leak_detection/tools/apps/ups/lib/TextRecognitionDataGenerator/trdg") + +#sys.path.append(os.path.join(os.path.dirname(__file__), "..")) import random as rnd import string @@ -13,6 +17,8 @@ from tqdm import tqdm from trdg.data_generator import FakeTextDataGenerator +from trdg.object_localization_generator import FakeObjectDataGenerator + from trdg.string_generator import (create_strings_from_dict, create_strings_from_file, create_strings_from_wikipedia, @@ -20,6 +26,7 @@ from trdg.utils import load_dict, load_fonts + def margins(margin): margins = margin.split(",") if len(margins) == 1: @@ -46,6 +53,14 @@ def parse_arguments(): help="When set, this argument uses a specified text file as source for the text", default="", ) + parser.add_argument( + "-it", + "--input_type", + type=str, + nargs="?", + help="font or object", + default="font", + ) parser.add_argument( "-l", "--language", @@ -429,45 +444,73 @@ def main(): string_count = len(strings) p = Pool(args.thread_count) - for _ in tqdm( - p.imap_unordered( - FakeTextDataGenerator.generate_from_tuple, - zip( - [i for i in range(0, string_count)], - strings, - [fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)], - [args.output_dir] * string_count, - [args.format] * string_count, - [args.extension] * string_count, - [args.skew_angle] * string_count, - [args.random_skew] * string_count, - [args.blur] * string_count, - [args.random_blur] * string_count, - [args.background] * string_count, - [args.distorsion] * string_count, - [args.distorsion_orientation] * string_count, - [args.handwritten] * string_count, - [args.name_format] * string_count, - [args.width] * string_count, - [args.alignment] * string_count, - [args.text_color] * string_count, - [args.orientation] * string_count, - [args.space_width] * string_count, - [args.character_spacing] * string_count, - [args.margins] * string_count, - [args.fit] * string_count, - [args.output_mask] * string_count, - [args.word_split] * string_count, - [args.image_dir] * string_count, - [args.stroke_width] * string_count, - [args.stroke_fill] * string_count, - [args.image_mode] * string_count, - [args.output_bboxes] * string_count, + + if args.input_type == "object": + for _ in tqdm( + p.imap_unordered( + FakeObjectDataGenerator.generate_from_tuple, + zip( + [i for i in range(0, string_count)], + strings, + [str(i) for i in range(0, string_count)], + [args.width] * string_count, + [args.length] * string_count, + [rnd.randrange(10, 160) for i in range(0, string_count)], + [args.skew_angle] * string_count, + [args.random_skew] * string_count, + [args.blur] * string_count, + [args.random_blur] * string_count, + [args.background] * string_count, + [args.distorsion] * string_count, + [args.distorsion_orientation] * string_count, + [args.background] * string_count, + [args.format] * string_count, + [args.output_dir] * string_count, + ), + ), + total=2, + ): + pass + else: + for _ in tqdm( + p.imap_unordered( + FakeTextDataGenerator.generate_from_tuple, + zip( + [i for i in range(0, string_count)], + strings, + [fonts[rnd.randrange(0, len(fonts))] for _ in range(0, string_count)], + [args.output_dir] * string_count, + [args.format] * string_count, + [args.extension] * string_count, + [args.skew_angle] * string_count, + [args.random_skew] * string_count, + [args.blur] * string_count, + [args.random_blur] * string_count, + [args.background] * string_count, + [args.distorsion] * string_count, + [args.distorsion_orientation] * string_count, + [args.handwritten] * string_count, + [args.name_format] * string_count, + [args.width] * string_count, + [args.alignment] * string_count, + [args.text_color] * string_count, + [args.orientation] * string_count, + [args.space_width] * string_count, + [args.character_spacing] * string_count, + [args.margins] * string_count, + [args.fit] * string_count, + [args.output_mask] * string_count, + [args.word_split] * string_count, + [args.image_dir] * string_count, + [args.stroke_width] * string_count, + [args.stroke_fill] * string_count, + [args.image_mode] * string_count, + [args.output_bboxes] * string_count, + ), ), - ), - total=args.count, - ): - pass + total=args.count, + ): + pass p.terminate() if args.name_format == 2: From bb1de8dc9d54a8792802369f56015cd0c97ce0a3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 12 Apr 2022 14:08:46 -0400 Subject: [PATCH 2/4] fix a bug when an object's size is bigger than background's size --- trdg/object_localization_generator.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/trdg/object_localization_generator.py b/trdg/object_localization_generator.py index 71da74cbb..2c6a612a3 100644 --- a/trdg/object_localization_generator.py +++ b/trdg/object_localization_generator.py @@ -209,8 +209,15 @@ def generate( (int(background_width / 2 - new_text_width / 2), margin_top), ) else: - object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] ) - object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1]) + print(" bg.x: {}, object.x: {}".format(background_img.size[0], resized_img.size[0] )) + if resized_img.size[0] > background_img.size[0]: + object_offset_x = 0 + else: + object_offset_x = rnd.randint(0,background_img.size[0] - resized_img.size[0] ) + if resized_img.size[1] > background_img.size[1]: + object_offset_y = 0 + else: + object_offset_y = rnd.randint(0, background_img.size[1] - resized_img.size[1]) background_img.paste( resized_img.copy(), ((object_offset_x , object_offset_y)), From f5ce82e13975cdc9dc25ecfe02d783ba79d4e634 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 13 Apr 2022 08:00:52 -0400 Subject: [PATCH 3/4] fix a bug when image's width is less than or equal to zero --- trdg/data_generator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trdg/data_generator.py b/trdg/data_generator.py index d67d81ae6..97a837933 100644 --- a/trdg/data_generator.py +++ b/trdg/data_generator.py @@ -134,6 +134,8 @@ def generate( (new_width, size - vertical_margin), Image.ANTIALIAS ) resized_mask = distorted_mask.resize((new_width, size - vertical_margin), Image.NEAREST) + if new_width + horizontal_margin <= 0: + horizontal_margin = 1 - new_width background_width = width if width > 0 else new_width + horizontal_margin background_height = size # Vertical text From cfc3d74f3a62a1f854f472c86da9ac30babcfa01 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 19 Apr 2022 12:17:04 -0400 Subject: [PATCH 4/4] do not try to create quasicbackground when length or width of image is equal to 1 pix width, a scenario that causes division by zero --- trdg/background_generator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/trdg/background_generator.py b/trdg/background_generator.py index c383b7c81..6d6ba5836 100644 --- a/trdg/background_generator.py +++ b/trdg/background_generator.py @@ -33,13 +33,15 @@ def quasicrystal(height, width): """ Create a background with quasicrystal (https://en.wikipedia.org/wiki/Quasicrystal) """ - image = Image.new("L", (width, height)) pixels = image.load() frequency = rnd.random() * 30 + 20 # frequency phase = rnd.random() * 2 * math.pi # phase rotation_count = rnd.randint(10, 20) # of rotations + + if width == 1 or height == 1: + return image.convert("RGBA") for kw in range(width): y = float(kw) / (width - 1) * 4 * math.pi - 2 * math.pi