|
| 1 | +# Copyright (c) OpenMMLab. All rights reserved. |
| 2 | +from typing import List, Optional, Tuple |
| 3 | + |
| 4 | +from mmocr.utils import bbox2poly |
| 5 | +from ..data_preparer import DATA_PARSERS |
| 6 | +from .base import BaseParser |
| 7 | + |
| 8 | + |
| 9 | +@DATA_PARSERS.register_module() |
| 10 | +class SROIETextDetAnnParser(BaseParser): |
| 11 | + """SROIE Txt Format Text Detection Annotation Parser. |
| 12 | +
|
| 13 | + The original annotation format of this dataset is stored in txt files, |
| 14 | + which is formed as the following format: |
| 15 | + x1, y1, x2, y2, x3, y3, x4, y4, transcription |
| 16 | +
|
| 17 | + Args: |
| 18 | + separator (str): The separator between each element in a line. Defaults |
| 19 | + to ','. |
| 20 | + ignore (str): The text to be ignored. Defaults to '###'. |
| 21 | + format (str): The format of the annotation. Defaults to |
| 22 | + 'x1,y1,x2,y2,x3,y3,x4,trans'. |
| 23 | + encoding (str): The encoding of the annotation file. Defaults to |
| 24 | + 'utf-8-sig'. |
| 25 | + nproc (int): The number of processes to parse the annotation. Defaults |
| 26 | + to 1. |
| 27 | + remove_strs (List[str], Optional): Used to remove redundant strings in |
| 28 | + the transcription. Defaults to None. |
| 29 | + mode (str, optional): The mode of the box converter. Supported modes |
| 30 | + are 'xywh' and 'xyxy'. Defaults to None. |
| 31 | + """ |
| 32 | + |
| 33 | + def __init__(self, |
| 34 | + separator: str = ',', |
| 35 | + ignore: str = '###', |
| 36 | + format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans', |
| 37 | + encoding: str = 'utf-8-sig', |
| 38 | + nproc: int = 1, |
| 39 | + remove_strs: Optional[List[str]] = None, |
| 40 | + mode: str = None) -> None: |
| 41 | + self.sep = separator |
| 42 | + self.format = format |
| 43 | + self.encoding = encoding |
| 44 | + self.ignore = ignore |
| 45 | + self.mode = mode |
| 46 | + self.remove_strs = remove_strs |
| 47 | + super().__init__(nproc=nproc) |
| 48 | + |
| 49 | + def parse_file(self, file: Tuple, split: str) -> Tuple: |
| 50 | + """Parse single annotation.""" |
| 51 | + img_file, txt_file = file |
| 52 | + instances = list() |
| 53 | + try: |
| 54 | + # there might be some illegal symbols in the annotation |
| 55 | + # which cannot be parsed by loader |
| 56 | + for anno in self.loader(txt_file, self.sep, self.format, |
| 57 | + self.encoding): |
| 58 | + anno = list(anno.values()) |
| 59 | + if self.remove_strs is not None: |
| 60 | + for strs in self.remove_strs: |
| 61 | + for i in range(len(anno)): |
| 62 | + if strs in anno[i]: |
| 63 | + anno[i] = anno[i].replace(strs, '') |
| 64 | + poly = list(map(float, anno[0:-1])) |
| 65 | + if self.mode is not None: |
| 66 | + poly = bbox2poly(poly, self.mode) |
| 67 | + poly = poly.tolist() |
| 68 | + text = anno[-1] |
| 69 | + instances.append( |
| 70 | + dict(poly=poly, text=text, ignore=text == self.ignore)) |
| 71 | + except Exception: |
| 72 | + pass |
| 73 | + |
| 74 | + return img_file, instances |
0 commit comments