Skip to content

Commit 965d0f6

Browse files
authored
use black formatting (#70)
* use black formatting * format setup.py
1 parent 3172363 commit 965d0f6

File tree

21 files changed

+591
-366
lines changed

21 files changed

+591
-366
lines changed

.github/workflows/black.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
name: Lint
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
lint:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v3
10+
- uses: psf/black@stable

manga_ocr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
__version__ = '0.1.11'
1+
__version__ = "0.1.11"
22

33
from manga_ocr.ocr import MangaOcr

manga_ocr/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ def main():
77
fire.Fire(run)
88

99

10-
if __name__ == '__main__':
10+
if __name__ == "__main__":
1111
main()

manga_ocr/ocr.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,45 @@
99

1010

1111
class MangaOcr:
12-
def __init__(self, pretrained_model_name_or_path='kha-white/manga-ocr-base', force_cpu=False):
13-
logger.info(f'Loading OCR model from {pretrained_model_name_or_path}')
14-
self.processor = ViTImageProcessor.from_pretrained(pretrained_model_name_or_path)
12+
def __init__(
13+
self, pretrained_model_name_or_path="kha-white/manga-ocr-base", force_cpu=False
14+
):
15+
logger.info(f"Loading OCR model from {pretrained_model_name_or_path}")
16+
self.processor = ViTImageProcessor.from_pretrained(
17+
pretrained_model_name_or_path
18+
)
1519
self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
16-
self.model = VisionEncoderDecoderModel.from_pretrained(pretrained_model_name_or_path)
20+
self.model = VisionEncoderDecoderModel.from_pretrained(
21+
pretrained_model_name_or_path
22+
)
1723

1824
if not force_cpu and torch.cuda.is_available():
19-
logger.info('Using CUDA')
25+
logger.info("Using CUDA")
2026
self.model.cuda()
2127
elif not force_cpu and torch.backends.mps.is_available():
22-
logger.info('Using MPS')
23-
self.model.to('mps')
28+
logger.info("Using MPS")
29+
self.model.to("mps")
2430
else:
25-
logger.info('Using CPU')
31+
logger.info("Using CPU")
2632

27-
example_path = Path(__file__).parent / 'assets/example.jpg'
33+
example_path = Path(__file__).parent / "assets/example.jpg"
2834
if not example_path.is_file():
29-
example_path = Path(__file__).parent.parent / 'assets/example.jpg'
35+
example_path = Path(__file__).parent.parent / "assets/example.jpg"
3036
self(example_path)
3137

32-
logger.info('OCR ready')
38+
logger.info("OCR ready")
3339

3440
def __call__(self, img_or_path):
3541
if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
3642
img = Image.open(img_or_path)
3743
elif isinstance(img_or_path, Image.Image):
3844
img = img_or_path
3945
else:
40-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
46+
raise ValueError(
47+
f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}"
48+
)
4149

42-
img = img.convert('L').convert('RGB')
50+
img = img.convert("L").convert("RGB")
4351

4452
x = self._preprocess(img)
4553
x = self.model.generate(x[None].to(self.model.device), max_length=300)[0].cpu()
@@ -53,9 +61,9 @@ def _preprocess(self, img):
5361

5462

5563
def post_process(text):
56-
text = ''.join(text.split())
57-
text = text.replace('…', '...')
58-
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
64+
text = "".join(text.split())
65+
text = text.replace("…", "...")
66+
text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text)
5967
text = jaconv.h2z(text, ascii=True, digit=True)
6068

6169
return text

manga_ocr/run.py

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,30 +27,33 @@ def process_and_write_results(mocr, img_or_path, write_to):
2727
text = mocr(img_or_path)
2828
t1 = time.time()
2929

30-
logger.info(f'Text recognized in {t1 - t0:0.03f} s: {text}')
30+
logger.info(f"Text recognized in {t1 - t0:0.03f} s: {text}")
3131

32-
if write_to == 'clipboard':
32+
if write_to == "clipboard":
3333
pyperclip.copy(text)
3434
else:
3535
write_to = Path(write_to)
36-
if write_to.suffix != '.txt':
37-
raise ValueError('write_to must be either "clipboard" or a path to a text file')
36+
if write_to.suffix != ".txt":
37+
raise ValueError(
38+
'write_to must be either "clipboard" or a path to a text file'
39+
)
3840

39-
with write_to.open('a', encoding="utf-8") as f:
40-
f.write(text + '\n')
41+
with write_to.open("a", encoding="utf-8") as f:
42+
f.write(text + "\n")
4143

4244

4345
def get_path_key(path):
4446
return path, path.lstat().st_mtime
4547

4648

47-
def run(read_from='clipboard',
48-
write_to='clipboard',
49-
pretrained_model_name_or_path='kha-white/manga-ocr-base',
50-
force_cpu=False,
51-
delay_secs=0.1,
52-
verbose=False
53-
):
49+
def run(
50+
read_from="clipboard",
51+
write_to="clipboard",
52+
pretrained_model_name_or_path="kha-white/manga-ocr-base",
53+
force_cpu=False,
54+
delay_secs=0.1,
55+
verbose=False,
56+
):
5457
"""
5558
Run OCR in the background, waiting for new images to appear either in system clipboard, or a directory.
5659
Recognized texts can be either saved to system clipboard, or appended to a text file.
@@ -65,21 +68,25 @@ def run(read_from='clipboard',
6568

6669
mocr = MangaOcr(pretrained_model_name_or_path, force_cpu)
6770

68-
if sys.platform not in ('darwin', 'win32') and write_to == 'clipboard':
71+
if sys.platform not in ("darwin", "win32") and write_to == "clipboard":
6972
# Check if the system is using Wayland
7073
import os
71-
if os.environ.get('WAYLAND_DISPLAY'):
74+
75+
if os.environ.get("WAYLAND_DISPLAY"):
7276
# Check if the wl-clipboard package is installed
7377
if os.system("which wl-copy > /dev/null") == 0:
7478
pyperclip.set_clipboard("wl-clipboard")
7579
else:
76-
msg = 'Your session uses wayland and does not have wl-clipboard installed. ' \
77-
'Install wl-clipboard for write in clipboard to work.'
80+
msg = (
81+
"Your session uses wayland and does not have wl-clipboard installed. "
82+
"Install wl-clipboard for write in clipboard to work."
83+
)
7884
raise NotImplementedError(msg)
7985

80-
if read_from == 'clipboard':
86+
if read_from == "clipboard":
8187
from PIL import ImageGrab
82-
logger.info('Reading from clipboard')
88+
89+
logger.info("Reading from clipboard")
8390

8491
img = None
8592
while True:
@@ -95,19 +102,25 @@ def run(read_from='clipboard',
95102
# Pillow error when clipboard contains text (Linux, X11)
96103
pass
97104
else:
98-
logger.warning('Error while reading from clipboard ({})'.format(error))
105+
logger.warning(
106+
"Error while reading from clipboard ({})".format(error)
107+
)
99108
else:
100-
if isinstance(img, Image.Image) and not are_images_identical(img, old_img):
109+
if isinstance(img, Image.Image) and not are_images_identical(
110+
img, old_img
111+
):
101112
process_and_write_results(mocr, img, write_to)
102113

103114
time.sleep(delay_secs)
104115

105116
else:
106117
read_from = Path(read_from)
107118
if not read_from.is_dir():
108-
raise ValueError('read_from must be either "clipboard" or a path to a directory')
119+
raise ValueError(
120+
'read_from must be either "clipboard" or a path to a directory'
121+
)
109122

110-
logger.info(f'Reading from directory {read_from}')
123+
logger.info(f"Reading from directory {read_from}")
111124

112125
old_paths = set()
113126
for path in read_from.iterdir():
@@ -123,12 +136,12 @@ def run(read_from='clipboard',
123136
img = Image.open(path)
124137
img.load()
125138
except (UnidentifiedImageError, OSError) as e:
126-
logger.warning(f'Error while reading file {path}: {e}')
139+
logger.warning(f"Error while reading file {path}: {e}")
127140
else:
128141
process_and_write_results(mocr, img, write_to)
129142

130143
time.sleep(delay_secs)
131144

132145

133-
if __name__ == '__main__':
146+
if __name__ == "__main__":
134147
fire.Fire(run)

manga_ocr_dev/data/generate_backgrounds.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ def find_rectangle(mask, y, x, aspect_ratio_range=(0.33, 3.0)):
4747

4848

4949
def generate_backgrounds(crops_per_page=5, min_size=40):
50-
data = pd.read_csv(MANGA109_ROOT / 'data.csv')
51-
frames_df = pd.read_csv(MANGA109_ROOT / 'frames.csv')
50+
data = pd.read_csv(MANGA109_ROOT / "data.csv")
51+
frames_df = pd.read_csv(MANGA109_ROOT / "frames.csv")
5252

5353
BACKGROUND_DIR.mkdir(parents=True, exist_ok=True)
5454

@@ -57,11 +57,11 @@ def generate_backgrounds(crops_per_page=5, min_size=40):
5757
page = cv2.imread(str(MANGA109_ROOT / page_path))
5858
mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool)
5959
for row in data[data.page_path == page_path].itertuples():
60-
mask[row.ymin:row.ymax, row.xmin:row.xmax] = True
60+
mask[row.ymin : row.ymax, row.xmin : row.xmax] = True
6161

6262
frames_mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool)
6363
for row in frames_df[frames_df.page_path == page_path].itertuples():
64-
frames_mask[row.ymin:row.ymax, row.xmin:row.xmax] = True
64+
frames_mask[row.ymin : row.ymax, row.xmin : row.xmax] = True
6565

6666
mask = mask | ~frames_mask
6767

@@ -76,10 +76,12 @@ def generate_backgrounds(crops_per_page=5, min_size=40):
7676
crop = page[ymin:ymax, xmin:xmax]
7777

7878
if crop.shape[0] >= min_size and crop.shape[1] >= min_size:
79-
out_filename = '_'.join(
80-
Path(page_path).with_suffix('').parts[-2:]) + f'_{ymin}_{ymax}_{xmin}_{xmax}.png'
79+
out_filename = (
80+
"_".join(Path(page_path).with_suffix("").parts[-2:])
81+
+ f"_{ymin}_{ymax}_{xmin}_{xmax}.png"
82+
)
8183
cv2.imwrite(str(BACKGROUND_DIR / out_filename), crop)
8284

8385

84-
if __name__ == '__main__':
86+
if __name__ == "__main__":
8587
generate_backgrounds()

0 commit comments

Comments
 (0)