-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathdataset.py
More file actions
112 lines (90 loc) · 4.23 KB
/
dataset.py
File metadata and controls
112 lines (90 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Copyright 2022 Dakewe Biotech Corporation. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
import imgproc
__all__ = [
"train_collate_fn", "valid_test_collate_fn",
"ImageDataset"
]
def train_collate_fn(batch: [torch.Tensor, torch.Tensor, torch.Tensor]) -> [torch.Tensor,
torch.Tensor,
torch.Tensor]:
images, target, target_length = zip(*batch)
images = torch.stack(images, 0)
target = torch.cat(target, 0)
target_length = torch.cat(target_length, 0)
return images, target, target_length
def valid_test_collate_fn(batch: [str, torch.Tensor, str]) -> [str, torch.Tensor, str]:
image_path, images, target = zip(*batch)
images = torch.stack(images, 0)
return image_path, images, target
class ImageDataset(Dataset):
def __init__(self,
dataroot: str = None,
annotation_file_name: str = None,
labels_dict: dict = None,
image_width: int = None,
image_height: int = None,
mean: list = None,
std: list = None,
mode: str = None):
self.dataroot = dataroot
self.annotation_file_name = annotation_file_name
self.labels_dict = labels_dict
self.image_width = image_width
self.image_height = image_height
self.mean = mean
self.std = std
self.mode = mode
self.images_path, self.images_target = self.load_image_label_from_file()
def load_image_label_from_file(self):
# Initialize the definition of image path, image text information, etc.
images_path = []
images_target = []
# Read image path and corresponding text information
with open(os.path.join(self.dataroot, self.annotation_file_name), "r", encoding="UTF-8") as f:
for line in f.readlines():
image_path, image_target = line.strip().split(" ")
images_path.append(os.path.join(self.dataroot, image_path))
images_target.append(image_target)
return images_path, images_target
def __getitem__(self, index: int) -> [str, torch.Tensor, torch.Tensor, torch.Tensor]:
image_path = self.images_path[index]
# Read the image and convert it to grayscale
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Scale to the size of the image that the model can accept
image = cv2.resize(image, (self.image_width, self.image_height), interpolation=cv2.INTER_CUBIC)
image = np.reshape(image, (self.image_height, self.image_width, 1))
# Normalize and convert to Tensor format
image = imgproc.image2tensor(image, mean=self.mean, std=self.std)
if self.mode == "train" and self.labels_dict is not None:
# Read images target
target = self.images_target[index]
target = [self.labels_dict[character] for character in target]
target = torch.LongTensor(target)
target_length = torch.LongTensor([len(target)])
return image, target, target_length
elif self.mode == "valid" or self.mode == "test":
# Read images target
target = self.images_target[index]
return image_path, image, target
else:
raise ValueError("Unsupported data processing model, please use `train`, `valid` or `test`.")
def __len__(self):
return len(self.images_path)