Skip to content

Commit 6d5b14a

Browse files
committed
consistently use double quotes for strings across the entire library + some styling adjustments
1 parent c12bb90 commit 6d5b14a

File tree

42 files changed

+280
-250
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+280
-250
lines changed

Tests/test_text_utils.py

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,28 @@ def test_edit_distance(self):
1010
errors. It also includes a test case for empty input.
1111
"""
1212
# Test simple case with no errors
13-
prediction_tokens = ['A', 'B', 'C']
14-
reference_tokens = ['A', 'B', 'C']
13+
prediction_tokens = ["A", "B", "C"]
14+
reference_tokens = ["A", "B", "C"]
1515
self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 0)
1616

1717
# Test simple case with one substitution error
18-
prediction_tokens = ['A', 'B', 'D']
19-
reference_tokens = ['A', 'B', 'C']
18+
prediction_tokens = ["A", "B", "D"]
19+
reference_tokens = ["A", "B", "C"]
2020
self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
2121

2222
# Test simple case with one insertion error
23-
prediction_tokens = ['A', 'B', 'C']
24-
reference_tokens = ['A', 'B', 'C', 'D']
23+
prediction_tokens = ["A", "B", "C"]
24+
reference_tokens = ["A", "B", "C", "D"]
2525
self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
2626

2727
# Test simple case with one deletion error
28-
prediction_tokens = ['A', 'B']
29-
reference_tokens = ['A', 'B', 'C']
28+
prediction_tokens = ["A", "B"]
29+
reference_tokens = ["A", "B", "C"]
3030
self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
3131

3232
# Test more complex case with multiple errors
33-
prediction_tokens = ['A', 'B', 'C', 'D', 'E']
34-
reference_tokens = ['A', 'C', 'B', 'F', 'E']
33+
prediction_tokens = ["A", "B", "C", "D", "E"]
34+
reference_tokens = ["A", "C", "B", "F", "E"]
3535
self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 3)
3636

3737
# Test empty input
@@ -41,18 +41,18 @@ def test_edit_distance(self):
4141

4242
def test_get_cer(self):
4343
# Test simple case with no errors
44-
preds = ['A B C']
45-
target = ['A B C']
44+
preds = ["A B C"]
45+
target = ["A B C"]
4646
self.assertEqual(get_cer(preds, target), 0)
4747

4848
# Test simple case with one character error
49-
preds = ['A B C']
50-
target = ['A B D']
49+
preds = ["A B C"]
50+
target = ["A B D"]
5151
self.assertEqual(get_cer(preds, target), 1/5)
5252

5353
# Test simple case with multiple character errors
54-
preds = ['A B C']
55-
target = ['D E F']
54+
preds = ["A B C"]
55+
target = ["D E F"]
5656
self.assertEqual(get_cer(preds, target), 3/5)
5757

5858
# Test empty input
@@ -61,24 +61,24 @@ def test_get_cer(self):
6161
self.assertEqual(get_cer(preds, target), 0)
6262

6363
# Test simple case with different word lengths
64-
preds = ['ABC']
65-
target = ['ABCDEFG']
64+
preds = ["ABC"]
65+
target = ["ABCDEFG"]
6666
self.assertEqual(get_cer(preds, target), 4/7)
6767

6868
def test_get_wer(self):
6969
# Test simple case with no errors
70-
preds = 'A B C'
71-
target = 'A B C'
70+
preds = "A B C"
71+
target = "A B C"
7272
self.assertEqual(get_wer(preds, target), 0)
7373

7474
# Test simple case with one word error
75-
preds = 'A B C'
76-
target = 'A B D'
75+
preds = "A B C"
76+
target = "A B D"
7777
self.assertEqual(get_wer(preds, target), 1/3)
7878

7979
# Test simple case with multiple word errors
80-
preds = 'A B C'
81-
target = 'D E F'
80+
preds = "A B C"
81+
target = "D E F"
8282
self.assertEqual(get_wer(preds, target), 1)
8383

8484
# Test empty input
@@ -87,9 +87,10 @@ def test_get_wer(self):
8787
self.assertEqual(get_wer(preds, target), 0)
8888

8989
# Test simple case with different sentence lengths
90-
preds = ['ABC']
91-
target = ['ABC DEF']
90+
preds = ["ABC"]
91+
target = ["ABC DEF"]
9292
self.assertEqual(get_wer(preds, target), 1)
9393

94-
if __name__ == '__main__':
95-
unittest.main()
94+
95+
if __name__ == "__main__":
96+
unittest.main()

Tutorials/01_image_to_word/configs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33

44
from mltu.configs import BaseModelConfigs
55

6+
67
class ModelConfigs(BaseModelConfigs):
78
def __init__(self):
89
super().__init__()
9-
self.model_path = os.path.join('Models/1_image_to_word', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
10-
self.vocab = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
10+
self.model_path = os.path.join("Models/1_image_to_word", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
11+
self.vocab = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
1112
self.height = 32
1213
self.width = 128
1314
self.max_text_length = 23

Tutorials/01_image_to_word/model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
from mltu.tensorflow.model_utils import residual_block
55

6-
def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
6+
7+
def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
78

89
inputs = layers.Input(shape=input_dim, name="input")
910

@@ -24,7 +25,7 @@ def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
2425

2526
blstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(squeezed)
2627

27-
output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
28+
output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)
2829

2930
model = Model(inputs=inputs, outputs=output)
3031
return model

Tutorials/01_image_to_word/train.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from tqdm import tqdm
33
import tensorflow as tf
44

5-
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices('GPU')]
5+
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
66
except: pass
77

88
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
@@ -89,11 +89,11 @@ def read_annotation_file(annotation_path):
8989
os.makedirs(configs.model_path, exist_ok=True)
9090

9191
# Define callbacks
92-
earlystopper = EarlyStopping(monitor='val_CER', patience=10, verbose=1)
93-
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor='val_CER', verbose=1, save_best_only=True, mode='min')
92+
earlystopper = EarlyStopping(monitor="val_CER", patience=10, verbose=1)
93+
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
9494
trainLogger = TrainLogger(configs.model_path)
95-
tb_callback = TensorBoard(f'{configs.model_path}/logs', update_freq=1)
96-
reduceLROnPlat = ReduceLROnPlateau(monitor='val_CER', factor=0.9, min_delta=1e-10, patience=5, verbose=1, mode='auto')
95+
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
96+
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=5, verbose=1, mode="auto")
9797
model2onnx = Model2onnx(f"{configs.model_path}/model.h5")
9898

9999
# Train the model
@@ -106,5 +106,5 @@ def read_annotation_file(annotation_path):
106106
)
107107

108108
# Save training and validation datasets as csv files
109-
train_data_provider.to_csv(os.path.join(configs.model_path, 'train.csv'))
110-
val_data_provider.to_csv(os.path.join(configs.model_path, 'val.csv'))
109+
train_data_provider.to_csv(os.path.join(configs.model_path, "train.csv"))
110+
val_data_provider.to_csv(os.path.join(configs.model_path, "val.csv"))

Tutorials/02_captcha_to_text/configs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33

44
from mltu.configs import BaseModelConfigs
55

6+
67
class ModelConfigs(BaseModelConfigs):
78
def __init__(self):
89
super().__init__()
9-
self.model_path = os.path.join('Models/02_captcha_to_text', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
10-
self.vocab = ''
10+
self.model_path = os.path.join("Models/02_captcha_to_text", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
11+
self.vocab = ""
1112
self.height = 50
1213
self.width = 200
1314
self.max_text_length = 0

Tutorials/02_captcha_to_text/model.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
from mltu.tensorflow.model_utils import residual_block
55

6-
def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
6+
7+
def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
78

89
inputs = layers.Input(shape=input_dim, name="input")
910

@@ -29,7 +30,7 @@ def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
2930
blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
3031
blstm = layers.Dropout(dropout)(blstm)
3132

32-
output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
33+
output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)
3334

3435
model = Model(inputs=inputs, outputs=output)
35-
return model
36+
return model

Tutorials/02_captcha_to_text/train.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import tensorflow as tf
2-
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices('GPU')]
2+
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
33
except: pass
44

55
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
@@ -22,17 +22,20 @@
2222
from io import BytesIO
2323
from zipfile import ZipFile
2424

25-
def download_and_unzip(url, extract_to='Datasets'):
25+
26+
def download_and_unzip(url, extract_to="Datasets"):
2627
http_response = urlopen(url)
2728
zipfile = ZipFile(BytesIO(http_response.read()))
2829
zipfile.extractall(path=extract_to)
2930

30-
if not os.path.exists(os.path.join('Datasets', 'captcha_images_v2')):
31-
download_and_unzip('https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip', extract_to='Datasets')
31+
32+
if not os.path.exists(os.path.join("Datasets", "captcha_images_v2")):
33+
download_and_unzip("https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip",
34+
extract_to="Datasets")
3235

3336
# Create a list of all the images and labels in the dataset
3437
dataset, vocab, max_len = [], set(), 0
35-
captcha_path = os.path.join('Datasets', 'captcha_images_v2')
38+
captcha_path = os.path.join("Datasets", "captcha_images_v2")
3639
for file in os.listdir(captcha_path):
3740
file_path = os.path.join(captcha_path, file)
3841
label = os.path.splitext(file)[0] # Get the file name without the extension
@@ -83,11 +86,11 @@ def download_and_unzip(url, extract_to='Datasets'):
8386
os.makedirs(configs.model_path, exist_ok=True)
8487

8588
# Define callbacks
86-
earlystopper = EarlyStopping(monitor='val_CER', patience=50, verbose=1)
87-
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor='val_CER', verbose=1, save_best_only=True, mode='min')
89+
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1)
90+
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
8891
trainLogger = TrainLogger(configs.model_path)
89-
tb_callback = TensorBoard(f'{configs.model_path}/logs', update_freq=1)
90-
reduceLROnPlat = ReduceLROnPlateau(monitor='val_CER', factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode='auto')
92+
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
93+
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="auto")
9194
model2onnx = Model2onnx(f"{configs.model_path}/model.h5")
9295

9396
# Train the model
@@ -100,5 +103,5 @@ def download_and_unzip(url, extract_to='Datasets'):
100103
)
101104

102105
# Save training and validation datasets as csv files
103-
train_data_provider.to_csv(os.path.join(configs.model_path, 'train.csv'))
104-
val_data_provider.to_csv(os.path.join(configs.model_path, 'val.csv'))
106+
train_data_provider.to_csv(os.path.join(configs.model_path, "train.csv"))
107+
val_data_provider.to_csv(os.path.join(configs.model_path, "val.csv"))

Tutorials/03_handwriting_recognition/configs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
class ModelConfigs(BaseModelConfigs):
77
def __init__(self):
88
super().__init__()
9-
self.model_path = os.path.join('Models/03_handwriting_recognition', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
10-
self.vocab = ''
9+
self.model_path = os.path.join("Models/03_handwriting_recognition", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
10+
self.vocab = ""
1111
self.height = 32
1212
self.width = 128
1313
self.max_text_length = 0

Tutorials/03_handwriting_recognition/model.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
from mltu.tensorflow.model_utils import residual_block
55

6-
def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
6+
7+
def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
78

89
inputs = layers.Input(shape=input_dim, name="input")
910

@@ -29,7 +30,7 @@ def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
2930
blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
3031
blstm = layers.Dropout(dropout)(blstm)
3132

32-
output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
33+
output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)
3334

3435
model = Model(inputs=inputs, outputs=output)
35-
return model
36+
return model

Tutorials/03_handwriting_recognition/train.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import tensorflow as tf
2-
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices('GPU')]
2+
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
33
except: pass
44

55
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
@@ -24,20 +24,21 @@
2424
from io import BytesIO
2525
from zipfile import ZipFile
2626

27-
def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
27+
28+
def download_and_unzip(url, extract_to="Datasets", chunk_size=1024*1024):
2829
http_response = urlopen(url)
2930

30-
data = b''
31+
data = b""
3132
iterations = http_response.length // chunk_size + 1
3233
for _ in tqdm(range(iterations)):
3334
data += http_response.read(chunk_size)
3435

3536
zipfile = ZipFile(BytesIO(data))
3637
zipfile.extractall(path=extract_to)
3738

38-
dataset_path = os.path.join('Datasets', 'IAM_Words')
39+
dataset_path = os.path.join("Datasets", "IAM_Words")
3940
if not os.path.exists(dataset_path):
40-
download_and_unzip('https://git.io/J0fjL', extract_to='Datasets')
41+
download_and_unzip("https://git.io/J0fjL", extract_to="Datasets")
4142

4243
file = tarfile.open(os.path.join(dataset_path, "words.tgz"))
4344
file.extractall(os.path.join(dataset_path, "words"))
@@ -57,7 +58,7 @@ def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
5758
folder1 = line_split[0][:3]
5859
folder2 = "-".join(line_split[0].split("-")[:2])
5960
file_name = line_split[0] + ".png"
60-
label = line_split[-1].rstrip('\n')
61+
label = line_split[-1].rstrip("\n")
6162

6263
rel_path = os.path.join(dataset_path, "words", folder1, folder2, file_name)
6364
if not os.path.exists(rel_path):
@@ -115,11 +116,11 @@ def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
115116
model.summary(line_length=110)
116117

117118
# Define callbacks
118-
earlystopper = EarlyStopping(monitor='val_CER', patience=20, verbose=1)
119-
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor='val_CER', verbose=1, save_best_only=True, mode='min')
119+
earlystopper = EarlyStopping(monitor="val_CER", patience=20, verbose=1)
120+
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
120121
trainLogger = TrainLogger(configs.model_path)
121-
tb_callback = TensorBoard(f'{configs.model_path}/logs', update_freq=1)
122-
reduceLROnPlat = ReduceLROnPlateau(monitor='val_CER', factor=0.9, min_delta=1e-10, patience=10, verbose=1, mode='auto')
122+
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
123+
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=10, verbose=1, mode="auto")
123124
model2onnx = Model2onnx(f"{configs.model_path}/model.h5")
124125

125126
# Train the model
@@ -132,5 +133,5 @@ def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
132133
)
133134

134135
# Save training and validation datasets as csv files
135-
train_data_provider.to_csv(os.path.join(configs.model_path, 'train.csv'))
136-
val_data_provider.to_csv(os.path.join(configs.model_path, 'val.csv'))
136+
train_data_provider.to_csv(os.path.join(configs.model_path, "train.csv"))
137+
val_data_provider.to_csv(os.path.join(configs.model_path, "val.csv"))

0 commit comments

Comments
 (0)