Skip to content

Commit 94bcb4f

Browse files
committed
Cleaning unecessary code
1 parent 7e2baa3 commit 94bcb4f

File tree

4 files changed

+3
-102
lines changed

4 files changed

+3
-102
lines changed

Tutorials/01_image_to_word/train.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
# https://github.com/rajesh-bhat/spark-ai-summit-2020-text-extraction
2-
# https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
3-
# https://www.robots.ox.ac.uk/~vgg/data/text/#sec-chars
41
import stow
52
from tqdm import tqdm
63
import tensorflow as tf

Tutorials/02_captcha_to_text/inferenceModel.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ def predict(self, image: np.ndarray):
2121

2222
return text
2323

24-
2524
if __name__ == "__main__":
2625
import pandas as pd
2726
from tqdm import tqdm
@@ -42,12 +41,6 @@ def predict(self, image: np.ndarray):
4241
cer = get_cer(prediction_text, label)
4342
print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
4443

45-
# resize image by 3 times for visualization
46-
# image = cv2.resize(image, (image.shape[1] * 3, image.shape[0] * 3))
47-
# cv2.imshow(prediction_text, image)
48-
# cv2.waitKey(0)
49-
# cv2.destroyAllWindows()
50-
5144
accum_cer.append(cer)
5245

5346
print(f"Average CER: {np.average(accum_cer)}")

Tutorials/02_captcha_to_text/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def download_and_unzip(url, extract_to='Datasets'):
5353
],
5454
)
5555

56-
train_data_provider, val_data_provider = data_provider.split()
56+
train_data_provider, val_data_provider = data_provider.split(split = 0.9)
5757

5858
train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]
5959

mltu/dataProvider.py

Lines changed: 2 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
import os
22
import copy
33
import typing
4-
# import librosa
54
import numpy as np
65
import pandas as pd
76
from tqdm import tqdm
87
import tensorflow as tf
9-
# from scipy import signal
10-
# from scipy.io import wavfile
11-
12-
from tensorflow.keras.preprocessing.sequence import pad_sequences
138

149
import logging
1510
logging.basicConfig(format='%(asctime)s %(levelname)s %(name)s: %(message)s')
@@ -151,6 +146,7 @@ def __getitem__(self, index: int):
151146
for preprocessor in self._data_preprocessors:
152147
data, annotation = preprocessor(data, annotation)
153148

149+
# If data is None, remove it from the dataset
154150
if data is None:
155151
self._dataset.remove(dataset_batch[index])
156152
continue
@@ -168,89 +164,4 @@ def __getitem__(self, index: int):
168164
for transformer in self._transformers:
169165
batch_data, batch_annotations = zip(*[transformer(data, annotation) for data, annotation in zip(batch_data, batch_annotations)])
170166

171-
return np.array(batch_data), np.array(batch_annotations)
172-
173-
class SoundDataProvider(DataProvider):
174-
def __init__(
175-
self,
176-
vocab: typing.List[str] = None,
177-
*args,
178-
**kwargs
179-
) -> None:
180-
# Intherit all arguments from parent class
181-
# super().__init__(dataset)
182-
# TensorFlowDataProvider.__init__(self, *args, **kwargs)
183-
super().__init__(*args, **kwargs)
184-
self.vocab = vocab
185-
186-
# Mapping characters to integers
187-
self.char_to_num = tf.keras.layers.StringLookup(vocabulary=self.vocab, oov_token="")
188-
# Mapping integers back to original characters
189-
self.num_to_char = tf.keras.layers.StringLookup(
190-
vocabulary=self.char_to_num.get_vocabulary(), oov_token="", invert=True
191-
)
192-
193-
# An integer scalar Tensor. The window length in samples.
194-
self.frame_length = 256
195-
# An integer scalar Tensor. The number of samples to step.
196-
self.frame_step = 160
197-
# An integer scalar Tensor. The size of the FFT to apply.
198-
# If not provided, uses the smallest power of 2 enclosing frame_length.
199-
self.fft_length = 384
200-
201-
def __getitem__(self, index: int):
202-
""" Returns a batch of data by index"""
203-
batch_annotations = self.get_batch_annotations(index)
204-
205-
data, labels = [], []
206-
# bzz =[]
207-
for file_path, label in batch_annotations:
208-
209-
# x, sr = librosa.load(file_path, sr=44100)
210-
# X = librosa.stft(x)
211-
# Xdb = librosa.amplitude_to_db(abs(X))
212-
# bzz.append(Xdb)
213-
214-
# sample_rate, samples = wavfile.read(file_path)
215-
# frequencies, times, _spectrogram = signal.spectrogram(samples, sample_rate)
216-
217-
# 1. Read wav file
218-
file = tf.io.read_file(file_path)
219-
# 2. Decode the wav file
220-
audio, _ = tf.audio.decode_wav(file)
221-
audio = tf.squeeze(audio, axis=-1)
222-
# 3. Change type to float
223-
audio = tf.cast(audio, tf.float32)
224-
# 4. Get the spectrogram
225-
spectrogram = tf.signal.stft(audio, frame_length=self.frame_length, frame_step=self.frame_step, fft_length=self.fft_length)
226-
# 5. We only need the magnitude, which can be derived by applying tf.abs
227-
spectrogram = tf.abs(spectrogram)
228-
spectrogram = tf.math.pow(spectrogram, 0.5)
229-
# 6. normalisation
230-
means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
231-
stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
232-
spectrogram = (spectrogram - means) / (stddevs + 1e-10)
233-
###########################################
234-
## Process the label
235-
##########################################
236-
# 7. Convert label to Lower case
237-
label = tf.strings.lower(label)
238-
# 8. Split the label
239-
label = tf.strings.unicode_split(label, input_encoding="UTF-8")
240-
# 9. Map the characters in label to numbers
241-
label = self.char_to_num(label)
242-
# 10. Return a dict as our model is expecting two inputs
243-
244-
# final_labels = pad_sequences([label], maxlen=len(label), padding='post', value=len(self.vocab))[0]
245-
246-
data.append(spectrogram.numpy())
247-
labels.append(label.numpy())
248-
249-
padded_data = pad_sequences(data, maxlen=max([len(d) for d in data]), padding='post', value=0, dtype='float32')
250-
padded_labels = pad_sequences(labels, maxlen=max([len(l) for l in labels]), padding='post', value=len(self.vocab))
251-
252-
if self._transformers:
253-
for transformer in self._transformers:
254-
padded_data, padded_labels = zip(*[transformer(data, label) for data, label in zip(padded_data, padded_labels)])
255-
256-
return np.array(padded_data), np.array(padded_labels)
167+
return np.array(batch_data), np.array(batch_annotations)

0 commit comments

Comments
 (0)