Cleaning unecessary code

pythonlessons · pythonlessons · commit 94bcb4fd5686 · 2022-12-22T18:00:48.000+02:00
diff --git a/Tutorials/01_image_to_word/train.py b/Tutorials/01_image_to_word/train.py
@@ -1,6 +1,3 @@
-# https://github.com/rajesh-bhat/spark-ai-summit-2020-text-extraction
-# https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
-# https://www.robots.ox.ac.uk/~vgg/data/text/#sec-chars
 import stow
 from tqdm import tqdm
 import tensorflow as tf
diff --git a/Tutorials/02_captcha_to_text/inferenceModel.py b/Tutorials/02_captcha_to_text/inferenceModel.py
@@ -21,7 +21,6 @@ def predict(self, image: np.ndarray):
 
         return text
 
-
 if __name__ == "__main__":
     import pandas as pd
     from tqdm import tqdm
@@ -42,12 +41,6 @@ def predict(self, image: np.ndarray):
         cer = get_cer(prediction_text, label)
         print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
 
-        # resize image by 3 times for visualization
-        # image = cv2.resize(image, (image.shape[1] * 3, image.shape[0] * 3))
-        # cv2.imshow(prediction_text, image)
-        # cv2.waitKey(0)
-        # cv2.destroyAllWindows()
-
         accum_cer.append(cer)
 
     print(f"Average CER: {np.average(accum_cer)}")
diff --git a/Tutorials/02_captcha_to_text/train.py b/Tutorials/02_captcha_to_text/train.py
@@ -53,7 +53,7 @@ def download_and_unzip(url, extract_to='Datasets'):
         ],
 )
 
-train_data_provider, val_data_provider = data_provider.split()
+train_data_provider, val_data_provider = data_provider.split(split = 0.9)
 
 train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]
 
diff --git a/mltu/dataProvider.py b/mltu/dataProvider.py
@@ -1,15 +1,10 @@
 import os
 import copy
 import typing
-# import librosa
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 import tensorflow as tf
-# from scipy import signal
-# from scipy.io import wavfile
-
-from tensorflow.keras.preprocessing.sequence import pad_sequences
 
 import logging
 logging.basicConfig(format='%(asctime)s %(levelname)s %(name)s: %(message)s')
@@ -151,6 +146,7 @@ def __getitem__(self, index: int):
             for preprocessor in self._data_preprocessors:
                 data, annotation = preprocessor(data, annotation)
             
+            # If data is None, remove it from the dataset
             if data is None:
                 self._dataset.remove(dataset_batch[index])
                 continue
@@ -168,89 +164,4 @@ def __getitem__(self, index: int):
             for transformer in self._transformers:
                 batch_data, batch_annotations = zip(*[transformer(data, annotation) for data, annotation in zip(batch_data, batch_annotations)])
 
-        return np.array(batch_data), np.array(batch_annotations)
-
-class SoundDataProvider(DataProvider):
-    def __init__(
-        self, 
-        vocab: typing.List[str] = None,
-        *args,
-        **kwargs
-        ) -> None:
-        # Intherit all arguments from parent class
-        # super().__init__(dataset)
-        # TensorFlowDataProvider.__init__(self, *args, **kwargs)
-        super().__init__(*args, **kwargs)
-        self.vocab = vocab
-
-        # Mapping characters to integers
-        self.char_to_num = tf.keras.layers.StringLookup(vocabulary=self.vocab, oov_token="")
-        # Mapping integers back to original characters
-        self.num_to_char = tf.keras.layers.StringLookup(
-            vocabulary=self.char_to_num.get_vocabulary(), oov_token="", invert=True
-        )
-
-        # An integer scalar Tensor. The window length in samples.
-        self.frame_length = 256
-        # An integer scalar Tensor. The number of samples to step.
-        self.frame_step = 160
-        # An integer scalar Tensor. The size of the FFT to apply.
-        # If not provided, uses the smallest power of 2 enclosing frame_length.
-        self.fft_length = 384
-
-    def __getitem__(self, index: int):
-        """ Returns a batch of data by index"""
-        batch_annotations = self.get_batch_annotations(index)
-
-        data, labels = [], []
-        # bzz =[]
-        for file_path, label in batch_annotations:
-
-            # x, sr = librosa.load(file_path, sr=44100)
-            # X = librosa.stft(x)
-            # Xdb = librosa.amplitude_to_db(abs(X))
-            # bzz.append(Xdb)
-
-            # sample_rate, samples = wavfile.read(file_path)
-            # frequencies, times, _spectrogram = signal.spectrogram(samples, sample_rate)
-
-            # 1. Read wav file
-            file = tf.io.read_file(file_path)
-            # 2. Decode the wav file
-            audio, _ = tf.audio.decode_wav(file)
-            audio = tf.squeeze(audio, axis=-1)
-            # 3. Change type to float
-            audio = tf.cast(audio, tf.float32)
-            # 4. Get the spectrogram
-            spectrogram = tf.signal.stft(audio, frame_length=self.frame_length, frame_step=self.frame_step, fft_length=self.fft_length)
-            # 5. We only need the magnitude, which can be derived by applying tf.abs
-            spectrogram = tf.abs(spectrogram)
-            spectrogram = tf.math.pow(spectrogram, 0.5)
-            # 6. normalisation
-            means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
-            stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
-            spectrogram = (spectrogram - means) / (stddevs + 1e-10)
-            ###########################################
-            ##  Process the label
-            ##########################################
-            # 7. Convert label to Lower case
-            label = tf.strings.lower(label)
-            # 8. Split the label
-            label = tf.strings.unicode_split(label, input_encoding="UTF-8")
-            # 9. Map the characters in label to numbers
-            label = self.char_to_num(label)
-            # 10. Return a dict as our model is expecting two inputs
-
-            # final_labels = pad_sequences([label], maxlen=len(label), padding='post', value=len(self.vocab))[0]
-
-            data.append(spectrogram.numpy())
-            labels.append(label.numpy())
-
-        padded_data = pad_sequences(data, maxlen=max([len(d) for d in data]), padding='post', value=0, dtype='float32')
-        padded_labels = pad_sequences(labels, maxlen=max([len(l) for l in labels]), padding='post', value=len(self.vocab))
-
-        if self._transformers:
-            for transformer in self._transformers:
-                padded_data, padded_labels = zip(*[transformer(data, label) for data, label in zip(padded_data, padded_labels)])
-
-        return np.array(padded_data), np.array(padded_labels)
+        return np.array(batch_data), np.array(batch_annotations)

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ def download_and_unzip(url, extract_to='Datasets'):`
`53`	`53`	`],`
`54`	`54`	`)`
`55`	`55`
`56`		`-train_data_provider, val_data_provider = data_provider.split()`
	`56`	`+train_data_provider, val_data_provider = data_provider.split(split = 0.9)`
`57`	`57`
`58`	`58`	`train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]`
`59`	`59`