diff --git a/beginner_source/autoencoders_tutorial.py b/beginner_source/autoencoders_tutorial.py new file mode 100644 index 00000000000..092e3cd1a25 --- /dev/null +++ b/beginner_source/autoencoders_tutorial.py @@ -0,0 +1,700 @@ +""" +Autoencoder: A Deep Dive +========================= + +Introduction +~~~~~~~~~~~~ + +Autoencoder represent a class of artificial neural networks +utilized for unsupervised learning tasks. They are engineered +to learn efficient data encodings by mapping input data into +a lower-dimensional latent space, subsequently reconstructing +the original data from this latent representation. This +methodology compels the autoencoder to encapsulate the most +salient features of the input data, thereby enhancing the +efficiency and effectiveness of data compression and feature +extraction. + +Architecture of an Autoencoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A typical autoencoder consists of two main components: + +- **Encoder:** This part of the network maps the input data to a latent + space representation. +- **Decoder:** This part reconstructs the original data from the latent + space representation. + +The goal of training is to minimize the reconstruction error between the +input and the reconstructed output. + +Types of Autoencoder +~~~~~~~~~~~~~~~~~~~~~ + +There are several variations of autoencoder: + +- **Denoising Autoencoder:** These are trained on corrupted + input data, learning to reconstruct the original clean data. + +- **Variational Autoencoder:** These introduce + probabilistic elements into the encoding process, allowing + for generating new data samples. + +- **Convolutional Autoencoder (CAE):** These use convolutional + layers, making them suitable for image data. + +Applications of Autoencoder +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Autoencoder have a wide range of applications: + +- **Dimensionality Reduction:** By projecting data into a + lower-dimensional space, autoencoder can be used for visualization + and feature extraction. +- **Image Denoising:** Denoising autoencoder can effectively remove + noise from images. +- **Anomaly Detection:** Autoencoder can be used to identify unusual + data points by measuring reconstruction errors. +- **Image Generation:** Variational Autoencoder can generate new, realistic images based + on the learned latent space distribution. +- **Data Compression:** Undercomplete autoencoder can be used for data + compression. + +PyTorch Implementation +~~~~~~~~~~~~~~~~~~~~~~ + +Let’s implement a basic autoencoder using PyTorch for image compression: + +""" + +import torch +import torchvision + +import numpy as np +import torch.nn as nn +import torch.nn.functional as F + +import matplotlib.pyplot as plt + +from torchvision import transforms +from IPython.display import clear_output + + +###################################################################### +# Define the needed Functions. +# + +def make_dataloader(data_, batch_size: int): + """Helper function to convert datasets to batches.""" + batch_size = 32 + + # Make the Data loader Object + train_loader = torch.utils.data.DataLoader( + data_, batch_size=batch_size, shuffle=True, num_workers=2 + ) + + return train_loader + + +def make_transforms(): + """Helper function to make the transforms for datasets.""" + transform = transforms.Compose( + [ + transforms.ToTensor(), + ] + ) + return transform + + +def load_data_general(data_name: str): + """Helper function to load the data.""" + transform_ = make_transforms() + + if data_name == "mnist": + data_ = torchvision.datasets.FashionMNIST( + root="./data", train=True, download=True, transform=transform_ + ) + elif data_name == "cifar": + data_ = torchvision.datasets.CIFAR10( + root="./data", train=True, download=True, transform=transform_ + ) + + return data_ + + +def load_batch_data(dataset_name: str): + # Load data + train_data = load_data_general(dataset_name) + + # Make batches of data + data_loader = make_dataloader(data_=train_data, batch_size=32) + + return data_loader + + +def load_mnist_data(): + """Load the MNIST dataset and covert it to batches.""" + + return load_batch_data("mnist") + + +def load_cifar_data(): + """Load the CIFAR10 dataset and covert it to batches.""" + + return load_batch_data("cifar") + +def make_model(model_object, lr_rate=0.001, compress_=None): + """Make all of the needed objects for training. + + Args: + model_object: + The class which we want to derive the model from. + lr_rate: + elarning rate for the optimizer + compress_: + the number of neurons at the heart of autoencoder which defines + how much we are going to compress the data. We use this with linear + autoencoder. + + Returns: + A tuple cotanining the initiated model, optimizer and loss function. + """ + if not compress_: + model_ = model_object() + else: + model_ = model_object(compress_) + optimizer_ = torch.optim.Adam(model_.parameters(), lr=lr_rate) + loss_ = nn.MSELoss() + + return model_, optimizer_, loss_ + + +def test_model(loader_obj, model_, linear=True) -> None: + """Test the output of the autoencoder model by showing the images. + + Args: + loader_obj: + The object of the loader for data batches. + model_: + The model which we want to test the output. + linear: + If te model is linear or CNN. + """ + batch_iter = iter(loader_obj) + batch_images = next(batch_iter) + tmp_image = batch_images[0][0, 0, :, :] + plt.imshow(tmp_image) + plt.title("Original Image") + plt.show() + + plt.figure() + if linear: + model_input = tmp_image.reshape(28 * 28) + else: + model_input = tmp_image.reshape(1, 1, 28, 28) + + model_.eval() + with torch.inference_mode(): + output = model_(model_input) + plt.imshow(output.detach().numpy().reshape(28, 28)) + plt.title("Model's Regenerated Picture") + plt.show() + + return + + +def train_model( + model_obj: nn.Module, + optimizer_obj, + loss_obj, + loader_obj, + batch_s: int, + epoch_num: int = 1, + model_linear=True, +) -> nn.Module: + """Train the input model with optimizer and loss function.""" + train_loss = [] + + for epoch in range(epoch_num): + for i, data_ in enumerate(loader_obj, 0): + batches, targets = data_ + if model_linear: + batches = batches.reshape([batch_s, 28 * 28]) + + # zero the parameter gradients + optimizer_obj.zero_grad() + + # Find the output of the Neural Net + # Forward Pass + logits = model_obj(batches) + + # Calculate the loss + loss = loss_obj(logits, batches) + + # Update the neural net and gradients + # Backward Propagation + loss.backward() + optimizer_obj.step() + + # print(f"{loss.item():0.5f}") + # Append the loss of training + train_loss.append(loss.item()) + + plt.plot(train_loss) + plt.title("Training loss") + plt.show() + + return model_obj + + +def add_noise(img_, noise_int: float) -> torch.Tensor: + """Add noise to the given image. + + Args: + img_: + The given image. + noise_int: + The intensity of the noise, varies between 0 and 1. + + Returns: + A tensor of the noisy image. + """ + noise = np.random.normal(loc=0, scale=1, size=img_.shape) + + # noise overlaid over image + noisy = np.clip((img_.numpy() + noise * noise_int), 0, 1) + noisy_tensor = torch.tensor(noisy, dtype=torch.float).reshape(1, 1, 28, 28) + + return noisy_tensor + + +def noisy_test( + loader_obj, model_: nn.Module, linear: bool = True, noise_intensity: float = 0.2 +): + """Test the model by adding noise to the image.""" + batch_iter = iter(loader_obj) + batch_images = next(batch_iter) + tmp_image = batch_images[0][0, 0, :, :] + plt.imshow(tmp_image) + plt.title("Original Image") + plt.show() + + noisy_img = add_noise(tmp_image, noise_intensity) + plt.figure() + plt.imshow(noisy_img.reshape(28, 28).numpy()) + plt.title("Noisy Image") + plt.show() + + plt.figure() + if linear: + model_input = noisy_img.reshape(28 * 28) + else: + model_input = noisy_img.reshape(1, 1, 28, 28) + + model_.eval() + with torch.inference_mode(): + output = model_(model_input) + plt.imshow(output.detach().numpy().reshape(28, 28)) + plt.title("Model's Regenerated Image") + plt.show() + + return + + +def image_show(img_: torch.tensor, img_title: str): + """Convert the batches to grids and show image.""" + img_ = torchvision.utils.make_grid(img_) + npimg = img_.numpy() + plt.imshow(np.transpose(npimg, (1, 2, 0))) + plt.title(img_title) + plt.show() + + return + + +def test_cifar(cifar_model, data_loader_): + """Test the CIFAR model""" + # get some random training images + dataiter = iter(data_loader_) + images, labels = next(dataiter) + + # show images by converting batches to grids + image_show(images, "Original Image") + + cifar_model.eval() + with torch.inference_mode(): + out_batch = cifar_model(images) + image_show(out_batch, "Model's Regenerated Image") + + return + + + +###################################################################### +# Load Fashion MNIST Dataset +# -------------------------- +# +# **Breakdown:** +# +# 1. **Import necessary libraries:** +# +# - ``torchvision.transforms``: For image transformations. +# - ``torchvision.datasets``: For loading the MNIST dataset. +# +# 2. **Define image transformations:** +# +# - ``transforms.ToTensor()``: Converts PIL images to PyTorch tensors. +# - ``transforms.Normalize()``: Normalizes tensor images with mean and +# standard deviation of 0.5. +# +# 3. **Load training data:** +# +# - ``torchvision.datasets.FashionMNIST``: Loads the Fashion MNIST +# training dataset. +# - ``root``: Specifies the data directory. +# - ``train``: Set to ``True`` for training data. +# - ``download``: Downloads the dataset if not present. +# - ``transform``: Applies the defined transformations to the images. +# +# 4. **Load testing data:** +# +# - Similar to loading training data, but with ``train=False`` to load +# the test set. +# + +batch_size = 32 + +# This gives us the loader object which is iterable and also has batches of data +train_loader = load_mnist_data() + +clear_output() + + +###################################################################### +# Let’s explore a little about the size of data and also what is included +# in it. +# + +for i, data_ in enumerate(train_loader): + print(i, data_[0].shape) + if i==10: + break + +for i, data_ in enumerate(train_loader): + plt.imshow(data_[0][i, :, :].view(28, 28)) + plt.show() + if i==3: + break + + +###################################################################### +# Autoencoder Definition +# ====================== +# + + +###################################################################### +# Define Model +# ============ +# +# Here we define our model which us based on Autoencoder class and use the +# optimizer based on ``Adam Optimizer``. +# +# First, We start with a simple Model which only uses Linear layers with +# Leaky Relu activations. +# + +class AutoencoderLinear(nn.Module): + def __init__(self): + super().__init__() + self.linear_en_1 = nn.Linear(in_features=28*28, out_features=196) + self.linear_en_2 = nn.Linear(in_features=196, out_features=98) + self.linear_de_1 = nn.Linear(in_features=98, out_features=196) + self.linear_de_2 = nn.Linear(in_features=196, out_features=28*28) + + def forward(self, x): + encode_1 = F.leaky_relu(self.linear_en_1(x)) + encode_2 = F.leaky_relu(self.linear_en_2(encode_1)) + decode_1 = F.leaky_relu(self.linear_de_1(encode_2)) + decode_2 = F.sigmoid(self.linear_de_2(decode_1)) + return decode_2 + +model_aal, optimizer, loss_fn = make_model(AutoencoderLinear) + + +###################################################################### +# Is our model even working??? +# + +test_model(train_loader, model_aal) + + +###################################################################### +# So we are just getting noise, Let’s see what happens after a brief +# training. +# + +model_aal = train_model(model_aal, optimizer, loss_fn, train_loader, batch_size, epoch_num=10) + + +###################################################################### +# Let’s test that how much our model has learned to implicate the exact +# input by seeing the real images +# + +test_model(train_loader, model_aal) + + +###################################################################### +# We can See that After 10 epochs of training our model is learning the +# general shape of the given input. so we are on the right track. +# +# Let’s make the linear model a bit dynamic. We add two linear layers +# which we could adjust the size of compression. +# + +class AutoencoderLinearA(nn.Module): + def __init__(self, compress_nodes): + super().__init__() + self.linear_en_1 = nn.Linear(in_features=28*28, out_features=196) + self.linear_en_2 = nn.Linear(in_features=196, out_features=98) + self.linear_de_1 = nn.Linear(in_features=98, out_features=196) + self.linear_de_2 = nn.Linear(in_features=196, out_features=28*28) + self.linear_en_c = nn.Linear(in_features=98, out_features=compress_nodes) + self.linear_de_c = nn.Linear(in_features=compress_nodes, out_features=98) + + def forward(self, x): + encode_1 = F.leaky_relu(self.linear_en_1(x)) + encode_2 = F.leaky_relu(self.linear_en_2(encode_1)) + encode_c = self.linear_en_c(encode_2) + decode_c = self.linear_de_c(encode_c) + decode_1 = F.leaky_relu(self.linear_de_1(decode_c)) + decode_2 = F.sigmoid(self.linear_de_2(decode_1)) + return decode_2 + +model_aala, optimizer, loss_fn = make_model(AutoencoderLinearA, compress_=10) +model_aala = train_model(model_aala, optimizer, loss_fn, train_loader, batch_size, epoch_num=10) + + +###################################################################### +# With a compression level which we compress all the 784 pixels to 10 +# nodes and then rescale them we still learning the general shape of the +# item after 10 epochs. +# +# We might get better results if we try and use higher epochs, These +# results are just for 10 epochs. (Although the chart is showing that the +# training loss might not increase a lot and we might need to make the +# model more complicated to decrease the error) +# +# Although we can see that when we decrease the compression nodes we are +# loosing some data which increases the training error, as you can compare +# the charts before. +# + +test_model(model_=model_aala, loader_obj=train_loader) + + +###################################################################### +# Autoencoder with CNN +# ==================== +# +# This Python code defines a convolutional autoencoder class using +# PyTorch. The autoencoder consists of an encoder and a decoder network. +# +# **Encoder:** \* Takes a 1-Channel image as input. \* Applies a series of +# convolutional layers with LeakyReLU activations to extract features. \* +# Uses a flattening layer to convert the feature maps into a linear +# vector. \* Finally, projects the vector into a latent space +# representation. +# +# **Decoder:** \* Takes the latent space representation as input. \* +# Projects it back to the original feature map size using a linear +# and unflatten layer. \* Applies a series of transposed convolutional layers +# with LeakyReLU activations to reconstruct the image. \* Uses an +# activation function to output the reconstructed image with pixel values +# between 0 and 1. +# +# **Forward Pass:** \* Encodes the input image using the encoder. \* +# Decodes the encoded representation using the decoder. \* Returns the +# reconstructed image. +# + +# Build the Autoencoder with CNN using the sequential method from pytorch +class AutoencoderCNN(nn.Module): + def __init__(self): + super().__init__() + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(5,5), stride=(1,1)), + nn.LeakyReLU(), + nn.Conv2d(in_channels=16, out_channels=64, kernel_size=(5,5), stride=(1,1)), + nn.LeakyReLU(), + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(2,2), stride=(2,2)), + nn.LeakyReLU(), + nn.Flatten(), + nn.Linear(in_features=128*10*10, out_features=144) + ) + self.decoder = nn.Sequential( + nn.Linear(in_features=144, out_features=128*10*10), + nn.Unflatten(1, (128, 10, 10)), + nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(2,2), stride=(2,2)), + nn.LeakyReLU(), + nn.ConvTranspose2d(in_channels=64, out_channels=16, kernel_size=(5,5), stride=(1,1)), + nn.LeakyReLU(), + nn.ConvTranspose2d(in_channels=16, out_channels=1, kernel_size=(5,5), stride=(1,1)), + nn.Sigmoid() + ) + + def forward(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +model_cnn, optimizer, loss_fn = make_model(AutoencoderCNN) +model_cnn = train_model(model_cnn, optimizer, loss_fn, train_loader, batch_size, epoch_num=5, model_linear=False) + +test_model(train_loader, model_cnn, linear=False) + + +###################################################################### +# Looks like we have achieved a better result using a CNN Autoencoder with +# just only 5 epochs but a longer training time. (10 minutes instead of 30 +# seconds training time) +# +# Let’s take a look at the output of the ``code layer`` which compresses +# the data, and convert it to a picture to see if there is anything +# meaningful in there. +# + +batch_iter = iter(train_loader) +batch_images = next(batch_iter) +tmp_image = batch_images[0][0, 0, :, :] +enc_output = model_cnn.encoder(tmp_image.reshape(1, 1, 28, 28)) + +# We have 144 Nodes so we can derive a 12*12 picture from it. +plt.imshow(enc_output.detach().numpy().reshape(12, 12)) +plt.title("Model's Encoder Output") +plt.show() + + +###################################################################### +# Autoencoder for Data Noise Reduction +# ------------------------------------- +# +# Autoencoder have emerged as a powerful tool for mitigating noise in +# various data modalities. By training a neural network to reconstruct +# clean data from noisy inputs, these models effectively learn to filter +# out unwanted disturbances. +# +# A key advantage of autoencoder lies in their ability to capture +# complex, non-linear relationships within data. This enables them to +# effectively remove noise while preserving essential features. Moreover, +# autoencoder are unsupervised learning models, requiring only unlabeled +# data for training, making them versatile for a wide range of +# applications. +# +# By effectively removing noise, autoencoder can significantly enhance +# the performance of downstream machine learning models, leading to +# improved accuracy and robustness. +# +# Let's introduce some noise to the image and evaluate how our model +# performs in reconstructing the output without noise. +# + +noisy_test(train_loader, model_cnn, linear=False, noise_intensity=0.3) + + +###################################################################### +# We have added a lot of noise to our input data and our model was able to +# reduce many of them and find the general shape of our original image. +# + + +###################################################################### +# CIFAR 10 +# ======== +# +# We will try to use the autoencoder with CIFAR10 dataset. This dataset +# consists of color images with 3 channels and 32*32 size. +# +# Since the images in this dataset has more variety and also has colors in +# them we need to use a bigger model to be able to distinguish between +# pattern and also reproduce the given image with a low loss. +# + +# Load data and make it into chunks +cifar_loader = load_cifar_data() + + +###################################################################### +# Let’s check the size of chunks +# + +cifar_loader.dataset.data.shape + + +###################################################################### +# A quick peek at the images. +# + +# get some random training images +dataiter = iter(cifar_loader) +images, labels = next(dataiter) + +# show images by converting batches to grids +image_show(images, "Original image") + +# We use a similar architecture as before just tweaking some numbers for a bigger model +# since these pictures has 3 channels and we need to compress more data in our model +# We also add some padding to take into account the information that is stored on the edges of the pictures. +class AutoencoderCNNCIF(nn.Module): + def __init__(self): + super().__init__() + self.encoder = nn.Sequential( + nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(4,4), stride=(1,1), padding=2), + nn.LeakyReLU(), + nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(5,5), stride=(2,2), padding=2), + nn.LeakyReLU(), + nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(5,5), stride=(1,1)), + nn.LeakyReLU(), + nn.Conv2d(in_channels=128, out_channels=512, kernel_size=(3,3), stride=(2,2)), + nn.LeakyReLU(), + nn.Flatten(), + nn.Linear(in_features=512*6*6, out_features=100) + ) + self.decoder = nn.Sequential( + nn.Linear(in_features=100, out_features=512*6*6), + nn.Unflatten(1, (512, 6, 6)), + nn.ConvTranspose2d(in_channels=512, out_channels=128, kernel_size=(3,3), stride=(2,2)), + nn.LeakyReLU(), + nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(5,5), stride=(1,1)), + nn.LeakyReLU(), + nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(5,5), stride=(2,2), padding=2), + nn.LeakyReLU(), + nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=(4,4), stride=(1,1), padding=2), + nn.Sigmoid() + ) + + def forward(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +model_cifar, optimizer_cifar, loss_cifar = make_model(AutoencoderCNNCIF, .001) +model_cifar = train_model(model_cifar, optimizer_cifar, loss_cifar, cifar_loader, 32, 3, False) + +# Test the output model by feeding random batches to it and get the output +test_cifar(model_cifar, cifar_loader) + + +###################################################################### +# Our CNN model has successfully reconstructed many details +# of the images, though the outputs remain somewhat blurry. +# +# We should consider adding additional layers to the model +# to enhance its ability to detect and preserve patterns in +# the data during compression. +# +# Another potential cause of the blurry images is the size +# of the "code layer." If it is too small for this type of +# data, it may lose crucial details, making it difficult to +# recover specific information during reconstruction. \ No newline at end of file diff --git a/index.rst b/index.rst index 91517834fd8..7b38d6eba32 100644 --- a/index.rst +++ b/index.rst @@ -159,6 +159,13 @@ Welcome to PyTorch Tutorials :link: advanced/usb_semisup_learn.html :tags: Image/Video +.. customcarditem:: + :header: Autoencoders with PyTorch + :card_description: Learn how to train Autoencoders for images using PyTorch. + :image: _static/img/thumbnails/cropped/torch-nn.png + :link: beginner/autoencoders_tutorial.html + :tags: Image/Video + .. Audio .. customcarditem:: @@ -976,6 +983,7 @@ Additional Resources beginner/deep_learning_60min_blitz beginner/pytorch_with_examples beginner/nn_tutorial + beginner/autoencoders_tutorial intermediate/tensorboard_tutorial intermediate/pinmem_nonblock