guided-infant-generation/src/models/mono/G2.py at master · vrai-group/guided-infant-generation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam

from models.Model_template import Model_Template

class G2(Model_Template):

    def __init__(self):
        self.architecture = "mono"
        self.input_shape = [96, 128, 2]
        self.output_channels = 1
        self.conv_hidden_num = 128
        self.repeat_num = int(np.log2(self.input_shape[0])) - 2
        self.activation_fn = 'relu'
        self.data_format = 'channels_last'
        self.lr_initial_G2 = 2e-5
        super().__init__()


    def _build_model(self):
        # Encoder
        inputs = Input(shape=self.input_shape)

        conv1 = Conv2D(self.conv_hidden_num, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(inputs)
        conv1 = Conv2D(self.conv_hidden_num, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv1)
        conv1 = Conv2D(self.conv_hidden_num, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv1)

        pool1 = Conv2D(self.conv_hidden_num * 2, 2, (2, 2), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv1)
        conv2 = Conv2D(self.conv_hidden_num * 2, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(pool1)
        conv2 = Conv2D(self.conv_hidden_num * 2, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv2)  # 256

        # Bridge
        pool3 = Conv2D(self.conv_hidden_num * 3, 2, (2, 2), activation=self.activation_fn,
                       data_format=self.data_format)(conv2)  # pool
        conv3 = Conv2D(self.conv_hidden_num * 3, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(pool3)
        conv3 = Conv2D(self.conv_hidden_num * 3, 3, (1, 1), padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv3)  # 384
        up4 = UpSampling2D(size=(2, 2), data_format=self.data_format, interpolation="nearest")(conv3)
        up4 = Conv2D(self.conv_hidden_num, 2, 1, padding="same", activation=self.activation_fn,
                     data_format=self.data_format)(up4)  # 128

        # Decoder
        merge4 = Concatenate(axis=-1)([up4, conv2])  # Long Skip connestion 128+256 =384
        conv4 = Conv2D(384, 3, 1, padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(merge4)
        conv4 = Conv2D(384, 3, 1, padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv4)

        up5 = UpSampling2D(size=(2, 2), data_format=self.data_format, interpolation="nearest")(conv4)
        up5 = Conv2D(self.conv_hidden_num, 2, 1, padding="same", activation=self.activation_fn,
                     data_format=self.data_format)(up5)
        merge5 = Concatenate(axis=-1)([up5, conv1])  # Long Skip connestion 128+128
        conv5 = Conv2D(256, 3, 1, padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(merge5)
        conv5 = Conv2D(256, 3, 1, padding='same', activation=self.activation_fn,
                       data_format=self.data_format)(conv5)

        outputs = Conv2D(self.output_channels, 1, 1, padding='same', activation=None,
                         data_format=self.data_format)(conv5)

        model = keras.Model(inputs, outputs)

        return model

    def prediction(self, I_PT1, Ic):
        input_G2 = tf.concat([I_PT1, Ic], axis=-1)  # [batch, 96, 128, 2]
        output_G2 = self.model(input_G2)  # [batch, 96, 128, 1] dtype=float32
        output_G2 = tf.cast(output_G2, dtype=tf.float16)
        return output_G2

    def _optimizer(self):
        return Adam(learning_rate=self.lr_initial_G2, beta_1=0.5)

    # Loss
    def PoseMaskloss(self, I_PT2, It, Mt):
        It = tf.cast(It, dtype=tf.float32)
        I_PT2 = tf.cast(I_PT2, dtype=tf.float32)
        Mt = tf.cast(Mt, dtype=tf.float32)

        primo_membro = tf.reduce_mean(tf.abs(I_PT2 - It))  # L1 loss
        secondo_membro = tf.reduce_mean(tf.abs(I_PT2 - It) * Mt)
        loss = primo_membro + secondo_membro
        return loss

    def adv_loss(self, D_neg_refined_result, I_PT2, It, Mt):
        # Loss per imbrogliare il discriminatore creando un immagine sempre più reale
        gen_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_neg_refined_result, labels=tf.ones_like(D_neg_refined_result)))
        gen_cost = tf.cast(gen_cost, dtype=tf.float32)

        poseMaskLoss = self.PoseMaskloss(I_PT2, It, Mt)

        loss = gen_cost + poseMaskLoss*10

        return loss

    # Metriche
    def ssim(self, I_PT2, It, mean_0, mean_1, unprocess_function):
        It = tf.reshape(It, [-1, 96, 128, 1])
        I_PT2 = tf.reshape(I_PT2, [-1, 96, 128, 1])

        It = tf.cast(unprocess_function(It, mean_1), dtype=tf.uint16)
        I_PT2 = tf.cast(unprocess_function(I_PT2, mean_0), dtype=tf.uint16)

        result = tf.image.ssim(I_PT2, It, max_val=tf.reduce_max(It) - tf.reduce_min(It))
        mean = tf.reduce_mean(result)

        return mean


    def mask_ssim(self, I_PT2, It, Mt, mean_0, mean_1, unprocess_function):
        It = tf.reshape(It, [-1, 96, 128, 1])
        Mt = tf.reshape(Mt, [-1, 96, 128, 1])
        I_PT2 = tf.reshape(I_PT2, [-1, 96, 128, 1])

        It = tf.cast(unprocess_function(It, mean_1), dtype=tf.uint16)
        I_PT2 = tf.cast(unprocess_function(I_PT2, mean_0), dtype=tf.uint16)
        Mt = tf.cast(Mt, dtype=tf.uint16)

        mask_image_raw_1 = Mt * It
        mask_refined_result = Mt * I_PT2

        result = tf.image.ssim(mask_image_raw_1, mask_refined_result, max_val=tf.reduce_max(It) - tf.reduce_min(It))
        mean = tf.reduce_mean(result)

        return mean