Skip to content

Excuse me, How can we extend the model to predict three dimension vectors such as (x, y, z)? #8

@HankerSia

Description

@HankerSia

Hello!
I need to predict a 3D trajectory. I have tried to modify the code, but the model can not run normally. I found the last dimension of the output contains many "nan" values. Can you give me some suggestions? Thank you very much!
The code modified by myself is as following:
`def _build_graph(self):
'''Method that builds the graph as per our blog post.'''

    cell = rnn_cell.BasicLSTMCell(self.num_units, state_is_tuple=True)

    self.input_data = tf.placeholder(tf.float32, [None, self.sequence_length, 3])
    self.target_data = tf.placeholder(tf.float32, [None, self.sequence_length, 3])

    self.lr = tf.Variable(self.learning_rate, trainable=False, name="learning_rate")
    self.initial_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)
    
    # input dimensionality is the x and y position at every step
    # the output is comprised of three means, three std and 1 corr variable
    embedding_w, embedding_b, output_w, output_b = self.build_embeddings(input_dim=3, output_dim=7)

    # Prepare inputs ..
    inputs = tf.split(self.input_data, self.sequence_length, 1)
    inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

    # the actual LSTM model
    embedded_inputs = self.embed_inputs(inputs, embedding_w, embedding_b)
    outputs, last_state = self.lstm_advance(embedded_inputs, cell)
    final_output = self.final_layer(outputs, output_w, output_b)# shape=(400, 7)
    
    self.final_state = last_state
    # reshape target data so that it aligns with predictions
    flat_target_data = tf.reshape(self.target_data, [-1, 3])
    # Extract the x-coordinates and y-coordinates from the target data
    [x_data, y_data, z_data] = tf.split(flat_target_data, num_or_size_splits = 3, axis = 1)

    # Extract coef from output of the linear output layer
    [o_mux, o_muy, o_muz, o_sx, o_sy, o_sz, o_corr] = self.get_coef(final_output)
    self.mux = o_mux
    self.muy = o_muy
    self.muz = o_muz
    self.sx = o_sx
    self.sy = o_sy
    self.sz = o_sz
    self.corr = o_corr
    # o_corr = tf.Print(o_corr, [o_corr, o_corr.shape], message='Debug o_corr:', summarize=400)
    if self.mode != tf.contrib.learn.ModeKeys.INFER:
        with tf.name_scope("Optimization"):
            lossfunc = self.get_lossfunc_3d(o_mux, o_muy, o_muz, o_sx, o_sy, o_sz, o_corr, x_data, y_data, z_data)
            # lossfunc = self.get_lossfunc(o_mux, o_muy, o_sx, o_sy, o_corr, x_data, y_data)
            self.cost = tf.div(lossfunc, (self.batch_size * self.sequence_length))
            trainable_params = tf.trainable_variables()

            # apply L2 regularisation
            l2 = 0.05 * sum(tf.nn.l2_loss(t_param) for t_param in trainable_params)
            self.cost = self.cost + l2
            # self.cost = lossfunc
            tf.summary.scalar('cost', self.cost)

            self.gradients = tf.gradients(self.cost, trainable_params)
            grads, _ = tf.clip_by_global_norm(self.gradients, self.grad_clip)

            # Adam might also do a good job as in Graves (2013)
            optimizer = tf.train.RMSPropOptimizer(self.lr)
            # Train operator
            self.train_op = optimizer.apply_gradients(zip(grads, trainable_params))

    self.init = tf.global_variables_initializer()`

the subfunction is as following:

`def get_coef(self, output):
# eq 20 -> 22 of Graves (2013)
z = output
z_mux, z_muy, z_muz, z_sx, z_sy, z_sz, z_corr = tf.split(z, 7, 1)
z_corr = tf.Print(z_corr, [z_corr, z_corr.shape], message='Debug z_corr:', summarize=400)
# The output must be exponentiated for the std devs
z_sx = tf.exp(z_sx)
z_sy = tf.exp(z_sy)
z_sz = tf.exp(z_sz)
# Tanh applied to keep it in the range [-1, 1]
z_corr = tf.tanh(z_corr)

    return [z_mux, z_muy, z_muz, z_sx, z_sy, z_sz, z_corr]

def get_lossfunc_3d(self, z_mux, z_muy, z_muz, z_sx, z_sy, z_sz, z_corr, x_data, y_data, z_data):
# Calculate the PDF of the data w.r.t to the distribution
result0 = distributions.tf_3d_normal(self.g, x_data, y_data, z_data, z_mux, z_muy, z_muz, z_sx, z_sy, z_sz, z_corr)
# For numerical stability purposes as in Vemula (2018)
epsilon = 1e-20
# Numerical stability
result1 = -tf.log(tf.maximum(result0, epsilon))
return tf.reduce_sum(result1)

def tf_3d_normal(g, x, y, z, mux, muy, muz, sx, sy, sz, rho):
'''
Function that computes a multivariate Gaussian
Equation taken from 24 & 25 in Graves (2013)
'''

with g.as_default():
    # Calculate (x-mux), (y-muy), and (z-muz)
    normx = tf.subtract(x, mux)
    normy = tf.subtract(y, muy)
    normz = tf.subtract(z, muz)
    # Calculate sx*sy*sz
    sxsysz = tf.multiply(tf.multiply(sx, sy),sz)
    # Calculate the exponential factor
    # Problem: rho is nan
    
    # rho = tf.Print(rho, [rho, rho.shape], message='Debug rho:', summarize=400)
    
    z = tf.square(tf.divide(normx, sx)) + tf.square(tf.divide(normy, sy)) + tf.square(tf.divide(normz, sz)) - 2*tf.divide(tf.multiply(rho, tf.multiply(tf.multiply(normx, normy), normz)), sxsysz)       
    
    negatedRho = 1 - tf.square(rho)
    
    # Numerator
    result = tf.exp(tf.divide(-z, 2*negatedRho))
    # Normalization constant
    denominator = 2 * np.pi * tf.multiply(sxsysz, tf.sqrt(negatedRho))
    
    # Final PDF calculation
    result = tf.divide(result, denominator)
    
    # result = tf.Print(result, [result, result.shape], message='Debug Result:')
    return result

`
I tried to debug the model using tf.Print. I found "final_output" contains many "nan" value which result in error computation of the get_lossfunc_3d function.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions