From 8777f8a4f4e7ec41f365f99973bdf65f77ceafb3 Mon Sep 17 00:00:00 2001 From: Fju Date: Sun, 21 Oct 2018 18:50:18 +0200 Subject: [PATCH] Comments in `yolo_tiny_net.py` I added comments that explain how the loss is calculated in the `yolo_tiny_net.py` file. A `.gitignore` was added to avoid commiting model checkpoints or `*.pyc` files. --- .gitignore | 2 + yolo/net/yolo_tiny_net.py | 494 +++++++++++++++++++------------------- 2 files changed, 249 insertions(+), 247 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..65e6ca4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +models/ +*.pyc diff --git a/yolo/net/yolo_tiny_net.py b/yolo/net/yolo_tiny_net.py index 48dfa84..5323f05 100644 --- a/yolo/net/yolo_tiny_net.py +++ b/yolo/net/yolo_tiny_net.py @@ -4,305 +4,305 @@ import tensorflow as tf import numpy as np -import re +#import re from yolo.net.net import Net class YoloTinyNet(Net): - def __init__(self, common_params, net_params, test=False): - """ - common params: a params dict - net_params : a params dict - """ - super(YoloTinyNet, self).__init__(common_params, net_params) - #process params - self.image_size = int(common_params['image_size']) - self.num_classes = int(common_params['num_classes']) - self.cell_size = int(net_params['cell_size']) - self.boxes_per_cell = int(net_params['boxes_per_cell']) - self.batch_size = int(common_params['batch_size']) - self.weight_decay = float(net_params['weight_decay']) + def __init__(self, common_params, net_params, test=False): + """ + common params: a params dict + net_params : a params dict + """ + super(YoloTinyNet, self).__init__(common_params, net_params) + #process params + self.image_size = int(common_params['image_size']) + self.num_classes = int(common_params['num_classes']) + self.cell_size = int(net_params['cell_size']) + self.boxes_per_cell = int(net_params['boxes_per_cell']) + self.batch_size = int(common_params['batch_size']) + self.weight_decay = float(net_params['weight_decay']) - if not test: - self.object_scale = float(net_params['object_scale']) - self.noobject_scale = float(net_params['noobject_scale']) - self.class_scale = float(net_params['class_scale']) - self.coord_scale = float(net_params['coord_scale']) + if not test: + self.object_scale = float(net_params['object_scale']) + self.noobject_scale = float(net_params['noobject_scale']) + self.class_scale = float(net_params['class_scale']) + self.coord_scale = float(net_params['coord_scale']) - def inference(self, images): - """Build the yolo model + def inference(self, images): + """Build the yolo model + Args: + images: 4-D tensor [batch_size, image_height, image_width, channels] + Returns: + predicts: 4-D tensor [batch_size, cell_size, cell_size, boxes_per_cell * (x_center, y_center, w, h, classes...)] + """ + conv_num = 1 - Args: - images: 4-D tensor [batch_size, image_height, image_width, channels] - Returns: - predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell] - """ - conv_num = 1 + temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1) + conv_num += 1 - temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1) - conv_num += 1 + temp_pool = self.max_pool(temp_conv, [2, 2], 2) - temp_pool = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1) + conv_num += 1 - temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1) - conv_num += 1 + temp_pool = self.max_pool(temp_conv, [2, 2], 2) - temp_pool = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1) + conv_num += 1 - temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1) - conv_num += 1 - - temp_conv = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.max_pool(temp_conv, [2, 2], 2) - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1) + conv_num += 1 - temp_conv = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.max_pool(temp_conv, [2, 2], 2) - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1) + conv_num += 1 - temp_conv = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.max_pool(temp_conv, [2, 2], 2) - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1) + conv_num += 1 - temp_conv = self.max_pool(temp_conv, [2, 2], 2) + temp_conv = self.max_pool(temp_conv, [2, 2], 2) - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1) + conv_num += 1 - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) + conv_num += 1 - temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) - conv_num += 1 + temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1) + conv_num += 1 - temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2)) + temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2)) - #Fully connected layer - local1 = self.local('local1', temp_conv, self.cell_size * self.cell_size * 1024, 256) + #Fully connected layer + local1 = self.local('local1', temp_conv, self.cell_size * self.cell_size * 1024, 256) - local2 = self.local('local2', local1, 256, 4096) - - local3 = self.local('local3', local2, 4096, self.cell_size * self.cell_size * (self.num_classes + self.boxes_per_cell * 5), leaky=False, pretrain=False, train=True) + local2 = self.local('local2', local1, 256, 4096) - n1 = self.cell_size * self.cell_size * self.num_classes + local3 = self.local('local3', local2, 4096, self.cell_size * self.cell_size * (self.num_classes + self.boxes_per_cell * 5), leaky=False, pretrain=False, train=True) - n2 = n1 + self.cell_size * self.cell_size * self.boxes_per_cell + n1 = self.cell_size * self.cell_size * self.num_classes - class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cell_size, self.cell_size, self.num_classes)) - scales = tf.reshape(local3[:, n1:n2], (-1, self.cell_size, self.cell_size, self.boxes_per_cell)) - boxes = tf.reshape(local3[:, n2:], (-1, self.cell_size, self.cell_size, self.boxes_per_cell * 4)) + n2 = n1 + self.cell_size * self.cell_size * self.boxes_per_cell - local3 = tf.concat([class_probs, scales, boxes], 3) + class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cell_size, self.cell_size, self.num_classes)) + scales = tf.reshape(local3[:, n1:n2], (-1, self.cell_size, self.cell_size, self.boxes_per_cell)) + boxes = tf.reshape(local3[:, n2:], (-1, self.cell_size, self.cell_size, self.boxes_per_cell * 4)) - predicts = local3 + local3 = tf.concat([class_probs, scales, boxes], 3) - return predicts + predicts = local3 - def iou(self, boxes1, boxes2): - """calculate ious - Args: - boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4] ====> (x_center, y_center, w, h) - boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h) - Return: - iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] - """ - boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2, - boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2]) - boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) - boxes2 = tf.stack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2, - boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2]) + return predicts - #calculate the left up point - lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) - rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) + def iou(self, p_boxes, t_box): + """ calculate ious (iou: intersection over union) + Args: + p_boxes: predicted boxes 4-D, tensor [cell_count, cell_count, boxes_per_cell, (x_center, y_center, w, h)] + t_box: true box, 1-D tensor [(x_center, y_center, w, h)] + Return: + iou: intersection over union, 3-D tensor [cell_count, cell_count, boxes_per_cell] + """ - #intersection - intersection = rd - lu + # convert boxes from [center_x, center_y, width, height] to [left, top, right, bottom] + p_boxes = tf.stack([p_boxes[:, :, :, 0] - p_boxes[:, :, :, 2] / 2, p_boxes[:, :, :, 1] - p_boxes[:, :, :, 3] / 2, + p_boxes[:, :, :, 0] + p_boxes[:, :, :, 2] / 2, p_boxes[:, :, :, 1] + p_boxes[:, :, :, 3] / 2]) - inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1] + # transpose boxes tensor, so that 0th dimension (box coordinates) are the 3rd dimension + p_boxes = tf.transpose(p_boxes, [1, 2, 3, 0]) - mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) - - inter_square = mask * inter_square - - #calculate the boxs1 square and boxs2 square - square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1]) - square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1]) - - return inter_square/(square1 + square2 - inter_square + 1e-6) + # convert true box from [center_x, center_y, width, height] to [left, top, right, bottom] + t_box = tf.stack([t_box[0] - t_box[2] / 2, t_box[1] - t_box[3] / 2, + t_box[0] + t_box[2] / 2, t_box[1] + t_box[3] / 2]) - def cond1(self, num, object_num, loss, predict, label, nilboy): - """ - if num < object_num - """ - return num < object_num + # find top-left point and bottom-right point (use max/min of true box or predicted box) + top_left = tf.maximum(p_boxes[:, :, :, 0:2], t_box[0:2]) # [:, :, :, 2] ==> (left, top) + bottom_right = tf.minimum(p_boxes[:, :, :, 2:], t_box[2:]) # [:, :, :, 2] ==> (right, bottom) + # intersection [:, :, :, 2] ==> (right - left, bottom - top) + intersection = bottom_right - top_left - def body1(self, num, object_num, loss, predict, labels, nilboy): - """ - calculate loss - Args: - predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] - labels : [max_objects, 5] (x_center, y_center, w, h, class) - """ - label = labels[num:num+1, :] - label = tf.reshape(label, [-1]) + # calculate area of intersection rectangle (A = a * b) + inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1] - #calculate objects tensor [CELL_SIZE, CELL_SIZE] - min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) - max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) + # prevent negative area + mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32) + inter_square = mask * inter_square - min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) - max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) + # calculate areas of rectangles (A = a * b) + square1 = (p_boxes[:, :, :, 2] - p_boxes[:, :, :, 0]) * (p_boxes[:, :, :, 3] - p_boxes[:, :, :, 1]) + square2 = (t_box[2] - t_box[0]) * (t_box[3] - t_box[1]) - min_x = tf.floor(min_x) - min_y = tf.floor(min_y) + # IoU: Intersection over Union (see https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/) + # Union is equal to the sum of both areas minus the intersection + return inter_square / (square1 + square2 - inter_square + 1e-6) # add tiny fraction to prevent dividing by 0 - max_x = tf.ceil(max_x) - max_y = tf.ceil(max_y) + def loss_loop_cond(self, cnt, object_count, losses, prediction, labels): + # keep looping when index is smaller than current object count + return cnt < object_count - temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32) - objects = tf.ones(temp, tf.float32) + def loss_loop_body(self, cnt, object_count, losses, prediction, labels): + """ calculate loss of a single sample + Args: + prediction: 3-D tensor [cell_count, cell_count, boxes_per_cell * (x_center, y_center, w, h, classes...)] + labels: 2-D tensor [max_objects, (x_center, y_center, w, h, classes...)] + """ + # get current label, label contains center point coordinates (x, y) + label = labels[cnt:cnt+1, :] + label = tf.reshape(label, [-1]) - temp = tf.cast(tf.stack([min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) - temp = tf.reshape(temp, (2, 2)) - objects = tf.pad(objects, temp, "CONSTANT") + # store coordinates of true box + t_left = tf.floor((label[0] - label[2] / 2) / (self.image_size / self.cell_count)) + t_right = tf.ceil((label[0] + label[2] / 2) / (self.image_size / self.cell_count)) + t_top = tf.floor((label[1] - label[3] / 2) / (self.image_size / self.cell_count)) + t_bottom = tf.ceil((label[1] + label[3] / 2) / (self.image_size / self.cell_count)) - #calculate objects tensor [CELL_SIZE, CELL_SIZE] - #calculate responsible tensor [CELL_SIZE, CELL_SIZE] - center_x = label[0] / (self.image_size / self.cell_size) - center_x = tf.floor(center_x) + #t_bottom = tf.Print(t_bottom, [t_left, t_top, t_right, t_bottom], 'coordinates: ') + # generate tensor of size [cell_count, cell_count] filled with zeros except the region where the object is in + # 1. 1-D tensor containing width and height of true box (width = t_right - t_left, height = t_bottom - t_top) + temp = tf.cast(tf.stack([t_bottom - t_top, t_right - t_left]), dtype=tf.int32) + # 2. 2-D tensor of size [height, width] filled with 1.f + t_region_mask = tf.ones(temp, tf.float32) + # 3. 2-D tensor of size [2, 2] that contains paddings ((pad_top, pad_bottom), (pad_left, pad_right)) + temp = tf.reshape(tf.cast(tf.stack([t_top, self.cell_count - t_bottom, t_left, self.cell_count - t_right]), tf.int32), (2, 2)) - center_y = label[1] / (self.image_size / self.cell_size) - center_y = tf.floor(center_y) + # 4. apply padding, resulting 2-D tensor has size [cell_count, cell_count], padded area is filled with zeros + t_region_mask = tf.pad(t_region_mask, temp, "CONSTANT") - response = tf.ones([1, 1], tf.float32) + # store center coordinates + t_center_x, t_center_y = tf.floor(label[0] / (self.image_size / self.cell_count)), tf.floor(label[1] / (self.image_size / self.cell_count)) - temp = tf.cast(tf.stack([center_y, self.cell_size - center_y - 1, center_x, self.cell_size -center_x - 1]), tf.int32) - temp = tf.reshape(temp, (2, 2)) - response = tf.pad(response, temp, "CONSTANT") - #objects = response + # generate tensor of size [cell_count, cell_count] filled with zeros except the center of the true object + # 1. 1-D tensor of size [1, 1] + t_center_mask = tf.ones([1, 1], tf.float32) + # 2. 2-D tensor of size [2, 2] that contains paddings ((pad_top, pad_bottom), (pad_left, pad_right)) + temp = tf.reshape(tf.cast(tf.stack([t_center_y, self.cell_count - t_center_y - 1, t_center_x, self.cell_count - t_center_x - 1]), tf.int32), (2, 2)) + # 3. apply padding, resulting 2-D tensor has size [cell_count, cell_count], padded area is filled with zeros + t_center_mask = tf.pad(t_center_mask, temp, "CONSTANT") + + # store 3-D tensor of size [cell_count, cell_count, 4] ==> (center_x, center_y, width, height) + predict_boxes = tf.reshape(prediction[:, :, self.num_classes + self.boxes_per_cell:], [self.cell_count, self.cell_count, self.boxes_per_cell, 4]) + + # map relative predicted values to absolute image coordinates and sizes + predict_boxes = predict_boxes * [self.image_size / self.cell_count, self.image_size / self.cell_count, self.image_size, self.image_size] + + # create 3-D numpy array for shifting relative center points by cell offset + box_offsets = np.zeros([self.cell_count, self.cell_count, 4]) + for y in range(self.cell_count): + for x in range(self.cell_count): + # apply cell specific offset (no width or height offset) + box_offsets[y, x, :] = [self.image_size / self.cell_count * x, self.image_size / self.cell_count * y, 0, 0] + + # repeat pattern by boxes_per_cell times along the 3rd dimension + box_offsets = np.tile(np.resize(box_offsets, [self.cell_count, self.cell_count, 1, 4]), [1, 1, self.boxes_per_cell, 1]) + + # apply bounding box offsets + predict_boxes = box_offsets + predict_boxes + + # compute IoU scores as a 3-D tensor of size [cell_count, cell_count, boxes_per_cell] + iou_predict_truth = self.iou(predict_boxes, label[0:4]) + + # mask ious scores with center point mask, get class + t_confidence = iou_predict_truth * tf.reshape(t_center_mask, [self.cell_count, self.cell_count, 1]) + + # mask iou scores with center point mask + masked_iou = iou_predict_truth * tf.reshape(t_center_mask, (self.cell_count, self.cell_count, 1)) + + # last dimension is reduced to the highest element, output tensor has size [cell_count, cell_count, 1] + max_iou = tf.reduce_max(masked_iou, 2, keepdims=True) + + masked_iou = tf.cast((masked_iou >= max_iou), tf.float32) * tf.reshape(t_center_mask, (self.cell_count, self.cell_count, 1)) + + # invert masked_iou tensor (0 -> 1, 1 -> 0) + masked_iou_inv = tf.ones_like(masked_iou, dtype=tf.float32) - masked_iou + + # get predicted confidence score, 3-D tensor of size [cell_count, cell_count, 1] + p_confidence = prediction[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] + + # store true bounding box coordinates in 0-D tensors + t_x = label[0] + t_y = label[1] + t_sqrt_w = tf.sqrt(tf.abs(label[2])) + t_sqrt_h = tf.sqrt(tf.abs(label[3])) + + # get predicted bounding box coordinates in 3-D tensors of size [cell_count, cell_count, boxes_per_cell] + p_x = predict_boxes[:, :, :, 0] + p_y = predict_boxes[:, :, :, 1] + + p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) + p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) + + # create 1-D tensor of size [num_classes] filled with zeros except one 1 at the position of the true class + # apply tensor to [cell_count, cell_count, 1] tensor + t_class = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) + t_class = t_class * tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1)) + + # get predicted class tensor of size [cell_count, cell_count, num_classes] + p_class = prediction[:, :, 0:self.num_classes] + p_class = p_class * tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1)) + + # compute L2-loss of predicting true class + class_loss = tf.nn.l2_loss(tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1)) * (p_class - t_class)) * self.class_scale + + # compute L2-loss of detecting objects correctly + object_loss = tf.nn.l2_loss(masked_iou * (p_confidence - t_confidence)) * self.object_scale + + # compute L2-loss of detecting empty cells with no objects + noobject_loss = tf.nn.l2_loss(masked_iou_inv * (p_confidence)) * self.noobject_scale + + # compute L2-loss of finding right coordinates + coord_loss = (tf.nn.l2_loss(masked_iou * (p_x - t_x) / (self.image_size / self.cell_count)) + + tf.nn.l2_loss(masked_iou * (p_y - t_y) / (self.image_size / self.cell_count)) + + tf.nn.l2_loss(masked_iou * (p_sqrt_w - t_sqrt_w)) / self.image_size + + tf.nn.l2_loss(masked_iou * (p_sqrt_h - t_sqrt_h)) / self.image_size) * self.coord_scale + + return cnt + 1, object_count, [losses[0] + class_loss, losses[1] + object_loss, losses[2] + noobject_loss, losses[3] + coord_loss], prediction, labels + + + def loss(self, predictions, labels, object_counts): + """ Add loss to all the trainable variables + Args: + predictions: 4-D tensor [batch_size, cell_count, cell_count, boxes_per_cell * (x_center, y_center, w, h, classes...)] + labels: 3-D tensor [batch_size, max_objects, (x_center, y_center, w, h, classes...)] + objects_count: 1-D tensor [batch_size] + """ + # loss variables + class_loss = tf.constant(0, tf.float32) + object_loss = tf.constant(0, tf.float32) + noobject_loss = tf.constant(0, tf.float32) + coord_loss = tf.constant(0, tf.float32) + losses = [0, 0, 0, 0] + + # iterate through whole training batch + for i in range(self.batch_size): + # get current prediction tensor of size [cell_count, cell_count, num_classes + 5 * boxes_per_cell] + current_prediction = predictions[i, :, :, :] + # get current label tensor of size [max_objects, 5] + current_label = labels[i, :, :] + # 1-D tensor containing amount of true objects along the whole batch + object_cnt = object_counts[i] + # run detection for all objects the current item contains (`object_num` times) + results = tf.while_loop(self.loss_loop_cond, self.loss_loop_body, [tf.constant(0), object_cnt, [class_loss, object_loss, noobject_loss, coord_loss], current_prediction, current_label]) + for j in range(4): + # add computed losses to `losses` array + losses[j] = losses[j] + results[2][j] - #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] - predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] - - - predict_boxes = tf.reshape(predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) - - predict_boxes = predict_boxes * [self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size] - - base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) - - for y in range(self.cell_size): - for x in range(self.cell_size): - #nilboy - base_boxes[y, x, :] = [self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0] - base_boxes = np.tile(np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) - - predict_boxes = base_boxes + predict_boxes - - iou_predict_truth = self.iou(predict_boxes, label[0:4]) - #calculate C [cell_size, cell_size, boxes_per_cell] - C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) - - #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] - I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) - - max_I = tf.reduce_max(I, 2, keep_dims=True) - - I = tf.cast((I >= max_I), tf.float32) * tf.reshape(response, (self.cell_size, self.cell_size, 1)) - - #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] - no_I = tf.ones_like(I, dtype=tf.float32) - I + losses_sum = losses[0] + losses[1] + losses[2] + losses[3] - p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] - - #calculate truth x,y,sqrt_w,sqrt_h 0-D - x = label[0] - y = label[1] - - sqrt_w = tf.sqrt(tf.abs(label[2])) - sqrt_h = tf.sqrt(tf.abs(label[3])) - #sqrt_w = tf.abs(label[2]) - #sqrt_h = tf.abs(label[3]) - - #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] - p_x = predict_boxes[:, :, :, 0] - p_y = predict_boxes[:, :, :, 1] + tf.add_to_collection('losses', losses_sum / self.batch_size) - #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) - #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) - #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) - #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) - #p_sqrt_w = predict_boxes[:, :, :, 2] - #p_sqrt_h = predict_boxes[:, :, :, 3] - p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) - p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) - #calculate truth p 1-D tensor [NUM_CLASSES] - P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) + tf.summary.scalar('class_loss', losses[0] / self.batch_size) + tf.summary.scalar('object_loss', losses[1] / self.batch_size) + tf.summary.scalar('noobject_loss', losses[2] / self.batch_size) + tf.summary.scalar('coord_loss', losses[3] / self.batch_size) + tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - losses_sum / self.batch_size) - #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] - p_P = predict[:, :, 0:self.num_classes] + return tf.add_n(tf.get_collection('losses'), name='total_loss') - #class_loss - class_loss = tf.nn.l2_loss(tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale - #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale - - #object_loss - object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale - #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale - - #noobject_loss - #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale - noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale - - #coord_loss - coord_loss = (tf.nn.l2_loss(I * (p_x - x)/(self.image_size/self.cell_size)) + - tf.nn.l2_loss(I * (p_y - y)/(self.image_size/self.cell_size)) + - tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w))/ self.image_size + - tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))/self.image_size) * self.coord_scale - - nilboy = I - - return num + 1, object_num, [loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss], predict, labels, nilboy - - - - def loss(self, predicts, labels, objects_num): - """Add Loss to all the trainable variables - - Args: - predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell] - ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell) - labels : 3-D tensor of [batch_size, max_objects, 5] - objects_num: 1-D tensor [batch_size] - """ - class_loss = tf.constant(0, tf.float32) - object_loss = tf.constant(0, tf.float32) - noobject_loss = tf.constant(0, tf.float32) - coord_loss = tf.constant(0, tf.float32) - loss = [0, 0, 0, 0] - for i in range(self.batch_size): - predict = predicts[i, :, :, :] - label = labels[i, :, :] - object_num = objects_num[i] - nilboy = tf.ones([7,7,2]) - tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy]) - for j in range(4): - loss[j] = loss[j] + tuple_results[2][j] - nilboy = tuple_results[5] - - tf.add_to_collection('losses', (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size) - - tf.summary.scalar('class_loss', loss[0]/self.batch_size) - tf.summary.scalar('object_loss', loss[1]/self.batch_size) - tf.summary.scalar('noobject_loss', loss[2]/self.batch_size) - tf.summary.scalar('coord_loss', loss[3]/self.batch_size) - tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size ) - - return tf.add_n(tf.get_collection('losses'), name='total_loss'), nilboy