From 8777f8a4f4e7ec41f365f99973bdf65f77ceafb3 Mon Sep 17 00:00:00 2001
From: Fju <flowi@outlook.com>
Date: Sun, 21 Oct 2018 18:50:18 +0200
Subject: [PATCH] Comments in `yolo_tiny_net.py`

I added comments that explain how the loss is calculated in the `yolo_tiny_net.py` file. A `.gitignore` was added to avoid commiting model checkpoints or `*.pyc` files.
---
 .gitignore                |   2 +
 yolo/net/yolo_tiny_net.py | 494 +++++++++++++++++++-------------------
 2 files changed, 249 insertions(+), 247 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..65e6ca4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+models/
+*.pyc
diff --git a/yolo/net/yolo_tiny_net.py b/yolo/net/yolo_tiny_net.py
index 48dfa84..5323f05 100644
--- a/yolo/net/yolo_tiny_net.py
+++ b/yolo/net/yolo_tiny_net.py
@@ -4,305 +4,305 @@
 
 import tensorflow as tf
 import numpy as np
-import re
+#import re
 
 from yolo.net.net import Net 
 
 class YoloTinyNet(Net):
 
-  def __init__(self, common_params, net_params, test=False):
-    """
-    common params: a params dict
-    net_params   : a params dict
-    """
-    super(YoloTinyNet, self).__init__(common_params, net_params)
-    #process params
-    self.image_size = int(common_params['image_size'])
-    self.num_classes = int(common_params['num_classes'])
-    self.cell_size = int(net_params['cell_size'])
-    self.boxes_per_cell = int(net_params['boxes_per_cell'])
-    self.batch_size = int(common_params['batch_size'])
-    self.weight_decay = float(net_params['weight_decay'])
+	def __init__(self, common_params, net_params, test=False):
+		"""
+		common params: a params dict
+		net_params	 : a params dict
+		"""
+		super(YoloTinyNet, self).__init__(common_params, net_params)
+		#process params
+		self.image_size = int(common_params['image_size'])
+		self.num_classes = int(common_params['num_classes'])
+		self.cell_size = int(net_params['cell_size'])
+		self.boxes_per_cell = int(net_params['boxes_per_cell'])
+		self.batch_size = int(common_params['batch_size'])
+		self.weight_decay = float(net_params['weight_decay'])
 
-    if not test:
-      self.object_scale = float(net_params['object_scale'])
-      self.noobject_scale = float(net_params['noobject_scale'])
-      self.class_scale = float(net_params['class_scale'])
-      self.coord_scale = float(net_params['coord_scale'])
+		if not test:
+			self.object_scale = float(net_params['object_scale'])
+			self.noobject_scale = float(net_params['noobject_scale'])
+			self.class_scale = float(net_params['class_scale'])
+			self.coord_scale = float(net_params['coord_scale'])
 
-  def inference(self, images):
-    """Build the yolo model
+	def inference(self, images):
+		"""Build the yolo model
+		Args:
+			images:	4-D tensor [batch_size, image_height, image_width, channels]
+		Returns:
+			predicts: 4-D tensor [batch_size, cell_size, cell_size, boxes_per_cell * (x_center, y_center, w, h, classes...)]
+		"""
+		conv_num = 1
 
-    Args:
-      images:  4-D tensor [batch_size, image_height, image_width, channels]
-    Returns:
-      predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell]
-    """
-    conv_num = 1
+		temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1)
+		conv_num += 1
 
-    temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1)
-    conv_num += 1
+		temp_pool = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_pool = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1)
+		conv_num += 1
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 16, 32], stride=1)
-    conv_num += 1
+		temp_pool = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_pool = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1)
+		conv_num += 1
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_pool, [3, 3, 32, 64], stride=1)
-    conv_num += 1
-    
-    temp_conv = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1)
-    conv_num += 1
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 64, 128], stride=1)
+		conv_num += 1
 
-    temp_conv = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1)
-    conv_num += 1
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 128, 256], stride=1)
+		conv_num += 1
 
-    temp_conv = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1)
-    conv_num += 1
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 256, 512], stride=1)
+		conv_num += 1
 
-    temp_conv = self.max_pool(temp_conv, [2, 2], 2)
+		temp_conv = self.max_pool(temp_conv, [2, 2], 2)
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1)
-    conv_num += 1     
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 512, 1024], stride=1)
+		conv_num += 1
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
-    conv_num += 1 
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
+		conv_num += 1
 
-    temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
-    conv_num += 1 
+		temp_conv = self.conv2d('conv' + str(conv_num), temp_conv, [3, 3, 1024, 1024], stride=1)
+		conv_num += 1
 
-    temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2))
+		temp_conv = tf.transpose(temp_conv, (0, 3, 1, 2))
 
-    #Fully connected layer
-    local1 = self.local('local1', temp_conv, self.cell_size * self.cell_size * 1024, 256)
+		#Fully connected layer
+		local1 = self.local('local1', temp_conv, self.cell_size * self.cell_size * 1024, 256)
 
-    local2 = self.local('local2', local1, 256, 4096)
- 
-    local3 = self.local('local3', local2, 4096, self.cell_size * self.cell_size * (self.num_classes + self.boxes_per_cell * 5), leaky=False, pretrain=False, train=True)
+		local2 = self.local('local2', local1, 256, 4096)
 
-    n1 = self.cell_size * self.cell_size * self.num_classes
+		local3 = self.local('local3', local2, 4096, self.cell_size * self.cell_size * (self.num_classes + self.boxes_per_cell * 5), leaky=False, pretrain=False, train=True)
 
-    n2 = n1 + self.cell_size * self.cell_size * self.boxes_per_cell
+		n1 = self.cell_size * self.cell_size * self.num_classes
 
-    class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cell_size, self.cell_size, self.num_classes))
-    scales = tf.reshape(local3[:, n1:n2], (-1, self.cell_size, self.cell_size, self.boxes_per_cell))
-    boxes = tf.reshape(local3[:, n2:], (-1, self.cell_size, self.cell_size, self.boxes_per_cell * 4))
+		n2 = n1 + self.cell_size * self.cell_size * self.boxes_per_cell
 
-    local3 = tf.concat([class_probs, scales, boxes], 3)
+		class_probs = tf.reshape(local3[:, 0:n1], (-1, self.cell_size, self.cell_size, self.num_classes))
+		scales = tf.reshape(local3[:, n1:n2], (-1, self.cell_size, self.cell_size, self.boxes_per_cell))
+		boxes = tf.reshape(local3[:, n2:], (-1, self.cell_size, self.cell_size, self.boxes_per_cell * 4))
 
-    predicts = local3
+		local3 = tf.concat([class_probs, scales, boxes], 3)
 
-    return predicts
+		predicts = local3
 
-  def iou(self, boxes1, boxes2):
-    """calculate ious
-    Args:
-      boxes1: 4-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL, 4]  ====> (x_center, y_center, w, h)
-      boxes2: 1-D tensor [4] ===> (x_center, y_center, w, h)
-    Return:
-      iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
-    """
-    boxes1 = tf.stack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2,
-                      boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2])
-    boxes1 = tf.transpose(boxes1, [1, 2, 3, 0])
-    boxes2 =  tf.stack([boxes2[0] - boxes2[2] / 2, boxes2[1] - boxes2[3] / 2,
-                      boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2])
+		return predicts
 
-    #calculate the left up point
-    lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2])
-    rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:])
+	def iou(self, p_boxes, t_box):
+		""" calculate ious (iou: intersection over union)
+		Args:
+			p_boxes:	predicted boxes 4-D, tensor [cell_count, cell_count, boxes_per_cell, (x_center, y_center, w, h)]
+			t_box:		true box, 1-D tensor [(x_center, y_center, w, h)]
+		Return:
+			iou: intersection over union, 3-D tensor [cell_count, cell_count, boxes_per_cell]
+		"""
 
-    #intersection
-    intersection = rd - lu 
+		# convert boxes from [center_x, center_y, width, height] to [left, top, right, bottom]
+		p_boxes = tf.stack([p_boxes[:, :, :, 0] - p_boxes[:, :, :, 2] / 2, p_boxes[:, :, :, 1] - p_boxes[:, :, :, 3] / 2,
+							p_boxes[:, :, :, 0] + p_boxes[:, :, :, 2] / 2, p_boxes[:, :, :, 1] + p_boxes[:, :, :, 3] / 2])
 
-    inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1]
+		# transpose boxes tensor, so that 0th dimension (box coordinates) are the 3rd dimension
+		p_boxes = tf.transpose(p_boxes, [1, 2, 3, 0])
 
-    mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)
-    
-    inter_square = mask * inter_square
-    
-    #calculate the boxs1 square and boxs2 square
-    square1 = (boxes1[:, :, :, 2] - boxes1[:, :, :, 0]) * (boxes1[:, :, :, 3] - boxes1[:, :, :, 1])
-    square2 = (boxes2[2] - boxes2[0]) * (boxes2[3] - boxes2[1])
-    
-    return inter_square/(square1 + square2 - inter_square + 1e-6)
+		# convert true box from [center_x, center_y, width, height] to [left, top, right, bottom] 
+		t_box = tf.stack([t_box[0] - t_box[2] / 2, t_box[1] - t_box[3] / 2,
+							t_box[0] + t_box[2] / 2, t_box[1] + t_box[3] / 2])
 
-  def cond1(self, num, object_num, loss, predict, label, nilboy):
-    """
-    if num < object_num
-    """
-    return num < object_num
+		# find top-left point and bottom-right point (use max/min of true box or predicted box)
+		top_left = tf.maximum(p_boxes[:, :, :, 0:2], t_box[0:2])	# [:, :, :, 2] ==> (left, top)
+		bottom_right = tf.minimum(p_boxes[:, :, :, 2:], t_box[2:])	# [:, :, :, 2] ==> (right, bottom)
 
+		# intersection [:, :, :, 2] ==> (right - left, bottom - top)
+		intersection = bottom_right - top_left
 
-  def body1(self, num, object_num, loss, predict, labels, nilboy):
-    """
-    calculate loss
-    Args:
-      predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell]
-      labels : [max_objects, 5]  (x_center, y_center, w, h, class)
-    """
-    label = labels[num:num+1, :]
-    label = tf.reshape(label, [-1])
+		# calculate area of intersection rectangle (A = a * b)
+		inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1]
 
-    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
-    min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size)
-    max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size)
+		# prevent negative area
+		mask = tf.cast(intersection[:, :, :, 0] > 0, tf.float32) * tf.cast(intersection[:, :, :, 1] > 0, tf.float32)
+		inter_square = mask * inter_square
 
-    min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size)
-    max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size)
+		# calculate areas of rectangles (A = a * b)
+		square1 = (p_boxes[:, :, :, 2] - p_boxes[:, :, :, 0]) * (p_boxes[:, :, :, 3] - p_boxes[:, :, :, 1])
+		square2 = (t_box[2] - t_box[0]) * (t_box[3] - t_box[1])
 
-    min_x = tf.floor(min_x)
-    min_y = tf.floor(min_y)
+		# IoU: Intersection over Union (see https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/)
+		# Union is equal to the sum of both areas minus the intersection
+		return inter_square / (square1 + square2 - inter_square + 1e-6) # add tiny fraction to prevent dividing by 0
 
-    max_x = tf.ceil(max_x)
-    max_y = tf.ceil(max_y)
+	def loss_loop_cond(self, cnt, object_count, losses, prediction, labels):
+		# keep looping when index is smaller than current object count
+		return cnt < object_count
 
-    temp = tf.cast(tf.stack([max_y - min_y, max_x - min_x]), dtype=tf.int32)
-    objects = tf.ones(temp, tf.float32)
+	def loss_loop_body(self, cnt, object_count, losses, prediction, labels):
+		""" calculate loss of a single sample
+		Args:
+			prediction:	3-D tensor [cell_count, cell_count, boxes_per_cell * (x_center, y_center, w, h, classes...)]
+			labels:		2-D tensor [max_objects, (x_center, y_center, w, h, classes...)]
+		"""
+		# get current label, label contains center point coordinates (x, y)
+		label = labels[cnt:cnt+1, :]
+		label = tf.reshape(label, [-1])
 
-    temp = tf.cast(tf.stack([min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32)
-    temp = tf.reshape(temp, (2, 2))
-    objects = tf.pad(objects, temp, "CONSTANT")
+		# store coordinates of true box		
+		t_left = tf.floor((label[0] - label[2] / 2) / (self.image_size / self.cell_count))
+		t_right = tf.ceil((label[0] + label[2] / 2) / (self.image_size / self.cell_count))
+		t_top = tf.floor((label[1] - label[3] / 2) / (self.image_size / self.cell_count))
+		t_bottom = tf.ceil((label[1] + label[3] / 2) / (self.image_size / self.cell_count))
 
-    #calculate objects  tensor [CELL_SIZE, CELL_SIZE]
-    #calculate responsible tensor [CELL_SIZE, CELL_SIZE]
-    center_x = label[0] / (self.image_size / self.cell_size)
-    center_x = tf.floor(center_x)
+		#t_bottom = tf.Print(t_bottom, [t_left, t_top, t_right, t_bottom], 'coordinates: ')
+		# generate tensor of size [cell_count, cell_count] filled with zeros except the region where the object is in
+		# 1. 1-D tensor containing width and height of true box (width = t_right - t_left, height = t_bottom - t_top)
+		temp = tf.cast(tf.stack([t_bottom - t_top, t_right - t_left]), dtype=tf.int32)
+		# 2. 2-D tensor of size [height, width] filled with 1.f
+		t_region_mask = tf.ones(temp, tf.float32)
+		# 3. 2-D tensor of size [2, 2] that contains paddings ((pad_top, pad_bottom), (pad_left, pad_right))
+		temp = tf.reshape(tf.cast(tf.stack([t_top, self.cell_count - t_bottom, t_left, self.cell_count - t_right]), tf.int32), (2, 2))
 
-    center_y = label[1] / (self.image_size / self.cell_size)
-    center_y = tf.floor(center_y)
+		# 4. apply padding, resulting 2-D tensor has size [cell_count, cell_count], padded area is filled with zeros
+		t_region_mask = tf.pad(t_region_mask, temp, "CONSTANT")
 
-    response = tf.ones([1, 1], tf.float32)
+		# store center coordinates
+		t_center_x, t_center_y = tf.floor(label[0] / (self.image_size / self.cell_count)), tf.floor(label[1] / (self.image_size / self.cell_count))
 
-    temp = tf.cast(tf.stack([center_y, self.cell_size - center_y - 1, center_x, self.cell_size -center_x - 1]), tf.int32)
-    temp = tf.reshape(temp, (2, 2))
-    response = tf.pad(response, temp, "CONSTANT")
-    #objects = response
+		# generate tensor of size [cell_count, cell_count] filled with zeros except the center of the true object
+		# 1. 1-D tensor of size [1, 1]
+		t_center_mask = tf.ones([1, 1], tf.float32)
+		# 2. 2-D tensor of size [2, 2] that contains paddings ((pad_top, pad_bottom), (pad_left, pad_right))
+		temp = tf.reshape(tf.cast(tf.stack([t_center_y, self.cell_count - t_center_y - 1, t_center_x, self.cell_count - t_center_x - 1]), tf.int32), (2, 2))
+		# 3. apply padding, resulting 2-D tensor has size [cell_count, cell_count], padded area is filled with zeros
+		t_center_mask = tf.pad(t_center_mask, temp, "CONSTANT")
+
+		# store 3-D tensor of size [cell_count, cell_count, 4] ==> (center_x, center_y, width, height)
+		predict_boxes = tf.reshape(prediction[:, :, self.num_classes + self.boxes_per_cell:], [self.cell_count, self.cell_count, self.boxes_per_cell, 4])
+
+		# map relative predicted values to absolute image coordinates and sizes
+		predict_boxes = predict_boxes * [self.image_size / self.cell_count, self.image_size / self.cell_count, self.image_size, self.image_size]
+
+		# create 3-D numpy array for shifting relative center points by cell offset
+		box_offsets = np.zeros([self.cell_count, self.cell_count, 4])
+		for y in range(self.cell_count):
+			for x in range(self.cell_count):
+				# apply cell specific offset (no width or height offset)
+				box_offsets[y, x, :] = [self.image_size / self.cell_count * x, self.image_size / self.cell_count * y, 0, 0]
+
+		# repeat pattern by boxes_per_cell times along the 3rd dimension
+		box_offsets = np.tile(np.resize(box_offsets, [self.cell_count, self.cell_count, 1, 4]), [1, 1, self.boxes_per_cell, 1])
+
+		# apply bounding box offsets
+		predict_boxes = box_offsets + predict_boxes
+
+		# compute IoU scores as a 3-D tensor of size [cell_count, cell_count, boxes_per_cell]
+		iou_predict_truth = self.iou(predict_boxes, label[0:4])
+
+		# mask ious scores with center point mask, get class
+		t_confidence = iou_predict_truth * tf.reshape(t_center_mask, [self.cell_count, self.cell_count, 1])
+
+		# mask iou scores with center point mask
+		masked_iou = iou_predict_truth * tf.reshape(t_center_mask, (self.cell_count, self.cell_count, 1))
+
+		# last dimension is reduced to the highest element, output tensor has size [cell_count, cell_count, 1]
+		max_iou = tf.reduce_max(masked_iou, 2, keepdims=True)
+
+		masked_iou = tf.cast((masked_iou >= max_iou), tf.float32) * tf.reshape(t_center_mask, (self.cell_count, self.cell_count, 1))
+
+		# invert masked_iou tensor (0 -> 1, 1 -> 0)
+		masked_iou_inv = tf.ones_like(masked_iou, dtype=tf.float32) - masked_iou
+
+		# get predicted confidence score, 3-D tensor of size [cell_count, cell_count, 1]
+		p_confidence = prediction[:, :, self.num_classes:self.num_classes + self.boxes_per_cell]
+
+		# store true bounding box coordinates in 0-D tensors
+		t_x = label[0]
+		t_y = label[1]
+		t_sqrt_w = tf.sqrt(tf.abs(label[2]))
+		t_sqrt_h = tf.sqrt(tf.abs(label[3]))
+
+		# get predicted bounding box coordinates in 3-D tensors of size [cell_count, cell_count, boxes_per_cell]
+		p_x = predict_boxes[:, :, :, 0]
+		p_y = predict_boxes[:, :, :, 1]
+
+		p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2])))
+		p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3])))
+
+		# create 1-D tensor of size [num_classes] filled with zeros except one 1 at the position of the true class
+		# apply tensor to [cell_count, cell_count, 1] tensor
+		t_class = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32)
+		t_class = t_class * tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1))
+
+		# get predicted class tensor of size [cell_count, cell_count, num_classes]
+		p_class = prediction[:, :, 0:self.num_classes]
+		p_class = p_class * tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1))
+
+		# compute L2-loss of predicting true class
+		class_loss = tf.nn.l2_loss(tf.reshape(t_region_mask, (self.cell_count, self.cell_count, 1)) * (p_class - t_class)) * self.class_scale
+
+		# compute L2-loss of detecting objects correctly
+		object_loss = tf.nn.l2_loss(masked_iou * (p_confidence - t_confidence)) * self.object_scale
+
+		# compute L2-loss of detecting empty cells with no objects
+		noobject_loss = tf.nn.l2_loss(masked_iou_inv * (p_confidence)) * self.noobject_scale
+
+		# compute L2-loss of finding right coordinates
+		coord_loss = (tf.nn.l2_loss(masked_iou * (p_x - t_x) / (self.image_size / self.cell_count)) +
+					 tf.nn.l2_loss(masked_iou * (p_y - t_y) / (self.image_size / self.cell_count)) +
+					 tf.nn.l2_loss(masked_iou * (p_sqrt_w - t_sqrt_w)) / self.image_size +
+					 tf.nn.l2_loss(masked_iou * (p_sqrt_h - t_sqrt_h)) / self.image_size) * self.coord_scale
+
+		return cnt + 1, object_count, [losses[0] + class_loss, losses[1] + object_loss, losses[2] + noobject_loss, losses[3] + coord_loss], prediction, labels
+
+
+	def loss(self, predictions, labels, object_counts):
+		""" Add loss to all the trainable variables
+		Args:
+			predictions:	4-D tensor [batch_size, cell_count, cell_count, boxes_per_cell * (x_center, y_center, w, h, classes...)]
+			labels:			3-D tensor [batch_size, max_objects, (x_center, y_center, w, h, classes...)]
+			objects_count:	1-D tensor [batch_size]
+		"""
+		# loss variables
+		class_loss = tf.constant(0, tf.float32)
+		object_loss = tf.constant(0, tf.float32)
+		noobject_loss = tf.constant(0, tf.float32)
+		coord_loss = tf.constant(0, tf.float32)
+		losses = [0, 0, 0, 0]
+
+		# iterate through whole training batch
+		for i in range(self.batch_size):
+			# get current prediction tensor of size [cell_count, cell_count, num_classes + 5 * boxes_per_cell]
+			current_prediction = predictions[i, :, :, :]
+			# get current label tensor of size [max_objects, 5]
+			current_label = labels[i, :, :]
+			# 1-D tensor containing amount of true objects along the whole batch
+			object_cnt = object_counts[i]
+			# run detection for all objects the current item contains (`object_num` times)
+			results = tf.while_loop(self.loss_loop_cond, self.loss_loop_body, [tf.constant(0), object_cnt, [class_loss, object_loss, noobject_loss, coord_loss], current_prediction, current_label])
+			for j in range(4):
+				# add computed losses to `losses` array
+				losses[j] = losses[j] + results[2][j]
 
-    #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
-    predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:]
-    
-
-    predict_boxes = tf.reshape(predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4])
-
-    predict_boxes = predict_boxes * [self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size]
-
-    base_boxes = np.zeros([self.cell_size, self.cell_size, 4])
-
-    for y in range(self.cell_size):
-      for x in range(self.cell_size):
-        #nilboy
-        base_boxes[y, x, :] = [self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0]
-    base_boxes = np.tile(np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1])
-
-    predict_boxes = base_boxes + predict_boxes
-
-    iou_predict_truth = self.iou(predict_boxes, label[0:4])
-    #calculate C [cell_size, cell_size, boxes_per_cell]
-    C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1])
-
-    #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
-    I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1))
-    
-    max_I = tf.reduce_max(I, 2, keep_dims=True)
-
-    I = tf.cast((I >= max_I), tf.float32) * tf.reshape(response, (self.cell_size, self.cell_size, 1))
-
-    #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
-    no_I = tf.ones_like(I, dtype=tf.float32) - I 
+		losses_sum = losses[0] + losses[1] + losses[2] + losses[3]
 
 
-    p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell]
-
-    #calculate truth x,y,sqrt_w,sqrt_h 0-D
-    x = label[0]
-    y = label[1]
-
-    sqrt_w = tf.sqrt(tf.abs(label[2]))
-    sqrt_h = tf.sqrt(tf.abs(label[3]))
-    #sqrt_w = tf.abs(label[2])
-    #sqrt_h = tf.abs(label[3])
-
-    #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
-    p_x = predict_boxes[:, :, :, 0]
-    p_y = predict_boxes[:, :, :, 1]
+		tf.add_to_collection('losses', losses_sum / self.batch_size)
 
-    #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1)
-    #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1)
-    #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2]))
-    #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3]))
-    #p_sqrt_w = predict_boxes[:, :, :, 2]
-    #p_sqrt_h = predict_boxes[:, :, :, 3]
-    p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2])))
-    p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3])))
-    #calculate truth p 1-D tensor [NUM_CLASSES]
-    P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32)
+		tf.summary.scalar('class_loss', losses[0] / self.batch_size)
+		tf.summary.scalar('object_loss', losses[1] / self.batch_size)
+		tf.summary.scalar('noobject_loss', losses[2] / self.batch_size)
+		tf.summary.scalar('coord_loss', losses[3] / self.batch_size)
+		tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - losses_sum / self.batch_size)
 
-    #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES]
-    p_P = predict[:, :, 0:self.num_classes]
+		return tf.add_n(tf.get_collection('losses'), name='total_loss')
 
-    #class_loss
-    class_loss = tf.nn.l2_loss(tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale
-    #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale
-
-    #object_loss
-    object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale
-    #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale
-
-    #noobject_loss
-    #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale
-    noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale
-
-    #coord_loss
-    coord_loss = (tf.nn.l2_loss(I * (p_x - x)/(self.image_size/self.cell_size)) +
-                 tf.nn.l2_loss(I * (p_y - y)/(self.image_size/self.cell_size)) +
-                 tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w))/ self.image_size +
-                 tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))/self.image_size) * self.coord_scale
-
-    nilboy = I
-
-    return num + 1, object_num, [loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss], predict, labels, nilboy
-
-
-
-  def loss(self, predicts, labels, objects_num):
-    """Add Loss to all the trainable variables
-
-    Args:
-      predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell]
-      ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell)
-      labels  : 3-D tensor of [batch_size, max_objects, 5]
-      objects_num: 1-D tensor [batch_size]
-    """
-    class_loss = tf.constant(0, tf.float32)
-    object_loss = tf.constant(0, tf.float32)
-    noobject_loss = tf.constant(0, tf.float32)
-    coord_loss = tf.constant(0, tf.float32)
-    loss = [0, 0, 0, 0]
-    for i in range(self.batch_size):
-      predict = predicts[i, :, :, :]
-      label = labels[i, :, :]
-      object_num = objects_num[i]
-      nilboy = tf.ones([7,7,2])
-      tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy])
-      for j in range(4):
-        loss[j] = loss[j] + tuple_results[2][j]
-      nilboy = tuple_results[5]
-
-    tf.add_to_collection('losses', (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size)
-
-    tf.summary.scalar('class_loss', loss[0]/self.batch_size)
-    tf.summary.scalar('object_loss', loss[1]/self.batch_size)
-    tf.summary.scalar('noobject_loss', loss[2]/self.batch_size)
-    tf.summary.scalar('coord_loss', loss[3]/self.batch_size)
-    tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size )
-
-    return tf.add_n(tf.get_collection('losses'), name='total_loss'), nilboy