Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 45 additions & 22 deletions sheeping/sheep_localizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,45 +40,46 @@ def score_threshold(self, value):
self.model.score_thresh = value

def build_model(self):
# TODO: determine the correct model type
model = "correct_model"
if self.model_type == 'ssd300':
model = SSD300(n_fg_class=1)
elif self.model_type == 'ssd512':
model = SSD512(n_fg_class=1)
else:
raise NotImplementedError("Sheep Localizer is not prepared to work with model {}".format(self.model_type))

model.score_thresh = self._score_threshold

# TODO: transfer to GPU if necessary
if self.gpu_id >= 0:
chainer.backends.cuda.get_device_from_id(self.gpu_id).use()
model.to_gpu()

# TODO: load weights
with np.load(self.model_file) as f:
chainer.serializers.NpzDeserializer(f).load(model)

self.initialized = True
self.model = model

def resize(self, image, is_array=True):
if is_array:
image = Image.fromarray(image)

# TODO: calculate corresponding scaling factor for x and y (used for scaling back to original size)
scale_x = 1.0
scale_y = 1.0

# TODO: resize to self.input_size (hint: pay attention to resize algorithm)

scale_x = image.size[0] / self.input_size[0]
scale_y = image.size[1] / self.input_size[1]
image = image.resize(self.input_size, Image.BICUBIC)
image = np.asarray(image)
return image, (scale_x, scale_y)

def preprocess(self, image, make_copy=True):
if make_copy:
image = image.copy()
# TODO: reorder channels to the CHW format
# TODO: convert to the correct dtype (float32)
# TODO: subtract mean (hint: mean is saved in this class)
image = image.transpose(2, 0, 1)
image = image.astype(np.float32)
image -= self.mean
return image

def localize(self, processed_image):
if not self.initialized:
self.build_model()
# TODO: get bounding boxes and scores
bboxes = [[0, 0, 100, 100]]
scores = [[0.9]]
bboxes, _, scores = self.model.predict([processed_image])

return bboxes[0], scores[0]

Expand All @@ -88,10 +89,32 @@ def visualize_results(self, image, bboxes, scores, scaling=(1, 1)):
if len(bbox) != 4:
continue

# TODO: scale bounding box with scale factor (see resize function)
# HINT: the y axis comes first in bounding boxes, order is [top(y), left(x), bottom(y), right(x)]

# TODO: visualize the found item with a rectangle and render the score as text

# scale bounding box with scale factor
bbox = [bbox[0] * scaling[1], bbox[1] * scaling[0], bbox[2] * scaling[1], bbox[3] * scaling[0]]
bbox = list(map(lambda x: int(round(x)), bbox))

width = bbox[3] - bbox[1]
height = bbox[2] - bbox[0]

thickness = self.thickness_base + round(max(image.shape) * self.thickness_scale)
cv2.rectangle(image, (bbox[1], bbox[0]), (bbox[1] + width, bbox[0] + height), self.color, thickness)

font_scaling = self.font_size_base + round(max(image.shape) * self.font_scale)
text_thickness = round(self.font_thickness_factor * thickness)
score_text = format(float(score), ".2f")
text_size = cv2.getTextSize(score_text, self.font, font_scaling, text_thickness)[0]
text_start = bbox[1] + width - text_size[0], bbox[0]
text_end = bbox[1] + width, bbox[0] - text_size[1]
cv2.rectangle(image, text_start, text_end, self.color, -1)
cv2.putText(
image,
score_text,
text_start,
self.font,
font_scaling,
(255, 255, 255),
bottomLeftOrigin=False,
thickness=text_thickness
)
return image