Skip to content

Commit 2f1755c

Browse files
committed
minor cleanup and docstring fixes
Signed-off-by: Farhan Ahmed <[email protected]>
1 parent 707ed46 commit 2f1755c

File tree

2 files changed

+55
-59
lines changed

2 files changed

+55
-59
lines changed

art/estimators/object_detection/pytorch_yolo.py

Lines changed: 48 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ def translate_predictions_xcycwh_to_x1y1x2y2(
5858
for y_pred in y_pred_xcycwh:
5959
boxes = torch.vstack(
6060
[
61-
torch.maximum((y_pred[:, 0] - y_pred[:, 2] / 2), torch.tensor(0).to(device)),
62-
torch.maximum((y_pred[:, 1] - y_pred[:, 3] / 2), torch.tensor(0).to(device)),
63-
torch.minimum((y_pred[:, 0] + y_pred[:, 2] / 2), torch.tensor(input_height).to(device)),
64-
torch.minimum((y_pred[:, 1] + y_pred[:, 3] / 2), torch.tensor(input_width).to(device)),
61+
torch.maximum((y_pred[:, 0] - y_pred[:, 2] / 2), torch.tensor(0, device=device)),
62+
torch.maximum((y_pred[:, 1] - y_pred[:, 3] / 2), torch.tensor(0, device=device)),
63+
torch.minimum((y_pred[:, 0] + y_pred[:, 2] / 2), torch.tensor(input_height, device=device)),
64+
torch.minimum((y_pred[:, 1] + y_pred[:, 3] / 2), torch.tensor(input_width, device=device)),
6565
]
6666
).permute((1, 0))
6767
labels = torch.argmax(y_pred[:, 5:], dim=1, keepdim=False)
@@ -106,10 +106,10 @@ def translate_labels_x1y1x2y2_to_xcycwh(
106106
labels[:, 3:6:2] /= input_height
107107

108108
# convert from x1y1x2y2 to xcycwh
109-
labels[:, 4] = labels[:, 4] - labels[:, 2]
110-
labels[:, 5] = labels[:, 5] - labels[:, 3]
111-
labels[:, 2] = labels[:, 2] + labels[:, 4] / 2
112-
labels[:, 3] = labels[:, 3] + labels[:, 5] / 2
109+
labels[:, 4] -= labels[:, 2]
110+
labels[:, 5] -= labels[:, 3]
111+
labels[:, 2] += labels[:, 4] / 2
112+
labels[:, 3] += labels[:, 5] / 2
113113
labels_xcycwh_list.append(labels)
114114

115115
labels_xcycwh = torch.vstack(labels_xcycwh_list)
@@ -148,13 +148,13 @@ def __init__(
148148
Initialization.
149149
150150
:param model: Object detection model wrapped as demonstrated in examples/get_started_yolo.py.
151-
The output of the model is `List[Dict[Tensor]]`, one for each input
152-
image. The fields of the Dict are as follows:
151+
The output of the model is `List[Dict[str, torch.Tensor]]`, one for each input image.
152+
The fields of the Dict are as follows:
153153
154-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
155-
between 0 and H and 0 and W
156-
- labels (Int64Tensor[N]): the predicted labels for each image
157-
- scores (Tensor[N]): the scores or each prediction
154+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
155+
0 <= y1 < y2 <= H.
156+
- labels [N]: the labels for each image
157+
- scores [N]: the scores of each prediction.
158158
:param input_shape: The shape of one input sample.
159159
:param optimizer: The optimizer for training the classifier.
160160
:param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
@@ -274,44 +274,37 @@ def _preprocess_and_convert_inputs(
274274
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
275275
The fields of the Dict are as follows:
276276
277-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
278-
between 0 and H and 0 and W
279-
- labels (Int64Tensor[N]): the predicted labels for each image
280-
- scores (Tensor[N]): the scores or each prediction.
277+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
278+
- labels [N]: the labels for each image
279+
- scores [N]: the scores of each prediction.
281280
:param fit: `True` if the function is call before fit/training and `False` if the function is called before a
282281
predict operation.
283282
:param no_grad: `True` if no gradients required.
284283
:return: Preprocessed inputs `(x, y)` as tensors.
285284
"""
286285
import torch
287-
import torchvision
288286

289287
if self.clip_values is not None:
290288
norm_factor = self.clip_values[1]
291289
else:
292290
norm_factor = 1.0
293291

294-
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
295-
296292
if self.all_framework_preprocessing:
297293
if isinstance(x, np.ndarray):
298294
# Convert samples into tensor
299-
if self.channels_first:
300-
x_tensor = torch.from_numpy(x / norm_factor).to(self.device)
301-
else:
302-
x_tensor = torch.stack([transform(x_i / norm_factor).to(self.device) for x_i in x])
295+
x_tensor = torch.from_numpy(x / norm_factor).to(self.device)
303296
else:
304-
if self.channels_first:
305-
x_tensor = x.to(self.device)
306-
else:
307-
x_tensor = torch.permute(x, (0, 3, 1, 2)).to(self.device)
297+
x_tensor = (x / norm_factor).to(self.device)
298+
299+
if not self.channels_first:
300+
x_tensor = torch.permute(x_tensor, (0, 3, 1, 2))
308301

309302
# Convert targets into tensor
310303
if y is not None and isinstance(y[0]["boxes"], np.ndarray):
311304
y_tensor = []
312305
for y_i in y:
313306
y_t = {
314-
"boxes": torch.from_numpy(y_i["boxes"]).to(device=self.device, dtype=torch.float),
307+
"boxes": torch.from_numpy(y_i["boxes"]).to(device=self.device, dtype=torch.float32),
315308
"labels": torch.from_numpy(y_i["labels"]).to(device=self.device, dtype=torch.int64),
316309
}
317310
if "masks" in y_i:
@@ -332,10 +325,10 @@ def _preprocess_and_convert_inputs(
332325
x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y=y, fit=fit, no_grad=no_grad)
333326

334327
# Convert samples into tensor
335-
if self.channels_first:
336-
x_preprocessed = torch.from_numpy(x_preprocessed / norm_factor).to(self.device)
337-
else:
338-
x_preprocessed = torch.stack([transform(x_i / norm_factor).to(self.device) for x_i in x_preprocessed])
328+
x_preprocessed = torch.from_numpy(x_preprocessed / norm_factor).to(self.device)
329+
330+
if not self.channels_first:
331+
x_preprocessed = torch.permute(x_preprocessed, (0, 3, 1, 2))
339332

340333
# Set gradients
341334
if not no_grad:
@@ -346,7 +339,7 @@ def _preprocess_and_convert_inputs(
346339
y_preprocessed_tensor = []
347340
for y_i in y_preprocessed:
348341
y_preprocessed_t = {
349-
"boxes": torch.from_numpy(y_i["boxes"]).to(device=self.device, dtype=torch.float),
342+
"boxes": torch.from_numpy(y_i["boxes"]).to(device=self.device, dtype=torch.float32),
350343
"labels": torch.from_numpy(y_i["labels"]).to(device=self.device, dtype=torch.int64),
351344
}
352345
if "masks" in y_i:
@@ -371,9 +364,9 @@ def _get_losses(
371364
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
372365
The fields of the Dict are as follows:
373366
374-
- boxes (FloatTensor[N, 4]): the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
375-
0 <= y1 < y2 <= H.
376-
- labels (Int64Tensor[N]): the labels for each image
367+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
368+
- labels [N]: the labels for each image
369+
- scores [N]: the scores of each prediction.
377370
:return: Loss gradients of the same shape as `x`.
378371
"""
379372
self._model.train()
@@ -407,10 +400,9 @@ def loss_gradient( # pylint: disable=W0613
407400
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
408401
The fields of the Dict are as follows:
409402
410-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
411-
between 0 and H and 0 and W
412-
- labels (Int64Tensor[N]): the predicted labels for each image
413-
- scores (Tensor[N]): the scores or each prediction.
403+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
404+
- labels [N]: the labels for each image
405+
- scores [N]: the scores of each prediction.
414406
:return: Loss gradients of the same shape as `x`.
415407
"""
416408
import torch
@@ -461,12 +453,12 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
461453
462454
:param x: Samples of shape NCHW or NHWC.
463455
:param batch_size: Batch size.
464-
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
465-
The fields of the Dict are as follows:
456+
:return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict
457+
are as follows:
466458
467-
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
468-
- labels [N]: the labels for each image
469-
- scores [N]: the scores or each prediction.
459+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
460+
- labels [N]: the labels for each image
461+
- scores [N]: the scores of each prediction.
470462
"""
471463
import torch
472464

@@ -528,10 +520,9 @@ def fit( # pylint: disable=W0221
528520
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
529521
The fields of the Dict are as follows:
530522
531-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
532-
between 0 and H and 0 and W
533-
- labels (Int64Tensor[N]): the predicted labels for each image
534-
- scores (Tensor[N]): the scores or each prediction.
523+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
524+
- labels [N]: the labels for each image
525+
- scores [N]: the scores of each prediction.
535526
:param batch_size: Size of batches.
536527
:param nb_epochs: Number of epochs to use for training.
537528
:param drop_last: Set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by
@@ -612,10 +603,9 @@ def compute_losses(
612603
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
613604
The fields of the Dict are as follows:
614605
615-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
616-
between 0 and H and 0 and W
617-
- labels (Int64Tensor[N]): the predicted labels for each image
618-
- scores (Tensor[N]): the scores or each prediction.
606+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
607+
- labels [N]: the labels for each image
608+
- scores [N]: the scores of each prediction.
619609
:return: Dictionary of loss components.
620610
"""
621611
loss_components, _ = self._get_losses(x=x, y=y)
@@ -634,10 +624,9 @@ def compute_loss( # type: ignore
634624
:param y: Target values of format `List[Dict[str, Union[np.ndarray, torch.Tensor]]]`, one for each input image.
635625
The fields of the Dict are as follows:
636626
637-
- boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values
638-
between 0 and H and 0 and W
639-
- labels (Int64Tensor[N]): the predicted labels for each image
640-
- scores (Tensor[N]): the scores or each prediction.
627+
- boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
628+
- labels [N]: the labels for each image
629+
- scores [N]: the scores of each prediction.
641630
:return: Loss.
642631
"""
643632
import torch

docs/modules/estimators/object_detection.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ Object Detector PyTorch Faster-RCNN
2323
:special-members: __init__
2424
:inherited-members:
2525

26+
Object Detector PyTorch YOLO
27+
----------------------------
28+
.. autoclass:: PyTorchYolo
29+
:members:
30+
:special-members: __init__
31+
:inherited-members:
32+
2633
Object Detector TensorFlow Faster-RCNN
2734
--------------------------------------
2835
.. autoclass:: TensorFlowFasterRCNN

0 commit comments

Comments
 (0)