Updates for review comments

Beat Buesser · Beat Buesser · commit 61d1601f4b56 · 2021-05-19T00:36:44.000+01:00
Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;
diff --git a/art/estimators/object_detection/tensorflow_faster_rcnn.py b/art/estimators/object_detection/tensorflow_faster_rcnn.py
@@ -332,9 +332,11 @@ def loss_gradient(  # pylint: disable=W0221
         :param y: Targets of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict are
                   as follows:
 
-                 - boxes [N, 4]: the boxes in [y1, x1, y2, x2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
-                                 Can be changed to PyTorch format with `standardise_output=True`.
-                 - labels [N]: the labels for each image
+                 - boxes [N, 4]: the boxes in [y1, x1, y2, x2] in scale [0, 1] (`standardise_output=False`) or
+                                 [x1, y1, x2, y2] in image scale (`standardise_output=True`) format,
+                                 with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
+                 - labels [N]: the labels for each image in TensorFlow (`standardise_output=False`) or PyTorch
+                               (`standardise_output=True`) format
                  - scores [N]: the scores or each prediction.
 
         :param standardise_output: True if `y` is provided in standardised PyTorch format. Box coordinates will be
@@ -401,14 +403,14 @@ def predict(  # pylint: disable=W0221
                                    scaled from [0, 1] to image dimensions, label index will be increased by 1 to adhere
                                    to COCO categories and the boxes will be changed to [x1, y1, x2, y2] format, with
                                    0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
-        :return: A dictionary containing the following fields:
 
         :return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The
                  fields of the Dict are as follows:
 
                  - boxes [N, 4]: the boxes in [y1, x1, y2, x2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
                                  Can be changed to PyTorch format with `standardise_output=True`.
-                 - labels [N]: the labels for each image
+                 - labels [N]: the labels for each image in TensorFlow format. Can be changed to PyTorch format with
+                               `standardise_output=True`.
                  - scores [N]: the scores or each prediction.
         """
         # Only do prediction if is_training is False
diff --git a/art/estimators/object_detection/utils.py b/art/estimators/object_detection/utils.py
@@ -71,7 +71,7 @@ def convert_pt_to_tf(y: List[Dict[str, np.ndarray]], height: int, width: int) ->
     :return: Target values of format `List[Dict[Tensor]]`, one for each input image. The fields of the Dict are as
              follows:
 
-             - boxes (FloatTensor[N, 4]): the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
+             - boxes (FloatTensor[N, 4]): the boxes in [y1, x1, y2, x2] format, with 0 <= x1 < x2 <= W and
                                           0 <= y1 < y2 <= H in scale [0, 1].
              - labels (Int64Tensor[N]): the labels for each image
              - scores (Tensor[N]): the scores or each prediction.