Unity-Technologies
diff --git a/‎com.unity.perception/CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎com.unity.perception/CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎com.unity.perception/Documentation~/PerceptionCamera.md‎
Lines changed: 79 additions & 0 deletions b/‎com.unity.perception/Documentation~/PerceptionCamera.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎com.unity.perception/Documentation~/Schema/Synthetic_Dataset_Schema.md‎
Lines changed: 106 additions & 40 deletions b/‎com.unity.perception/Documentation~/Schema/Synthetic_Dataset_Schema.md‎
Lines changed: 106 additions & 40 deletions
diff --git a/‎com.unity.perception/Documentation~/images/keypoint_template_header.png‎
29.7 KB b/‎com.unity.perception/Documentation~/images/keypoint_template_header.png‎
29.7 KB
diff --git a/‎com.unity.perception/Documentation~/images/keypoint_template_keypoints.png‎
49.7 KB b/‎com.unity.perception/Documentation~/images/keypoint_template_keypoints.png‎
49.7 KB
diff --git a/‎com.unity.perception/Documentation~/images/keypoint_template_skeleton.png‎
22.3 KB b/‎com.unity.perception/Documentation~/images/keypoint_template_skeleton.png‎
22.3 KB
diff --git a/‎com.unity.perception/Editor/Randomization/Editors/RandomizerTagEditor.cs‎
Lines changed: 32 additions & 0 deletions b/‎com.unity.perception/Editor/Randomization/Editors/RandomizerTagEditor.cs‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎com.unity.perception/Editor/Randomization/Editors/RandomizerTagEditor.cs.meta‎
Lines changed: 3 additions & 0 deletions b/‎com.unity.perception/Editor/Randomization/Editors/RandomizerTagEditor.cs.meta‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎com.unity.perception/Runtime/GroundTruth/Labelers/AnimationPoseLabel.cs‎
Lines changed: 61 additions & 0 deletions b/‎com.unity.perception/Runtime/GroundTruth/Labelers/AnimationPoseLabel.cs‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎com.unity.perception/Runtime/GroundTruth/Labelers/AnimationPoseLabel.cs.meta‎
Lines changed: 3 additions & 0 deletions b/‎com.unity.perception/Runtime/GroundTruth/Labelers/AnimationPoseLabel.cs.meta‎
Lines changed: 3 additions & 0 deletions
@@ -13,6 +13,10 @@ Before upgrading a project to this version of the Perception package, make sure
 
 ### Added
 
+Added keypoint ground truth labeling
+
+Added animation randomization
+
 Added ScenarioConstants base class for all scenario constants objects
 
 Added ScenarioBase.SerializeToConfigFile()
 
@@ -77,6 +77,85 @@ _Example rendered object info for a single object_
 
 The RenderedObjectInfoLabeler records a list of all objects visible in the Camera image, including its instance ID, resolved label ID and visible pixels. If Unity cannot resolve objects to a label in the IdLabelConfig, it does not record these objects.
 
+### KeypointLabeler
+
+The keypoint labeler captures keypoints of a labeled gameobject. The typical use of this labeler is capturing human pose
+estimation data. The labeler uses a [keypoint template](#KeypointTemplate) which defines the keypoints to capture for the
+model and the skeletal connections between those keypoints. The positions of the keypoints are recorded in pixel coordinates
+and saved to the captures json file.
+
+```
+keypoints {
+  label_id:      <int>   -- Integer identifier of the label
+  instance_id:   <str>   -- UUID of the instance.
+  template_guid: <str>   -- UUID of the keypoint template
+  pose:          <str>   -- Pose ground truth information
+  keypoints [            -- Array of keypoint data, one entry for each keypoint defined in associated template file.
+    {
+      index:     <int>   -- Index of keypoint in template
+      x:         <float> -- X pixel coordinate of keypoint
+      y:         <float> -- Y pixel coordinate of keypoint
+      state:     <int>   -- 0: keypoint does not exist, 1 keypoint exists
+    }, ...
+  ]
+}
+```
+
+#### Keypoint Template
+
+keypoint templates are used to define the keypoints and skeletal connections captured by the KeypointLabeler. The keypoint
+template takes advantage of Unity's humanoid animation rig, and allows the user to automatically associate template keypoints
+to animation rig joints. Additionally, the user can choose to ignore the rigged points, or add points not defined in the rig.
+A Coco keypoint template is included in the perception package.
+
+##### Editor
+
+The keypoint template editor allows the user to create/modify a keypoint template. The editor consists of the header information,
+the keypoint array, and the skeleton array.
+
+![Header section of the keypoint template](images/keypoint_template_header.png)
+<br/>_Header section of the keypoint template_
+
+In the header section, a user can change the name of the template and supply textures that they would like to use for the keypoint
+visualization.
+
+![The keypoint section of the keypoint template](images/keypoint_template_keypoints.png)
+<br/>_Keypoint section of the keypoint template_
+
+The keypoint section allows the user to create/edit keypoints and associate them with Unity animation rig points. Each keypoint record
+has 4 fields: label (the name of the keypoint), Associate to Rig (a boolean value which, if true, automatically maps the keypoint to
+the gameobject defined by the rig), Rig Label (only needed if Associate To Rig is true, defines which rig component to associate with
+the keypoint), and Color (RGB color value of the keypoint in the visualization).
+
+![Skeleton section of the keypoint template](images/keypoint_template_skeleton.png)
+<br/>_Skeleton section of the keypoint template_
+
+The skeleton section allows the user to create connections between joints, basically defining the skeleton of a labeled object.
+
+##### Format
+```
+annotation_definition.spec {
+  template_id:       <str>           -- The UUID of the template
+  template_name:     <str>           -- Human readable name of the template
+  key_points [                       -- Array of joints defined in this template
+    {
+      label:         <str>           -- The label of the joint
+      index:         <int>           -- The index of the joint
+    }, ...
+  ]
+  skeleton [                         -- Array of skeletal connections (which joints have connections between one another) defined in this template
+    {
+      joint1:        <int>           -- The first joint of the connection
+      joint2:        <int>           -- The second joint of the connection
+    }, ...
+  ]
+}
+```
+
+#### Animation Pose Label
+
+This file is used to define timestamps in an animation to a pose label.
+
 ## Limitations
 
 Ground truth is not compatible with all rendering features, especially those that modify the visibility or shape of objects in the frame.
 
@@ -172,21 +172,21 @@ A grayscale PNG file that stores integer values (label pixel_value in [annotatio
 
 #### capture.annotation.values
 
-<!-- Not yet implemented annotations
-##### instance segmentation - polygon
+##### instance segmentation - color image
 
-A json object that stores collections of polygons. Each polygon record maps a tuple of (instance, label) to a list of 
-K pixel coordinates that forms a polygon. This object can be directly stored in annotation.values 
+A color png file that stores instance ids as a color value per pixel. The png files are located in the "filename" location.
 
 ```
-semantic_segmentation_polygon {
-  label_id:     <int> -- Integer identifier of the label
-  label_name:   <str> -- String identifier of the label
-  instance_id:  <str> -- UUID of the instance.
-  polygon:      [<int, int>,...] -- List of points in pixel coordinates of the outer edge. Connecting these points in order should create a polygon that identifies the object. 
+instance_segmentation {
+  instance_id:  <int> -- The instance ID of the labeled object
+  color {             -- The pixel color that correlates with the instance ID
+    r:          <int> -- The red value of the pixel between 0 and 255
+    g:          <int> -- The green value of the pixel between 0 and 255
+    b:          <int> -- The blue value of the pixel between 0 and 255
+    a:          <int> -- The alpha value of the pixel between 0 and 255
+  } 
 }
 ```
--->
 
 ##### 2D bounding box
 
@@ -196,36 +196,77 @@ We follow the OpenCV 2D coordinate [system](https://github.com/vvvv/VL.OpenCV/wi
 
 ```
 bounding_box_2d {
-  label_id:     <int> -- Integer identifier of the label
-  label_name:   <str> -- String identifier of the label
-  instance_id:  <str> -- UUID of the instance.
+  label_id:     <int>   -- Integer identifier of the label
+  label_name:   <str>   -- String identifier of the label
+  instance_id:  <str>   -- UUID of the instance.
   x:            <float> -- x coordinate of the upper left corner.
   y:            <float> -- y coordinate of the upper left corner. 
   width:        <float> -- number of pixels in the x direction
   height:       <float> -- number of pixels in the y direction
 }
 ```
-<!-- Not yet implemented annotations
 
 ##### 3D bounding box
 
-A json file that stored collections of 3D bounding boxes. 
-Each bounding box record maps a tuple of (instance, label) to translation, size and rotation that draws a 3D bounding box, as well as velocity and acceleration (optional) of the 3D bounding box. 
-All location data is given with respect to the **sensor coordinate system**.
+3D bounding box information. Unlike the 2D bounding box, 3D bounding boxes coordinates are captured in **sensor coordinate system**. 
+Each bounding box record maps a tuple of (instance, label) to translation, size and rotation that draws a 3D bounding box, as well as velocity and acceleration (optional) of the 3D bounding box.
 
 ```
 bounding_box_3d {
-  label_id:     <int> -- Integer identifier of the label
-  label_name:   <str> -- String identifier of the label
-  instance_id:  <str> -- UUID of the instance.
-  translation:  <float, float, float> -- 3d bounding box's center location in meters as center_x, center_y, center_z with respect to global coordinate system.
-  size:         <float, float, float> -- 3d bounding box size in meters as width, length, height.
-  rotation:     <float, float, float, float> -- 3d bounding box orientation as quaternion: w, x, y, z.
-  velocity:     <float, float, float>  -- 3d bounding box velocity in meters per second as v_x, v_y, v_z.
-  acceleration: <float, float, float> [optional] -- 3d bounding box acceleration in meters per second^2 as a_x, a_y, a_z.
+  label_id:      <int>   -- Integer identifier of the label
+  label_name:    <str>   -- String identifier of the label
+  instance_id:   <str>   -- UUID of the instance.
+  translation {          -- 3d bounding box's center location in meters with respect to global coordinate system.
+    x:           <float> -- The x coordinate
+    y:           <float> -- The y coordinate
+    z:           <float> -- The z coordinate
+  }
+  size {                 -- 3d bounding box size in meters
+    x:           <float> -- The x coordinate
+    y:           <float> -- The y coordinate
+    z:           <float> -- The z coordinate
+  }          
+  rotation {             -- 3d bounding box orientation as quaternion: w, x, y, z.
+    x:           <float> -- The x coordinate
+    y:           <float> -- The y coordinate
+    z:           <float> -- The z coordinate
+    w:           <float> -- The w coordinate
+  }     
+  velocity {             -- [Optional] 3d bounding box velocity in meters per second.
+    x:           <float> -- The x coordinate
+    y:           <float> -- The y coordinate
+    z:           <float> -- The z coordinate
+  } 
+  acceleration {         -- [Optional] 3d bounding box acceleration in meters per second^2.
+    x:           <float> -- The x coordinate
+    y:           <float> -- The y coordinate
+    z:           <float> -- The z coordinate
+  } 
+}
+```
+##### Keypoints
+
+Keypoint data, commonly used for human pose estimation. A keypoint capture is associated to a template that defines the keypoints (see annotation.definition file).
+Each keypoint record maps a tuple of (instance, label) to template, pose, and an array of keypoints. A keypoint will exist in this record for each keypoint defined in the template file.
+If a given keypoint doesn't exist in the labeled gameobject, then that keypoint will have a state value of 0; if it does exist then it will have a keypoint value of 2.
+```
+keypoints {
+  label_id:      <int>   -- Integer identifier of the label
+  instance_id:   <str>   -- UUID of the instance.
+  template_guid: <str>   -- UUID of the keypoint template
+  pose:          <str>   -- Pose ground truth information
+  keypoints [            -- Array of keypoint data, one entry for each keypoint defined in associated template file.
+    {
+      index:     <int>   -- Index of keypoint in template
+      x:         <float> -- X pixel coordinate of keypoint
+      y:         <float> -- Y pixel coordinate of keypoint
+      state:     <int>   -- 0: keypoint does not exist, 2 keypoint exists
+    }, ...
+  ]
 }
 ```
 
+<!-- Not yet implemented annotations
 
 #### instances (V2, WIP)
 
@@ -303,27 +344,52 @@ Each record describes a particular type of annotation and contains an annotation
 Typically, the `spec` key describes all labels_id and label_name used by the annotation. 
 Some special cases like semantic segmentation might assign additional values (e.g. pixel value) to record the mapping between label_id/label_name and pixel color in the annotated PNG files.
 
+##### annotation definition header
 ```
 annotation_definition {
-  id:           <int> -- Integer identifier of the annotation definition.
-  name:         <str> -- Human readable annotation spec name (e.g. sementic_segmentation, instance_segmentation, etc.) 
-  description:  <str, optional> -- Description of this annotation specifications.
-  format:       <str> -- The format of the annotation files. (e.g. png, json, etc.)
-  spec:         [<obj>...] -- Format-specific specification for the annotation values (ex. label-value mappings for semantic segmentation images)
+  id:                <int>           -- Integer identifier of the annotation definition.
+  name:              <str>           -- Human readable annotation spec name (e.g. sementic_segmentation, instance_segmentation, etc.) 
+  description:       <str>           -- [Optional] Description of this annotation specifications.
+  format:            <str>           -- The format of the annotation files. (e.g. png, json, etc.)
+  spec:              [<obj>...]      -- Format-specific specification for the annotation values (ex. label-value mappings for semantic segmentation images)
 }
-
-# semantic segmentation
+```
+##### semantic segmentation
+Annotation spec for semantic [segmentation labeler](#semantic-segmentation---grayscale-image)
+```
 annotation_definition.spec {        
-  label_id:          <int> -- Integer identifier of the label
-  label_name:        <str> -- String identifier of the label
-  pixel_value:       <int> -- Grayscale pixel value
-  color_pixel_value: <int, int, int> [optional] -- Color pixel value
+  label_id:          <int>           -- Integer identifier of the label
+  label_name:        <str>           -- String identifier of the label
+  pixel_value:       <int>           -- Grayscale pixel value
+  color_pixel_value: <int, int, int> -- [Optional] Color pixel value
 }
-
-# label enumeration spec, used for annotations like bounding box 2d. This might be a subset of all labels used in simulation.
+```
+##### label enumeration spec
+This spec is used for annotations like [bounding box 2d](#2d-bounding-box). This might be a subset of all labels used in simulation.
+```
 annotation_definition.spec {
-  label_id:    <int> -- Integer identifier of the label
-  label_name:  <str> -- String identifier of the label
+  label_id:          <int>           -- Integer identifier of the label
+  label_name:        <str>           -- String identifier of the label
+}
+```
+##### keypoint template
+keypoint templates are used to define the keypoints and skeletal connections captured by the [keypoint labeler](#keypoints).
+```
+annotation_definition.spec {
+  template_id:       <str>           -- The UUID of the template
+  template_name:     <str>           -- Human readable name of the template
+  key_points [                       -- Array of joints defined in this template
+    {
+      label:         <str>           -- The label of the joint
+      index:         <int>           -- The index of the joint
+    }, ...
+  ]
+  skeleton [                         -- Array of skeletal connections (which joints have connections between one another) defined in this template
+    {
+      joint1:        <int>           -- The first joint of the connection
+      joint2:        <int>           -- The second joint of the connection
+    }, ...
+  ]
 }
 ```
 
 
@@ -0,0 +1,32 @@
+using UnityEditor;
+using UnityEditor.UIElements;
+using UnityEngine.Experimental.Perception.Randomization.Randomizers;
+using UnityEngine.UIElements;
+
+namespace UnityEngine.Experimental.Perception.Randomization.Editor
+{
+    [CustomEditor(typeof(RandomizerTag), true)]
+    public class RandomizerTagEditor : UnityEditor.Editor
+    {
+        public override VisualElement CreateInspectorGUI()
+        {
+            var rootElement = new VisualElement();
+            CreatePropertyFields(rootElement);
+            return rootElement;
+        }
+
+        void CreatePropertyFields(VisualElement rootElement)
+        {
+            var iterator = serializedObject.GetIterator();
+            iterator.NextVisible(true);
+            do
+            {
+                if (iterator.name == "m_Script")
+                    continue;
+                var propertyField = new PropertyField(iterator.Copy());
+                propertyField.Bind(serializedObject);
+                rootElement.Add(propertyField);
+            } while (iterator.NextVisible(false));
+        }
+    }
+}
@@ -0,0 +1,61 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace UnityEngine.Perception.GroundTruth
+{
+    /// <summary>
+    /// Record that maps a pose to a timestamp
+    /// </summary>
+    [Serializable]
+    public class PoseTimestampRecord
+    {
+        /// <summary>
+        /// The percentage within the clip that the pose starts, a value from 0 (beginning) to 1 (end)
+        /// </summary>
+        [Tooltip("The percentage within the clip that the pose starts, a value from 0 (beginning) to 1 (end)")]
+        public float startOffsetPercent;
+        /// <summary>
+        /// The label to use for any captures inside of this time period
+        /// </summary>
+        public string poseLabel;
+    }
+
+    /// <summary>
+    /// The animation pose label is a mapping that file that maps a time range in an animation clip to a ground truth
+    /// pose. The timestamp record is defined by a pose label and a duration. The timestamp records are order dependent
+    /// and build on the previous entries. This means that if the first record has a duration of 5, then it will be the label
+    /// for all points in the clip from 0 (the beginning) to the five second mark. The next record will then go from the end
+    /// of the previous clip to its duration. If there is time left over in the flip, the final entry will be used.
+    /// </summary>
+    [CreateAssetMenu(fileName = "AnimationPoseTimestamp", menuName = "Perception/Animation Pose Timestamps")]
+    public class AnimationPoseLabel : ScriptableObject
+    {
+        /// <summary>
+        /// The animation clip used for all of the timestamps
+        /// </summary>
+        public AnimationClip animationClip;
+        /// <summary>
+        /// The list of timestamps, order dependent
+        /// </summary>
+        public List<PoseTimestampRecord> timestamps;
+
+        /// <summary>
+        /// Retrieves the pose for the clip at the current time.
+        /// </summary>
+        /// <param name="time">The time in question</param>
+        /// <returns>The pose for the passed in time</returns>
+        public string GetPoseAtTime(float time)
+        {
+            if (time < 0 || time > 1) return "unset";
+
+            var i = 1;
+            for (i = 1; i < timestamps.Count; i++)
+            {
+                if (timestamps[i].startOffsetPercent > time) break;
+            }
+
+            return timestamps[i - 1].poseLabel;
+        }
+    }
+}