added new reward function - not working

AIWintermuteAI · AIWintermuteAI · commit 6a00b2a3513f · 2023-08-23T21:30:08.000+02:00
diff --git a/isaacgymenvs/cfg/task/Atlas.yaml b/isaacgymenvs/cfg/task/Atlas.yaml
@@ -1,19 +1,31 @@
 # used to create the object
 name: Atlas
 
-physics_engine: ${..physics_engine}
+physics_engine: 'physx'
 
 env:
-  numEnvs: ${resolve_default:16,${...num_envs}}
+  numEnvs: ${resolve_default:512,${...num_envs}}
+  numObservations: 242
+  numActions: 30
   envSpacing: 4.  # [m]
+  enableDebugVis: False
 
-  clipObservations: 5.0
-  clipActions: 1.0
-
-  plane:
+  terrain:
+    terrainType: plane # none, plane, or trimesh
     staticFriction: 1.0  # [-]
     dynamicFriction: 1.0  # [-]
     restitution: 0.        # [-]
+    # rough terrain only:
+    curriculum: true
+    maxInitMapLevel: 0
+    mapLength: 8.
+    mapWidth: 8.
+    numLevels: 10
+    numTerrains: 20
+    # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
+    terrainProportions: [0.1, 0.1, 0.35, 0.25, 0.2]
+    # tri mesh only:
+    slopeTreshold: 0.5
 
   baseInitState:
     pos: [0.0, 0.0, 0.95] # x,y,z [m]
@@ -22,6 +34,7 @@ env:
     vAngular: [0.0, 0.0, 0.0]  # x,y,z [rad/s]
 
   randomCommandVelocityRanges:
+    # train
     linear_x: [-1., 1.] # min max [m/s]
     linear_y: [0., 0.]   # min max [m/s]
     yaw: [-1.57, 1.57]          # min max [rad/s]
@@ -30,47 +43,85 @@ env:
     # PD Drive parameters:
     stiffness: 85.0  # [N*m/rad]
     damping: 4.0     # [N*m*s/rad]
-    actionScale: 3.14
-    controlFrequencyInv: 1 # 60 Hz
+    # action scale: target angle = actionScale * action + defaultAngle
+    actionScale: 0.5
+    # decimation: Number of control action updates @ sim DT per policy DT
+    decimation: 4
 
   defaultJointAngles:  # = target angles when action = 0.0
-    left-back-shoulder-joint: 0.5      # [rad]
-    right-back-shoulder-joint: -0.5    # [rad]
-    left-front-shoulder-joint: -0.2    # [rad]
-    right-front-shoulder-joint: 0.2    # [rad]
+    LF_HAA: 0.03    # [rad]
+    LH_HAA: 0.03    # [rad]
+    RF_HAA: -0.03   # [rad]
+    RH_HAA: -0.03   # [rad]
+
+    LF_HFE: 0.4     # [rad]
+    LH_HFE: -0.4    # [rad]
+    RF_HFE: 0.4     # [rad]
+    RH_HFE: -0.4    # [rad]
 
-    left-back-knee-joint: -0.8         # [rad]
-    right-back-knee-joint: 0.8         # [rad]
-    left-front-knee-joint: -0.5        # [rad]
-    right-front-knee-joint: 0.5        # [rad]
+    LF_KFE: -0.8    # [rad]
+    LH_KFE: 0.8     # [rad]
+    RF_KFE: -0.8    # [rad]
+    RH_KFE: 0.8     # [rad]
 
   urdfAsset:
-    collapseFixedJoints: True
-    fixBaseLink: False
+    file: "urdf/atlas/urdf/atlas_v4_with_multisense.urdf"
+    footName: foot # SHANK if collapsing fixed joint, FOOT otherwise
+    kneeName: glut
+    collapseFixedJoints: true
+    fixBaseLink: false
     defaultDofDriveMode: 1 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
 
   learn:
+    allowKneeContacts: true
     # rewards
+    terminalReward: 0.0
     linearVelocityXYRewardScale: 1.0
+    linearVelocityZRewardScale: -4.0
+    angularVelocityXYRewardScale: -0.05
     angularVelocityZRewardScale: 0.5
-    torqueRewardScale: -0.00002
-    headingScale: 0.25
-    upScale: 0.1
+    orientationRewardScale: -0. #-1.
+    torqueRewardScale: -0.00002 # -0.000025
+    jointAccRewardScale: -0.0005 # -0.0025
+    baseHeightRewardScale: -0.0 #5
+    feetAirTimeRewardScale:  1.0
+    kneeCollisionRewardScale: -0.25
+    feetStumbleRewardScale: -0. #-2.0
+    actionRateRewardScale: -0.01
+    # cosmetics
+    hipRewardScale: -0. #25
 
     # normalization
     linearVelocityScale: 2.0
     angularVelocityScale: 0.25
     dofPositionScale: 1.0
     dofVelocityScale: 0.05
+    heightMeasurementScale: 5.0
+
+    # noise
+    addNoise: false
+    noiseLevel: 1.0 # scales other values
+    dofPositionNoise: 0.01
+    dofVelocityNoise: 1.5
+    linearVelocityNoise: 0.1
+    angularVelocityNoise: 0.2
+    gravityNoise: 0.05
+    heightMeasurementNoise: 0.06
+
+    #randomization
+    randomizeFriction: false
+    frictionRange: [0.5, 1.25]
+    pushRobots: false
+    pushInterval_s: 15
 
     # episode length in seconds
-    episodeLength_s: 50
+    episodeLength_s: 20
 
   # viewer cam:
   viewer:
     refEnv: 0
-    pos: [0, 0, 4]  # [m]
-    lookat: [1., 1, 3.3]  # [m]
+    pos: [0, 0, 10]  # [m]
+    lookat: [1., 1, 9]  # [m]
 
   # set to True if you use camera sensors in the environment
   enableCameraSensors: False
@@ -94,74 +145,7 @@ sim:
     default_buffer_size_multiplier: 5.0
     max_gpu_contact_pairs: 8388608 # 8*1024*1024
     num_subscenes: ${....num_subscenes}
-    contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (default - all contacts)
+    contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
 
 task:
   randomize: False
-  randomization_params:
-    frequency: 600   # Define how many environment steps between generating new randomizations
-    observations:
-      range: [0, .002] # range for the white noise
-      operation: "additive"
-      distribution: "gaussian"
-    actions:
-      range: [0., .02]
-      operation: "additive"
-      distribution: "gaussian"
-    sim_params:
-      gravity:
-        range: [0, 0.4]
-        operation: "additive"
-        distribution: "gaussian"
-        schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
-        schedule_steps: 3000
-    actor_params:
-      anymal:
-        color: True
-        rigid_body_properties:
-          mass:
-            range: [0.5, 1.5]
-            operation: "scaling"
-            distribution: "uniform"
-            setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
-            schedule: "linear"  # "linear" will linearly interpolate between no rand and max rand
-            schedule_steps: 3000
-        rigid_shape_properties:
-          friction:
-            num_buckets: 500
-            range: [0.7, 1.3]
-            operation: "scaling"
-            distribution: "uniform"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
-          restitution:
-            range: [0., 0.7]
-            operation: "scaling"
-            distribution: "uniform"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
-        dof_properties:
-          damping:
-            range: [0.5, 1.5]
-            operation: "scaling"
-            distribution: "uniform"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
-          stiffness:
-            range: [0.5, 1.5]
-            operation: "scaling"
-            distribution: "uniform"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
-          lower:
-            range: [0, 0.01]
-            operation: "additive"
-            distribution: "gaussian"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
-          upper:
-            range: [0, 0.01]
-            operation: "additive"
-            distribution: "gaussian"
-            schedule: "linear"  # "linear" will scale the current random sample by `min(current num steps, schedule_steps) / schedule_steps`
-            schedule_steps: 3000
diff --git a/isaacgymenvs/cfg/train/AtlasPPO.yaml b/isaacgymenvs/cfg/train/AtlasPPO.yaml
@@ -67,7 +67,7 @@ params:
     truncate_grads: True
     grad_norm: 1.
     horizon_length: 24
-    minibatch_size: 16
+    minibatch_size: 512
     mini_epochs: 5
     critic_coef: 2
     clip_value: True
diff --git a/isaacgymenvs/tasks/atlas.py b/isaacgymenvs/tasks/atlas.py