temp commit

AIWintermuteAI · AIWintermuteAI · commit 8676010b12ea · 2023-08-21T21:35:20.000+02:00
Atlas imported and runs
diff --git a/isaacgymenvs/cfg/task/Atlas.yaml b/isaacgymenvs/cfg/task/Atlas.yaml
@@ -4,7 +4,7 @@ name: Atlas
 physics_engine: ${..physics_engine}
 
 env:
-  numEnvs: ${resolve_default:1024,${...num_envs}}
+  numEnvs: ${resolve_default:16,${...num_envs}}
   envSpacing: 4.  # [m]
 
   clipObservations: 5.0
@@ -16,14 +16,14 @@ env:
     restitution: 0.        # [-]
 
   baseInitState:
-    pos: [0.0, 0.0, 1.05] # x,y,z [m]
+    pos: [0.0, 0.0, 0.95] # x,y,z [m]
     rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
     vLinear: [0.0, 0.0, 0.0]  # x,y,z [m/s]
     vAngular: [0.0, 0.0, 0.0]  # x,y,z [rad/s]
 
   randomCommandVelocityRanges:
-    linear_x: [0., 0.] # min max [m/s]
-    linear_y: [-2., 2.]   # min max [m/s]
+    linear_x: [-1., 1.] # min max [m/s]
+    linear_y: [0., 0.]   # min max [m/s]
     yaw: [-1.57, 1.57]          # min max [rad/s]
 
   control:
@@ -46,7 +46,7 @@ env:
 
   urdfAsset:
     collapseFixedJoints: True
-    fixBaseLink: True
+    fixBaseLink: False
     defaultDofDriveMode: 1 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
 
   learn:
diff --git a/isaacgymenvs/cfg/task/AtlasTerrain.yaml b/isaacgymenvs/cfg/task/AtlasTerrain.yaml
@@ -0,0 +1,151 @@
+# used to create the object
+name: AtlasTerrain
+
+physics_engine: 'physx'
+
+env:
+  numEnvs: ${resolve_default:16,${...num_envs}}
+  numObservations: 242
+  numActions: 30
+  envSpacing: 4.  # [m]
+  enableDebugVis: False
+
+  terrain:
+    terrainType: plane # none, plane, or trimesh
+    staticFriction: 1.0  # [-]
+    dynamicFriction: 1.0  # [-]
+    restitution: 0.        # [-]
+    # rough terrain only:
+    curriculum: true
+    maxInitMapLevel: 0
+    mapLength: 8.
+    mapWidth: 8.
+    numLevels: 10
+    numTerrains: 20
+    # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
+    terrainProportions: [0.1, 0.1, 0.35, 0.25, 0.2]
+    # tri mesh only:
+    slopeTreshold: 0.5
+
+  baseInitState:
+    pos: [0.0, 0.0, 0.95] # x,y,z [m]
+    rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
+    vLinear: [0.0, 0.0, 0.0]  # x,y,z [m/s]
+    vAngular: [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+
+  randomCommandVelocityRanges:
+    # train
+    linear_x: [0., 0.] # min max [m/s]
+    linear_y: [-1., 1.]   # min max [m/s]
+    yaw: [-3.14, 3.14]    # min max [rad/s]
+
+  control:
+    # PD Drive parameters:
+    stiffness: 85.0  # [N*m/rad]
+    damping: 4.0     # [N*m*s/rad]
+    # action scale: target angle = actionScale * action + defaultAngle
+    actionScale: 3.14
+    # decimation: Number of control action updates @ sim DT per policy DT
+    decimation: 1
+
+  defaultJointAngles:  # = target angles when action = 0.0
+    LF_HAA: 0.03    # [rad]
+    LH_HAA: 0.03    # [rad]
+    RF_HAA: -0.03   # [rad]
+    RH_HAA: -0.03   # [rad]
+
+    LF_HFE: 0.4     # [rad]
+    LH_HFE: -0.4    # [rad]
+    RF_HFE: 0.4     # [rad]
+    RH_HFE: -0.4    # [rad]
+
+    LF_KFE: -0.8    # [rad]
+    LH_KFE: 0.8     # [rad]
+    RF_KFE: -0.8    # [rad]
+    RH_KFE: 0.8     # [rad]
+
+  urdfAsset:
+    file: "urdf/atlas/urdf/atlas_v4_with_multisense.urdf"
+    footName: foot # SHANK if collapsing fixed joint, FOOT otherwise
+    kneeName: glut
+    collapseFixedJoints: true
+    fixBaseLink: false
+    defaultDofDriveMode: 1 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
+
+  learn:
+    allowKneeContacts: true
+    # rewards
+    terminalReward: 0.0
+    linearVelocityXYRewardScale: 1.0
+    linearVelocityZRewardScale: -4.0
+    angularVelocityXYRewardScale: -0.05
+    angularVelocityZRewardScale: 0.5
+    orientationRewardScale: -0. #-1.
+    torqueRewardScale: -0.00002 # -0.000025
+    jointAccRewardScale: -0.0005 # -0.0025
+    baseHeightRewardScale: -0.0 #5
+    feetAirTimeRewardScale:  1.0
+    kneeCollisionRewardScale: -0.25
+    feetStumbleRewardScale: -0. #-2.0
+    actionRateRewardScale: -0.01
+    # cosmetics
+    hipRewardScale: -0. #25
+
+    # normalization
+    linearVelocityScale: 2.0
+    angularVelocityScale: 0.25
+    dofPositionScale: 1.0
+    dofVelocityScale: 0.05
+    heightMeasurementScale: 5.0
+
+    # noise
+    addNoise: false
+    noiseLevel: 1.0 # scales other values
+    dofPositionNoise: 0.01
+    dofVelocityNoise: 1.5
+    linearVelocityNoise: 0.1
+    angularVelocityNoise: 0.2
+    gravityNoise: 0.05
+    heightMeasurementNoise: 0.06
+
+    #randomization
+    randomizeFriction: false
+    frictionRange: [0.5, 1.25]
+    pushRobots: false
+    pushInterval_s: 15
+
+    # episode length in seconds
+    episodeLength_s: 20
+
+  # viewer cam:
+  viewer:
+    refEnv: 0
+    pos: [0, 0, 10]  # [m]
+    lookat: [1., 1, 9]  # [m]
+
+  # set to True if you use camera sensors in the environment
+  enableCameraSensors: False
+
+sim:
+  dt: 0.02
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  physx:
+    num_threads: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
+    num_position_iterations: 4
+    num_velocity_iterations: 1
+    contact_offset: 0.02
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 100.0
+    default_buffer_size_multiplier: 5.0
+    max_gpu_contact_pairs: 8388608 # 8*1024*1024
+    num_subscenes: ${....num_subscenes}
+    contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
+
+task:
+  randomize: False
diff --git a/isaacgymenvs/cfg/train/AtlasPPO.yaml b/isaacgymenvs/cfg/train/AtlasPPO.yaml
@@ -1,51 +1,81 @@
 params:
-
   seed: ${...seed}
 
   algo:
-    name: sac
+    name: a2c_continuous
 
   model:
-    name: soft_actor_critic
+    name: continuous_a2c_logstd
 
   network:
-    name: soft_actor_critic
+    name: actor_critic
     separate: True
+
     space:
       continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0. # std = 1.
+        fixed_sigma: True
+
     mlp:
-      units: [256, 128, 64]
+      units: [512] #, 256, 128]
       activation: elu
+      d2rl: False
 
       initializer:
         name: default
-    log_std_bounds: [-5, 2]
+      regularizer:
+        name: None
+    rnn:
+      name: lstm
+      units: 256 #128
+      layers: 1
+      before_mlp: False #True
+      concat_input: True
+      layer_norm: False
+
 
   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
   load_path: ${...checkpoint} # path to the checkpoint to load
 
   config:
-    name: ${resolve_default:Bittle,${....experiment}}
+    name: ${resolve_default:Atlas,${....experiment}}
     full_experiment_name: ${.name}
     env_name: rlgpu
+    ppo: True
+    mixed_precision: True
     normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    value_bootstrap: True
+    clip_actions: False
+    num_actors: ${....task.env.numEnvs}
     reward_shaper:
       scale_value: 1.0
-    num_steps_per_episode: 8
     gamma: 0.99
-    init_alpha: 1.0
-    alpha_lr: 0.005
-    actor_lr: 0.0005
-    critic_lr: 0.0005
-    critic_tau: 0.005
-    batch_size: 8192
-    learnable_temperature: true
-    num_seed_steps: 5
-    num_warmup_steps: 10
-    replay_buffer_size: 1000000
-    num_actors: ${....task.env.numEnvs}
+    tau: 0.95
+    e_clip: 0.2
+    entropy_coef: 0.001
+    learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
+    lr_schedule: adaptive
+    kl_threshold: 0.008 # target kl for adaptive lr
+    truncate_grads: True
+    grad_norm: 1.
+    horizon_length: 24
+    minibatch_size: 16
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    seq_len: 4 # only for rnn
+    bounds_loss_coef: 0.
 
     max_epochs: ${resolve_default:1000,${....max_iterations}}
     save_best_after: 200
-    save_frequency: 200
-    print_stats: True
+    score_to_win: 20000
+    save_frequency: 50
+    print_stats: True
diff --git a/isaacgymenvs/cfg/train/AtlasTerrainPPO.yaml b/isaacgymenvs/cfg/train/AtlasTerrainPPO.yaml
@@ -0,0 +1,81 @@
+params:
+  seed: ${...seed}
+
+  algo:
+    name: a2c_continuous
+
+  model:
+    name: continuous_a2c_logstd
+
+  network:
+    name: actor_critic
+    separate: True
+
+    space:
+      continuous:
+        mu_activation: None
+        sigma_activation: None
+        mu_init:
+          name: default
+        sigma_init:
+          name: const_initializer
+          val: 0. # std = 1.
+        fixed_sigma: True
+
+    mlp:
+      units: [512] #, 256, 128]
+      activation: elu
+      d2rl: False
+
+      initializer:
+        name: default
+      regularizer:
+        name: None
+    rnn:
+      name: lstm
+      units: 256 #128
+      layers: 1
+      before_mlp: False #True
+      concat_input: True
+      layer_norm: False
+
+
+  load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
+  load_path: ${...checkpoint} # path to the checkpoint to load
+
+  config:
+    name: ${resolve_default:AtlasTerrain,${....experiment}}
+    full_experiment_name: ${.name}
+    env_name: rlgpu
+    ppo: True
+    mixed_precision: True
+    normalize_input: True
+    normalize_value: True
+    normalize_advantage: True
+    value_bootstrap: True
+    clip_actions: False
+    num_actors: ${....task.env.numEnvs}
+    reward_shaper:
+      scale_value: 1.0
+    gamma: 0.99
+    tau: 0.95
+    e_clip: 0.2
+    entropy_coef: 0.001
+    learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
+    lr_schedule: adaptive
+    kl_threshold: 0.008 # target kl for adaptive lr
+    truncate_grads: True
+    grad_norm: 1.
+    horizon_length: 24
+    minibatch_size: 16
+    mini_epochs: 5
+    critic_coef: 2
+    clip_value: True
+    seq_len: 4 # only for rnn
+    bounds_loss_coef: 0.
+
+    max_epochs: ${resolve_default:1000,${....max_iterations}}
+    save_best_after: 200
+    score_to_win: 20000
+    save_frequency: 50
+    print_stats: True
diff --git a/isaacgymenvs/tasks/__init__.py b/isaacgymenvs/tasks/__init__.py
@@ -31,6 +31,7 @@
 from .anymal import Anymal
 from .anymal_terrain import AnymalTerrain
 from .atlas import Atlas
+from .atlas_terrain import AtlasTerrain
 from .ball_balance import BallBalance
 from .bittle import Bittle
 from .bittle_terrain import BittleTerrain
@@ -57,6 +58,7 @@
     "Anymal": Anymal,
     "AnymalTerrain": AnymalTerrain,
     "Atlas": Atlas,
+    "AtlasTerrain": AtlasTerrain,
     "BallBalance": BallBalance,
     "Bittle": Bittle,
     "BittleTerrain": BittleTerrain,
diff --git a/isaacgymenvs/tasks/atlas.py b/isaacgymenvs/tasks/atlas.py
diff --git a/isaacgymenvs/tasks/atlas_terrain.py b/isaacgymenvs/tasks/atlas_terrain.py