diff --git a/CMakeLists.txt b/CMakeLists.txt index 8239b4118..82445f9fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,7 +56,7 @@ set(MUJOCO_BUILD_TESTS OFF) set(MUJOCO_TEST_PYTHON_UTIL OFF) set(MUJOCO_MPC_MUJOCO_GIT_TAG - 088079eff0450e32b98ee743141780ed68307506 + 3.1.4 CACHE STRING "Git revision for MuJoCo." ) diff --git a/README.md b/README.md index bf07fd4dc..c3cdb7b4d 100644 --- a/README.md +++ b/README.md @@ -58,12 +58,6 @@ Quadruped task: [![Quadruped](http://img.youtube.com/vi/esLuwaWz4oE/hqdefault.jpg)](https://www.youtube.com/watch?v=esLuwaWz4oE) - -Bimanual manipulation: - -[![Bimanual](http://img.youtube.com/vi/aCNCKVThKIE/hqdefault.jpg)](https://www.youtube.com/watch?v=aCNCKVThKIE) - - Rubik's cube 10-move unscramble: [![Unscramble](http://img.youtube.com/vi/ZRRvVWV-Muk/hqdefault.jpg)](https://www.youtube.com/watch?v=ZRRvVWV-Muk) diff --git a/cmake/MpcOptions.cmake b/cmake/MpcOptions.cmake index 0fe3e1bf7..ddf4ce7e8 100644 --- a/cmake/MpcOptions.cmake +++ b/cmake/MpcOptions.cmake @@ -103,10 +103,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang endif() endif() -if(NOT CMAKE_INTERPROCEDURAL_OPTIMIZATION AND (CMAKE_BUILD_TYPE AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) -endif() - include(MujocoHarden) set(EXTRA_COMPILE_OPTIONS ${EXTRA_COMPILE_OPTIONS} ${MUJOCO_HARDEN_COMPILE_OPTIONS}) set(EXTRA_LINK_OPTIONS ${EXTRA_LINK_OPTIONS} ${MUJOCO_HARDEN_LINK_OPTIONS}) diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt index f8fb30c3e..f7d1d11db 100644 --- a/mjpc/CMakeLists.txt +++ b/mjpc/CMakeLists.txt @@ -82,6 +82,15 @@ add_library( tasks/swimmer/swimmer.h tasks/walker/walker.cc tasks/walker/walker.h + tasks/humanoid_bench/utility/dm_control_utils_rewards.cpp + tasks/humanoid_bench/walk/walk.cc + tasks/humanoid_bench/walk/walk.h + tasks/humanoid_bench/walk_reward.cc + tasks/humanoid_bench/walk_reward.h + tasks/humanoid_bench/stand/stand.cc + tasks/humanoid_bench/stand/stand.h + tasks/humanoid_bench/push/push.cc + tasks/humanoid_bench/push/push.h planners/planner.cc planners/planner.h planners/policy.h diff --git a/mjpc/agent.cc b/mjpc/agent.cc index 5cb316fa7..f1072e721 100644 --- a/mjpc/agent.cc +++ b/mjpc/agent.cc @@ -451,8 +451,8 @@ int Agent::SetModeByName(std::string_view name) { void Agent::ModifyScene(mjvScene* scn) { // if acting is off make all geom colors grayscale if (!action_enabled) { - int cube = mj_name2id(model_, mjOBJ_MATERIAL, "cube"); - int graycube = mj_name2id(model_, mjOBJ_MATERIAL, "graycube"); + int cube = mj_name2id(model_, mjOBJ_TEXTURE, "cube"); + int graycube = mj_name2id(model_, mjOBJ_TEXTURE, "graycube"); for (int i = 0; i < scn->ngeom; i++) { mjvGeom* g = scn->geoms + i; // skip static and decor geoms @@ -463,8 +463,8 @@ void Agent::ModifyScene(mjvScene* scn) { double rgb_average = (g->rgba[0] + g->rgba[1] + g->rgba[2]) / 3; g->rgba[0] = g->rgba[1] = g->rgba[2] = rgb_average; // specifically for the hand task, make grayscale cube. - if (cube > -1 && graycube > -1 && g->matid == cube) { - g->matid = graycube; + if (cube > -1 && graycube > -1 && g->texid == cube) { + g->texid = graycube; } } } @@ -508,10 +508,14 @@ void Agent::ModifyScene(mjvScene* scn) { color); // make geometry - mjv_connector( + mjv_makeConnector( &scn->geoms[scn->ngeom], mjGEOM_CAPSULE, width, - winner->trace.data() + 3 * num_trace * i + 3 * j, - winner->trace.data() + 3 * num_trace * (i + 1) + 3 * j); + winner->trace[3 * num_trace * i + 3 * j], + winner->trace[3 * num_trace * i + 1 + 3 * j], + winner->trace[3 * num_trace * i + 2 + 3 * j], + winner->trace[3 * num_trace * (i + 1) + 3 * j], + winner->trace[3 * num_trace * (i + 1) + 1 + 3 * j], + winner->trace[3 * num_trace * (i + 1) + 2 + 3 * j]); // increment number of geometries scn->ngeom += 1; } diff --git a/mjpc/grpc/agent_service.cc b/mjpc/grpc/agent_service.cc index eecce9101..2c8385764 100644 --- a/mjpc/grpc/agent_service.cc +++ b/mjpc/grpc/agent_service.cc @@ -35,12 +35,12 @@ using ::agent::GetAllModesRequest; using ::agent::GetAllModesResponse; using ::agent::GetBestTrajectoryRequest; using ::agent::GetBestTrajectoryResponse; +using ::agent::GetResidualsRequest; +using ::agent::GetResidualsResponse; using ::agent::GetCostValuesAndWeightsRequest; using ::agent::GetCostValuesAndWeightsResponse; using ::agent::GetModeRequest; using ::agent::GetModeResponse; -using ::agent::GetResidualsRequest; -using ::agent::GetResidualsResponse; using ::agent::GetStateRequest; using ::agent::GetStateResponse; using ::agent::GetTaskParametersRequest; @@ -115,7 +115,8 @@ grpc::Status AgentService::Init(grpc::ServerContext* context, << "Multiple instances of AgentService detected."; agent_model = agent_.GetModel(); // copy the model before agent model's timestep and integrator are updated - CHECK_EQ(model, nullptr) << "Multiple instances of AgentService detected."; + CHECK_EQ(model, nullptr) + << "Multiple instances of AgentService detected."; model = mj_copyModel(nullptr, agent_model); data_ = mj_makeData(model); rollout_data_.reset(mj_makeData(model)); @@ -189,14 +190,14 @@ grpc::Status AgentService::GetAction(grpc::ServerContext* context, return out; } -grpc::Status AgentService::GetResiduals(grpc::ServerContext* context, - const GetResidualsRequest* request, - GetResidualsResponse* response) { +grpc::Status AgentService::GetResiduals( + grpc::ServerContext* context, const GetResidualsRequest* request, + GetResidualsResponse* response) { if (!Initialized()) { return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."}; } - return grpc_agent_util::GetResiduals(request, &agent_, model, data_, - response); + return grpc_agent_util::GetResiduals(request, &agent_, model, + data_, response); } grpc::Status AgentService::GetCostValuesAndWeights( @@ -274,9 +275,9 @@ grpc::Status AgentService::GetTaskParameters( return grpc_agent_util::GetTaskParameters(request, &agent_, response); } -grpc::Status AgentService::SetCostWeights(grpc::ServerContext* context, - const SetCostWeightsRequest* request, - SetCostWeightsResponse* response) { +grpc::Status AgentService::SetCostWeights( + grpc::ServerContext* context, const SetCostWeightsRequest* request, + SetCostWeightsResponse* response) { if (!Initialized()) { return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."}; } @@ -349,9 +350,10 @@ grpc::Status AgentService::GetBestTrajectory( return grpc::Status::OK; } -grpc::Status AgentService::SetAnything(grpc::ServerContext* context, - const SetAnythingRequest* request, - SetAnythingResponse* response) { + +grpc::Status AgentService::SetAnything( + grpc::ServerContext* context, const SetAnythingRequest* request, + SetAnythingResponse* response) { if (!Initialized()) { return {grpc::StatusCode::FAILED_PRECONDITION, "Init not called."}; } diff --git a/mjpc/grpc/direct_service.cc b/mjpc/grpc/direct_service.cc index 8f5545313..1f24621ef 100644 --- a/mjpc/grpc/direct_service.cc +++ b/mjpc/grpc/direct_service.cc @@ -74,7 +74,9 @@ grpc::Status DirectService::Init(grpc::ServerContext* context, // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size()); + mj_makeEmptyFileVFS(vfs.get(), file, mjb.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size()); tmp_model = {mj_loadModel(file, vfs.get()), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); } else if (request->has_model() && request->model().has_xml()) { @@ -86,7 +88,9 @@ grpc::Status DirectService::Init(grpc::ServerContext* context, // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, model_xml.data(), model_xml.size()); + mj_makeEmptyFileVFS(vfs.get(), file, model_xml.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + memcpy(vfs->filedata[file_idx], model_xml.data(), model_xml.size()); tmp_model = {mj_loadXML(file, vfs.get(), load_error, sizeof(load_error)), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); diff --git a/mjpc/grpc/filter_service.cc b/mjpc/grpc/filter_service.cc index b1d8165d9..f1be0f336 100644 --- a/mjpc/grpc/filter_service.cc +++ b/mjpc/grpc/filter_service.cc @@ -14,6 +14,7 @@ #include "mjpc/grpc/filter_service.h" +#include #include #include #include @@ -69,7 +70,9 @@ grpc::Status FilterService::Init(grpc::ServerContext* context, // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size()); + mj_makeEmptyFileVFS(vfs.get(), file, mjb.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size()); tmp_model = {mj_loadModel(file, vfs.get()), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); } else if (request->has_model() && request->model().has_xml()) { @@ -81,7 +84,9 @@ grpc::Status FilterService::Init(grpc::ServerContext* context, // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, model_xml.data(), model_xml.size()); + mj_makeEmptyFileVFS(vfs.get(), file, model_xml.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + memcpy(vfs->filedata[file_idx], model_xml.data(), model_xml.size()); tmp_model = {mj_loadXML(file, vfs.get(), load_error, sizeof(load_error)), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); diff --git a/mjpc/grpc/grpc_agent_util.cc b/mjpc/grpc/grpc_agent_util.cc index 29cc2f65a..1c5ae3c87 100644 --- a/mjpc/grpc/grpc_agent_util.cc +++ b/mjpc/grpc/grpc_agent_util.cc @@ -14,6 +14,7 @@ #include "mjpc/grpc/grpc_agent_util.h" +#include #include #include #include @@ -520,7 +521,9 @@ mjpc::UniqueMjModel LoadModelFromString(std::string_view xml, char* error, // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, xml.data(), xml.size()); + mj_makeEmptyFileVFS(vfs.get(), file, xml.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + std::memcpy(vfs->filedata[file_idx], xml.data(), xml.size()); mjpc::UniqueMjModel m = {mj_loadXML(file, vfs.get(), error, error_size), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); @@ -532,7 +535,9 @@ mjpc::UniqueMjModel LoadModelFromBytes(std::string_view mjb) { // mjVFS structs need to be allocated on the heap, because it's ~2MB auto vfs = std::make_unique(); mj_defaultVFS(vfs.get()); - mj_addBufferVFS(vfs.get(), file, mjb.data(), mjb.size()); + mj_makeEmptyFileVFS(vfs.get(), file, mjb.size()); + int file_idx = mj_findFileVFS(vfs.get(), file); + memcpy(vfs->filedata[file_idx], mjb.data(), mjb.size()); mjpc::UniqueMjModel m = {mj_loadModel(file, vfs.get()), mj_deleteModel}; mj_deleteFileVFS(vfs.get(), file); return m; diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc index 76e222ebd..2d6a34557 100644 --- a/mjpc/planners/cross_entropy/planner.cc +++ b/mjpc/planners/cross_entropy/planner.cc @@ -465,10 +465,14 @@ void CrossEntropyPlanner::Traces(mjvScene* scn) { // elite index int idx = trajectory_order[k]; // make geometry - mjv_connector( + mjv_makeConnector( &scn->geoms[scn->ngeom], mjGEOM_LINE, width, - trajectory[idx].trace.data() + 3*task->num_trace * i + 3 * j, - trajectory[idx].trace.data() + 3*task->num_trace * (i + 1) + 3 * j); + trajectory[idx].trace[3 * task->num_trace * i + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 2 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]); // increment number of geometries scn->ngeom += 1; diff --git a/mjpc/planners/gradient/planner.cc b/mjpc/planners/gradient/planner.cc index 3a192fad2..ba36ea152 100644 --- a/mjpc/planners/gradient/planner.cc +++ b/mjpc/planners/gradient/planner.cc @@ -448,10 +448,14 @@ void GradientPlanner::Traces(mjvScene* scn) { color); // make geometry - mjv_connector( + mjv_makeConnector( &scn->geoms[scn->ngeom], mjGEOM_LINE, width, - trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j, - trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j); + trajectory[k].trace[3 * task->num_trace * i + 3 * j], + trajectory[k].trace[3 * task->num_trace * i + 1 + 3 * j], + trajectory[k].trace[3 * task->num_trace * i + 2 + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]); // increment number of geometries scn->ngeom += 1; diff --git a/mjpc/planners/sample_gradient/planner.cc b/mjpc/planners/sample_gradient/planner.cc index 8368f01c3..a12b557da 100644 --- a/mjpc/planners/sample_gradient/planner.cc +++ b/mjpc/planners/sample_gradient/planner.cc @@ -542,10 +542,14 @@ void SampleGradientPlanner::Traces(mjvScene* scn) { idx < num_noisy ? white : orange); // make geometry - mjv_connector( + mjv_makeConnector( &scn->geoms[scn->ngeom], mjGEOM_LINE, width, - trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j, - trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j); + trajectory[idx].trace[3 * task->num_trace * i + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * i + 2 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j], + trajectory[idx].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]); // increment number of geometries scn->ngeom += 1; diff --git a/mjpc/planners/sampling/planner.cc b/mjpc/planners/sampling/planner.cc index 10b5baba0..8ac2646d6 100644 --- a/mjpc/planners/sampling/planner.cc +++ b/mjpc/planners/sampling/planner.cc @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -113,11 +112,8 @@ void SamplingPlanner::Reset(int horizon, time = 0.0; // policy parameters - { - const std::unique_lock lock(mtx_); - policy.Reset(horizon, initial_repeated_action); - previous_policy.Reset(horizon, initial_repeated_action); - } + policy.Reset(horizon, initial_repeated_action); + previous_policy.Reset(horizon, initial_repeated_action); // scratch plan_scratch.Clear(); @@ -154,9 +150,6 @@ void SamplingPlanner::SetState(const State& state) { int SamplingPlanner::OptimizePolicyCandidates(int ncandidates, int horizon, ThreadPool& pool) { - // resample nominal policy to current time - this->UpdateNominalPolicy(horizon); - // if num_trajectory_ has changed, use it in this new iteration. // num_trajectory_ might change while this function runs. Keep it constant // for the duration of this function. @@ -195,6 +188,9 @@ int SamplingPlanner::OptimizePolicyCandidates(int ncandidates, int horizon, // optimize nominal policy using random sampling void SamplingPlanner::OptimizePolicy(int horizon, ThreadPool& pool) { + // resample nominal policy to current time + this->UpdateNominalPolicy(horizon); + OptimizePolicyCandidates(1, horizon, pool); // ----- update policy ----- // @@ -270,19 +266,10 @@ void SamplingPlanner::UpdateNominalPolicy(int horizon) { time_shift = time_horizon; } - const std::unique_lock lock(mtx_); - - // special case for when simulation time is reset (which doesn't cause - // Planner::Reset) - if (policy.plan.Size() && policy.plan.begin()->time() > nominal_time) { - // time went backwards. keep the nominal plan, but start at the new time - policy.plan.ShiftTime(nominal_time); - previous_policy.plan.ShiftTime(nominal_time); - } - + const std::shared_lock lock(mtx_); policy.plan.DiscardBefore(nominal_time); if (policy.plan.Size() == 0) { - policy.plan.AddNode(nominal_time); + policy.plan.AddNode(time); } while (policy.plan.Size() < num_spline_points) { // duplicate the last node, with a time further in the future. @@ -316,7 +303,7 @@ void SamplingPlanner::UpdateNominalPolicy(int horizon) { // copy scratch into plan { - const std::unique_lock lock(mtx_); + const std::shared_lock lock(mtx_); policy.plan = plan_scratch; } } @@ -430,10 +417,14 @@ void SamplingPlanner::Traces(mjvScene* scn) { color); // make geometry - mjv_connector( + mjv_makeConnector( &scn->geoms[scn->ngeom], mjGEOM_LINE, width, - trajectory[k].trace.data() + 3 * task->num_trace * i + 3 * j, - trajectory[k].trace.data() + 3 * task->num_trace * (i + 1) + 3 * j); + trajectory[k].trace[3 * task->num_trace * i + 3 * j], + trajectory[k].trace[3 * task->num_trace * i + 1 + 3 * j], + trajectory[k].trace[3 * task->num_trace * i + 2 + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 1 + 3 * j], + trajectory[k].trace[3 * task->num_trace * (i + 1) + 2 + 3 * j]); // increment number of geometries scn->ngeom += 1; @@ -536,7 +527,7 @@ void SamplingPlanner::CopyCandidateToPolicy(int candidate) { winner = trajectory_order[candidate]; { - const std::unique_lock lock(mtx_); + const std::shared_lock lock(mtx_); previous_policy = policy; policy = candidate_policy[winner]; } diff --git a/mjpc/simulate.cc b/mjpc/simulate.cc index 6af568413..6bbd08a20 100644 --- a/mjpc/simulate.cc +++ b/mjpc/simulate.cc @@ -617,8 +617,8 @@ void MakePhysicsSection(mj::Simulate* sim, int oldstate) { {mjITEM_EDITNUM, "LS Tol", 2, &(opt->ls_tolerance), "1 0 0.1"}, {mjITEM_EDITINT, "Noslip Iter", 2, &(opt->noslip_iterations), "1 0 1000"}, {mjITEM_EDITNUM, "Noslip Tol", 2, &(opt->noslip_tolerance), "1 0 1"}, - {mjITEM_EDITINT, "CCD Iter", 2, &(opt->ccd_iterations), "1 0 1000"}, - {mjITEM_EDITNUM, "CCD Tol", 2, &(opt->ccd_tolerance), "1 0 1"}, + {mjITEM_EDITINT, "MPR Iter", 2, &(opt->mpr_iterations), "1 0 1000"}, + {mjITEM_EDITNUM, "MPR Tol", 2, &(opt->mpr_tolerance), "1 0 1"}, {mjITEM_EDITNUM, "API Rate", 2, &(opt->apirate), "1 0 1000"}, {mjITEM_EDITINT, "SDF Iter", 2, &(opt->sdf_iterations), "1 1 20"}, {mjITEM_EDITINT, "SDF Init", 2, &(opt->sdf_initpoints), "1 1 100"}, diff --git a/mjpc/spline/spline.cc b/mjpc/spline/spline.cc index bd2bf53dd..c36b3d9c9 100644 --- a/mjpc/spline/spline.cc +++ b/mjpc/spline/spline.cc @@ -187,16 +187,6 @@ int TimeSpline::DiscardBefore(double time) { return nodes_to_remove; } -void TimeSpline::ShiftTime(double start_time) { - if (times_.empty()) { - return; - } - double shift = start_time - times_[0]; - for (int i = 0; i < times_.size(); i++) { - times_[i] += shift; - } -} - void TimeSpline::Clear() { times_.clear(); values_begin_ = 0; diff --git a/mjpc/spline/spline.h b/mjpc/spline/spline.h index bacfe32b6..2a8a875eb 100644 --- a/mjpc/spline/spline.h +++ b/mjpc/spline/spline.h @@ -242,11 +242,6 @@ class TimeSpline { // Returns the number of nodes removed. int DiscardBefore(double time); - // Keeps all existing nodes, but shifts the time of the first node to be - // `start_time`, and all other times are shifted accordingly. No resampling - // is performed. - void ShiftTime(double start_time); - // Removes all existing nodes. void Clear(); diff --git a/mjpc/tasks/CMakeLists.txt b/mjpc/tasks/CMakeLists.txt index c8e345d78..5e8675a53 100644 --- a/mjpc/tasks/CMakeLists.txt +++ b/mjpc/tasks/CMakeLists.txt @@ -199,6 +199,28 @@ add_custom_target( COMMAND ${Python_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/manipulation/merge_panda_robotiq.py ${CMAKE_CURRENT_BINARY_DIR}/manipulation/panda_robotiq.xml + + # Unitree H1 + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${menagerie_SOURCE_DIR}/unitree_h1/assets + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/h1 + COMMAND ${CMAKE_COMMAND} -E copy + ${menagerie_SOURCE_DIR}/unitree_h1/h1.xml + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/h1/h1.xml + COMMAND patch -o ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/h1/h1_modified.xml + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/h1/h1.xml + <${CMAKE_CURRENT_SOURCE_DIR}/humanoid_bench/h1.xml.patch + + # Unitree G1 + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${menagerie_SOURCE_DIR}/unitree_g1/assets + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/g1 + COMMAND ${CMAKE_COMMAND} -E copy + ${menagerie_SOURCE_DIR}/unitree_g1/g1.xml + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/g1/g1.xml + COMMAND patch -o ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/g1/g1_modified.xml + ${CMAKE_CURRENT_BINARY_DIR}/humanoid_bench/g1/g1.xml + <${CMAKE_CURRENT_SOURCE_DIR}/humanoid_bench/g1.xml.patch COMMENT "Copying Menagerie and dm_control assets into binary directory") add_custom_target(copy_resources ALL diff --git a/mjpc/tasks/bimanual/aloha.patch b/mjpc/tasks/bimanual/aloha.patch index 496d966f8..22d80a2a2 100644 --- a/mjpc/tasks/bimanual/aloha.patch +++ b/mjpc/tasks/bimanual/aloha.patch @@ -73,11 +73,10 @@ -@@ -283,6 +283,5 @@ +@@ -283,5 +283,5 @@ - + -- diff --git a/mjpc/tasks/bimanual/handover/handover.cc b/mjpc/tasks/bimanual/handover/handover.cc index ca7bcec56..524a0017a 100644 --- a/mjpc/tasks/bimanual/handover/handover.cc +++ b/mjpc/tasks/bimanual/handover/handover.cc @@ -14,7 +14,8 @@ #include "mjpc/tasks/bimanual/handover/handover.h" -#include +#include +#include #include #include @@ -67,11 +68,9 @@ void Handover::ResidualFn::Residual(const mjModel* model, const mjData* data, name[7] = finger_name[segment][1]; int finger_sensor_id = mj_name2id(model, mjOBJ_SENSOR, name); finger[segment] = model->sensor_objid[finger_sensor_id]; - assert(finger[segment] > 0); } int target_sensor_id = mj_name2id(model, mjOBJ_SENSOR, "box"); int object_id = model->sensor_objid[target_sensor_id]; - assert(object_id > 0); // loop over contacts, add up (and maybe flip) relevant normals int ncon = data->ncon; @@ -143,11 +142,8 @@ void Handover::TransitionLocked(mjModel* model, mjData* data) { last_solve_time = data->time; } - int target_id = mj_name2id(model, mjOBJ_GEOM, "target"); - assert(target_id > 0); - // reset target on success - if (data->time > 0 && dist < model->geom_size[target_id * 3]) { + if (data->time > 0 && dist < .01) { absl::BitGen gen_; // move target diff --git a/mjpc/tasks/bimanual/handover/task.xml b/mjpc/tasks/bimanual/handover/task.xml index ea6b4a577..2416275ed 100644 --- a/mjpc/tasks/bimanual/handover/task.xml +++ b/mjpc/tasks/bimanual/handover/task.xml @@ -41,8 +41,8 @@ - - + + @@ -74,7 +74,7 @@ - + diff --git a/mjpc/tasks/humanoid/interact/contact_keyframe.h b/mjpc/tasks/humanoid/interact/contact_keyframe.h index 53090f05f..1171bbc1a 100644 --- a/mjpc/tasks/humanoid/interact/contact_keyframe.h +++ b/mjpc/tasks/humanoid/interact/contact_keyframe.h @@ -15,12 +15,12 @@ #ifndef MJPC_TASKS_HUMANOID_INTERACT_CONTACT_KEYFRAME_H_ #define MJPC_TASKS_HUMANOID_INTERACT_CONTACT_KEYFRAME_H_ +#include + #include #include #include -#include - namespace mjpc::humanoid { // ---------- Constants ----------------- // diff --git a/mjpc/tasks/humanoid/interact/interact.cc b/mjpc/tasks/humanoid/interact/interact.cc index a64b95854..6bb360979 100644 --- a/mjpc/tasks/humanoid/interact/interact.cc +++ b/mjpc/tasks/humanoid/interact/interact.cc @@ -14,10 +14,8 @@ #include "mjpc/tasks/humanoid/interact/interact.h" -#include - #include -#include "mjpc/tasks/humanoid/interact/contact_keyframe.h" + #include "mjpc/utilities.h" namespace mjpc::humanoid { diff --git a/mjpc/tasks/humanoid/interact/scenes/armchair.xml b/mjpc/tasks/humanoid/interact/scenes/armchair.xml index b0b0f6723..08686b865 100644 --- a/mjpc/tasks/humanoid/interact/scenes/armchair.xml +++ b/mjpc/tasks/humanoid/interact/scenes/armchair.xml @@ -26,7 +26,7 @@ arms : 6 chair : 7 --> - - + diff --git a/mjpc/tasks/humanoid/interact/scenes/bed.xml b/mjpc/tasks/humanoid/interact/scenes/bed.xml index a85fd14ae..6f3c87208 100644 --- a/mjpc/tasks/humanoid/interact/scenes/bed.xml +++ b/mjpc/tasks/humanoid/interact/scenes/bed.xml @@ -24,7 +24,7 @@ arms : 6 chair : 7 --> - diff --git a/mjpc/tasks/humanoid/interact/scenes/dining_chair.xml b/mjpc/tasks/humanoid/interact/scenes/dining_chair.xml index 62c9af5ee..331fbbd71 100644 --- a/mjpc/tasks/humanoid/interact/scenes/dining_chair.xml +++ b/mjpc/tasks/humanoid/interact/scenes/dining_chair.xml @@ -27,7 +27,7 @@ arms : 6 chair : 7 --> - + 0.00636"/> diff --git a/mjpc/tasks/humanoid/interact/scenes/stool.xml b/mjpc/tasks/humanoid/interact/scenes/stool.xml index 8302030be..757069c2c 100644 --- a/mjpc/tasks/humanoid/interact/scenes/stool.xml +++ b/mjpc/tasks/humanoid/interact/scenes/stool.xml @@ -10,34 +10,34 @@ - diff --git a/mjpc/tasks/humanoid/interact/task.xml b/mjpc/tasks/humanoid/interact/task.xml index 5db8ddcc5..e3b32fa6a 100644 --- a/mjpc/tasks/humanoid/interact/task.xml +++ b/mjpc/tasks/humanoid/interact/task.xml @@ -13,7 +13,7 @@ - + @@ -36,7 +36,7 @@ - + @@ -50,18 +50,18 @@ - + - + - + diff --git a/mjpc/tasks/humanoid_bench/Readme.md b/mjpc/tasks/humanoid_bench/Readme.md new file mode 100644 index 000000000..3e9473c0c --- /dev/null +++ b/mjpc/tasks/humanoid_bench/Readme.md @@ -0,0 +1,16 @@ +# HumanoidBench in MuJoCo-MPC + +This directory contains the re-implementation of some of the tasks of the HumanoidBench benchmark in MuJoCo-MPC. The original implementation is in [this repository](https://github.com/carlosferrazza/humanoid-bench). + +## Reward to Residuals +MuJoCo-MPC uses residuals with multiple dimensions instead of a single reward. +The residuals should be 'close to zero' to indicate a good performance. So in each task, the first step is to compute the reward the same way it is done in the original implementation. +Then, the first dimension of the residual is set to x - reward, where x is the maximum reward that can be achieved in the task. + +## Additional Residuals +In addition to the reward residual, we also add additional residuals. We found them to be helpful to solve the task. +To get the 'vanilla' version of the task, you can set the additional residuals weights to zero, using the sliders in the GUI. + +## Robots +In the original implementation, they use a position controlled H1 robot from unitree. +In addition, in some of the tasks we also include the G1 robot from unitree. This robot is torque controlled. diff --git a/mjpc/tasks/humanoid_bench/assets/dark-wood.png b/mjpc/tasks/humanoid_bench/assets/dark-wood.png new file mode 100644 index 000000000..69e773595 Binary files /dev/null and b/mjpc/tasks/humanoid_bench/assets/dark-wood.png differ diff --git a/mjpc/tasks/humanoid_bench/assets/floor.xml b/mjpc/tasks/humanoid_bench/assets/floor.xml new file mode 100644 index 000000000..60078c107 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/assets/floor.xml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/assets/kitchen-wood.png b/mjpc/tasks/humanoid_bench/assets/kitchen-wood.png new file mode 100644 index 000000000..8d2b69b49 Binary files /dev/null and b/mjpc/tasks/humanoid_bench/assets/kitchen-wood.png differ diff --git a/mjpc/tasks/humanoid_bench/assets/plywood-4k.png b/mjpc/tasks/humanoid_bench/assets/plywood-4k.png new file mode 100644 index 000000000..d2c1013cb Binary files /dev/null and b/mjpc/tasks/humanoid_bench/assets/plywood-4k.png differ diff --git a/mjpc/tasks/humanoid_bench/g1.xml.patch b/mjpc/tasks/humanoid_bench/g1.xml.patch new file mode 100644 index 000000000..f2354b4de --- /dev/null +++ b/mjpc/tasks/humanoid_bench/g1.xml.patch @@ -0,0 +1,22 @@ +diff --git a/g1_modified.xml b/g1_modified.xml +--- a/g1_modified.xml ++++ b/g1_modified.xml +@@ -1,4 +1,4 @@ +- ++ + + + +@@ -409,12 +409,6 @@ + + + +- +- +- +- +- +- + + + ++ + + + + + + ++ + + +- +- +- +- ++ + + + +@@ -24,7 +22,31 @@ + + + ++ ++ ++ + ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +@@ -59,39 +81,42 @@ + + + ++ diaginertia="0.0490211 0.0445821 0.00824619"/> + + + + ++ diaginertia="0.00304494 0.00296885 0.00189201"/> + + + + + ++ diaginertia="0.00243264 0.00225325 0.00205492"/> + + + + +- ++ + + + + + + +- ++ + + + + + +- ++ + + + +@@ -104,34 +129,37 @@ + + + ++ diaginertia="0.00304494 0.00296885 0.00189201"/> + + + + + ++ diaginertia="0.00243264 0.00225325 0.00205492"/> + + + + +- ++ + + + + + + +- ++ + + + + + +- ++ + + + +@@ -143,8 +171,9 @@ + + + +- ++ + + + +@@ -152,59 +181,72 @@ + + + ++ + + +- ++ + + + +- ++ + + +- ++ + +- ++ + + + + +- ++ + + + + ++ + + + + + +- ++ + + + +- ++ + + +- ++ + +- ++ + + + + +- ++ + + + + ++ + + + +@@ -219,24 +261,24 @@ + + + +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/push/Push_G1.xml b/mjpc/tasks/humanoid_bench/push/Push_G1.xml new file mode 100644 index 000000000..7f81945db --- /dev/null +++ b/mjpc/tasks/humanoid_bench/push/Push_G1.xml @@ -0,0 +1,91 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/push/Push_H1.xml b/mjpc/tasks/humanoid_bench/push/Push_H1.xml new file mode 100644 index 000000000..a46d898a5 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/push/Push_H1.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/push/push.cc b/mjpc/tasks/humanoid_bench/push/push.cc new file mode 100644 index 000000000..5210db140 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/push/push.cc @@ -0,0 +1,253 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mjpc/tasks/humanoid_bench/push/push.h" + +#include +#include +#include + +#include "mujoco/mujoco.h" + +namespace mjpc { +// ------------------ Residuals for humanoid stand task ------------ +// Number of residuals: +// Residual(0): humanoid_bench reward +// Residual(1): Height: head feet vertical error +// Residual(2): CoM Velocity +// Residual(3): joint velocity +// Residual(4): balance +// Residual(5): upright +// Residual(6): position +// Residual(7): posture +// Residual(8): velocity +// Residual(9): control +// Residual(10): box goal distance +// Residual(11): left hand distance +// Residual(12): right hand distance +// Number of parameters: +// Parameter(0): head height goal +// ---------------------------------------------------------------- +void push::ResidualFn::Residual(const mjModel *model, const mjData *data, + double *residual) const { + double const height_goal = parameters_[0]; + + int counter = 0; + + //------------- Reward for the push task as in humanoid_bench --------------// + double const hand_dist_penalty = 0.1; + double const target_dist_penalty = 1.0; + double const success = 1000; + + // ----- object position ----- // + double const *object_pos = SensorByName(model, data, "object_pos"); + double goal_dist = mju_dist3(object_pos, task_->target_position_.data()); + + double penalty_dist = target_dist_penalty * goal_dist; + double reward_success = (goal_dist < 0.05) ? success : 0; + + // ----- hand position ----- // + double hand_dist = + mju_dist3(SensorByName(model, data, "left_hand_pos"), object_pos); + double penalty_hand = hand_dist_penalty * hand_dist; + + // ----- reward ----- // + double reward = -penalty_hand - penalty_dist + reward_success; + + //--------------- End of reward calculation -----------------// + + residual[counter++] = success - reward; + + // -------------- Below are additional residuals -------------- // + + // ----- Height: head feet vertical error ----- // + + // feet sensor positions + double *foot_right_pos = SensorByName(model, data, "foot_right_pos"); + double *foot_left_pos = SensorByName(model, data, "foot_left_pos"); + + double *head_position = SensorByName(model, data, "head_position"); + double head_feet_error = + head_position[2] - 0.5 * (foot_right_pos[2] + foot_left_pos[2]); + residual[counter++] = head_feet_error - height_goal; + + // ----- Balance: CoM-feet xy error ----- // + + // capture point + double *com_velocity = SensorByName(model, data, "torso_subtreelinvel"); + + // ----- COM xy velocity should be 0 ----- // + mju_copy(&residual[counter], com_velocity, 2); + counter += 2; + + // ----- joint velocity ----- // + mju_copy(residual + counter, data->qvel + 6, model->nu); + counter += model->nu; + + // ----- torso height ----- // + double torso_height = SensorByName(model, data, "torso_position")[2]; + + // ----- balance ----- // + // capture point + double *subcom = SensorByName(model, data, "torso_subcom"); + double *subcomvel = SensorByName(model, data, "torso_subcomvel"); + + double capture_point[3]; + mju_addScl(capture_point, subcom, subcomvel, 0.3, 3); + capture_point[2] = 1.0e-3; + + // project onto line segment + + double axis[3]; + double center[3]; + double vec[3]; + double pcp[3]; + mju_sub3(axis, foot_right_pos, foot_left_pos); + axis[2] = 1.0e-3; + double length = 0.5 * mju_normalize3(axis) - 0.05; + mju_add3(center, foot_right_pos, foot_left_pos); + mju_scl3(center, center, 0.5); + mju_sub3(vec, capture_point, center); + + // project onto axis + double t = mju_dot3(vec, axis); + + // clamp + t = mju_max(-length, mju_min(length, t)); + mju_scl3(vec, axis, t); + mju_add3(pcp, vec, center); + pcp[2] = 1.0e-3; + + // is standing + double standing = + torso_height / mju_sqrt(torso_height * torso_height + 0.45 * 0.45) - 0.4; + + mju_sub(&residual[counter], capture_point, pcp, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + + counter += 2; + + // ----- upright ----- // + double *torso_up = SensorByName(model, data, "torso_up"); + double *pelvis_up = SensorByName(model, data, "pelvis_up"); + double *foot_right_up = SensorByName(model, data, "foot_right_up"); + double *foot_left_up = SensorByName(model, data, "foot_left_up"); + + double z_ref[3] = {0.0, 0.0, 1.0}; + + // torso + residual[counter++] = torso_up[2] - 1.0; + + // pelvis + residual[counter++] = 0.3 * (pelvis_up[2] - 1.0); + + // right foot + mju_sub3(&residual[counter], foot_right_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + mju_sub3(&residual[counter], foot_left_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + // ----- keep initial position -----// + mju_sub(&residual[counter], data->qpos, model->key_qpos, 7); + counter += 7; + + // ----- posture ----- // + mju_sub(&residual[counter], data->qpos + 7, model->key_qpos + 7, model->nu); + counter += model->nu; + + // com vel + double *waist_lower_subcomvel = + SensorByName(model, data, "waist_lower_subcomvel"); + double *torso_velocity = SensorByName(model, data, "torso_velocity"); + double com_vel[2]; + mju_add(com_vel, waist_lower_subcomvel, torso_velocity, 2); + mju_scl(com_vel, com_vel, 0.5, 2); + + // ----- move feet ----- // + double *foot_right_vel = SensorByName(model, data, "foot_right_vel"); + double *foot_left_vel = SensorByName(model, data, "foot_left_vel"); + double move_feet[2]; + mju_copy(move_feet, com_vel, 2); + mju_addToScl(move_feet, foot_right_vel, -0.5, 2); + mju_addToScl(move_feet, foot_left_vel, -0.5, 2); + + mju_copy(&residual[counter], move_feet, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + counter += 2; + + // ----- control ----- // + mju_sub(&residual[counter], data->ctrl, model->key_qpos + 7, + model->nu); // because of pos control + counter += model->nu; + + // ------ box position ------ // + mju_sub3(&residual[counter], object_pos, task_->target_position_.data()); + mju_scl3(&residual[counter], &residual[counter], standing); + counter += 3; + + // ----- distance between hands and box ----- // + mju_sub3(&residual[counter], SensorByName(model, data, "left_hand_pos"), + object_pos); + counter += 3; + mju_sub3(&residual[counter], SensorByName(model, data, "right_hand_pos"), + object_pos); + counter += 3; + + // sensor dim sanity check + // TODO: use this pattern everywhere and make this a utility function + int user_sensor_dim = 0; + for (int i = 0; i < model->nsensor; i++) { + if (model->sensor_type[i] == mjSENS_USER) { + user_sensor_dim += model->sensor_dim[i]; + } + } + if (user_sensor_dim != counter) { + mju_error_i( + "mismatch between total user-sensor dimension " + "and actual length of residual %d", + counter); + } +} + +// -------- Transition for humanoid_bench push task -------- // +// ------------------------------------------------------------ // +void push::TransitionLocked(mjModel *model, mjData *data) { + double *object_pos = SensorByName(model, data, "object_pos"); + double goal_dist = mju_dist3(object_pos, target_position_.data()); + if (goal_dist < 0.05) { // consider task as solved + // set random target position + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_x(0.7, 1.0); + std::uniform_real_distribution<> dis_y(-0.5, 0.5); + target_position_ = {dis_x(gen), dis_y(gen), 1.0}; + printf("New target position: %f, %f\n", target_position_[0], + target_position_[1]); + } + mju_copy3(data->mocap_pos, target_position_.data()); +} + +void push::ResetLocked(const mjModel *model) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_x(0.7, 1.0); + std::uniform_real_distribution<> dis_y(-0.5, 0.5); + target_position_ = {dis_x(gen), dis_y(gen), 1.0}; + printf("New target position: %f, %f\n", target_position_[0], + target_position_[1]); +} +} // namespace mjpc diff --git a/mjpc/tasks/humanoid_bench/push/push.h b/mjpc/tasks/humanoid_bench/push/push.h new file mode 100644 index 000000000..fb058066e --- /dev/null +++ b/mjpc/tasks/humanoid_bench/push/push.h @@ -0,0 +1,95 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MJPC_TASKS_HUMANOID_BENCH_PUSH_PUSH_H_ +#define MJPC_TASKS_HUMANOID_BENCH_PUSH_PUSH_H_ + +#include +#include +#include + +#include "mjpc/task.h" +#include "mjpc/utilities.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +class push : public Task { + public: + std::string Name() const override = 0; + + std::string XmlPath() const override = 0; + + class ResidualFn : public mjpc::BaseResidualFn { + public: + explicit ResidualFn(const push *task) + : mjpc::BaseResidualFn(task), task_(const_cast(task)) {} + + void Residual(const mjModel *model, const mjData *data, + double *residual) const override; + + private: + push *task_; + }; + + push() : residual_(this) { + target_position_ = {0.85, 0.0, 1.0}; + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis_x(0.7, 1.0); + std::uniform_real_distribution<> dis_y(-0.5, 0.5); + target_position_ = {dis_x(gen), dis_y(gen), 1.0}; + } + + void TransitionLocked(mjModel *model, mjData *data) override; + + void ResetLocked(const mjModel *model) override; + + protected: + std::unique_ptr ResidualLocked() const + + override { + return std::make_unique(this); + } + + ResidualFn *InternalResidual() + + override { + return &residual_; + } + + private: + ResidualFn residual_; + std::array target_position_; +}; + +class Push_H1 : public push { + public: + std::string Name() const override { return "Push H1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/push/Push_H1.xml"); + } +}; + +class G1_push : public push { + public: + std::string Name() const override { return "Push G1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/push/Push_G1.xml"); + } +}; +} // namespace mjpc + +#endif // MJPC_TASKS_HUMANOID_BENCH_PUSH_PUSH_H_ diff --git a/mjpc/tasks/humanoid_bench/push/push.xml b/mjpc/tasks/humanoid_bench/push/push.xml new file mode 100644 index 000000000..396057344 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/push/push.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mjpc/tasks/humanoid_bench/stand/Stand_G1.xml b/mjpc/tasks/humanoid_bench/stand/Stand_G1.xml new file mode 100644 index 000000000..9d864d809 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/stand/Stand_G1.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/mjpc/tasks/humanoid_bench/stand/Stand_H1.xml b/mjpc/tasks/humanoid_bench/stand/Stand_H1.xml new file mode 100644 index 000000000..87bb6c23d --- /dev/null +++ b/mjpc/tasks/humanoid_bench/stand/Stand_H1.xml @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/stand/stand.cc b/mjpc/tasks/humanoid_bench/stand/stand.cc new file mode 100644 index 000000000..84b7026ce --- /dev/null +++ b/mjpc/tasks/humanoid_bench/stand/stand.cc @@ -0,0 +1,179 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mjpc/tasks/humanoid_bench/stand/stand.h" + +#include "mjpc/tasks/humanoid_bench/walk_reward.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +// ------------------ Residuals for humanoid stand task ------------ +// Number of residuals: +// Residual(0): 1 - humanoid_bench reward +// Residual(1): Height: head feet vertical error +// Residual(2): Balance: CoM Velocity +// Residual(3): joint velocity +// Residual(4): balance +// Residual(5): upright +// Residual(6): posture +// Residual(7): velocity +// Residual(8): control +// Number of parameters: +// Parameter(0): head height goal +// ---------------------------------------------------------------- +void Stand::ResidualFn::Residual(const mjModel *model, const mjData *data, + double *residual) const { + double const height_goal = parameters_[0]; + + int counter = 0; + + // ----- humanoid_bench reward ----- // + residual[counter++] = 1.0 - walk_reward(model, data, 0.0, height_goal); + + // ----- Height: head feet vertical error ----- // + + // feet sensor positions + double *foot_right_pos = SensorByName(model, data, "foot_right_pos"); + double *foot_left_pos = SensorByName(model, data, "foot_left_pos"); + + double *head_position = SensorByName(model, data, "head_position"); + double head_feet_error = + head_position[2] - 0.5 * (foot_right_pos[2] + foot_left_pos[2]); + residual[counter++] = head_feet_error - height_goal; + + // ----- Balance: CoM-feet xy error ----- // + + // capture point + double *com_velocity = SensorByName(model, data, "torso_subtreelinvel"); + + // ----- COM xy velocity should be 0 ----- // + mju_copy(&residual[counter], com_velocity, 2); + counter += 2; + + // ----- joint velocity ----- // + mju_copy(residual + counter, data->qvel + 6, model->nv - 6); + counter += model->nv - 6; + + // ----- torso height ----- // + double torso_height = SensorByName(model, data, "torso_position")[2]; + + // ----- balance ----- // + // capture point + double *subcom = SensorByName(model, data, "torso_subcom"); + double *subcomvel = SensorByName(model, data, "torso_subcomvel"); + + double capture_point[3]; + mju_addScl(capture_point, subcom, subcomvel, 0.3, 3); + capture_point[2] = 1.0e-3; + + // project onto line segment + + double axis[3]; + double center[3]; + double vec[3]; + double pcp[3]; + mju_sub3(axis, foot_right_pos, foot_left_pos); + axis[2] = 1.0e-3; + double length = 0.5 * mju_normalize3(axis) - 0.05; + mju_add3(center, foot_right_pos, foot_left_pos); + mju_scl3(center, center, 0.5); + mju_sub3(vec, capture_point, center); + + // project onto axis + double t = mju_dot3(vec, axis); + + // clamp + t = mju_max(-length, mju_min(length, t)); + mju_scl3(vec, axis, t); + mju_add3(pcp, vec, center); + pcp[2] = 1.0e-3; + + // is standing + double standing = + torso_height / mju_sqrt(torso_height * torso_height + 0.45 * 0.45) - 0.4; + + mju_sub(&residual[counter], capture_point, pcp, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + + counter += 2; + + // ----- upright ----- // + double *torso_up = SensorByName(model, data, "torso_up"); + double *pelvis_up = SensorByName(model, data, "pelvis_up"); + double *foot_right_up = SensorByName(model, data, "foot_right_up"); + double *foot_left_up = SensorByName(model, data, "foot_left_up"); + + double z_ref[3] = {0.0, 0.0, 1.0}; + + // torso + residual[counter++] = torso_up[2] - 1.0; + + // pelvis + residual[counter++] = 0.3 * (pelvis_up[2] - 1.0); + + // right foot + mju_sub3(&residual[counter], foot_right_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + mju_sub3(&residual[counter], foot_left_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + // ----- posture ----- // + mju_sub(&residual[counter], data->qpos + 7, model->key_qpos + 7, + model->nq - 7); + counter += model->nq - 7; + + // com vel + double *waist_lower_subcomvel = + SensorByName(model, data, "waist_lower_subcomvel"); + double *torso_velocity = SensorByName(model, data, "torso_velocity"); + double com_vel[2]; + mju_add(com_vel, waist_lower_subcomvel, torso_velocity, 2); + mju_scl(com_vel, com_vel, 0.5, 2); + + // ----- move feet ----- // + double *foot_right_vel = SensorByName(model, data, "foot_right_vel"); + double *foot_left_vel = SensorByName(model, data, "foot_left_vel"); + double move_feet[2]; + mju_copy(move_feet, com_vel, 2); + mju_addToScl(move_feet, foot_right_vel, -0.5, 2); + mju_addToScl(move_feet, foot_left_vel, -0.5, 2); + + mju_copy(&residual[counter], move_feet, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + counter += 2; + + // ----- control ----- // + mju_sub(&residual[counter], data->ctrl, model->key_qpos + 7, + model->nq - 7); // because of pos control + counter += model->nq - 7; + + // sensor dim sanity check + // TODO: use this pattern everywhere and make this a utility function + int user_sensor_dim = 0; + for (int i = 0; i < model->nsensor; i++) { + if (model->sensor_type[i] == mjSENS_USER) { + user_sensor_dim += model->sensor_dim[i]; + } + } + if (user_sensor_dim != counter) { + mju_error_i( + "mismatch between total user-sensor dimension " + "and actual length of residual %d", + counter); + } +} +} // namespace mjpc diff --git a/mjpc/tasks/humanoid_bench/stand/stand.h b/mjpc/tasks/humanoid_bench/stand/stand.h new file mode 100644 index 000000000..46e0e1d39 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/stand/stand.h @@ -0,0 +1,71 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MJPC_TASKS_HUMANOID_BENCH_STAND_STAND_H_ +#define MJPC_TASKS_HUMANOID_BENCH_STAND_STAND_H_ + +#include +#include + +#include "mjpc/task.h" +#include "mjpc/utilities.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +class Stand : public Task { + public: + std::string Name() const override = 0; + + std::string XmlPath() const override = 0; + + class ResidualFn : public mjpc::BaseResidualFn { + public: + explicit ResidualFn(const Stand *task) : mjpc::BaseResidualFn(task) {} + + void Residual(const mjModel *model, const mjData *data, + double *residual) const override; + }; + + Stand() : residual_(this) {} + + protected: + std::unique_ptr ResidualLocked() const override { + return std::make_unique(this); + } + + ResidualFn *InternalResidual() override { return &residual_; } + + private: + ResidualFn residual_; +}; + +class Stand_H1 : public Stand { + public: + std::string Name() const override { return "Stand H1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/stand/Stand_H1.xml"); + } +}; + +class Stand_G1 : public Stand { + public: + std::string Name() const override { return "Stand G1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/stand/Stand_G1.xml"); + } +}; +} // namespace mjpc +#endif // MJPC_TASKS_HUMANOID_BENCH_STAND_STAND_H_ diff --git a/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.cpp b/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.cpp new file mode 100644 index 000000000..dec4aa17e --- /dev/null +++ b/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.cpp @@ -0,0 +1,81 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dm_control_utils_rewards.h" + +#include +#include + +// +// this file contains the reimplementation of some of the utility functions from +// dm_control the original implementation is from Google DeepMind and can be +// found here: +// https://github.com/google-deepmind/dm_control/tree/main/dm_control/utils/rewards.py +// + +double sigmoid(double x, double value_at_1, std::string sigmoid_type) { + if (sigmoid_type == "cosine" || sigmoid_type == "linear" || + sigmoid_type == "quadratic") { + if (!(0 <= value_at_1 && value_at_1 <= 1)) { + throw std::invalid_argument("Value at 1 must be in [0, 1]."); + } + } else { + if (!(0 < value_at_1 && value_at_1 < 1)) { + throw std::invalid_argument("Value at 1 must be in (0, 1)."); + } + } + if (sigmoid_type == "gaussian") { + double scale = std::sqrt(-2 * std::log(value_at_1)); + return std::exp(-0.5 * std::pow(x * scale, 2)); + } else if (sigmoid_type == "linear") { + double scale = 1.0 - value_at_1; + double scaled_x = x * scale; + return std::abs(scaled_x) < 1.0 ? 1.0 - scaled_x : 0.0; + } else if (sigmoid_type == "quadratic") { + double scale = std::sqrt(1 - value_at_1); + double scaled_x = x * scale; + return std::abs(scaled_x) < 1.0 ? 1.0 - scaled_x * scaled_x : 0.0; + } else { + // in the python implementation there are some more sigmoid types, but they + // are currently not used + throw std::invalid_argument("Unknown sigmoid type."); + } +} + +double tolerance(double x, std::pair bounds, double margin, + std::string sigmoid_type, double value_at_margin) { + double lower = bounds.first; + double upper = bounds.second; + + if (lower > upper) { + throw std::invalid_argument("Lower bound must be <= than upper bound."); + } + if (margin < 0) { + throw std::invalid_argument("Margin must be >= 0."); + } + bool in_bounds = lower <= x && x <= upper; + double value; + if (margin == 0) { + value = in_bounds ? 1.0 : 0.0; + } else { + double distance = + std::min(std::abs(x - lower), std::abs(x - upper)) / margin; + if (in_bounds) { + value = 1.0; + } else { + value = sigmoid(distance, value_at_margin, sigmoid_type); + } + } + return value; +} diff --git a/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.h b/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.h new file mode 100644 index 000000000..087a1759a --- /dev/null +++ b/mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.h @@ -0,0 +1,34 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MUJOCO_MPC_DM_CONTROL_UTILS_REWARDS_H +#define MUJOCO_MPC_DM_CONTROL_UTILS_REWARDS_H + +#include +#include + +// +// this file contains the reimplementation of some of the utility functions from +// dm_control the original implementation is from Google DeepMind and can be +// found here: +// https://github.com/google-deepmind/dm_control/tree/main/dm_control/utils/rewards.py +// + +double sigmoid(double x, double value_at_1, std::string sigmoid_type); + +double tolerance(double x, std::pair bounds = {0.0, 0.0}, + double margin = 0.0, std::string sigmoid_type = "gaussian", + double value_at_margin = 0.1); + +#endif // MUJOCO_MPC_DM_CONTROL_UTILS_REWARDS_H diff --git a/mjpc/tasks/humanoid_bench/walk/Walk_G1.xml b/mjpc/tasks/humanoid_bench/walk/Walk_G1.xml new file mode 100644 index 000000000..14aa2bc99 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk/Walk_G1.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/walk/Walk_H1.xml b/mjpc/tasks/humanoid_bench/walk/Walk_H1.xml new file mode 100644 index 000000000..59dbc4818 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk/Walk_H1.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mjpc/tasks/humanoid_bench/walk/walk.cc b/mjpc/tasks/humanoid_bench/walk/walk.cc new file mode 100644 index 000000000..0b694d429 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk/walk.cc @@ -0,0 +1,198 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mjpc/tasks/humanoid_bench/walk/walk.h" + +#include +#include +#include + +#include "mjpc/tasks/humanoid_bench/walk_reward.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +// ------------------ Residuals for humanoid walk task ------------ +// Number of residuals: +// Residual(0): 1 - humanoid_bench reward +// Residual(1): torso height +// Residual(2): pelvis-feet alignment +// Residual(3): balance +// Residual(4): upright +// Residual(5): posture +// Residual(6): face direction +// Residual(7): walk +// Residual(8): velocity +// Residual(9): control +// Number of parameters: +// Parameter(0): torso height goal +// Parameter(1): head height goal +// Parameter(2): speed goal +// Parameter(3): direction goal +// ---------------------------------------------------------------- +void Walk::ResidualFn::Residual(const mjModel *model, const mjData *data, + double *residual) const { + double const torso_height_goal = parameters_[0]; + double const head_height_goal = parameters_[1]; + double const speed_goal = parameters_[2]; + double const direction_goal = parameters_[3]; + + int counter = 0; + + // ----- humanoid_bench reward ----- // + residual[counter++] = + 1.0 - walk_reward(model, data, speed_goal, head_height_goal); + + // ----- torso height ----- // + double torso_height = SensorByName(model, data, "torso_position")[2]; + residual[counter++] = torso_height - torso_height_goal; + + // ----- pelvis / feet ----- // + double *foot_right = SensorByName(model, data, "foot_right"); + double *foot_left = SensorByName(model, data, "foot_left"); + double pelvis_height = SensorByName(model, data, "pelvis_position")[2]; + residual[counter++] = + 0.5 * (foot_left[2] + foot_right[2]) - pelvis_height - 0.2; + + // ----- balance ----- // + // capture point + double *subcom = SensorByName(model, data, "torso_subcom"); + double *subcomvel = SensorByName(model, data, "torso_subcomvel"); + + double capture_point[3]; + mju_addScl(capture_point, subcom, subcomvel, 0.3, 3); + capture_point[2] = 1.0e-3; + + // project onto line segment + + double axis[3]; + double center[3]; + double vec[3]; + double pcp[3]; + mju_sub3(axis, foot_right, foot_left); + axis[2] = 1.0e-3; + double length = 0.5 * mju_normalize3(axis) - 0.05; + mju_add3(center, foot_right, foot_left); + mju_scl3(center, center, 0.5); + mju_sub3(vec, capture_point, center); + + // project onto axis + double t = mju_dot3(vec, axis); + + // clamp + t = mju_max(-length, mju_min(length, t)); + mju_scl3(vec, axis, t); + mju_add3(pcp, vec, center); + pcp[2] = 1.0e-3; + + // is standing + double standing = + torso_height / mju_sqrt(torso_height * torso_height + 0.45 * 0.45) - 0.4; + + standing = std::max(0.0, standing); + + mju_sub(&residual[counter], capture_point, pcp, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + + counter += 2; + + // ----- upright ----- // + double *torso_up = SensorByName(model, data, "torso_up"); + double *pelvis_up = SensorByName(model, data, "pelvis_up"); + double *foot_right_up = SensorByName(model, data, "foot_right_up"); + double *foot_left_up = SensorByName(model, data, "foot_left_up"); + double z_ref[3] = {0.0, 0.0, 1.0}; + + // torso + residual[counter++] = torso_up[2] - 1.0; + + // pelvis + residual[counter++] = 0.3 * (pelvis_up[2] - 1.0); + + // right foot + mju_sub3(&residual[counter], foot_right_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + mju_sub3(&residual[counter], foot_left_up, z_ref); + mju_scl3(&residual[counter], &residual[counter], 0.1 * standing); + counter += 3; + + // ----- posture ----- // + mju_sub(&residual[counter], data->qpos + 7, model->key_qpos + 7, model->nu); + counter += model->nu; + + // ----- Walk ----- // + double *torso_forward = SensorByName(model, data, "torso_forward"); + double *pelvis_forward = SensorByName(model, data, "pelvis_forward"); + double *foot_right_forward = SensorByName(model, data, "foot_right_forward"); + double *foot_left_forward = SensorByName(model, data, "foot_left_forward"); + + double forward[2]; + mju_copy(forward, torso_forward, 2); + mju_addTo(forward, pelvis_forward, 2); + mju_addTo(forward, foot_right_forward, 2); + mju_addTo(forward, foot_left_forward, 2); + mju_normalize(forward, 2); + + // Face in right-direction + double direction_goal_radiant = direction_goal * M_PI / 180; + double face_x[2] = {cos(direction_goal_radiant), sin(direction_goal_radiant)}; + mju_sub(&residual[counter], forward, face_x, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + counter += 2; + + // com vel + double *waist_lower_subcomvel = + SensorByName(model, data, "waist_lower_subcomvel"); + double *torso_velocity = SensorByName(model, data, "torso_velocity"); + double com_vel[2]; + mju_add(com_vel, waist_lower_subcomvel, torso_velocity, 2); + mju_scl(com_vel, com_vel, 0.5, 2); + + // Walk forward + residual[counter++] = standing * (mju_dot(com_vel, forward, 2) - speed_goal); + + // ----- move feet ----- // + double *foot_right_vel = SensorByName(model, data, "foot_right_velocity"); + double *foot_left_vel = SensorByName(model, data, "foot_left_velocity"); + double move_feet[2]; + mju_copy(move_feet, com_vel, 2); + mju_addToScl(move_feet, foot_right_vel, -0.5, 2); + mju_addToScl(move_feet, foot_left_vel, -0.5, 2); + + mju_copy(&residual[counter], move_feet, 2); + mju_scl(&residual[counter], &residual[counter], standing, 2); + counter += 2; + + // ----- control ----- // + mju_sub(&residual[counter], data->ctrl, model->key_qpos + 7, + model->nu); // because of pos control + counter += model->nu; + + // sensor dim sanity check + // TODO: use this pattern everywhere and make this a utility function + int user_sensor_dim = 0; + for (int i = 0; i < model->nsensor; i++) { + if (model->sensor_type[i] == mjSENS_USER) { + user_sensor_dim += model->sensor_dim[i]; + } + } + if (user_sensor_dim != counter) { + mju_error_i( + "mismatch between total user-sensor dimension " + "and actual length of residual %d", + counter); + } +} +} // namespace mjpc diff --git a/mjpc/tasks/humanoid_bench/walk/walk.h b/mjpc/tasks/humanoid_bench/walk/walk.h new file mode 100644 index 000000000..d9dc37966 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk/walk.h @@ -0,0 +1,72 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MJPC_TASKS_HUMANOID_BENCH_WALK_WALK_H_ +#define MJPC_TASKS_HUMANOID_BENCH_WALK_WALK_H_ + +#include +#include + +#include "mjpc/task.h" +#include "mjpc/utilities.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +class Walk : public Task { + public: + std::string Name() const override = 0; + + std::string XmlPath() const override = 0; + + class ResidualFn : public mjpc::BaseResidualFn { + public: + explicit ResidualFn(const Walk *task) : mjpc::BaseResidualFn(task) {} + + void Residual(const mjModel *model, const mjData *data, + double *residual) const override; + }; + + Walk() : residual_(this) {} + + protected: + std::unique_ptr ResidualLocked() const override { + return std::make_unique(this); + } + + ResidualFn *InternalResidual() override { return &residual_; } + + private: + ResidualFn residual_; +}; + +class Walk_H1 : public Walk { + public: + std::string Name() const override { return "Walk H1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/Walk/Walk_H1.xml"); + } +}; + +class Walk_G1 : public Walk { + public: + std::string Name() const override { return "Walk G1"; } + + std::string XmlPath() const override { + return GetModelPath("humanoid_bench/Walk/Walk_G1.xml"); + } +}; +} // namespace mjpc + +#endif // MJPC_TASKS_HUMANOID_BENCH_WALK_WALK_H_ diff --git a/mjpc/tasks/humanoid_bench/walk_reward.cc b/mjpc/tasks/humanoid_bench/walk_reward.cc new file mode 100644 index 000000000..ddebc983c --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk_reward.cc @@ -0,0 +1,78 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mjpc/tasks/humanoid_bench/walk_reward.h" + +#include +#include + +#include "mjpc/tasks/humanoid_bench/utility/dm_control_utils_rewards.h" +#include "mjpc/utilities.h" +#include "mujoco/mujoco.h" + +namespace mjpc { +// this is a one to one re-implementation of the reward as it is done in +// the humanoid_bench paper // +// it is used for the Stand, Walk, and Run task, with respective values +// for walk_speed // +double walk_reward(const mjModel *model, const mjData *data, + const double walk_speed, const double stand_height) { + // initialize reward + double reward = 1.0; + + // ----- standing ----- // + double head_height = SensorByName(model, data, "head_height")[2]; + double standing = + tolerance(head_height, {stand_height, INFINITY}, stand_height / 4); + + reward *= standing; + + // ----- torso upright ----- // + double torso_upright = SensorByName(model, data, "torso_upright")[2]; + double upright = + tolerance(torso_upright, {0.9, INFINITY}, 1.9, "linear", 0.0); + + reward *= upright; + + // ----- small control ----- // + double small_control = 0.0; + for (int i = 0; i < model->nu; i++) { + small_control += tolerance(data->ctrl[i], {0.0, 0.0}, 10.0, "quadratic"); + } + small_control /= model->nu; // average over all controls + small_control = (4 + small_control) / 5; + + reward *= small_control; + + // ----- move speed ----- // + if (walk_speed == 0.0) { + double horizontal_velocity_x = + SensorByName(model, data, "center_of_mass_velocity")[0]; + double horizontal_velocity_y = + SensorByName(model, data, "center_of_mass_velocity")[1]; + double dont_move = (tolerance(horizontal_velocity_x, {0.0, 0.0}, 2) + + tolerance(horizontal_velocity_y, {0.0, 0.0}, 2)) / + 2; + reward *= dont_move; + } else { + double com_velocity = + SensorByName(model, data, "center_of_mass_velocity")[0]; + double move = tolerance(com_velocity, {walk_speed, INFINITY}, + std::abs(walk_speed), "linear", 0.0); + move = (5 * move + 1) / 6; + reward *= move; + } + return reward; +} +} // namespace mjpc diff --git a/mjpc/tasks/humanoid_bench/walk_reward.h b/mjpc/tasks/humanoid_bench/walk_reward.h new file mode 100644 index 000000000..528822289 --- /dev/null +++ b/mjpc/tasks/humanoid_bench/walk_reward.h @@ -0,0 +1,24 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MJPC_TASKS_HUMANOID_BENCH_WALK_REWARD_H_ +#define MJPC_TASKS_HUMANOID_BENCH_WALK_REWARD_H_ + +#include "mujoco/mujoco.h" + +namespace mjpc { +double walk_reward(const mjModel *model, const mjData *data, double walk_speed, + double stand_height); +} // namespace mjpc +#endif // MJPC_TASKS_HUMANOID_BENCH_WALK_REWARD_H_ diff --git a/mjpc/tasks/quadruped/a1.xml.patch b/mjpc/tasks/quadruped/a1.xml.patch index 1a52d064f..ebbbd213e 100644 --- a/mjpc/tasks/quadruped/a1.xml.patch +++ b/mjpc/tasks/quadruped/a1.xml.patch @@ -236,4 +236,4 @@ diff --git a/a1_modified.xml b/a1_modified.xml - - - + \ No newline at end of file diff --git a/mjpc/tasks/tasks.cc b/mjpc/tasks/tasks.cc index 5a1600939..ef8127bff 100644 --- a/mjpc/tasks/tasks.cc +++ b/mjpc/tasks/tasks.cc @@ -21,11 +21,9 @@ #include "mjpc/tasks/acrobot/acrobot.h" #include "mjpc/tasks/allegro/allegro.h" #include "mjpc/tasks/bimanual/handover/handover.h" -#include "mjpc/tasks/bimanual/insert/insert.h" #include "mjpc/tasks/bimanual/reorient/reorient.h" #include "mjpc/tasks/cartpole/cartpole.h" #include "mjpc/tasks/fingers/fingers.h" -#include "mjpc/tasks/humanoid/interact/interact.h" #include "mjpc/tasks/humanoid/stand/stand.h" #include "mjpc/tasks/humanoid/tracking/tracking.h" #include "mjpc/tasks/humanoid/walk/walk.h" @@ -40,19 +38,32 @@ #include "mjpc/tasks/shadow_reorient/hand.h" #include "mjpc/tasks/swimmer/swimmer.h" #include "mjpc/tasks/walker/walker.h" +// Humanoid Bench Tasks +#include "mjpc/tasks/humanoid_bench/push/push.h" +#include "mjpc/tasks/humanoid_bench/stand/stand.h" +#include "mjpc/tasks/humanoid_bench/walk/walk.h" namespace mjpc { std::vector> GetTasks() { return { + // Humanoid Bench Walk Task + std::make_shared(), + std::make_shared(), + + // Humanoid Bench Stand Task + std::make_shared(), + std::make_shared(), + + // Humanoid Bench Push Task + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), - std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), - std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), diff --git a/mjpc/test/spline/spline_test.cc b/mjpc/test/spline/spline_test.cc index 9feed369d..866245def 100644 --- a/mjpc/test/spline/spline_test.cc +++ b/mjpc/test/spline/spline_test.cc @@ -158,26 +158,6 @@ TEST(TimeSplineTest, Cubic) { } } -TEST(TimeSplineAllInterpolationsTest, ShiftTime) { - TimeSpline spline(/*dim=*/2); - spline.SetInterpolation(mjpc::spline::kLinearSpline); - - spline.AddNode(1.0, {1.0, 2.0}); - spline.AddNode(2.0, {2.0, 3.0}); - spline.AddNode(3.0, {3.0, 4.0}); - spline.AddNode(4.0, {4.0, 5.0}); - EXPECT_EQ(spline.Size(), 4); - - EXPECT_THAT(spline.Sample(1.0), ElementsAre(1.0, 2.0)); - EXPECT_THAT(spline.Sample(1.5), ElementsAre(1.5, 2.5)); - - // Shift the spline so that the first node is at 1.5. - spline.ShiftTime(1.5); - EXPECT_EQ(spline.Size(), 4); - EXPECT_THAT(spline.Sample(1.5), ElementsAre(1.0, 2.0)); - EXPECT_THAT(spline.Sample(2.0), ElementsAre(1.5, 2.5)); -} - TEST_P(TimeSplineAllInterpolationsTest, DiscardBefore) { const TimeSplineTestCase& test_case = GetParam(); TimeSpline spline(/*dim=*/2); diff --git a/mjpc/testspeed.cc b/mjpc/testspeed.cc index 571ac1d23..0eebc87df 100644 --- a/mjpc/testspeed.cc +++ b/mjpc/testspeed.cc @@ -44,10 +44,10 @@ void residual_callback(const mjModel* model, mjData* data, int stage) { double SynchronousPlanningCost(std::string task_name, int planner_thread_count, int steps_per_planning_iteration, double total_time) { - std::cout << "Test MJPC Speed: " << task_name << "\n"; + std::cout << "Test MJPC Speed\n"; std::cout << " MuJoCo version " << mj_versionString() << "\n"; if (mjVERSION_HEADER != mj_version()) { - mju_error("Headers and library have different versions"); + mju_error("Headers and library have Different versions"); } std::cout << " Hardware threads: " << NumAvailableHardwareThreads() << "\n"; @@ -60,20 +60,20 @@ double SynchronousPlanningCost(std::string task_name, int planner_thread_count, std::cerr << agent.GetTaskNames(); return -1; } - Agent::LoadModelResult load_model = agent.LoadModel(); - mjModel* model = load_model.model.get(); + auto load_model = agent.LoadModel(); + mjModel* model = load_model.model.release(); if (!model) { std::cerr << load_model.error << "\n"; return -1; } mjData* data = mj_makeData(model); + mj_forward(model, data); int home_id = mj_name2id(model, mjOBJ_KEY, "home"); if (home_id >= 0) { std::cout << "home_id: " << home_id << "\n"; mj_resetDataKeyframe(model, data, home_id); } - mj_forward(model, data); // the planner and its initial configuration is set in the XML agent.estimator_enabled = false; @@ -123,6 +123,7 @@ double SynchronousPlanningCost(std::string task_name, int planner_thread_count, << total_cost / total_steps << "\n"; mj_deleteData(data); + mj_deleteModel(model); mjcb_sensor = nullptr; return total_cost; } diff --git a/mjpc/utilities.cc b/mjpc/utilities.cc index 9d4585a8d..ae51881ec 100644 --- a/mjpc/utilities.cc +++ b/mjpc/utilities.cc @@ -751,7 +751,8 @@ void AddConnector(mjvScene* scene, mjtGeom type, mjtNum width, mjv_initGeom(&scene->geoms[scene->ngeom], type, /*size=*/nullptr, /*pos=*/nullptr, /*mat=*/nullptr, rgba); scene->geoms[scene->ngeom].category = mjCAT_DECOR; - mjv_connector(&scene->geoms[scene->ngeom], type, width, from, to); + mjv_makeConnector(&scene->geoms[scene->ngeom], type, width, from[0], from[1], + from[2], to[0], to[1], to[2]); // increment ngeom scene->ngeom += 1; diff --git a/python/mujoco_mpc/demos/agent/cartpole.py b/python/mujoco_mpc/demos/agent/cartpole.py index 79926fb0f..25c0da9a7 100644 --- a/python/mujoco_mpc/demos/agent/cartpole.py +++ b/python/mujoco_mpc/demos/agent/cartpole.py @@ -16,12 +16,12 @@ import matplotlib.pyplot as plt import mediapy as media import mujoco -# set current directory: mujoco_mpc/python/mujoco_mpc -from mujoco_mpc import agent as agent_lib import numpy as np - import pathlib +# set current directory: mujoco_mpc/python/mujoco_mpc +from mujoco_mpc import agent as agent_lib + # %matplotlib inline # %%