From 133ccf7113285f13c11f786e7f232c520996b115 Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Mon, 7 Apr 2025 14:11:49 +0800
Subject: [PATCH 1/2] Refactor[MeshLoader]: Polymorphic mesh model
 loader/parser & Make FoundationPose output poses in mesh frame

Signed-off-by: zz990099 <771647586@qq.com>
---
 detection_6d_foundationpose/CMakeLists.txt    |   3 +-
 .../foundationpose.hpp                        | 161 ++++------
 .../mesh_loader.hpp                           |  95 ++++++
 .../src/foundationpose.cpp                    | 259 ++++-----------
 .../src/foundationpose_decoder.cpp            |  54 ----
 .../src/foundationpose_decoder.hpp            |  32 --
 .../src/foundationpose_render.cpp             |  47 ++-
 .../src/foundationpose_render.hpp             |  19 +-
 .../src/foundationpose_utils.cpp              | 296 -----------------
 .../src/foundationpose_utils.hpp              | 104 ------
 .../src/mesh_loader/assimp_mesh_loader.cpp    | 297 ++++++++++++++++++
 simple_tests/src/test_foundationpose.cpp      |  36 ++-
 12 files changed, 566 insertions(+), 837 deletions(-)
 create mode 100644 detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp
 delete mode 100644 detection_6d_foundationpose/src/foundationpose_decoder.cpp
 delete mode 100644 detection_6d_foundationpose/src/foundationpose_decoder.hpp
 delete mode 100644 detection_6d_foundationpose/src/foundationpose_utils.cpp
 create mode 100644 detection_6d_foundationpose/src/mesh_loader/assimp_mesh_loader.cpp

diff --git a/detection_6d_foundationpose/CMakeLists.txt b/detection_6d_foundationpose/CMakeLists.txt
index 5290dd6..e804cb1 100644
--- a/detection_6d_foundationpose/CMakeLists.txt
+++ b/detection_6d_foundationpose/CMakeLists.txt
@@ -39,11 +39,10 @@ set(source_file src/nvdiffrast/common/cudaraster/impl/Buffer.cpp
                 src/foundationpose_render.cu
                 src/foundationpose_sampling.cpp
                 src/foundationpose_sampling.cu
-                src/foundationpose_utils.cpp
                 src/foundationpose_utils.cu
-                src/foundationpose_decoder.cpp
                 src/foundationpose_decoder.cu
                 src/foundationpose.cpp
+                src/mesh_loader/assimp_mesh_loader.cpp
 )
 
 include_directories(
diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
index 8c8c4a1..c10726b 100644
--- a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
+++ b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
@@ -1,134 +1,103 @@
-#ifndef __FOUNDATIONPOSE_H
-#define __FOUNDATIONPOSE_H
+#pragma once
 
 #include <opencv2/core.hpp>
 #include <Eigen/Dense>
 
+#include "detection_6d_foundationpose/mesh_loader.hpp"
 #include "deploy_core/base_infer_core.h"
 
 namespace detection_6d {
 
+/**
+ * @brief Abstract base class for 6-DoF object detection models
+ *
+ * @note Implementations should handle both object registration and tracking
+ */
 class Base6DofDetectionModel {
 public:
   /**
-   * @brief 实现的接口应支持动态输入尺寸，并检查rgb/depth/mask尺寸是否一致.
+   * @brief Register object instance with initial pose estimation
    *
-   * @note
-   * 相机内参与原始图像的尺寸是绑定的，不可在外部直接对图像进行resize操作，若需要进行resize，应对内参intrinsic同样处理
+   * @note Implementation must:
+   *       - Verify consistent RGB/depth/mask dimensions
+   *       - Maintain binding between camera intrinsics and original image size
+   *         (resizing requires corresponding intrinsic adjustment)
    *
-   * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
-   * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
-   * @param mask 目标物的mask图像，cv::Mat数据格式为CV_8UC1，positive的像素值大于0即可
-   * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
-   * @param out_pose 输出位姿
-   * @return true
-   * @return false
+   * @param rgb Input RGB image (must be in RGB format, convert if using opencv-imread's default
+   * BGR)
+   * @param depth Depth image (CV_32FC1 format)
+   * @param mask Object mask (CV_8UC1 format, positive pixels > 0)
+   * @param target_name Object category name (must match construction mapping)
+   * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @return true Registration successful
+   * @return false Registration failed
    */
   virtual bool Register(const cv::Mat     &rgb,
                         const cv::Mat     &depth,
                         const cv::Mat     &mask,
                         const std::string &target_name,
-                        Eigen::Matrix4f   &out_pose) = 0;
+                        Eigen::Matrix4f   &out_pose_in_mesh) = 0;
+
   /**
-   * @brief
-   * 从第二帧开始的跟踪过程，是`Register`的轻量化版本，精度稍低但推理效率非常高，调用前必须先调用`Register`
+   * @brief Track object pose from subsequent frames (lightweight version of Register)
+   *
+   * @note Requires prior successful Register call
+   *       - Lower accuracy but higher efficiency than Register
+   *       - Accepts pose hypotheses from external sources
    *
-   * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
-   * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
-   * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
-   * @param out_pose 输出位姿
-   * @return true
-   * @return false
+   * @param rgb Input RGB image (must be in RGB format)
+   * @param depth Depth image (CV_32FC1 format)
+   * @param hyp_pose_in_mesh Hypothesis pose in mesh frame (from Register or other sources)
+   * @param target_name Object category name (must match construction mapping)
+   * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @return true Tracking successful
+   * @return false Tracking failed
    */
-  virtual bool Track(const cv::Mat     &rgb,
-                     const cv::Mat     &depth,
-                     const std::string &target_name,
-                     Eigen::Matrix4f   &out_pose) = 0;
+  virtual bool Track(const cv::Mat         &rgb,
+                     const cv::Mat         &depth,
+                     const Eigen::Matrix4f &hyp_pose_in_mesh,
+                     const std::string     &target_name,
+                     Eigen::Matrix4f       &out_pose_in_mesh) = 0;
 
   /**
-   * @brief 获取某个输入mesh目标的三维尺寸(辅助功能，用户也可自己计算)
-   *
-   * @return Eigen::Vector3f
+   * @brief Virtual destructor for proper resource cleanup
    */
-  virtual Eigen::Vector3f GetObjectDimension(const std::string &target_name) const
-  {
-    throw std::runtime_error("[Base6DofDetectionModel] GetOjbectDimension NOT Implemented yet!!!");
-  };
-
   virtual ~Base6DofDetectionModel() = default;
 
 protected:
+  /**
+   * @brief Construct a new base 6-DoF detection model
+   * @note Protected constructor for abstract base class
+   */
   Base6DofDetectionModel() = default;
 };
 
 /**
- * @brief 创建一个`FoundationPose`实例
+ * @brief Factory function for creating FoundationPose model instances
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param mesh_file_path 使用的三维模型`mesh_file`路径
- * @param texture_file_path 使用的三维模型外观特征图像路径
- * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const std::vector<float>                      &intrinsic_in_vec);
-
-/**
- * @brief 创建一个`FoundationPose`实例
+ * @param refiner_core Refinement inference core handle
+ * @param scorer_core Scoring inference core handle
+ * @param mesh_loaders Registered 3D mesh loaders
+ * @param intrinsic_in_mat Camera intrinsic matrix (3x3, Eigen::Matrix3f)
+ * @param max_input_image_height Maximum allowed input image height (pixels)
+ * @param max_input_image_width Maximum allowed input image width (pixels)
+ * @return std::shared_ptr<Base6DofDetectionModel> Initialized model instance
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param mesh_file_path 使用的三维模型`mesh_file`路径
- * @param texture_file_path 使用的三维模型外观特征图像路径
- * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const Eigen::Matrix3f                         &intrinsic_in_mat);
-
-/**
- * @brief 创建一个`FoundationPose`实例
+ * @note Creates an implementation of the 6-DoF detection pipeline using:
+ *       - Pose refinement neural network
+ *       - Pose scoring network
+ *       - Pre-registered 3D mesh models
+ *       - Camera calibration parameters
+ *       - Input size constraints for memory optimization
  *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
- *             键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
- * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel>
+ * @warning Input images exceeding max dimensions will cause failure in processing.
  */
 std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const std::vector<float>                                                   &intrinsic_in_vec);
-
-/**
- * @brief 创建一个`FoundationPose`实例
- *
- * @param refiner_core refiner推理核心
- * @param scorer_core scorer推理核心
- * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
- *               键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
- * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel>
- */
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const Eigen::Matrix3f                                                      &intrinsic_in_mat);
+    std::shared_ptr<inference_core::BaseInferCore>      refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>      scorer_core,
+    const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
+    const Eigen::Matrix3f                              &intrinsic_in_mat,
+    const int                                           max_input_image_height = 1080,
+    const int                                           max_input_image_width  = 1920);
 
 } // namespace detection_6d
-
-#endif
diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp
new file mode 100644
index 0000000..91d8ad2
--- /dev/null
+++ b/detection_6d_foundationpose/include/detection_6d_foundationpose/mesh_loader.hpp
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <vector>
+#include <Eigen/Dense>
+#include <opencv2/core.hpp>
+
+namespace detection_6d {
+
+/**
+ * @brief Abstract base class for mesh loading and data access interfaces
+ *
+ * @note Implementations should handle different mesh formats while maintaining
+ *       consistent vertex/face data organization
+ */
+class BaseMeshLoader {
+public:
+  virtual ~BaseMeshLoader() = default;
+
+  using Vector3ui = Eigen::Matrix<uint32_t, 3, 1>;
+
+  /**
+   * @brief Get identifier name for the loaded mesh
+   * @return Mesh name string
+   */
+  virtual std::string GetName() const noexcept = 0;
+
+  /** @brief Get diameter of the mesh's bounding sphere */
+  virtual float GetMeshDiameter() const noexcept = 0;
+
+  /** @brief Get number of vertices in the mesh */
+  virtual size_t GetMeshNumVertices() const noexcept = 0;
+
+  /** @brief Get number of triangular faces in the mesh */
+  virtual size_t GetMeshNumFaces() const noexcept = 0;
+
+  /** @brief Access array of vertex positions (3D coordinates) */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshVertices() const noexcept = 0;
+
+  /** @brief Access array of vertex normals */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshVertexNormals() const noexcept = 0;
+
+  /** @brief Access array of texture coordinates (UV mapping) */
+  virtual const std::vector<Eigen::Vector3f> &GetMeshTextureCoords() const noexcept = 0;
+
+  /** @brief Access array of triangular face indices */
+  virtual const std::vector<Vector3ui> &GetMeshTriangleFaces() const noexcept = 0;
+
+  /** @brief Get centroid position of the mesh model */
+  virtual const Eigen::Vector3f &GetMeshModelCenter() const noexcept = 0;
+
+  /**
+   * @brief Get orientation bounds transformation matrix
+   * @return 4x4 matrix containing axis-aligned bounding box orientation
+   */
+  virtual const Eigen::Matrix4f &GetOrientBounds() const noexcept = 0;
+
+  /** @brief Get dimensional measurements of the object (width/height/depth) */
+  virtual const Eigen::Vector3f &GetObjectDimension() const noexcept = 0;
+
+  /** @brief Access texture map image for the mesh */
+  virtual const cv::Mat &GetTextureMap() const noexcept = 0;
+};
+
+/**
+ * @brief Convert pose from mesh coordinate frame to bounding box frame
+ *
+ * @param pose_in_mesh Input pose in mesh coordinate system
+ * @param mesh_loader Mesh loader containing transformation parameters
+ * @return Transformed pose in bounding box coordinate system
+ *
+ * @note Transformation formula:
+ *       T_bbox = T_mesh * T_center * T_orient
+ *       Where T_center translates to mesh center, T_orient aligns with bounds
+ */
+inline Eigen::Matrix4f ConvertPoseMesh2BBox(const Eigen::Matrix4f                 &pose_in_mesh,
+                                            const std::shared_ptr<BaseMeshLoader> &mesh_loader)
+{
+  Eigen::Matrix4f tf_to_center   = Eigen::Matrix4f::Identity();
+  tf_to_center.block<3, 1>(0, 3) = -mesh_loader->GetMeshModelCenter();
+  return pose_in_mesh * tf_to_center * mesh_loader->GetOrientBounds();
+}
+
+/**
+ * @brief Factory function for creating Assimp-based mesh loader
+ *
+ * @param name Identifier for the mesh
+ * @param mesh_file_path Path to mesh file (supports .obj/.ply/.stl etc.)
+ * @return Shared pointer to initialized mesh loader instance
+ *
+ * @note Throws std::runtime_error if mesh loading fails
+ */
+std::shared_ptr<BaseMeshLoader> CreateAssimpMeshLoader(const std::string &name,
+                                                       const std::string &mesh_file_path);
+
+} // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose.cpp b/detection_6d_foundationpose/src/foundationpose.cpp
index 7fd3b22..268be2e 100644
--- a/detection_6d_foundationpose/src/foundationpose.cpp
+++ b/detection_6d_foundationpose/src/foundationpose.cpp
@@ -1,53 +1,22 @@
 #include "detection_6d_foundationpose/foundationpose.hpp"
+#include "detection_6d_foundationpose/mesh_loader.hpp"
 
 #include "foundationpose_render.hpp"
 #include "foundationpose_sampling.hpp"
 #include "foundationpose_utils.hpp"
-#include "foundationpose_decoder.hpp"
+#include "foundationpose_decoder.cu.hpp"
 #include "foundationpose_utils.cu.hpp"
 
-#define MAX_INPUT_IMAGE_HEIGHT 1080
-#define MAX_INPUT_IMAGE_WIDTH 1920
-
 namespace detection_6d {
 
 class FoundationPose : public Base6DofDetectionModel {
 public:
-  /**
-   * @brief 使用单个目标的mesh构建一个FoundationPose实例
-   *
-   * @param refiner_core refiner的推理核心
-   * @param scorer_core scorer的推理核心
-   * @param mesh_file_path 目标的mesh文件路径
-   * @param texture_file_path 目标的外观特征图像路径
-   * @param intrinsic 相机内参
-   * @param input_image_H 输入图像高度，默认480
-   * @param input_image_W 输入图像宽度，默认640
-   * @param crop_window_H 模型的输入图像高度，默认160
-   * @param crop_window_W 模型的输入图像宽度，默认160
-   * @param min_depth 有效深度最小值
-   * @param max_depth 有效深度最大值
-   */
-  FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                 std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                 const std::string                             &target_name,
-                 const std::string                             &mesh_file_path,
-                 const std::string                             &texture_file_path,
-                 const Eigen::Matrix3f                         &intrinsic,
-                 const int   input_image_H = MAX_INPUT_IMAGE_HEIGHT,
-                 const int   input_image_W = MAX_INPUT_IMAGE_WIDTH,
-                 const int   crop_window_H = 160,
-                 const int   crop_window_W = 160,
-                 const float min_depth     = 0.1,
-                 const float max_depth     = 4.0);
-
   /**
    * @brief 使用多个目标的mesh构建一个FoundationPose实例
    *
    * @param refiner_core refiner的推理核心
    * @param scorer_core scorer的推理核心
-   * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
-   *                键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
+   * @param mesh_loaders 注册的三维模型
    * @param intrinsic 相机内参
    * @param input_image_H 输入图像高度，默认480
    * @param input_image_W 输入图像宽度，默认640
@@ -56,29 +25,28 @@ class FoundationPose : public Base6DofDetectionModel {
    * @param min_depth 有效深度最小值
    * @param max_depth 有效深度最大值
    */
-  FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                 std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                 const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-                 const Eigen::Matrix3f &intrinsic,
-                 const int              max_input_image_H = MAX_INPUT_IMAGE_HEIGHT,
-                 const int              max_input_image_W = MAX_INPUT_IMAGE_WIDTH,
-                 const int              crop_window_H     = 160,
-                 const int              crop_window_W     = 160,
-                 const float            min_depth         = 0.1,
-                 const float            max_depth         = 4.0);
+  FoundationPose(std::shared_ptr<inference_core::BaseInferCore>      refiner_core,
+                 std::shared_ptr<inference_core::BaseInferCore>      scorer_core,
+                 const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
+                 const Eigen::Matrix3f                              &intrinsic,
+                 const int                                           max_input_image_H = 1080,
+                 const int                                           max_input_image_W = 1920,
+                 const int                                           crop_window_H     = 160,
+                 const int                                           crop_window_W     = 160,
+                 const float                                         min_depth         = 0.1,
+                 const float                                         max_depth         = 4.0);
 
   bool Register(const cv::Mat     &rgb,
                 const cv::Mat     &depth,
                 const cv::Mat     &mask,
                 const std::string &target_name,
-                Eigen::Matrix4f   &out_pose) override;
-
-  bool Track(const cv::Mat     &rgb,
-             const cv::Mat     &depth,
-             const std::string &target_name,
-             Eigen::Matrix4f   &out_pose) override;
+                Eigen::Matrix4f   &out_pose_in_mesh) override;
 
-  Eigen::Vector3f GetObjectDimension(const std::string &target_name) const override;
+  bool Track(const cv::Mat         &rgb,
+             const cv::Mat         &depth,
+             const Eigen::Matrix4f &hyp_pose_in_mesh,
+             const std::string     &target_name,
+             Eigen::Matrix4f       &out_pose_in_mesh) override;
 
 private:
   bool CheckInputArguments(const cv::Mat     &rgb,
@@ -127,50 +95,21 @@ class FoundationPose : public Base6DofDetectionModel {
 
 private:
   // 内部各个模块
-  std::unordered_map<std::string, std::shared_ptr<TexturedMeshLoader>>     map_name2loaders_;
+  std::unordered_map<std::string, std::shared_ptr<BaseMeshLoader>>         map_name2loaders_;
   std::unordered_map<std::string, std::shared_ptr<FoundationPoseRenderer>> map_name2renderer_;
   std::shared_ptr<FoundationPoseSampler>                                   hyp_poses_sampler_;
-  std::shared_ptr<FoundationPoseDecoder>                                   out_pose_decoder_;
-
-  // 维护一个Track用的prev_pose
-  std::unordered_map<std::string, Eigen::Matrix4f> map_name2prev_pose_;
 };
 
-FoundationPose::FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                               std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                               const std::string                             &target_name,
-                               const std::string                             &mesh_file_path,
-                               const std::string                             &texture_file_path,
-                               const Eigen::Matrix3f                         &intrinsic,
-                               const int                                      input_image_H,
-                               const int                                      input_image_W,
-                               const int                                      crop_window_H,
-                               const int                                      crop_window_W,
-                               const float                                    min_depth,
-                               const float                                    max_depth)
-    : FoundationPose(refiner_core,
-                     scorer_core,
-                     {{target_name, {mesh_file_path, texture_file_path}}},
-                     intrinsic,
-                     input_image_H,
-                     input_image_W,
-                     crop_window_H,
-                     crop_window_W,
-                     min_depth,
-                     max_depth)
-{}
-
-FoundationPose::FoundationPose(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const Eigen::Matrix3f                                                      &intrinsic,
-    const int                                                                   max_input_image_H,
-    const int                                                                   max_input_image_W,
-    const int                                                                   crop_window_H,
-    const int                                                                   crop_window_W,
-    const float                                                                 min_depth,
-    const float                                                                 max_depth)
+FoundationPose::FoundationPose(std::shared_ptr<inference_core::BaseInferCore>      refiner_core,
+                               std::shared_ptr<inference_core::BaseInferCore>      scorer_core,
+                               const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
+                               const Eigen::Matrix3f                              &intrinsic,
+                               const int   max_input_image_H,
+                               const int   max_input_image_W,
+                               const int   crop_window_H,
+                               const int   crop_window_W,
+                               const float min_depth,
+                               const float max_depth)
     : refiner_core_(refiner_core),
       scorer_core_(scorer_core),
       intrinsic_(intrinsic),
@@ -209,24 +148,17 @@ FoundationPose::FoundationPose(
   }
 
   // preload modules
-  for (const auto &p_name_paths : meshes)
+  for (const auto &mesh_loader : mesh_loaders)
   {
-    const std::string &target_name      = p_name_paths.first;
-    const std::string &mesh_file_path   = p_name_paths.second.first;
-    const std::string &texutre_img_path = p_name_paths.second.second;
-    LOG(INFO) << "[FoundationPose] Got target_name : " << target_name
-              << ", mesh_file_path: " << mesh_file_path
-              << ", texture_img_path: " << texutre_img_path;
-    auto mesh_loader = std::make_shared<TexturedMeshLoader>(mesh_file_path, texutre_img_path);
-    map_name2loaders_[p_name_paths.first]  = mesh_loader;
-    map_name2renderer_[p_name_paths.first] = std::make_shared<FoundationPoseRenderer>(
+    const std::string &target_name = mesh_loader->GetName();
+    LOG(INFO) << "[FoundationPose] Got target_name : " << target_name;
+    map_name2loaders_[target_name]  = mesh_loader;
+    map_name2renderer_[target_name] = std::make_shared<FoundationPoseRenderer>(
         mesh_loader, intrinsic_, score_mode_poses_num_, refine_mode_crop_ratio);
   }
 
   hyp_poses_sampler_ = std::make_shared<FoundationPoseSampler>(
       max_input_image_H_, max_input_image_W_, min_depth, max_depth, intrinsic_);
-
-  out_pose_decoder_ = std::make_shared<FoundationPoseDecoder>(score_mode_poses_num_);
 }
 
 bool FoundationPose::CheckInputArguments(const cv::Mat     &rgb,
@@ -245,6 +177,9 @@ bool FoundationPose::CheckInputArguments(const cv::Mat     &rgb,
     return false;
   }
 
+  CHECK_STATE(r_rows <= max_input_image_H_ && r_cols <= max_input_image_W_,
+              "[FoundationPose] Got rgb/depth/mask with unexpected size !");
+
   CHECK_STATE(map_name2loaders_.find(target_name) != map_name2loaders_.end(),
               "[FoundationPose] Register Got Invalid `target_name` \
                               which was not provided to FoundationPose instance!!!");
@@ -256,7 +191,7 @@ bool FoundationPose::Register(const cv::Mat     &rgb,
                               const cv::Mat     &depth,
                               const cv::Mat     &mask,
                               const std::string &target_name,
-                              Eigen::Matrix4f   &out_pose)
+                              Eigen::Matrix4f   &out_pose_in_mesh)
 {
   CHECK_STATE(CheckInputArguments(rgb, depth, mask, target_name),
               "[FoundationPose] `Register` Got invalid arguments!!!");
@@ -290,29 +225,25 @@ bool FoundationPose::Register(const cv::Mat     &rgb,
   MESSURE_DURATION_AND_CHECK_STATE(ScorePostProcess(package),
                                    "[FoundationPose] SyncDetect Failed to execute PostProcess!!!");
 
-  out_pose = std::move(package->actual_pose);
+  out_pose_in_mesh = std::move(package->actual_pose);
 
   return true;
 }
 
-bool FoundationPose::Track(const cv::Mat     &rgb,
-                           const cv::Mat     &depth,
-                           const std::string &target_name,
-                           Eigen::Matrix4f   &out_pose)
+bool FoundationPose::Track(const cv::Mat         &rgb,
+                           const cv::Mat         &depth,
+                           const Eigen::Matrix4f &hyp_pose_in_mesh,
+                           const std::string     &target_name,
+                           Eigen::Matrix4f       &out_pose_in_mesh)
 {
   CHECK_STATE(CheckInputArguments(rgb, depth, cv::Mat(), target_name),
               "[FoundationPose] `Track` Got invalid arguments!!!");
 
-  CHECK_STATE(map_name2prev_pose_.find(target_name) != map_name2prev_pose_.end(),
-              "[FoundationPose] Track target: " + target_name +
-                  " is NOT registered yet!!!\
-            Call `Register` first before calling `Track`!!!");
-
   auto package           = std::make_shared<FoundationPosePipelinePackage>();
   package->rgb_on_host   = rgb;
   package->depth_on_host = depth;
   package->target_name   = target_name;
-  package->hyp_poses     = {map_name2prev_pose_[target_name]};
+  package->hyp_poses     = {hyp_pose_in_mesh};
   // 将数据传输至device端，并生成xyz_map数据
   MESSURE_DURATION_AND_CHECK_STATE(UploadDataToDevice(rgb, depth, cv::Mat(), package),
                                    "[FoundationPose] Track Failed to upload data!!!");
@@ -327,24 +258,11 @@ bool FoundationPose::Track(const cv::Mat     &rgb,
   MESSURE_DURATION_AND_CHECK_STATE(TrackPostProcess(package),
                                    "[Foundation] Track Failed to execute `TrackPostProcess`!!!");
 
-  out_pose = std::move(package->actual_pose);
+  out_pose_in_mesh = std::move(package->actual_pose);
 
   return true;
 }
 
-Eigen::Vector3f FoundationPose::GetObjectDimension(const std::string &target_name) const
-{
-  if (map_name2loaders_.find(target_name) == map_name2loaders_.end())
-  {
-    LOG(ERROR) << "[FoundationPose] GetObjectDimension Got invalid `target_name`:" << target_name
-               << ", whose mesh_file mapping was not provided to FoundationPose instance!!!";
-    throw std::runtime_error("[FoundationPose] GetObjectDimension Got invalid `target_name`: " +
-                             target_name);
-  }
-
-  return map_name2loaders_.at(target_name)->GetObjectDimension();
-}
-
 bool FoundationPose::UploadDataToDevice(
     const cv::Mat                                        &rgb,
     const cv::Mat                                        &depth,
@@ -513,19 +431,10 @@ bool FoundationPose::ScorePostProcess(std::shared_ptr<async_pipeline::IPipelineP
 
   const auto &refine_poses = package->refine_poses;
   const int   poses_num    = refine_poses.size();
-  // 输入到decoder进行解码(找到得分最高的位姿，并通过包络盒转换到正确的位姿)
-  // 获取对应的mesh_loader
-  const auto &mesh_loader = map_name2loaders_[package->target_name];
 
   // 获取置信度最大的refined_pose
-  int max_score_index = out_pose_decoder_->GetMaxScoreIndex(score_ptr);
-  // 记录
-  map_name2prev_pose_[package->target_name] = refine_poses[max_score_index];
-
-  // 转换到包络盒坐标系
-  Eigen::Matrix4f actual_pose;
-  out_pose_decoder_->DecodeWithMaxScore(max_score_index, refine_poses, actual_pose, mesh_loader);
-  package->actual_pose = actual_pose;
+  int max_score_index  = getMaxScoreIndex(nullptr, reinterpret_cast<float *>(score_ptr), poses_num);
+  package->actual_pose = refine_poses[max_score_index];
 
   return true;
 }
@@ -577,73 +486,21 @@ bool FoundationPose::TrackPostProcess(std::shared_ptr<async_pipeline::IPipelineP
     refine_poses[i].block<3, 3>(0, 0) = result_3x3;
   }
 
-  // 记录
-  map_name2prev_pose_[package->target_name] = refine_poses[0];
-
-  // 获取对应的mesh_loader
-  out_pose_decoder_->DecodeInRefine(refine_poses[0], package->actual_pose, mesh_loader);
+  package->actual_pose = refine_poses[0];
 
   return true;
 }
 
 std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const std::vector<float>                      &intrinsic_in_vec)
-{
-  // 重构内参矩阵，`row_major` -> `col_major`
-  Eigen::Matrix3f intrinsic;
-  for (int r = 0; r < 3; ++r)
-  {
-    for (int c = 0; c < 3; ++c)
-    {
-      intrinsic(r, c) = intrinsic_in_vec[r * 3 + c];
-    }
-  }
-  return std::make_shared<FoundationPose>(refiner_core, scorer_core, target_name, mesh_file_path,
-                                          texture_file_path, intrinsic);
-}
-
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string                             &target_name,
-    const std::string                             &mesh_file_path,
-    const std::string                             &texture_file_path,
-    const Eigen::Matrix3f                         &intrinsic_in_mat)
-{
-  return std::make_shared<FoundationPose>(refiner_core, scorer_core, target_name, mesh_file_path,
-                                          texture_file_path, intrinsic_in_mat);
-}
-
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const std::vector<float>                                                   &intrinsic_in_vec)
-{
-  // 重构内参矩阵，`row_major` -> `col_major`
-  Eigen::Matrix3f intrinsic;
-  for (int r = 0; r < 3; ++r)
-  {
-    for (int c = 0; c < 3; ++c)
-    {
-      intrinsic(r, c) = intrinsic_in_vec[r * 3 + c];
-    }
-  }
-  return std::make_shared<FoundationPose>(refiner_core, scorer_core, meshes, intrinsic);
-}
-
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
-    const Eigen::Matrix3f                                                      &intrinsic_in_mat)
+    std::shared_ptr<inference_core::BaseInferCore>      refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>      scorer_core,
+    const std::vector<std::shared_ptr<BaseMeshLoader>> &mesh_loaders,
+    const Eigen::Matrix3f                              &intrinsic_in_mat,
+    const int                                           max_input_image_height,
+    const int                                           max_input_image_width)
 {
-  return std::make_shared<FoundationPose>(refiner_core, scorer_core, meshes, intrinsic_in_mat);
+  return std::make_shared<FoundationPose>(refiner_core, scorer_core, mesh_loaders, intrinsic_in_mat,
+                                          max_input_image_height, max_input_image_width);
 }
 
 } // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.cpp b/detection_6d_foundationpose/src/foundationpose_decoder.cpp
deleted file mode 100644
index 3136dab..0000000
--- a/detection_6d_foundationpose/src/foundationpose_decoder.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "foundationpose_decoder.hpp"
-
-#include "foundationpose_decoder.cu.hpp"
-
-namespace detection_6d {
-
-FoundationPoseDecoder::FoundationPoseDecoder(const int input_poses_num)
-    : input_poses_num_(input_poses_num)
-{
-  if (cudaStreamCreate(&cuda_stream_) != cudaSuccess)
-  {
-    throw std::runtime_error("[FoundationPoseDecoder] Failed to create cuda stream!!!");
-  }
-}
-
-FoundationPoseDecoder::~FoundationPoseDecoder()
-{
-  if (cudaStreamDestroy(cuda_stream_) != cudaSuccess)
-  {
-    LOG(WARNING) << "[FoundationPoseDecoder] Failed to destroy cuda stream!!!";
-  }
-}
-
-int FoundationPoseDecoder::GetMaxScoreIndex(void *scores_on_device) noexcept
-{
-  return getMaxScoreIndex(cuda_stream_, reinterpret_cast<float *>(scores_on_device),
-                          input_poses_num_);
-}
-
-bool FoundationPoseDecoder::DecodeWithMaxScore(int                                 max_score_index,
-                                               const std::vector<Eigen::Matrix4f> &refined_poses,
-                                               Eigen::Matrix4f                    &out_pose,
-                                               std::shared_ptr<TexturedMeshLoader> mesh_loader)
-{
-  const auto     &best_pose_matrix = refined_poses[max_score_index];
-  Eigen::Matrix4f tf_to_center     = Eigen::Matrix4f::Identity();
-  tf_to_center.block<3, 1>(0, 3)   = -mesh_loader->GetMeshModelCenter();
-  out_pose                         = best_pose_matrix * tf_to_center;
-  out_pose                         = out_pose * mesh_loader->GetOrientBounds();
-  return true;
-}
-
-bool FoundationPoseDecoder::DecodeInRefine(const Eigen::Matrix4f              &refined_pose,
-                                           Eigen::Matrix4f                    &out_pose,
-                                           std::shared_ptr<TexturedMeshLoader> mesh_loader)
-{
-  Eigen::Matrix4f tf_to_center   = Eigen::Matrix4f::Identity();
-  tf_to_center.block<3, 1>(0, 3) = -mesh_loader->GetMeshModelCenter();
-  out_pose                       = refined_pose * tf_to_center;
-  out_pose                       = out_pose * mesh_loader->GetOrientBounds();
-  return true;
-}
-
-} // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.hpp b/detection_6d_foundationpose/src/foundationpose_decoder.hpp
deleted file mode 100644
index be3ece8..0000000
--- a/detection_6d_foundationpose/src/foundationpose_decoder.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __DETECTION_6D_FOUNDATIONPOSE_DECODER_H
-#define __DETECTION_6D_FOUNDATIONPOSE_DECODER_H
-
-#include "foundationpose_utils.hpp"
-
-namespace detection_6d {
-
-class FoundationPoseDecoder {
-public:
-  FoundationPoseDecoder(const int input_poses_num);
-
-  bool DecodeWithMaxScore(int                                 max_score_index,
-                          const std::vector<Eigen::Matrix4f> &refined_poses,
-                          Eigen::Matrix4f                    &out_pose,
-                          std::shared_ptr<TexturedMeshLoader> mesh_loader);
-
-  bool DecodeInRefine(const Eigen::Matrix4f              &refined_pose,
-                      Eigen::Matrix4f                    &out_pose,
-                      std::shared_ptr<TexturedMeshLoader> mesh_loader);
-
-  int GetMaxScoreIndex(void *scores_on_device) noexcept;
-
-  ~FoundationPoseDecoder();
-
-private:
-  const int    input_poses_num_;
-  cudaStream_t cuda_stream_;
-};
-
-} // namespace detection_6d
-
-#endif
diff --git a/detection_6d_foundationpose/src/foundationpose_render.cpp b/detection_6d_foundationpose/src/foundationpose_render.cpp
index 7385489..5742ca8 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.cpp
+++ b/detection_6d_foundationpose/src/foundationpose_render.cpp
@@ -217,14 +217,14 @@ void WrapFloatPtrToNHWCTensor(
   output_tensor = nvcv::TensorWrapData(output_data);
 }
 
-FoundationPoseRenderer::FoundationPoseRenderer(std::shared_ptr<TexturedMeshLoader> mesh_loader,
-                                               const Eigen::Matrix3f              &intrinsic,
-                                               const int                           input_poses_num,
-                                               const float                         crop_ratio,
-                                               const int                           crop_window_H,
-                                               const int                           crop_window_W,
-                                               const float                         min_depth,
-                                               const float                         max_depth)
+FoundationPoseRenderer::FoundationPoseRenderer(std::shared_ptr<BaseMeshLoader> mesh_loader,
+                                               const Eigen::Matrix3f          &intrinsic,
+                                               const int                       input_poses_num,
+                                               const float                     crop_ratio,
+                                               const int                       crop_window_H,
+                                               const int                       crop_window_W,
+                                               const float                     min_depth,
+                                               const float                     max_depth)
     : mesh_loader_(mesh_loader),
       intrinsic_(intrinsic),
       input_poses_num_(input_poses_num),
@@ -386,7 +386,7 @@ bool FoundationPoseRenderer::LoadTexturedMesh()
   const auto &mesh_vertices       = mesh_loader_->GetMeshVertices();
   const auto &mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals();
   const auto &mesh_texcoords      = mesh_loader_->GetMeshTextureCoords();
-  const auto &mesh_faces          = mesh_loader_->GetMeshFaces();
+  const auto &mesh_faces          = mesh_loader_->GetMeshTriangleFaces();
   const auto &rgb_texture_map     = mesh_loader_->GetTextureMap();
   mesh_diameter_                  = mesh_loader_->GetMeshDiameter();
 
@@ -395,34 +395,25 @@ bool FoundationPoseRenderer::LoadTexturedMesh()
   // Walk through each of the mesh's vertices
   for (unsigned int v = 0; v < mesh_vertices.size(); v++)
   {
-    vertices_.push_back(mesh_vertices[v].x - mesh_model_center[0]);
-    vertices_.push_back(mesh_vertices[v].y - mesh_model_center[1]);
-    vertices_.push_back(mesh_vertices[v].z - mesh_model_center[2]);
+    vertices_.push_back(mesh_vertices[v][0] - mesh_model_center[0]);
+    vertices_.push_back(mesh_vertices[v][1] - mesh_model_center[1]);
+    vertices_.push_back(mesh_vertices[v][2] - mesh_model_center[2]);
 
-    vertex_normals.push_back(mesh_vertex_normals[v].x);
-    vertex_normals.push_back(mesh_vertex_normals[v].y);
-    vertex_normals.push_back(mesh_vertex_normals[v].z);
+    vertex_normals.push_back(mesh_vertex_normals[v][0]);
+    vertex_normals.push_back(mesh_vertex_normals[v][1]);
+    vertex_normals.push_back(mesh_vertex_normals[v][2]);
 
     // Check if the mesh has texture coordinates
-    if (mesh_texcoords.size() >= 1)
-    {
-      texcoords_.push_back(mesh_texcoords[0][v].x);
-      texcoords_.push_back(1 - mesh_texcoords[0][v].y);
-    }
+    texcoords_.push_back(mesh_texcoords[v][0]);
+    texcoords_.push_back(1 - mesh_texcoords[v][1]);
   }
 
   // Walk through each of the mesh's faces (a face is a mesh its triangle)
   for (unsigned int f = 0; f < mesh_faces.size(); f++)
   {
-    const aiFace &face = mesh_faces[f];
-
-    // We assume the face is a triangle due to aiProcess_Triangulate
-    CHECK_STATE(face.mNumIndices == 3, "Only triangle is supported, but the object face has " +
-                                           std::to_string(face.mNumIndices) + " vertices. ");
-
-    for (unsigned int i = 0; i < face.mNumIndices; i++)
+    for (unsigned int i = 0; i < kTriangleVertices; i++)
     {
-      mesh_faces_.push_back(face.mIndices[i]);
+      mesh_faces_.push_back(mesh_faces[f][i]);
     }
   }
 
diff --git a/detection_6d_foundationpose/src/foundationpose_render.hpp b/detection_6d_foundationpose/src/foundationpose_render.hpp
index d1c3a9f..4c0ebfa 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_render.hpp
@@ -10,6 +10,7 @@
 
 #include "nvdiffrast/common/cudaraster/CudaRaster.hpp"
 #include "foundationpose_utils.hpp"
+#include "detection_6d_foundationpose/mesh_loader.hpp"
 
 namespace detection_6d {
 
@@ -17,14 +18,14 @@ typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Ro
 
 class FoundationPoseRenderer {
 public:
-  FoundationPoseRenderer(std::shared_ptr<TexturedMeshLoader> mesh_loader,
-                         const Eigen::Matrix3f              &intrinsic,
-                         const int                           input_poses_num,
-                         const float                         crop_ratio    = 1.2,
-                         const int                           crop_window_H = 160,
-                         const int                           crop_window_W = 160,
-                         const float                         min_depth     = 0.1,
-                         const float                         max_depth     = 4.0);
+  FoundationPoseRenderer(std::shared_ptr<BaseMeshLoader> mesh_loader,
+                         const Eigen::Matrix3f          &intrinsic,
+                         const int                       input_poses_num,
+                         const float                     crop_ratio    = 1.2,
+                         const int                       crop_window_H = 160,
+                         const int                       crop_window_W = 160,
+                         const float                     min_depth     = 0.1,
+                         const float                     max_depth     = 4.0);
 
   bool RenderAndTransform(const std::vector<Eigen::Matrix4f> &_poses,
                           void                               *rgb_on_device,
@@ -101,7 +102,7 @@ class FoundationPoseRenderer {
   const float max_depth_;
 
   // mesh
-  std::shared_ptr<TexturedMeshLoader>                                   mesh_loader_;
+  std::shared_ptr<BaseMeshLoader>                                       mesh_loader_;
   std::vector<float>                                                    vertices_;
   std::vector<float>                                                    texcoords_;
   std::vector<int32_t>                                                  mesh_faces_;
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.cpp b/detection_6d_foundationpose/src/foundationpose_utils.cpp
deleted file mode 100644
index ae665bf..0000000
--- a/detection_6d_foundationpose/src/foundationpose_utils.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-// SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-// Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-#include "foundationpose_utils.hpp"
-
-#include <opencv2/imgcodecs.hpp>
-#include <opencv2/imgproc.hpp>
-#include <vector>
-
-#include "cuda.h"
-#include "cuda_runtime.h"
-
-namespace detection_6d {
-
-std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh *mesh)
-{
-  Eigen::Vector3f min_vertex = {0, 0, 0};
-  Eigen::Vector3f max_vertex = {0, 0, 0};
-
-  if (mesh->mNumVertices == 0)
-  {
-    return std::pair{min_vertex, max_vertex};
-  }
-
-  min_vertex << mesh->mVertices[0].x, mesh->mVertices[0].y, mesh->mVertices[0].z;
-  max_vertex << mesh->mVertices[0].x, mesh->mVertices[0].y, mesh->mVertices[0].z;
-
-  // Iterate over all vertices to find the bounding box
-  for (size_t v = 0; v < mesh->mNumVertices; v++)
-  {
-    float vx = mesh->mVertices[v].x;
-    float vy = mesh->mVertices[v].y;
-    float vz = mesh->mVertices[v].z;
-
-    min_vertex[0] = std::min(min_vertex[0], vx);
-    min_vertex[1] = std::min(min_vertex[1], vy);
-    min_vertex[2] = std::min(min_vertex[2], vz);
-
-    max_vertex[0] = std::max(max_vertex[0], vx);
-    max_vertex[1] = std::max(max_vertex[1], vy);
-    max_vertex[2] = std::max(max_vertex[2], vz);
-  }
-  return std::pair{min_vertex, max_vertex};
-}
-
-float CalcMeshDiameter(const aiMesh *mesh)
-{
-  float max_dist = 0.0;
-  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
-  {
-    for (unsigned int j = i + 1; j < mesh->mNumVertices; ++j)
-    {
-      aiVector3D diff = mesh->mVertices[i] - mesh->mVertices[j];
-      float      dist = diff.Length();
-      max_dist        = std::max(max_dist, dist);
-    }
-  }
-  return max_dist;
-}
-
-void ComputeOBB(const aiMesh    *mesh,
-                Eigen::Matrix4f &out_orient_bbox,
-                Eigen::Vector3f &out_dimension)
-{
-  std::vector<Eigen::Vector3f> vertices;
-  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
-  {
-    vertices.emplace_back(mesh->mVertices[i].x, mesh->mVertices[i].y, mesh->mVertices[i].z);
-  }
-
-  // 计算质心
-  Eigen::Vector3f mean = Eigen::Vector3f::Zero();
-  for (const auto &v : vertices)
-  {
-    mean += v;
-  }
-  mean /= vertices.size();
-
-  // 计算协方差矩阵
-  Eigen::Matrix3f cov = Eigen::Matrix3f::Zero();
-  for (const auto &v : vertices)
-  {
-    Eigen::Vector3f diff = v - mean;
-    cov += diff * diff.transpose();
-  }
-  cov /= vertices.size();
-
-  // 特征分解
-  Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> solver(cov);
-  Eigen::Matrix3f                                rotation = solver.eigenvectors();
-  Eigen::Vector3f                                extents  = solver.eigenvalues().cwiseSqrt();
-  // 生成变换矩阵
-  Eigen::Matrix4f transform   = Eigen::Matrix4f::Identity();
-  transform.block<3, 3>(0, 0) = rotation;
-  transform.block<3, 1>(0, 3) = mean;
-
-  Eigen::MatrixXf transformed(vertices.size(), 3);
-  for (int i = 0; i < vertices.size(); ++i)
-  {
-    Eigen::Vector3f proj = rotation.transpose() * vertices[i];
-    transformed(i, 0)    = proj(0);
-    transformed(i, 1)    = proj(1);
-    transformed(i, 2)    = proj(2);
-  }
-
-  Eigen::Vector3f minBound = transformed.colwise().minCoeff();
-  Eigen::Vector3f maxBound = transformed.colwise().maxCoeff();
-
-  Eigen::Vector3f dimension = maxBound - minBound;
-
-  out_orient_bbox = transform;
-  out_dimension   = dimension;
-}
-
-TexturedMeshLoader::TexturedMeshLoader(const std::string &mesh_file_path,
-                                       const std::string &textured_file_path)
-{
-  // 1. load textured mesh file using assimp
-  if (mesh_file_path.empty() || textured_file_path.empty())
-  {
-    throw std::invalid_argument("[TexturedMeshLoader] Invalid textured mesh file path: " +
-                                mesh_file_path + "\t" + textured_file_path);
-  }
-  LOG(INFO) << "Loading mesh file: " << mesh_file_path;
-  Assimp::Importer importer;
-  const aiScene   *scene =
-      importer.ReadFile(mesh_file_path, aiProcess_Triangulate | aiProcess_JoinIdenticalVertices |
-                                            aiProcess_SortByPType);
-  if (scene == nullptr)
-  {
-    throw std::runtime_error("[TexturedMeshLoader] Failed to read mesh file: " + mesh_file_path);
-  }
-
-  const aiMesh *mesh = scene->mMeshes[0];
-  mesh_diamter_      = CalcMeshDiameter(mesh);
-  ComputeOBB(mesh, obb_, dim_);
-
-  auto min_max_vertex = FindMinMaxVertex(mesh);
-  mesh_center_        = (min_max_vertex.second + min_max_vertex.first) / 2.0;
-
-  // Walk through each of the mesh's vertices
-  for (unsigned int v = 0; v < mesh->mNumVertices; v++)
-  {
-    vertices_.push_back(mesh->mVertices[v]);
-    vertex_normals_.push_back(mesh->mNormals[v]);
-  }
-  for (unsigned int i = 0; i < AI_MAX_NUMBER_OF_TEXTURECOORDS; ++i)
-  {
-    if (mesh->mTextureCoords[i] != nullptr)
-    {
-      std::vector<aiVector3D> tex_coords_vec;
-      tex_coords_vec.reserve(mesh->mNumVertices);
-      for (int v = 0; v < mesh->mNumVertices; ++v)
-      {
-        tex_coords_vec[v] = mesh->mTextureCoords[i][v];
-      }
-      texcoords_.push_back(std::move(tex_coords_vec));
-    }
-  }
-
-  for (unsigned int f = 0; f < mesh->mNumFaces; ++f)
-  {
-    faces_.push_back(mesh->mFaces[f]);
-  }
-
-  LOG(INFO) << "Loading textured map file: " << textured_file_path;
-  texture_map_ = cv::imread(textured_file_path);
-  if (texture_map_.empty())
-  {
-    // throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: "
-    //                         + textured_file_path);
-    texture_map_ = cv::Mat(2, 2, CV_8UC3, {100, 100, 100});
-  }
-  cv::cvtColor(texture_map_, texture_map_, cv::COLOR_BGR2RGB);
-
-  LOG(INFO) << "Successfully Loaded textured mesh file!!!";
-  LOG(INFO) << "Mesh has vertices_num: " << vertices_.size() << ", diameter: " << mesh_diamter_
-            << ", faces_num: " << faces_.size() << ", center: " << mesh_center_;
-}
-
-/**
- * @brief 获取mesh模型的半径
- *
- * @return float
- */
-float TexturedMeshLoader::GetMeshDiameter() const noexcept
-{
-  return mesh_diamter_;
-}
-
-/**
- * @brief 获取mesh模型的顶点数量
- *
- * @return size_t
- */
-size_t TexturedMeshLoader::GetMeshNumVertices() const noexcept
-{
-  return vertices_.size();
-}
-
-/**
- * @brief 获取mesh模型的顶点数据指针
- *
- * @return const std::vector<aiVector3D> &
- */
-const std::vector<aiVector3D> &TexturedMeshLoader::GetMeshVertices() const noexcept
-{
-  return vertices_;
-}
-
-/**
- * @brief 获取mesh模型顶点的法向量
- *
- * @return const std::vector<aiVector3D> &
- */
-const std::vector<aiVector3D> &TexturedMeshLoader::GetMeshVertexNormals() const noexcept
-{
-  return vertex_normals_;
-}
-
-/**
- * @brief 获取mesh模型的外观坐标系
- *
- * @return const std::vector<aiVector3D> &
- */
-const std::vector<std::vector<aiVector3D>> &TexturedMeshLoader::GetMeshTextureCoords()
-    const noexcept
-{
-  return texcoords_;
-}
-
-/**
- * @brief 获取mesh模型的faces
- *
- * @return const std::vector<aiFace> &
- */
-const std::vector<aiFace> &TexturedMeshLoader::GetMeshFaces() const noexcept
-{
-  return faces_;
-}
-
-/**
- * @brief 获取mesh模型的三维中心
- *
- * @return const std::vector<Eigen::Vector3f>&
- */
-const Eigen::Vector3f &TexturedMeshLoader::GetMeshModelCenter() const noexcept
-{
-  return mesh_center_;
-}
-
-/**
- * @brief 获取mesh包围盒转换矩阵
- *
- * @return const Eigen::Matrix4f&
- */
-const Eigen::Matrix4f &TexturedMeshLoader::GetOrientBounds() const noexcept
-{
-  return obb_;
-}
-
-/**
- * @brief 获取cv::Mat格式的外观图
- *
- * @return const cv::Mat&
- */
-const cv::Mat &TexturedMeshLoader::GetTextureMap() const noexcept
-{
-  return texture_map_;
-}
-
-/**
- * @brief 获取物体最小包络盒的尺寸
- *
- * @return const Eigen::Vector3f
- */
-const Eigen::Vector3f TexturedMeshLoader::GetObjectDimension() const noexcept
-{
-  return dim_;
-}
-
-} // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.hpp b/detection_6d_foundationpose/src/foundationpose_utils.hpp
index 3761f8a..7b1b1b7 100644
--- a/detection_6d_foundationpose/src/foundationpose_utils.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_utils.hpp
@@ -34,100 +34,6 @@
 
 namespace detection_6d {
 
-class TexturedMeshLoader {
-public:
-  /**
-   * @brief 创建TexturedMeshLoader实例，并加载mesh模型以及其外观图
-   *
-   * @param mesh_file_path 应当以`.obj`结尾
-   * @param textured_file_path 应当以`.png`结尾
-   *
-   * @throw 如果输入路径格式不正确，抛出`std::invalid_arguments`异常
-   */
-  TexturedMeshLoader(const std::string &mesh_file_path, const std::string &textured_file_path);
-
-  /**
-   * @brief 获取mesh模型的半径
-   *
-   * @return float
-   */
-  float GetMeshDiameter() const noexcept;
-
-  /**
-   * @brief 获取mesh模型的顶点数量
-   *
-   * @return size_t
-   */
-  size_t GetMeshNumVertices() const noexcept;
-
-  /**
-   * @brief 获取mesh模型的顶点数据指针
-   *
-   * @return const std::vector<aiVector3D> &
-   */
-  const std::vector<aiVector3D> &GetMeshVertices() const noexcept;
-
-  /**
-   * @brief 获取mesh模型顶点的法向量
-   *
-   * @return const std::vector<aiVector3D> &
-   */
-  const std::vector<aiVector3D> &GetMeshVertexNormals() const noexcept;
-
-  /**
-   * @brief 获取mesh模型的外观坐标系
-   *
-   * @return const std::vector<aiVector3D> &
-   */
-  const std::vector<std::vector<aiVector3D>> &GetMeshTextureCoords() const noexcept;
-
-  /**
-   * @brief 获取mesh模型的faces
-   *
-   * @return const std::vector<aiFace> &
-   */
-  const std::vector<aiFace> &GetMeshFaces() const noexcept;
-
-  /**
-   * @brief 获取mesh模型的三维中心
-   *
-   * @return const std::vector<Eigen::Vector3f>&
-   */
-  const Eigen::Vector3f &GetMeshModelCenter() const noexcept;
-
-  /**
-   * @brief 获取mesh包围盒转换矩阵
-   *
-   * @return const Eigen::Matrix4f&
-   */
-  const Eigen::Matrix4f &GetOrientBounds() const noexcept;
-
-  /**
-   * @brief 获取cv::Mat格式的外观图
-   *
-   * @return const cv::Mat&
-   */
-  const cv::Mat &GetTextureMap() const noexcept;
-
-  /**
-   * @brief 获取物体最小包络盒的尺寸
-   *
-   * @return const Eigen::Vector3f
-   */
-  const Eigen::Vector3f GetObjectDimension() const noexcept;
-
-private:
-  float                                mesh_diamter_;
-  Eigen::Vector3f                      mesh_center_;
-  std::vector<aiVector3D>              vertices_;
-  std::vector<aiVector3D>              vertex_normals_;
-  std::vector<std::vector<aiVector3D>> texcoords_;
-  std::vector<aiFace>                  faces_;
-  Eigen::Matrix4f                      obb_;
-  Eigen::Vector3f                      dim_;
-  cv::Mat                              texture_map_;
-};
-
 struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage {
   // 输入host端rgb图像
   cv::Mat rgb_on_host;
@@ -199,10 +105,6 @@ struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage {
     }                                  \
   }
 
-// static auto func_cuda_memory_release = [](float* p) {
-//   CHECK_CUDA(cudaFree(p), "Release cuda memory ptr FAILED!!!");
-// };
-
 template <typename T>
 class CudaMemoryDeleter {
 public:
@@ -216,12 +118,6 @@ class CudaMemoryDeleter {
   }
 };
 
-// Finds the minimum and maximum vertex from the mesh loaded by assimp
-std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh *mesh);
-
-// Calculates the diameter of the mesh loaded by assimp
-float CalcMeshDiameter(const aiMesh *mesh);
-
 } // namespace detection_6d
 
 #endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_UTILS_HPP_
diff --git a/detection_6d_foundationpose/src/mesh_loader/assimp_mesh_loader.cpp b/detection_6d_foundationpose/src/mesh_loader/assimp_mesh_loader.cpp
new file mode 100644
index 0000000..8f9e86b
--- /dev/null
+++ b/detection_6d_foundationpose/src/mesh_loader/assimp_mesh_loader.cpp
@@ -0,0 +1,297 @@
+#include "detection_6d_foundationpose/mesh_loader.hpp"
+
+#include <assimp/postprocess.h>
+#include <assimp/scene.h>
+#include <assimp/Importer.hpp>
+#include <filesystem>
+
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <glog/logging.h>
+#include <glog/log_severity.h>
+
+namespace detection_6d {
+
+static std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh *mesh)
+{
+  Eigen::Vector3f min_vertex = {0, 0, 0};
+  Eigen::Vector3f max_vertex = {0, 0, 0};
+
+  if (mesh->mNumVertices == 0)
+  {
+    return std::pair{min_vertex, max_vertex};
+  }
+
+  min_vertex << mesh->mVertices[0].x, mesh->mVertices[0].y, mesh->mVertices[0].z;
+  max_vertex << mesh->mVertices[0].x, mesh->mVertices[0].y, mesh->mVertices[0].z;
+
+  // Iterate over all vertices to find the bounding box
+  for (size_t v = 0; v < mesh->mNumVertices; v++)
+  {
+    float vx = mesh->mVertices[v].x;
+    float vy = mesh->mVertices[v].y;
+    float vz = mesh->mVertices[v].z;
+
+    min_vertex[0] = std::min(min_vertex[0], vx);
+    min_vertex[1] = std::min(min_vertex[1], vy);
+    min_vertex[2] = std::min(min_vertex[2], vz);
+
+    max_vertex[0] = std::max(max_vertex[0], vx);
+    max_vertex[1] = std::max(max_vertex[1], vy);
+    max_vertex[2] = std::max(max_vertex[2], vz);
+  }
+  return std::pair{min_vertex, max_vertex};
+}
+
+static float CalcMeshDiameter(const aiMesh *mesh)
+{
+  float max_dist = 0.0;
+  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
+  {
+    for (unsigned int j = i + 1; j < mesh->mNumVertices; ++j)
+    {
+      aiVector3D diff = mesh->mVertices[i] - mesh->mVertices[j];
+      float      dist = diff.Length();
+      max_dist        = std::max(max_dist, dist);
+    }
+  }
+  return max_dist;
+}
+
+static void ComputeOBB(const aiMesh    *mesh,
+                       Eigen::Matrix4f &out_orient_bbox,
+                       Eigen::Vector3f &out_dimension)
+{
+  std::vector<Eigen::Vector3f> vertices;
+  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
+  {
+    vertices.emplace_back(mesh->mVertices[i].x, mesh->mVertices[i].y, mesh->mVertices[i].z);
+  }
+
+  // 计算质心
+  Eigen::Vector3f mean = Eigen::Vector3f::Zero();
+  for (const auto &v : vertices)
+  {
+    mean += v;
+  }
+  mean /= vertices.size();
+
+  // 计算协方差矩阵
+  Eigen::Matrix3f cov = Eigen::Matrix3f::Zero();
+  for (const auto &v : vertices)
+  {
+    Eigen::Vector3f diff = v - mean;
+    cov += diff * diff.transpose();
+  }
+  cov /= vertices.size();
+
+  // 特征分解
+  Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> solver(cov);
+  Eigen::Matrix3f                                rotation = solver.eigenvectors();
+  Eigen::Vector3f                                extents  = solver.eigenvalues().cwiseSqrt();
+  // 生成变换矩阵
+  Eigen::Matrix4f transform   = Eigen::Matrix4f::Identity();
+  transform.block<3, 3>(0, 0) = rotation;
+  transform.block<3, 1>(0, 3) = mean;
+
+  Eigen::MatrixXf transformed(vertices.size(), 3);
+  for (int i = 0; i < vertices.size(); ++i)
+  {
+    Eigen::Vector3f proj = rotation.transpose() * vertices[i];
+    transformed(i, 0)    = proj(0);
+    transformed(i, 1)    = proj(1);
+    transformed(i, 2)    = proj(2);
+  }
+
+  Eigen::Vector3f minBound = transformed.colwise().minCoeff();
+  Eigen::Vector3f maxBound = transformed.colwise().maxCoeff();
+
+  Eigen::Vector3f dimension = maxBound - minBound;
+
+  out_orient_bbox = transform;
+  out_dimension   = dimension;
+}
+
+class AssimpMeshLoader : public BaseMeshLoader {
+public:
+  AssimpMeshLoader(const std::string &name, const std::string &mesh_file_path);
+
+  ~AssimpMeshLoader() = default;
+
+  std::string GetName() const noexcept override;
+
+  float GetMeshDiameter() const noexcept override;
+
+  size_t GetMeshNumVertices() const noexcept override;
+
+  size_t GetMeshNumFaces() const noexcept override;
+
+  const std::vector<Eigen::Vector3f> &GetMeshVertices() const noexcept override;
+
+  const std::vector<Eigen::Vector3f> &GetMeshVertexNormals() const noexcept override;
+
+  const std::vector<Eigen::Vector3f> &GetMeshTextureCoords() const noexcept override;
+
+  const std::vector<Vector3ui> &GetMeshTriangleFaces() const noexcept override;
+
+  const Eigen::Vector3f &GetMeshModelCenter() const noexcept override;
+
+  const Eigen::Matrix4f &GetOrientBounds() const noexcept override;
+
+  const Eigen::Vector3f &GetObjectDimension() const noexcept override;
+
+  const cv::Mat &GetTextureMap() const noexcept override;
+
+private:
+  std::string                  name_;
+  float                        mesh_diamter_;
+  Eigen::Vector3f              mesh_center_;
+  std::vector<Eigen::Vector3f> vertices_;
+  std::vector<Eigen::Vector3f> vertex_normals_;
+  std::vector<Eigen::Vector3f> texcoords_;
+  std::vector<Vector3ui>       faces_;
+  Eigen::Matrix4f              obb_;
+  Eigen::Vector3f              dim_;
+  cv::Mat                      texture_map_;
+};
+
+AssimpMeshLoader::AssimpMeshLoader(const std::string &name, const std::string &mesh_file_path)
+    : name_(name)
+{
+  if (mesh_file_path.empty())
+  {
+    throw std::invalid_argument("[AssimpMeshLoader] Got empty mesh_file_path !");
+  }
+
+  Assimp::Importer importer;
+  const aiScene   *scene =
+      importer.ReadFile(mesh_file_path, aiProcess_Triangulate | aiProcess_JoinIdenticalVertices |
+                                            aiProcess_SortByPType);
+  if (scene == nullptr)
+  {
+    throw std::runtime_error("[AssimpMeshLoader] Failed to read mesh file: " + mesh_file_path);
+  }
+
+  const aiMesh *mesh = scene->mMeshes[0];
+  mesh_diamter_      = CalcMeshDiameter(mesh);
+  ComputeOBB(mesh, obb_, dim_);
+  auto min_max_vertex = FindMinMaxVertex(mesh);
+  mesh_center_        = (min_max_vertex.second + min_max_vertex.first) / 2.0;
+
+  if (mesh->mTextureCoords[0] == nullptr)
+  {
+    throw std::runtime_error("[AssimpMeshLoader] Got invalid texturecoords!");
+  }
+  // Walk through each of the mesh's vertices
+  for (unsigned int v = 0; v < mesh->mNumVertices; v++)
+  {
+    Eigen::Vector3f vertice{mesh->mVertices[v].x, mesh->mVertices[v].y, mesh->mVertices[v].z};
+    vertices_.push_back(vertice);
+
+    Eigen::Vector3f normal{mesh->mNormals[v].x, mesh->mNormals[v].y, mesh->mNormals[v].z};
+    vertex_normals_.push_back(normal);
+
+    Eigen::Vector3f tex_coord{mesh->mTextureCoords[0][v].x, mesh->mTextureCoords[0][v].y,
+                              mesh->mTextureCoords[0][v].z};
+    texcoords_.push_back(tex_coord);
+  }
+
+  for (unsigned int f = 0; f < mesh->mNumFaces; ++f)
+  {
+    Vector3ui face{mesh->mFaces[f].mIndices[0], mesh->mFaces[f].mIndices[1],
+                   mesh->mFaces[f].mIndices[2]};
+    faces_.push_back(face);
+  }
+
+  std::string texture_map_path;
+
+  auto material = scene->mMaterials[mesh->mMaterialIndex];
+  aiString ai_texture_map_path;
+  if (material != nullptr && material->GetTexture(aiTextureType_DIFFUSE, 0, &ai_texture_map_path) == AI_SUCCESS)
+  {
+    texture_map_path = (std::filesystem::path(mesh_file_path).parent_path() / ai_texture_map_path.C_Str()).string();
+    LOG(INFO) << "[AssimpMeshLoader] Using texture map filepath from mesh : " << texture_map_path;
+  }
+  texture_map_ = cv::imread(texture_map_path);
+  if (texture_map_.empty())
+  {
+    LOG(WARNING) << "[AssimpMeshLoader] Got invalid texture_map_path: " << texture_map_path
+                 << ", using default texture map!";
+    texture_map_ = cv::Mat(2, 2, CV_8UC3, {100, 100, 100});
+  }
+  cv::cvtColor(texture_map_, texture_map_, cv::COLOR_BGR2RGB);
+
+  LOG(INFO) << "Successfully Loaded textured mesh file!!!";
+  LOG(INFO) << "Mesh has vertices_num: " << vertices_.size() << ", diameter: " << mesh_diamter_
+            << ", faces_num: " << faces_.size() << ", center: " << mesh_center_;
+}
+
+std::string AssimpMeshLoader::GetName() const noexcept
+{
+  return name_;
+}
+
+float AssimpMeshLoader::GetMeshDiameter() const noexcept
+{
+  return mesh_diamter_;
+}
+
+size_t AssimpMeshLoader::GetMeshNumVertices() const noexcept
+{
+  return vertices_.size();
+}
+
+size_t AssimpMeshLoader::GetMeshNumFaces() const noexcept
+{
+  return faces_.size();
+}
+
+const std::vector<Eigen::Vector3f> &AssimpMeshLoader::GetMeshVertices() const noexcept
+{
+  return vertices_;
+}
+
+const std::vector<Eigen::Vector3f> &AssimpMeshLoader::GetMeshVertexNormals() const noexcept
+{
+  return vertex_normals_;
+}
+
+const std::vector<Eigen::Vector3f> &AssimpMeshLoader::GetMeshTextureCoords() const noexcept
+{
+  return texcoords_;
+}
+
+const std::vector<BaseMeshLoader::Vector3ui> &AssimpMeshLoader::GetMeshTriangleFaces()
+    const noexcept
+{
+  return faces_;
+}
+
+const Eigen::Vector3f &AssimpMeshLoader::GetMeshModelCenter() const noexcept
+{
+  return mesh_center_;
+}
+
+const Eigen::Matrix4f &AssimpMeshLoader::GetOrientBounds() const noexcept
+{
+  return obb_;
+}
+
+const Eigen::Vector3f &AssimpMeshLoader::GetObjectDimension() const noexcept
+{
+  return dim_;
+}
+
+const cv::Mat &AssimpMeshLoader::GetTextureMap() const noexcept
+{
+  return texture_map_;
+}
+
+std::shared_ptr<BaseMeshLoader> CreateAssimpMeshLoader(const std::string &name,
+                                                       const std::string &mesh_file_path)
+{
+  return std::make_shared<AssimpMeshLoader>(name, mesh_file_path);
+}
+
+} // namespace detection_6d
diff --git a/simple_tests/src/test_foundationpose.cpp b/simple_tests/src/test_foundationpose.cpp
index 9715791..469f8c2 100644
--- a/simple_tests/src/test_foundationpose.cpp
+++ b/simple_tests/src/test_foundationpose.cpp
@@ -18,7 +18,7 @@ static const std::string demo_textured_map_path = demo_data_path_ + "/mesh/textu
 static const std::string demo_name_             = "mustard";
 static const std::string frame_id               = "1581120424100262102";
 
-std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel()
+std::tuple<std::shared_ptr<Base6DofDetectionModel>, std::shared_ptr<BaseMeshLoader>> CreateModel()
 {
   auto refiner_core = CreateTrtInferCore(refiner_engine_path_,
                                          {
@@ -35,17 +35,19 @@ std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel()
 
   Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
+  auto mesh_loader = CreateAssimpMeshLoader(demo_name_, demo_textured_obj_path);
+  CHECK(mesh_loader != nullptr);
+
   auto foundation_pose =
-      CreateFoundationPoseModel(refiner_core, scorer_core, demo_name_, demo_textured_obj_path,
-                                demo_textured_map_path, intrinsic_in_mat);
+      CreateFoundationPoseModel(refiner_core, scorer_core, {mesh_loader}, intrinsic_in_mat);
 
-  return foundation_pose;
+  return {foundation_pose, mesh_loader};
 }
 
 TEST(foundationpose_test, test)
 {
-  auto            foundation_pose  = CreateFoundationPoseModel();
-  Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
+  auto [foundation_pose, mesh_loader] = CreateModel();
+  Eigen::Matrix3f intrinsic_in_mat    = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
   const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
@@ -53,7 +55,7 @@ TEST(foundationpose_test, test)
 
   auto [rgb, depth, mask] = ReadRgbDepthMask(first_rgb_path, first_depth_path, first_mask_path);
 
-  const Eigen::Vector3f object_dimension = foundation_pose->GetObjectDimension(demo_name_);
+  const Eigen::Vector3f object_dimension = mesh_loader->GetObjectDimension();
 
   Eigen::Matrix4f out_pose;
   CHECK(foundation_pose->Register(rgb.clone(), depth, mask, demo_name_, out_pose));
@@ -62,7 +64,8 @@ TEST(foundationpose_test, test)
   // [temp] for test
   cv::Mat regist_plot = rgb.clone();
   cv::cvtColor(regist_plot, regist_plot, cv::COLOR_RGB2BGR);
-  draw3DBoundingBox(intrinsic_in_mat, out_pose, 480, 640, object_dimension, regist_plot);
+  auto draw_pose = ConvertPoseMesh2BBox(out_pose, mesh_loader);
+  draw3DBoundingBox(intrinsic_in_mat, draw_pose, 480, 640, object_dimension, regist_plot);
   cv::imwrite("/workspace/test_data/test_foundationpose_plot.png", regist_plot);
 
   auto rgb_paths = get_files_in_directory(demo_data_path_ + "/rgb/");
@@ -82,15 +85,18 @@ TEST(foundationpose_test, test)
     auto [cur_rgb, cur_depth]  = ReadRgbDepth(cur_rgb_path, cur_depth_path);
 
     Eigen::Matrix4f track_pose;
-    CHECK(foundation_pose->Track(cur_rgb.clone(), cur_depth, demo_name_, track_pose));
+    CHECK(foundation_pose->Track(cur_rgb.clone(), cur_depth, out_pose, demo_name_, track_pose));
     LOG(WARNING) << "Track pose : " << track_pose;
 
     cv::Mat track_plot = cur_rgb.clone();
     cv::cvtColor(track_plot, track_plot, cv::COLOR_RGB2BGR);
-    draw3DBoundingBox(intrinsic_in_mat, track_pose, 480, 640, object_dimension, track_plot);
+    auto draw_pose = ConvertPoseMesh2BBox(track_pose, mesh_loader);
+    draw3DBoundingBox(intrinsic_in_mat, draw_pose, 480, 640, object_dimension, track_plot);
     cv::imshow("test_foundationpose_result", track_plot);
     cv::waitKey(20);
     result_image_sequence.push_back(track_plot);
+
+    out_pose = track_pose;
   }
 
   saveVideo(result_image_sequence, "/workspace/test_data/test_foundationpose_result.mp4");
@@ -98,8 +104,8 @@ TEST(foundationpose_test, test)
 
 TEST(foundationpose_test, speed_register)
 {
-  auto            foundation_pose  = CreateFoundationPoseModel();
-  Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
+  auto [foundation_pose, mesh_loader] = CreateModel();
+  Eigen::Matrix3f intrinsic_in_mat    = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
   const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
@@ -122,8 +128,8 @@ TEST(foundationpose_test, speed_register)
 
 TEST(foundationpose_test, speed_track)
 {
-  auto            foundation_pose  = CreateFoundationPoseModel();
-  Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
+  auto [foundation_pose, mesh_loader] = CreateModel();
+  Eigen::Matrix3f intrinsic_in_mat    = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
   const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
@@ -140,7 +146,7 @@ TEST(foundationpose_test, speed_track)
   for (int i = 0; i < 5000; ++i)
   {
     Eigen::Matrix4f track_pose;
-    foundation_pose->Track(rgb.clone(), depth, demo_name_, track_pose);
+    foundation_pose->Track(rgb.clone(), depth, first_pose, demo_name_, track_pose);
     counter.Count(1);
   }
 

From 3adaf2d73c2d8c2450221a50736c29b34a9c810c Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Mon, 7 Apr 2025 14:49:59 +0800
Subject: [PATCH 2/2] Update README.md

Signed-off-by: zz990099 <771647586@qq.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 8dbcf89..90b6b36 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 
 ## Update LOG
 
+- [2025.04] 解耦`Register`和`Track`过程; 令算法输出mesh下的位姿，提供mesh_loader相关的接口和方法供外部拓展。
 - [2025.03] 渲染过程与原Python工程对齐，支持无texture纹理输入渲染. [对应PR](https://github.com/zz990099/foundationpose_cpp/pull/13).
 - [2025.03] 添加对Jetson Orin平台支持，[一键配置docker环境](docs/build_enviroment_on_jetson.md)