diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..8960d1c
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,84 @@
+#基础样式
+BasedOnStyle: Google
+# 指针和引用的对齐: Left, Right, Middle
+DerivePointerAlignment: false
+PointerAlignment: Right
+#访问修饰符前的空格
+AccessModifierOffset: -2
+# 缩进宽度
+IndentWidth: 2
+# 连续的空行保留几行
+MaxEmptyLinesToKeep: 1
+# 圆括号的换行方式
+BreakBeforeBraces: Custom
+# 是否允许短方法单行
+AllowShortFunctionsOnASingleLine: false
+# 支持一行的if
+AllowShortIfStatementsOnASingleLine: false
+# 在未封闭(括号的开始和结束不在同一行)的括号中的代码是否对齐
+AlignAfterOpenBracket: Align
+# switch的case缩进
+IndentCaseLabels: true
+# 每行字符的长度
+ColumnLimit: 100
+# 注释对齐,true将左对齐，false不会对齐
+AlignTrailingComments: true
+# C cast括号后加空格
+SpaceAfterCStyleCast: false
+# 换行的时候对齐操作符
+AlignOperands: true
+# 中括号两边空格 []
+SpacesInSquareBrackets: false
+# 多行声明语句按照=对齐
+AlignConsecutiveDeclarations: true
+# 连续赋值时，对齐所有等号
+AlignConsecutiveAssignments: true
+# 容器类的空格 例如 OC的字典
+SpacesInContainerLiterals: false
+# 在构造函数初始化时按逗号断行，并以冒号对齐
+BreakConstructorInitializersBeforeComma: false
+# 函数参数换行
+AllowAllParametersOfDeclarationOnNextLine: true
+#在续行(#下一行)时的缩进长度
+ContinuationIndentWidth: 4
+# tab键盘的宽度
+TabWidth: 4
+# 赋值运算符前加空格
+SpaceBeforeAssignmentOperators: true
+# 行尾的注释前加1个空格
+SpacesBeforeTrailingComments: 1
+# false表示所有形参要么都在同一行，要么都各自一行
+BinPackParameters: false
+# 头文件排序
+SortIncludes: false
+
+# 大括号换行，只有当BreakBeforeBraces设置为Custom时才有效
+BraceWrapping:
+  # class定义后面
+  AfterClass: false
+  # 控制语句后面
+  AfterControlStatement: true
+  # enum定义后面
+  AfterEnum: false
+  # 函数定义后面
+  AfterFunction: true
+  # 命名空间定义后面
+  AfterNamespace: false
+  # struct定义后面
+  AfterStruct: false
+  # union定义后面
+  AfterUnion: false
+  # extern之后
+  AfterExternBlock: false
+  # catch之前
+  BeforeCatch: false
+  # else之前
+  BeforeElse: false
+  # 缩进大括号
+  IndentBraces: false
+  # 分离空函数
+  SplitEmptyFunction: false
+  # 分离空语句
+  SplitEmptyRecord: false
+  # 分离空命名空间
+  SplitEmptyNamespace: false
diff --git a/.github/workflows/Compilation.yml b/.github/workflows/Compilation.yml
index ef9464e..3bf22bc 100644
--- a/.github/workflows/Compilation.yml
+++ b/.github/workflows/Compilation.yml
@@ -19,12 +19,18 @@ jobs:
         -v ${{ github.workspace }}:/workspace
         -w /workspace
     steps:
-    - uses: actions/checkout@v4
-    - name: Build the Docker image
+    - name: Init Submodule
+      uses: actions/checkout@v4
+      with:
+        submodules: recursive  # 递归初始化所有子模块
+        # 如果子模块是私有仓库，需配置 SSH 密钥
+        ssh-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+    - name: Build Project
       run:
-        cd /workspace && 
-        mkdir build && cd build && 
-        cmake .. && 
+        cd /workspace &&
+        mkdir build && cd build &&
+        cmake .. -DENABLE_TENSORRT=ON &&
         make -j
 
   tensorrt10:
@@ -38,10 +44,16 @@ jobs:
         -v ${{ github.workspace }}:/workspace
         -w /workspace
     steps:
-    - uses: actions/checkout@v4
-    - name: Build the Docker image
+    - name: Init Submodule
+      uses: actions/checkout@v4
+      with:
+        submodules: recursive  # 递归初始化所有子模块
+        # 如果子模块是私有仓库，需配置 SSH 密钥
+        ssh-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+    - name: Build Project
       run:
-        cd /workspace && 
-        mkdir build && cd build && 
-        cmake .. && 
+        cd /workspace &&
+        mkdir build && cd build &&
+        cmake .. -DENABLE_TENSORRT=ON &&
         make -j
diff --git a/.github/workflows/Lint.yml b/.github/workflows/Lint.yml
new file mode 100644
index 0000000..8ae99c2
--- /dev/null
+++ b/.github/workflows/Lint.yml
@@ -0,0 +1,26 @@
+name: pre-commit Checks
+
+on:
+  pull_request:  # 在 PR 时触发
+  push:          # 在推送代码到 main/master 分支时触发
+    branches: [main, master]
+
+jobs:
+  pre-commit:
+    name: Run pre-commit checks
+    runs-on: ubuntu-latest  # 使用 Ubuntu 环境
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4  # 检出代码
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"  # 指定 Python 版本
+
+      - name: Install pre-commit
+        run: pip install pre-commit  # 安装 pre-commit
+
+      - name: Run pre-commit checks
+        run: pre-commit run --all-files  # 运行所有检查
diff --git a/.gitignore b/.gitignore
index 7262cd4..5cda69e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@
 build
 models
 .vscode
-mustard0
\ No newline at end of file
+mustard0
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..8a66214
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "easy_deploy_tool"]
+	path = easy_deploy_tool
+	url = git@github.com:zz990099/EasyDeployTool.git
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a78e381
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+repos:
+  # 官方仓库中的基础钩子
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: check-added-large-files
+      - id: check-ast
+      - id: check-case-conflict
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: check-xml
+      - id: check-yaml
+        args: ["--allow-multiple-documents"]
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+        exclude_types: [rst]
+      - id: fix-byte-order-marker
+
+  # 运行 Uncrustify 格式化 C/C++ 代码
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: "v20.1.0"  # 指定 clang-format 版本
+    hooks:
+      - id: clang-format
+        name: clang-format (check)
+        args: [--style=file, --dry-run, --Werror]  # 检查模式
+        types: [c, c++]
+        exclude:
+          detection_6d_foundationpose/src/nvdiffrast(/.*)?
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35cad2f..5e853b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,11 +2,10 @@ cmake_minimum_required(VERSION 3.8)
 project(foundationpose_cpp)
 
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 
-add_subdirectory(deploy_core)
-add_subdirectory(inference_core)
+add_subdirectory(easy_deploy_tool)
 add_subdirectory(detection_6d_foundationpose)
 
-add_subdirectory(simple_tests)
\ No newline at end of file
+add_subdirectory(simple_tests)
diff --git a/README.md b/README.md
index 859cbdb..7f1fc26 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,15 @@
 
 ### Enviroment Build
 
-1. 使用`docker`来构建运行环境
+1. 下载`foundationpose_cpp`repo
+```bash
+git clone git@github.com:zz990099/foundationpose_cpp.git
+cd foundationpose_cpp
+git submodule init
+git submodule update
+```
+
+2. 使用`docker`来构建运行环境
   ```bash
     cd ${foundationpose_cpp}/docker
     bash build_docker.sh --container_type=trt8 # trt10, jetson_trt8, jetson_trt10
@@ -71,7 +79,7 @@
   ```bash
     cd /workspace
     mkdir build && cd build
-    cmake ..
+    cmake -DENABLE_TENSORRT=ON ..
     make -j
   ```
 
diff --git a/deploy_core/CMakeLists.txt b/deploy_core/CMakeLists.txt
deleted file mode 100644
index 0923939..0000000
--- a/deploy_core/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-cmake_minimum_required(VERSION 3.8)
-project(deploy_core)
-
-add_compile_options(-std=c++17)
-add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
-set(CMAKE_CXX_STANDARD 17)
-
-
-find_package(OpenCV REQUIRED)
-find_package(glog REQUIRED)
-
-include_directories(
-  include
-  ${OpenCV_INCLUDE_DIRS}
-)
-
-set(source_file src/base_infer_core.cpp
-                src/base_detection.cpp
-                src/base_sam.cpp
-)
-
-add_library(${PROJECT_NAME} SHARED ${source_file})   
-
-
-target_link_libraries(${PROJECT_NAME} PUBLIC
-  ${OpenCV_LIBS}
-  glog::glog
-) 
-
-target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
diff --git a/deploy_core/README.md b/deploy_core/README.md
deleted file mode 100644
index 6f71793..0000000
--- a/deploy_core/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Deploy Core
-
-The `DeployCore` module defines the abstract functionalities for all components, including core inference capabilities, 2D detection features, SAM functionalities, and more. Beyond merely defining abstract functions, DeployCore also provides external encapsulations for certain algorithms. When implementing an algorithm, developers only need to focus on completing the key processes outlined in these definitions to achieve seamless algorithm deployment.
-
-## Functionality
-
-`DeployCore` is designed to provide abstract interface definitions for the functionalities of all modules, as well as abstract base classes containing reusable code.
-
-- Abstract core inference functionality: `BaseInferCore`  
-- Abstract 2D detection functionality: `BaseDetection2DModel`  
-- Abstract SAM functionality: `BaseSamModel`  
-- Plug-and-play asynchronous pipeline base class: `BaseAsyncPipeline`
-
-## Structure
-
-The entire project code is divided into three parts:  
-  1. Abstract interface classes for functional modules  
-  2. Abstract base classes for certain functional modules  
-  3. Base classes for the asynchronous inference pipeline framework
-
-code structure:
-  ```bash
-  deploy_core
-  |-- CMakeLists.txt
-  |-- README.md
-  |-- include
-  |   `-- deploy_core
-  |       |-- base_infer_core.h
-  |       |-- base_detection.h
-  |       |-- base_sam.h
-  |       |-- async_pipeline.h
-  |       |-- async_pipeline_impl.h
-  |       |-- block_queue.h
-  |       |-- common_defination.h
-  |       `-- wrapper.h
-  `-- src
-      |-- base_detection.cpp
-      |-- base_infer_core.cpp
-      `-- base_sam.cpp
-  ```
-
-
-  - Abstract interface classes for functional modules
-    ```bash
-    |-- base_infer_core.h
-    |-- base_detection.h
-    |-- base_sam.h
-    ```
-    1. **`base_infer_core.h`**: Defines the core inference functionalities and related abstract classes, while also providing an abstract base class for the foundational features of the inference core module.  
-    2. **`base_detection.h`**: Defines the abstract base class for 2D detection functionalities.  
-    3. **`base_sam.h`**: Defines the abstract base class for SAM functionalities. 
-
-  - Base classes for the asynchronous inference pipeline framework
-    ```bash
-    |-- async_pipeline.h
-    |-- async_pipeline_impl.h
-    |-- block_queue.h
-    |-- common_defination.h
-    `-- wrapper.h
-    ```
-    1. **`async_pipeline.h`** and **`async_pipeline_impl.h`**: Define the asynchronous inference framework and its implementation.  
-    2. **`block_queue.h`**: Implements the blocking queue.  
-    3. **`common_defination.h`**: Contains common definitions, such as 2D bounding boxes.  
-    4. **`wrapper.h`**: Provides wrappers for certain classes, such as the encapsulation of OpenCV's `cv::Mat` format.
-  
-
-## TODO
-
diff --git a/deploy_core/include/deploy_core/async_pipeline.h b/deploy_core/include/deploy_core/async_pipeline.h
deleted file mode 100644
index 8593c78..0000000
--- a/deploy_core/include/deploy_core/async_pipeline.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 22:29:22
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/async_pipeline.h
- */
-#ifndef ___DEPLOY_CORE_ASYNC_PIPELINE_H
-#define ___DEPLOY_CORE_ASYNC_PIPELINE_H
-
-#include <functional>
-#include <future>
-#include <memory>
-#include <thread>
-#include <unordered_map>
-
-#include <glog/logging.h>
-#include <glog/log_severity.h>
-
-#include "deploy_core/async_pipeline_impl.h"
-#include "deploy_core/blob_buffer.h"
-#include "deploy_core/block_queue.h"
-
-namespace async_pipeline {
-
-/**
- * @brief A abstract class of image data. Needed by pipeline processing. Useful when data is
- * on device or other location which host cant read/write data directly. Could enable the
- * zero-copy feature if needed.
- *
- */
-class IPipelineImageData {
-public:
-  struct ImageDataInfo {
-    uint8_t        *data_pointer;
-    int             image_height;
-    int             image_width;
-    int             image_channels;
-    DataLocation    location;
-    ImageDataFormat format;
-  };
-  virtual const ImageDataInfo &GetImageDataInfo() const = 0;
-
-protected:
-  virtual ~IPipelineImageData() = default;
-};
-
-/**
- * @brief The basic unit pointer which is parsed in the pipeline processing.
- *
- */
-class IPipelinePackage {
-public:
-  /**
-   * @brief `GetInferBuffer` provides the interface to get the blobs buffer instance
-   * which will be used to deploy inference. Case the algorithm may need multiple inference
-   * core and multiple blobs buffer to complete the whole processing.
-   *
-   * @return std::shared_ptr<IBlobsBuffer>
-   */
-  virtual std::shared_ptr<inference_core::IBlobsBuffer> GetInferBuffer() = 0;
-
-protected:
-  virtual ~IPipelinePackage() = default;
-};
-
-/**
- * @brief This base class provides a simple implementation of the asynchronous inference
- * pipeline which could be plug-and-play.
- *
- * `BaseAsyncPipeline` takes function instance as a basic unit `Block` of the pipeline. User should
- * call the static method `BuildPipelineBlock` to construct a `Block`. Multiple `Block`s make up
- * a `Context`, which pipeline deploys the whole process on.
- *
- * @tparam ResultType
- * @tparam GenResult
- */
-template <typename ResultType, typename GenResult>
-class BaseAsyncPipeline {
-  using ParsingType = std::shared_ptr<IPipelinePackage>;
-  using Block_t     = AsyncPipelineBlock<ParsingType>;
-  using Context_t   = AsyncPipelineContext<ParsingType>;
-
-protected:
-  BaseAsyncPipeline() = default;
-
-  ~BaseAsyncPipeline()
-  {
-    ClosePipeline();
-  }
-
-  /**
-   * @brief The `Block` in pipeline is constructed with a function and its name. Call this method
-   * in the derived class to get `Block_t` instance which is used to configure the whole pipeline.
-   *
-   * @param func
-   * @param block_name
-   * @return Block_t
-   */
-  static Block_t BuildPipelineBlock(const std::function<bool(ParsingType)> &func,
-                                    const std::string                      &block_name)
-  {
-    return Block_t(func, block_name);
-  }
-
-  /**
-   * @brief Configure the pipelien with a `pipeline_name` and multiple `Context_t` instances. One
-   * derived class intance could have sereral pipelines by calling `ConfigPipeline`.
-   *
-   * @param pipeline_name
-   * @param block_list
-   */
-  void ConfigPipeline(const std::string &pipeline_name, const std::vector<Context_t> &block_list)
-  {
-    map_name2instance_.emplace(pipeline_name, block_list);
-  }
-
-public:
-  /**
-   * @brief Get the default pipeline context. Multiple instances derived from `BaseAsyncPipeline`
-   * could use this method to get the context from the other to generate a more complex pipeline.
-   * For example, in detection_2d_yolov8, we combine the algorithm process and inference_core
-   * process to make a integral processing pipeline.
-   *
-   * @return const Context_t&
-   */
-  const Context_t &GetPipelineContext() const
-  {
-    if (map_name2instance_.size() != 1)
-    {
-      throw std::runtime_error("[BaseAsyncPipeline] expect one pipeline, got " +
-                               std::to_string(map_name2instance_.size()));
-    }
-    return map_name2instance_.begin()->second.GetContext();
-  }
-
-  /**
-   * @brief `PushPipeline` allow user to asynchronously push the package into pipeline and wait on
-   * the `future` in another thread. The instance of template type `Result` is generated by functor
-   * `GenResult`.
-   *
-   * @param pipeline_name
-   * @param package
-   * @return std::future<ResultType>
-   */
-  [[nodiscard]] std::future<ResultType> PushPipeline(const std::string &pipeline_name,
-                                                     const ParsingType &package) noexcept
-  {
-    if (map_name2instance_.find(pipeline_name) == map_name2instance_.end())
-    {
-      LOG(ERROR) << "[BaseAsyncPipeline] `PushPipeline` pipeline {" << pipeline_name
-                 << "} is not valid !!!";
-      return std::future<ResultType>();
-    }
-
-    map_index2result_[package_index_] = std::promise<ResultType>();
-    auto ret                          = map_index2result_[package_index_].get_future();
-
-    auto callback = [this, package_index = package_index_](const ParsingType &package) -> bool {
-      ResultType result = gen_result_from_package_(package);
-      map_index2result_[package_index].set_value(std::move(result));
-      map_index2result_.erase(package_index);
-      return true;
-    };
-    map_name2instance_[pipeline_name].PushPipeline(package, callback);
-
-    package_index_++;
-
-    return std::move(ret);
-  }
-
-  /**
-   * @brief Return if the pipeline is initialized.
-   *
-   * @param pipeline_name
-   * @return true
-   * @return false
-   */
-  bool IsPipelineInitialized(const std::string &pipeline_name) noexcept
-  {
-    if (map_name2instance_.find(pipeline_name) == map_name2instance_.end())
-    {
-      return false;
-    }
-    return map_name2instance_[pipeline_name].IsInitialized();
-  }
-
-  /**
-   * @brief Close all pipeline. The un-finished packages will be dropped.
-   *
-   */
-  void ClosePipeline()
-  {
-    for (auto &p_name_ins : map_name2instance_)
-    {
-      p_name_ins.second.ClosePipeline();
-    }
-  }
-
-  /**
-   * @brief Stop all pipeline. The un-finished packages will not be dropped.
-   *
-   */
-  void StopPipeline()
-  {
-    for (auto &p_name_ins : map_name2instance_)
-    {
-      p_name_ins.second.StopPipeline();
-    }
-  }
-
-  /**
-   * @brief Initialize all configured pipeline. Call this function before push packages into
-   * pipeline.
-   *
-   */
-  void InitPipeline()
-  {
-    for (auto &p_name_ins : map_name2instance_)
-    {
-      p_name_ins.second.Init();
-    }
-  }
-
-private:
-  std::unordered_map<std::string, PipelineInstance<ParsingType>> map_name2instance_;
-
-  size_t                                               package_index_ = 0;
-  std::unordered_map<size_t, std::promise<ResultType>> map_index2result_;
-  GenResult                                            gen_result_from_package_;
-};
-
-} // namespace async_pipeline
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/async_pipeline_impl.h b/deploy_core/include/deploy_core/async_pipeline_impl.h
deleted file mode 100644
index a7692c1..0000000
--- a/deploy_core/include/deploy_core/async_pipeline_impl.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 21:50:48
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/async_pipeline_impl.h
- */
-#ifndef __EASY_DEPLOY_ASYNC_PIPELINE_IMPL_H
-#define __EASY_DEPLOY_ASYNC_PIPELINE_IMPL_H
-
-#include <functional>
-#include <future>
-#include <vector>
-
-#include <glog/log_severity.h>
-#include <glog/logging.h>
-
-#include "deploy_core/block_queue.h"
-
-namespace async_pipeline {
-
-/**
- * @brief Async Pipeline Block
- *
- * @tparam ParsingType
- */
-template <typename ParsingType>
-class AsyncPipelineBlock {
-public:
-  AsyncPipelineBlock() = default;
-  AsyncPipelineBlock(const AsyncPipelineBlock &block)
-      : func_(block.func_), block_name_(block.block_name_)
-  {}
-
-  AsyncPipelineBlock &operator=(const AsyncPipelineBlock &block)
-  {
-    func_       = block.func_;
-    block_name_ = block.block_name_;
-    return *this;
-  }
-
-  AsyncPipelineBlock(const std::function<bool(ParsingType)> &func) : func_(func)
-  {}
-
-  AsyncPipelineBlock(const std::function<bool(ParsingType)> &func, const std::string &block_name)
-      : func_(func), block_name_(block_name)
-  {}
-
-  const std::string &GetName() const
-  {
-    return block_name_;
-  }
-
-  bool operator()(const ParsingType &pipeline_unit) const
-  {
-    return func_(pipeline_unit);
-  }
-
-private:
-  std::function<bool(ParsingType)> func_;
-  std::string                      block_name_;
-};
-
-/**
- * @brief Async Pipeline Context
- *
- * @tparam ParsingType
- */
-template <typename ParsingType>
-class AsyncPipelineContext {
-  using Block_t   = AsyncPipelineBlock<ParsingType>;
-  using Context_t = AsyncPipelineContext<ParsingType>;
-
-public:
-  AsyncPipelineContext() = default;
-
-  AsyncPipelineContext(const Block_t &block) : blocks_({block})
-  {}
-
-  AsyncPipelineContext(const std::vector<Block_t> &block_vec)
-  {
-    for (const auto &block : block_vec)
-    {
-      blocks_.push_back(block);
-    }
-  }
-
-  AsyncPipelineContext &operator=(const std::vector<Block_t> &block_vec)
-  {
-    for (const auto &block : block_vec)
-    {
-      blocks_.push_back(block);
-    }
-    return *this;
-  }
-
-  AsyncPipelineContext(const Context_t &context) : blocks_(context.blocks_)
-  {}
-
-  AsyncPipelineContext(const std::vector<Context_t> &context_vec)
-  {
-    for (const auto &context : context_vec)
-    {
-      for (const auto &block : context.blocks_)
-      {
-        blocks_.push_back(block);
-      }
-    }
-  }
-
-  AsyncPipelineContext &operator=(const std::vector<Context_t> &context_vec)
-  {
-    for (const auto &context : context_vec)
-    {
-      for (const auto &block : context.blocks_)
-      {
-        blocks_.push_back(block);
-      }
-    }
-    return *this;
-  }
-
-  AsyncPipelineContext &operator=(const Context_t &context)
-  {
-    for (const auto &block : context.blocks_)
-    {
-      blocks_.push_back(block);
-    }
-    return *this;
-  }
-
-public:
-  std::vector<Block_t> blocks_;
-};
-
-/**
- * @brief Async Pipeline Processing Instance
- *
- * @tparam ParsingType
- */
-template <typename ParsingType>
-class PipelineInstance {
-  using Block_t    = AsyncPipelineBlock<ParsingType>;
-  using Context_t  = AsyncPipelineContext<ParsingType>;
-  using Callback_t = std::function<bool(const ParsingType &)>;
-
-  // for inner processing
-  struct _InnerPackage {
-    ParsingType package;
-    Callback_t  callback;
-  };
-  using InnerParsingType = std::shared_ptr<_InnerPackage>;
-  using InnerBlock_t     = AsyncPipelineBlock<InnerParsingType>;
-  using InnerContext_t   = AsyncPipelineContext<InnerParsingType>;
-
-public:
-  PipelineInstance() = default;
-
-  PipelineInstance(const std::vector<Context_t> &block_list) : context_(block_list)
-  {
-    // initialize inner context
-    std::vector<InnerBlock_t> inner_block_list;
-    for (const auto &block : context_.blocks_)
-    {
-      auto         func = [&](InnerParsingType p) -> bool { return block(p->package); };
-      InnerBlock_t inner_block(func, block.GetName());
-      inner_block_list.push_back(inner_block);
-    }
-    inner_context_ = InnerContext_t(inner_block_list);
-  }
-
-  ~PipelineInstance()
-  {
-    ClosePipeline();
-  }
-
-  void Init(int bq_max_size = 100)
-  {
-    // 1. for `n` blocks, construct `n+1` block queues
-    const auto blocks = inner_context_.blocks_;
-    const int  n      = blocks.size();
-    LOG(INFO) << "[AsyncPipelineInstance] Total {" << n << "} Pipeline Blocks";
-    for (int i = 0; i < n + 1; ++i)
-    {
-      block_queue_.emplace_back(std::make_shared<BlockQueue<InnerParsingType>>(bq_max_size));
-    }
-    pipeline_close_flag_.store(false);
-
-    async_futures_.resize(n + 1);
-    // 2. open `n` async threads to execute blocks
-    for (int i = 0; i < n; ++i)
-    {
-      async_futures_[i] = std::async(&PipelineInstance::ThreadExcuteEntry, this, block_queue_[i],
-                                     block_queue_[i + 1], blocks[i]);
-    }
-    // 3. open output threads to execute callback
-    async_futures_[n] = std::async(&PipelineInstance::ThreadOutputEntry, this, block_queue_[n]);
-
-    pipeline_initialized_.store(true);
-  }
-
-  void ClosePipeline()
-  {
-    if (pipeline_initialized_)
-    {
-      LOG(INFO) << "[AsyncPipelineInstance] Closing pipeline ...";
-      for (const auto &bq : block_queue_)
-      {
-        bq->DisableAndClear();
-      }
-      LOG(INFO) << "[AsyncPipelineInstance] Disabled all block queue ...";
-      pipeline_close_flag_.store(true);
-
-      for (auto &future : async_futures_)
-      {
-        auto res = future.get();
-      }
-      LOG(INFO) << "[AsyncPipelineInstance] Join all block queue ...";
-      block_queue_.clear();
-      LOG(INFO) << "[AsyncPipelineInstance] Async pipeline is released successfully!!";
-      pipeline_initialized_ = false;
-      pipeline_close_flag_.store(true);
-      pipeline_no_more_input_.store(true);
-    }
-  }
-
-  void StopPipeline()
-  {
-    if (pipeline_initialized_)
-    {
-      pipeline_no_more_input_.store(true);
-      block_queue_[0]->SetNoMoreInput();
-    }
-  }
-
-  bool IsInitialized() const
-  {
-    return pipeline_initialized_;
-  }
-
-  const Context_t &GetContext() const
-  {
-    return context_;
-  }
-
-  void PushPipeline(const ParsingType &obj, const Callback_t &callback)
-  {
-    auto inner_pack      = std::make_shared<_InnerPackage>();
-    inner_pack->package  = obj;
-    inner_pack->callback = callback;
-
-    block_queue_[0]->BlockPush(inner_pack);
-  }
-
-private:
-  bool ThreadExcuteEntry(std::shared_ptr<BlockQueue<InnerParsingType>> bq_input,
-                         std::shared_ptr<BlockQueue<InnerParsingType>> bq_output,
-                         const InnerBlock_t                           &pipeline_block)
-  {
-    LOG(INFO) << "[AsyncPipelineInstance] {" << pipeline_block.GetName() << "} thread start!";
-    while (!pipeline_close_flag_)
-    {
-      auto data = bq_input->Take();
-      if (!data.has_value())
-      {
-        if (pipeline_no_more_input_)
-        {
-          LOG(INFO) << "[AsyncPipelineInstance] {" << pipeline_block.GetName()
-                    << "} set no more output ...";
-          bq_output->SetNoMoreInput();
-          break;
-        } else
-        {
-          continue;
-        }
-      }
-      auto start  = std::chrono::high_resolution_clock::now();
-      bool status = pipeline_block(data.value());
-      auto end    = std::chrono::high_resolution_clock::now();
-      LOG(INFO) << "[AsyncPipelineInstance] {" << pipeline_block.GetName() << "} cost (us) : "
-                << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
-
-      if (!status)
-      {
-        LOG(WARNING) << "[AsyncPipelineInstance] {" << pipeline_block.GetName()
-                     << "}, excute block function failed! Drop package.";
-        continue;
-      }
-
-      bq_output->BlockPush(data.value());
-    }
-    LOG(INFO) << "[AsyncPipelineInstance] {" << pipeline_block.GetName() << "} thread quit!";
-    return true;
-  }
-
-  bool ThreadOutputEntry(std::shared_ptr<BlockQueue<InnerParsingType>> bq_input)
-  {
-    LOG(INFO) << "[AsyncPipelineInstance] {Output} thread start!";
-    while (!pipeline_close_flag_)
-    {
-      auto data = bq_input->Take();
-      if (!data.has_value())
-      {
-        if (pipeline_no_more_input_)
-        {
-          LOG(INFO) << "[AsyncPipelineInstance] {Output} set no more output ...";
-          break;
-        } else
-        {
-          continue;
-        }
-      }
-      const auto &inner_pack = data.value();
-      if (inner_pack != nullptr && inner_pack->callback != nullptr)
-      {
-        inner_pack->callback(inner_pack->package);
-      } else
-      {
-        LOG(WARNING)
-            << "[AsyncPipelineInstance] {Output} package without valid callback will be dropped!!!";
-      }
-    }
-    LOG(INFO) << "[AsyncPipelineInstance] {Output} thread quit!";
-
-    return true;
-  }
-
-private:
-  Context_t context_;
-
-  InnerContext_t inner_context_;
-
-  std::vector<std::shared_ptr<BlockQueue<InnerParsingType>>> block_queue_;
-  std::vector<std::future<bool>>                             async_futures_;
-
-  std::atomic<bool> pipeline_close_flag_{true};
-  std::atomic<bool> pipeline_no_more_input_{true};
-  std::atomic<bool> pipeline_initialized_{false};
-};
-
-} // namespace async_pipeline
-
-#endif
diff --git a/deploy_core/include/deploy_core/base_detection.h b/deploy_core/include/deploy_core/base_detection.h
deleted file mode 100644
index fa999ff..0000000
--- a/deploy_core/include/deploy_core/base_detection.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:24:19
- * @LastEditTime: 2024-12-02 20:03:34
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/base_detection.h
- */
-#ifndef __DEPLOY_CORE_BASE_DETECTION_H
-#define __DEPLOY_CORE_BASE_DETECTION_H
-
-#include <atomic>
-#include <functional>
-#include <thread>
-#include <vector>
-
-#include <opencv2/opencv.hpp>
-
-#include "deploy_core/async_pipeline.h"
-#include "deploy_core/base_infer_core.h"
-
-namespace detection_2d {
-
-/**
- * @brief A abstract interface class which defines the preprocess interface of detection_2d
- * algorithms.
- *
- */
-class IDetectionPreProcess {
-public:
-  virtual float Preprocess(std::shared_ptr<async_pipeline::IPipelineImageData> input_image_data,
-                           std::shared_ptr<inference_core::IBlobsBuffer>       blob_buffer,
-                           const std::string                                  &blob_name,
-                           int                                                 dst_height,
-                           int                                                 dst_width) = 0;
-};
-
-/**
- * @brief A abstract interface class which defines the postprocess interface of detection_2d
- * algorithms.
- *
- */
-class IDetectionPostProcess {
-public:
-  virtual void Postprocess(const std::vector<void *> &output_blobs_ptr,
-                           std::vector<BBox2D>       &results,
-                           float                      conf_threshold,
-                           float                      transform_scale) = 0;
-};
-
-/**
- * @brief The common detection_2d pipeline package wrapper.
- *
- */
-struct DetectionPipelinePackage : public async_pipeline::IPipelinePackage {
-  // the wrapped pipeline image data
-  std::shared_ptr<async_pipeline::IPipelineImageData> input_image_data;
-  // confidence used in postprocess
-  float conf_thresh;
-  // record the transform factor during image preprocess
-  float transform_scale;
-  // the detection result
-  std::vector<BBox2D> results;
-
-  // maintain the blobs buffer instance
-  std::shared_ptr<inference_core::IBlobsBuffer> infer_buffer;
-
-  // override from `IPipelinePakcage`, to provide the blobs buffer to inference_core
-  std::shared_ptr<inference_core::IBlobsBuffer> GetInferBuffer() override
-  {
-    if (infer_buffer == nullptr)
-    {
-      LOG(ERROR) << "[DetectionPipelinePackage] returned nullptr of infer_buffer!!!";
-    }
-    return infer_buffer;
-  }
-};
-
-/**
- * @brief A abstract class defines two pure virtual methods -- `PreProcess` and `PostProcess`.
- * The derived class could only override these methods to make it work.
- *
- */
-class IDetectionModel {
-public:
-  IDetectionModel() = default;
-
-protected:
-  virtual ~IDetectionModel() = default;
-
-  /**
-   * @brief PreProcess-Stage. Inside the method, you should cast the `pipeline_unit` pointer to
-   * `DetectionPipelinePackage` type pointer, and check if the convertion works. If the package
-   * pointer is not valid or anything goes wrong, it should return `false` to mention the inference
-   * processing to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool PreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> pipeline_unit) = 0;
-
-  /**
-   * @brief PostProcess-Stage. Inside the method, you should cast the `pipeline_unit` pointer to
-   * `DetectionPipelinePackage` type pointer, and check if the convertion works. If the package
-   * pointer is not valid or anything goes wrong, it should return `false` to mention the inference
-   * processing to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool PostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> pipeline_unit) = 0;
-};
-
-/**
- * @brief A functor to generate detection results from `DetectionPipelinePackage`. Used in async
- * pipeline.
- *
- */
-class DetectionGenResultType {
-public:
-  std::vector<BBox2D> operator()(const std::shared_ptr<async_pipeline::IPipelinePackage> &package)
-  {
-    auto detection_package = std::dynamic_pointer_cast<DetectionPipelinePackage>(package);
-    if (detection_package == nullptr)
-    {
-      LOG(ERROR) << "[DetectionGenResult] Got INVALID package ptr!!!";
-      return {};
-    }
-    return std::move(detection_package->results);
-  }
-};
-
-/**
- * @brief The base class of detection_2d algorithms. It implements `Detect` which is the synchronous
- * version of detection and `DetectAsync` which is the asynchronous version of detection.
- *
- * @note Call `InitPipeline()` before you intend to use `DetectAsync`. And Check if `DetectAsync`
- * returns a valid `std::future<>` instance before involke `get()` method.
- *
- */
-class BaseDetectionModel
-    : public IDetectionModel,
-      public async_pipeline::BaseAsyncPipeline<std::vector<BBox2D>, DetectionGenResultType> {
-  typedef std::shared_ptr<async_pipeline::IPipelinePackage> ParsingType;
-
-public:
-  BaseDetectionModel(std::shared_ptr<inference_core::BaseInferCore> infer_core);
-
-  /**
-   * @brief Run the detection processing in synchronous mode.
-   *
-   * @param input_image input image in cv::Mat format.
-   * @param det_results the output results
-   * @param conf_thresh confidence threshold
-   * @param isRGB if the input is rgb format. Will flip channels if `isRGB` == false.
-   * @return true
-   * @return false
-   */
-  bool Detect(const cv::Mat       &input_image,
-              std::vector<BBox2D> &det_results,
-              float                conf_thresh,
-              bool                 isRGB = false) noexcept;
-
-  /**
-   * @brief Run the detection processing in asynchronous mode.
-   *
-   * @param input_image input image in cv::Mat format.
-   * @param conf_thresh confidence threshold
-   * @param isRGB if the input is rgb format. Will flip channels if `isRGB` == false. default=false.
-   * @param cover_oldest whether cover the oldest package if the pipeline queue is full.
-   * default=false.
-   * @return std::future<std::vector<BBox2D>>
-   */
-  [[nodiscard]] std::future<std::vector<BBox2D>> DetectAsync(const cv::Mat &input_image,
-                                                             float          conf_thresh,
-                                                             bool           isRGB = false,
-                                                             bool cover_oldest    = false) noexcept;
-
-protected:
-  // forbidden the access from outside to `BaseAsyncPipeline::PushPipeline`
-  using BaseAsyncPipeline::PushPipeline;
-
-  virtual ~BaseDetectionModel();
-
-  std::shared_ptr<inference_core::BaseInferCore> infer_core_{nullptr};
-
-  static std::string detection_pipeline_name_;
-};
-
-/**
- * @brief Abstract factory class of detection_2d model.
- * 
- */
-class BaseDetection2DFactory {
-public:
-  virtual std::shared_ptr<detection_2d::BaseDetectionModel> Create() = 0;
-};
-
-class BaseDetectionPreprocessFactory {
-public:
-  virtual std::shared_ptr<detection_2d::IDetectionPreProcess> Create() = 0;
-};
-
-class BaseDetectionPostprocessFactory {
-public:
-  virtual std::shared_ptr<detection_2d::IDetectionPostProcess> Create() = 0;
-};
-
-} // namespace detection_2d
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/base_infer_core.h b/deploy_core/include/deploy_core/base_infer_core.h
deleted file mode 100644
index 37e9124..0000000
--- a/deploy_core/include/deploy_core/base_infer_core.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-26 08:42:05
- * @LastEditTime: 2024-12-02 19:03:37
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/base_infer_core.h
- */
-#ifndef __EASY_DEPLOY_BASE_INFER_CORE_H
-#define __EASY_DEPLOY_BASE_INFER_CORE_H
-
-#include <atomic>
-#include <memory>
-#include <thread>
-#include <vector>
-
-#include "deploy_core/block_queue.h"
-#include "deploy_core/async_pipeline.h"
-
-namespace inference_core {
-
-enum InferCoreType { ONNXRUNTIME, TENSORRT, RKNN, NOT_PROVIDED };
-
-/**
- * @brief `IRotInferCore` is abstract interface class which defines all pure virtual functions
- * that the derived class should implement, e.g., `PreProcess`, `Inference` and `PostProcess`.
- *
- */
-class IRotInferCore {
-public:
-  /**
-   * @brief `AllocBlobsBuffer` is a common interface that user could get a brand new buffer
-   * instance by. This pure virtual function is implemented by actual inference core, which
-   * may take a while to process. Use pre-allocated buffer instance in mem buffer pool could
-   * get better performance. See `BaseInferCore`.
-   *
-   * @return std::shared_ptr<IBlobsBuffer> A brand new buffer instance allocated by inference
-   * core.
-   */
-  virtual std::shared_ptr<IBlobsBuffer> AllocBlobsBuffer() = 0;
-
-  /**
-   * @brief Get the core type.
-   *
-   * @return InferCoreType
-   */
-  virtual InferCoreType GetType()
-  {
-    return InferCoreType::NOT_PROVIDED;
-  }
-
-  /**
-   * @brief Return the name of inference core.
-   *
-   * @return std::string
-   */
-  virtual std::string GetName()
-  {
-    return "";
-  }
-
-protected:
-  virtual ~IRotInferCore() = default;
-
-  /**
-   * @brief `PreProcess` stage of the inference process. Return true if this is stage is not
-   * needed in the actual inference core implementation. Return false if something went wrong
-   * while doing processing. The pipeline will drop the package if `PreProcess` returns false.
-   *
-   * @param buffer a common "pipeline" package ptr.
-   * @return true
-   * @return false
-   */
-  virtual bool PreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) = 0;
-
-  /**
-   * @brief `Inference` stage of the inference process. Return false if something went wrong
-   * while doing processing. The pipeline will drop the package if `Inference` returns false.
-   *
-   * @param buffer a common "pipeline" package ptr.
-   * @return true
-   * @return false
-   */
-  virtual bool Inference(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) = 0;
-
-  /**
-   * @brief `PostProcess` stage of the inference process. Return false if something went wrong
-   * while doing processing. The pipeline will drop the package if `PostProcess` returns false.
-   *
-   * @param buffer a common "pipeline" package ptr.
-   * @return true
-   * @return false
-   */
-  virtual bool PostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) = 0;
-};
-
-/**
- * @brief A simple implementation of mem buffer pool. Using `BlockQueue` to deploy a producer-
- * consumer model. It will allocate buffer using `AllocBlobsBuffer` method of `IRotInferCore`
- * and provides `IBlobsBuffer` ptr when `Alloc` method is called. The "Alloced" buffer will
- * return back to mem buffer pool while the customed deconstruction method of shared_ptr ptr
- * is called.
- *
- */
-class MemBufferPool {
-public:
-  MemBufferPool(IRotInferCore *infer_core, const int pool_size)
-      : pool_size_(pool_size), dynamic_pool_(pool_size)
-  {
-    for (int i = 0; i < pool_size; ++i)
-    {
-      auto blob_buffer = infer_core->AllocBlobsBuffer();
-      dynamic_pool_.BlockPush(blob_buffer.get());
-      static_pool_.insert({blob_buffer.get(), blob_buffer});
-    }
-  }
-
-  std::shared_ptr<IBlobsBuffer> Alloc(bool block)
-  {
-    // customed deconstruction method
-    auto func_dealloc = [&](IBlobsBuffer *buf) {
-      buf->Reset();
-      this->dynamic_pool_.BlockPush(buf);
-    };
-
-    auto buf = block ? dynamic_pool_.Take() : dynamic_pool_.TryTake();
-    return buf.has_value() ? std::shared_ptr<IBlobsBuffer>(buf.value(), func_dealloc) : nullptr;
-  }
-
-  void Release()
-  {
-    if (dynamic_pool_.Size() != pool_size_)
-    {
-      LOG(WARNING) << "[MemBufPool] does not maintain all bufs when release func called!";
-    }
-    static_pool_.clear();
-  }
-
-  int RemainSize()
-  {
-    return dynamic_pool_.Size();
-  }
-
-  ~MemBufferPool()
-  {
-    Release();
-  }
-
-private:
-  const int                                                         pool_size_;
-  BlockQueue<IBlobsBuffer *>                                        dynamic_pool_;
-  std::unordered_map<IBlobsBuffer *, std::shared_ptr<IBlobsBuffer>> static_pool_;
-};
-
-/**
- * @brief A dummy class to help `BaseInferCore` inherit from `BaseAsyncPipeline` to generate
- * async pipeline framework.
- *
- */
-class _DummyInferCoreGenReulstType {
-public:
-  bool operator()(const std::shared_ptr<async_pipeline::IPipelinePackage> & /*package*/)
-  {
-    return true;
-  }
-};
-
-/**
- * @brief `BaseInferCore` inherits `IRotInferCore` and `BaseAsyncPipeline`. `IRotInferCore`
- * defines all pure virtual methods of the abstract function of the inference core.
- * `BaseAsyncPipeline` provides a set of methods to help user build and utilize a async
- * inference pipeline. See `BaseAsyncPipeline` defination.
- *
- * @note The inheritance relationship between class A and class B is modified by protected.
- * And `BaseInferCore` only makes the `GetPipelineContext` method public, which means the
- * derived class of `BaseInferCore` is not supported to deploy async pipeline inference
- * process. It should be used by specific algorithms in its entirety.
- *
- */
-class BaseInferCore : public IRotInferCore,
-                      protected async_pipeline::BaseAsyncPipeline<bool, _DummyInferCoreGenReulstType> {
-protected:
-  BaseInferCore();
-  typedef std::shared_ptr<async_pipeline::IPipelinePackage> ParsingType;
-
-public:
-  using BaseAsyncPipeline::GetPipelineContext;
-
-  /**
-   * @brief This function provides a sync inference process which is completely independent
-   * of the async inference pipeline. Through, it depends on the three stage virtual methods
-   * defined in `IRotInferCore`. Return false if something went wrong while inference.
-   *
-   * @param buffer
-   * @param batch_size default=1, multi-batch inference may not be supported.
-   * @return true
-   * @return false
-   */
-  bool SyncInfer(std::shared_ptr<IBlobsBuffer> buffer, const int batch_size = 1);
-
-  /**
-   * @brief Get the pre-allocated blobs buffer shared pointer. The returned pointer is a
-   * smart pointer which will automatically return to the pool when it is released.
-   *
-   * @param block whether to block the thread if the pool is empty.
-   * @return std::shared_ptr<IBlobsBuffer>
-   */
-  std::shared_ptr<IBlobsBuffer> GetBuffer(bool block);
-
-  /**
-   * @brief Release the sources in base class.
-   *
-   * @warning The derived class should call `BaseInferCore::Release()` in its deconstruct
-   * function in order to release the blobs buffer before the enviroment is destroyed.
-   * Things go wrong if allocated memory released after their enviroment released on some
-   * hardware.
-   *
-   */
-  virtual void Release();
-
-protected:
-  virtual ~BaseInferCore();
-
-  /**
-   * @brief Init the base class memory pool.
-   *
-   * @warning Please call `Init()` at the derived class construct function`s end when the
-   * runtime enviroment is setup successfully. This method will call `AllocBlobsBuffer`
-   * to create a memory pool. Temporary we manually call this method to init the memory pool.
-   *
-   * @param mem_buf_size number of blobs buffers pre-allocated.
-   */
-  void Init(int mem_buf_size = 5);
-
-private:
-  std::unique_ptr<MemBufferPool> mem_buf_pool_{nullptr};
-};
-
-/**
- * @brief Abstract factory class of infer_core.
- * 
- */
-class BaseInferCoreFactory {
-public:
-  virtual std::shared_ptr<inference_core::BaseInferCore> Create() = 0;
-};
-
-} // namespace inference_core
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/base_sam.h b/deploy_core/include/deploy_core/base_sam.h
deleted file mode 100644
index e309f65..0000000
--- a/deploy_core/include/deploy_core/base_sam.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 18:38:34
- * @LastEditTime: 2024-12-02 19:03:30
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/base_sam.h
- */
-#ifndef __EASY_DEPLOY_BASE_SAM_H
-#define __EASY_DEPLOY_BASE_SAM_H
-
-#include "deploy_core/base_infer_core.h"
-#include "deploy_core/common_defination.h"
-
-#include <opencv2/opencv.hpp>
-
-namespace sam {
-
-/**
- * @brief The common sam pipeline package wrapper.
- *
- */
-struct SamPipelinePackage : public async_pipeline::IPipelinePackage {
-  // maintain image-encoder's blobs buffer
-  std::shared_ptr<inference_core::IBlobsBuffer> image_encoder_blobs_buffer;
-  // maintain mask-decoder's blobs buffer
-  std::shared_ptr<inference_core::IBlobsBuffer> mask_decoder_blobs_buffer;
-
-  // the wrapped pipeline image data
-  std::shared_ptr<async_pipeline::IPipelineImageData> input_image_data;
-  // input boxes prompt
-  std::vector<BBox2D> boxes;
-  // input points prompt
-  std::vector<std::pair<int, int>> points;
-  // input points labels
-  std::vector<int> labels;
-  // record the transform factor in image preprocessing
-  float transform_scale;
-  // mask results
-  cv::Mat mask;
-
-  // the blobs buffer used in inference core processing
-  std::shared_ptr<inference_core::IBlobsBuffer> infer_buffer;
-  std::shared_ptr<inference_core::IBlobsBuffer> GetInferBuffer() override
-  {
-    return infer_buffer;
-  }
-};
-
-/**
- * @brief The abstract interface class of `Segment Anything Model`(SAM) which defines
- * image-preprocess、prompt-preprocess、mask-postprocess interfaces. Any SAM algorithms
- * implementation could override these pure virtual methods to make up a sync/async
- * inference supported pipeline.
- *
- * workflow:
- *
- * `ImagePreProcess` --> `ImageEncoderInfer` --> `PromptBoxPreProcess`/`PromptPointPreProcess`
- * --> `MaskDecoderInfer` --> `MaskPostProcess`
- *
- */
-class ISamModel {
-protected:
-  typedef std::shared_ptr<async_pipeline::IPipelinePackage> ParsingType;
-  virtual ~ISamModel() = default;
-  /**
-   * @brief The `ImagePreProcess` stage. Inside the method, you should cast the `pipeline_unit`
-   * pointer to `SamPipelinePackage` type pointer, and check if the convertion works. If the
-   * package pointer is not valid or anything goes wrong, it should return `false` to mention
-   * the inference pipelinee to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool ImagePreProcess(ParsingType pipeline_unit) = 0;
-
-  /**
-   * @brief The `PromptBoxPreProcess` stage. Inside the method, you should cast the `pipeline_unit`
-   * pointer to `SamPipelinePackage` type pointer, and check if the convertion works. If the
-   * package pointer is not valid or anything goes wrong, it should return `false` to mention
-   * the inference pipelinee to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool PromptBoxPreProcess(ParsingType pipeline_unit) = 0;
-
-  /**
-   * @brief The `PromptPointPreProcess` stage. Inside the method, you should cast the
-   * `pipeline_unit` pointer to `SamPipelinePackage` type pointer, and check if the convertion
-   * works. If the package pointer is not valid or anything goes wrong, it should return `false` to
-   * mention the inference pipelinee to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool PromptPointPreProcess(ParsingType pipeline_unit) = 0;
-
-  /**
-   * @brief The `MaskPostProcess` stage. Inside the method, you should cast the `pipeline_unit`
-   * pointer to `SamPipelinePackage` type pointer, and check if the convertion works. If the
-   * package pointer is not valid or anything goes wrong, it should return `false` to mention
-   * the inference pipelinee to drop the package.
-   *
-   * @param pipeline_unit
-   * @return true
-   * @return false
-   */
-  virtual bool MaskPostProcess(ParsingType pipeline_unit) = 0;
-};
-
-/**
- * @brief A functor to generate sam results from `SamPipelinePackage`. Used in async pipeline.
- *
- */
-class SamGenResultType {
-public:
-  cv::Mat operator()(const std::shared_ptr<async_pipeline::IPipelinePackage> &package)
-  {
-    auto sam_package = std::dynamic_pointer_cast<SamPipelinePackage>(package);
-    if (sam_package == nullptr)
-    {
-      LOG(ERROR) << "[SamGenResultType] Got INVALID package ptr!!!";
-      return {};
-    }
-    return std::move(sam_package->mask);
-  }
-};
-
-/**
- * @brief The base class of SAM model. It implements `GenerateMask` and `GenerateMaskAsync`
- * both with `box` prompts or `points` prompts. In the asynchronous pipeline inference mode,
- * the `box` pipeline and `point` pipeline could been used in the same time, cause they are
- * independent.
- *
- */
-class BaseSamModel : public ISamModel,
-                     public async_pipeline::BaseAsyncPipeline<cv::Mat, SamGenResultType> {
-protected:
-  using ParsingType = std::shared_ptr<async_pipeline::IPipelinePackage>;
-  /**
-   * @brief Construct `BaseSamModel` with `image_encoder_core` and at least one of `mask_points_
-   * decoder_core` or `mask_boxes_decoder_core`. Will throw exception if both decoders with points
-   * and boxes are nullptr.
-   *
-   * @param model_name
-   * @param image_encoder_core
-   * @param mask_points_decoder_core
-   * @param mask_boxes_decoder_core
-   */
-  BaseSamModel(const std::string                             &model_name,
-               std::shared_ptr<inference_core::BaseInferCore> image_encoder_core,
-               std::shared_ptr<inference_core::BaseInferCore> mask_points_decoder_core,
-               std::shared_ptr<inference_core::BaseInferCore> mask_boxes_decoder_core);
-
-  virtual ~BaseSamModel();
-
-public:
-  /**
-   * @brief Generate the mask with points as prompts in sync mode.
-   *
-   * @param image input image
-   * @param points points coords
-   * @param labels points labels, 0 - background; 1 - foreground
-   * @param cv::Mat reference to the result. 0 - background; 255 - foreground
-   * @param isRGB if the input image is RGB format. default=false
-   * @return true
-   * @return false
-   */
-  bool GenerateMask(const cv::Mat                          &image,
-                    const std::vector<std::pair<int, int>> &points,
-                    const std::vector<int>                 &labels,
-                    cv::Mat                                &result,
-                    bool                                    isRGB = false);
-  /**
-   * @brief Generate the mask with boxes as prompts in sync mode.
-   *
-   * @note SAM model with boxes only support one box as its prompts. More boxes wont make any
-   * exception, but also will not take effect.
-   *
-   * @param image input image
-   * @param boxes boxes coords
-   * @param cv::Mat reference to the result. 0 - background; 255 - foreground
-   * @param isRGB if the input image is RGB format. default=false
-   * @return true
-   * @return false
-   */
-  bool GenerateMask(const cv::Mat             &image,
-                    const std::vector<BBox2D> &boxes,
-                    cv::Mat                   &result,
-                    bool                       isRGB = false);
-
-  /**
-   * @brief Generate the mask with points as prompts in async mode.
-   *
-   * @warning The returned `std::future<>` instance could be invalid. Please make sure it is
-   * valid before you call `get()`.
-   *
-   * @param image input image
-   * @param points points coords
-   * @param labels points labels, 0 - background; 1 - foreground
-   * @param isRGB if the input image is RGB format. default=false
-   * @param cover_oldest whether cover the oldest package if the pipeline queue is full.
-   * default=false.
-   * @return std::future<cv::Mat> A std::future instance of the result.
-   */
-  [[nodiscard]] std::future<cv::Mat> GenerateMaskAsync(
-      const cv::Mat                          &image,
-      const std::vector<std::pair<int, int>> &points,
-      const std::vector<int>                 &labels,
-      bool                                    isRGB        = false,
-      bool                                    cover_oldest = false);
-
-  /**
-   * @brief Generate the mask with boxes as prompts in async mode.
-   *
-   * @note SAM model with boxes only support one box as its prompts. More boxes wont make any
-   * exception, but also will not take effect.
-   *
-   * @warning The returned `std::future<>` instance could be invalid. Please make sure it is
-   * valid before you call `get()`.
-   *
-   * @param image input image
-   * @param boxes boxes coords
-   * @param callback callback function if needed. default=nullptr.
-   * @param isRGB if the input image is RGB format. default=false
-   * @param cover_oldest whether cover the oldest package if the pipeline queue is full.
-   * default=false.
-   * @return std::future<cv::Mat> A std::future instance of the result.
-   */
-  [[nodiscard]] std::future<cv::Mat> GenerateMaskAsync(const cv::Mat             &image,
-                                                       const std::vector<BBox2D> &boxes,
-                                                       bool                       isRGB = false,
-                                                       bool cover_oldest                = false);
-
-private:
-  // forbidden the access from outside to `BaseAsyncPipeline::PushPipeline`
-  using BaseAsyncPipeline::PushPipeline;
-
-  void ConfigureBoxPipeline();
-
-  void ConfigurePointPipeline();
-
-protected:
-  std::shared_ptr<inference_core::BaseInferCore> image_encoder_core_;
-  std::shared_ptr<inference_core::BaseInferCore> mask_points_decoder_core_;
-  std::shared_ptr<inference_core::BaseInferCore> mask_boxes_decoder_core_;
-
-  const std::string box_pipeline_name_;
-  const std::string point_pipeline_name_;
-  const std::string model_name_;
-};
-
-
-/**
- * @brief Abstract factory base class of Sam model.
- * 
- */
-class BaseSamFactory {
-public:
-  virtual std::shared_ptr<sam::BaseSamModel> Create() = 0;
-};
-
-} // namespace sam
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/blob_buffer.h b/deploy_core/include/deploy_core/blob_buffer.h
deleted file mode 100644
index c503bbf..0000000
--- a/deploy_core/include/deploy_core/blob_buffer.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 15:27:59
- * @LastEditTime: 2024-11-26 21:57:59
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/blob_buffer.h
- */
-#ifndef __EASY_DEPLOY_BLOB_BUFFER_H
-#define __EASY_DEPLOY_BLOB_BUFFER_H
-
-#include <memory>
-#include <vector>
-
-#include "deploy_core/common_defination.h"
-
-namespace inference_core {
-
-/**
- * @brief The key to abstracting and shielding the inference framework and hardware characteristics
- * lies in how the management of inference buffer is abstracted. Considering the requirements
- * of asynchronous inference framework, we encapsulated the buffer used during inference into a
- * dedicated class and abstracted its functionality by developing the `IBlobsBuffer` interface
- * class. The implementation of this interface must provide the following functionalities:
- *
- *  1. Set the buffer address to be used for inference.
- *
- *  2. Get the default buffer address.
- *
- *  3. Set the shape of the model blob.
- *
- *  4. Get the default blob shape.
- *
- * And Other base functionalities as declared below.
- *
- */
-class IBlobsBuffer {
-public:
-  /**
-   * @brief The `BlobsBuffer` instance should provide the buffer ptr which will be used in the
-   * inference process. This buffer is allocated by certain inference_core by default. User could
-   * customize the buffer ptr by calling `SetBlobBuffer`.
-   *
-   * @param blob_name The name of the blob.
-   * @return std::pair<void*, DataLocation> Will return {nullptr, UNKOWN} if `blob_name` is
-   * invalid.
-   */
-  virtual std::pair<void *, DataLocation> GetOuterBlobBuffer(
-      const std::string &blob_name) noexcept = 0;
-
-  /**
-   * @brief The `BlobsBuffer` instance should provide the functionality to accept a customized
-   * data buffer ptr which could be on host or device. Some inference frameworks based on
-   * heterogeneous architecture hardware (e.g. CUDA) use buffer on device to deploy inference. There
-   * is no need to copy data from host to device if the device buffer ptr is provided to
-   * `BlobsBuffer`.
-   *
-   * @param blob_name The name of the blob.
-   * @param data_ptr The ptr of the customized data buffer.
-   * @param location Location of the customized data buffer.
-   * @return true
-   * @return false Will return false if `blob_name` is invalid.
-   */
-  virtual bool SetBlobBuffer(const std::string &blob_name,
-                             void              *data_ptr,
-                             DataLocation       location) noexcept = 0;
-
-  /**
-   * @brief `SetBlobBuffer` provides the functionality to change the default using data buffer
-   * on host size or device side. After calling this method, `GetOuterBlobBuffer` will return
-   * the buffer ptr on the certain side.
-   *
-   * @note Some inference frameworks (e.g. onnxruntime, rknn) do not distinguish buffer between
-   * the host side and the device side. So this method will not change their default buffer ptr.
-   *
-   * @param blob_name The name of the blob.
-   * @param location Location of the customized data buffer.
-   * @return true
-   * @return false Will return false if `blob_name` is invalid.
-   */
-  virtual bool SetBlobBuffer(const std::string &blob_name, DataLocation location) noexcept = 0;
-
-  /**
-   * @brief `SetBlobShape` provides the functionality to change the dynamic blob shape in the
-   * inference processing if the model engine allows.
-   *
-   * @note Some inference framework (e.g. rknn) do not support dynamic blob shape. And make sure
-   * your model supports dynamic blob shape before you call this method.
-   *
-   * @param blob_name The name of the blob.
-   * @param shape The dynamic blob shape.
-   * @return true
-   * @return false Will return false if `blob_name` is invalid.
-   */
-  virtual bool SetBlobShape(const std::string          &blob_name,
-                            const std::vector<int64_t> &shape) noexcept = 0;
-
-  /**
-   * @brief `GetBlobShape` provides the functionality to get the dynamic blob shape in the
-   * inference processing. By default, this will return the max blob shape which is parsed
-   * in `inference_core` construction.
-   *
-   * @param blob_name The name of the blob.
-   * @return const std::vector<int64_t>& The const reference of blob shape vector maintained.
-   */
-  virtual const std::vector<int64_t> &GetBlobShape(const std::string &blob_name) const noexcept = 0;
-
-  /**
-   * @brief Return the total number of blobs.
-   *
-   * @return size_t
-   */
-  virtual size_t Size() const noexcept = 0;
-
-  /**
-   * @brief Reset the `BlobsBuffer` which will not release the buffer memory.
-   *
-   */
-  virtual void Reset() noexcept = 0;
-
-protected:
-  virtual ~IBlobsBuffer() noexcept = default;
-
-  /**
-   * @brief Release the whole `BlobsBuffer` instance.
-   *
-   */
-  virtual void Release() noexcept = 0;
-};
-
-} // namespace inference_core
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/block_queue.h b/deploy_core/include/deploy_core/block_queue.h
deleted file mode 100644
index c73f36f..0000000
--- a/deploy_core/include/deploy_core/block_queue.h
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 09:29:20
- * @FilePath: /EasyDeploy/deploy_core/include/deploy_core/block_queue.h
- */
-#ifndef __EASY_DEPLOY_BLOCK_QUEUE_H
-#define __EASY_DEPLOY_BLOCK_QUEUE_H
-
-#include <atomic>
-#include <condition_variable>
-#include <optional>
-#include <queue>
-
-/**
- * @brief A simple implementation of block queue.
- *
- * @tparam T
- */
-template <typename T>
-class BlockQueue {
-public:
-  BlockQueue<T>(const size_t max_size) : max_size_(max_size)
-  {}
-
-  /**
-   * @brief Push a obj into the queue. Will block the thread if the queue is full.
-   *
-   * @param obj
-   * @return true
-   * @return false
-   */
-  bool BlockPush(const T &obj) noexcept;
-
-  /**
-   * @brief Push a obj into the queue. Will cover the oldest element if the queue is full.
-   *
-   * @param obj
-   * @return true
-   * @return false
-   */
-  bool CoverPush(const T &obj) noexcept;
-
-  /**
-   * @brief Get and pop the oldest element in the queue. Will block the thread if the queue is
-   * empty.
-   *
-   * @return std::optional<T>
-   */
-  std::optional<T> Take() noexcept;
-
-  /**
-   * @brief Get and pop the oldest element in the queue. Will return `nullopt` if the queue is
-   * empty.
-   *
-   * @return std::optional<T>
-   */
-  std::optional<T> TryTake() noexcept;
-
-  /**
-   * @brief Get the size of the queue.
-   *
-   * @return int
-   */
-  int Size() noexcept;
-
-  /**
-   * @brief Return if the queue is empty.
-   *
-   * @return true
-   * @return false
-   */
-  bool Empty() noexcept;
-
-  /**
-   * @brief Set the `push` process disabled. After called this method, all `push` calling will
-   * return `false`, which means this block queue no longer accept new elements.
-   *
-   */
-  void DisablePush() noexcept;
-
-  /**
-   * @brief Set the `push` process enabled.
-   *
-   */
-  void EnablePush() noexcept;
-
-  /**
-   * @brief Set the `take` process disabled. After called this method, all `take` calling will
-   * return `false`, which means this block queue no longer provides elements.
-   *
-   */
-  void DisableTake() noexcept;
-
-  /**
-   * @brief Set the `take` process enabled.
-   *
-   */
-  void EnableTake() noexcept;
-
-  /**
-   * @brief Set the `push` and `take` process disabled.
-   *
-   */
-  void Disable() noexcept;
-
-  /**
-   * @brief Get the max size of the block queue.
-   *
-   * @return int
-   */
-  int GetMaxSize() const noexcept;
-
-  /**
-   * @brief Set the `push` and `take` process disabled, and clear all elements in it.
-   *
-   */
-  void DisableAndClear() noexcept;
-
-  /**
-   * @brief Set the `push` process will no longer be called. The consumer threads which were
-   * blocked will be notified and quit blocking, when this method is called.
-   *
-   */
-  void SetNoMoreInput() noexcept;
-
-  ~BlockQueue() noexcept;
-
-private:
-  const size_t            max_size_;
-  std::queue<T>           q_;
-  std::atomic<bool>       push_enabled_{true};
-  std::atomic<bool>       take_enabled_{true};
-  std::condition_variable producer_cv_;
-  std::condition_variable consumer_cv_;
-  std::mutex              lck_;
-
-  std::atomic<bool> no_more_input_{false};
-};
-
-template <typename T>
-BlockQueue<T>::~BlockQueue() noexcept
-{
-  Disable();
-}
-
-template <typename T>
-bool BlockQueue<T>::BlockPush(const T &obj) noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  while (q_.size() >= max_size_ && push_enabled_.load())
-  {
-    producer_cv_.wait(u_lck);
-  }
-  if (!push_enabled_.load())
-  {
-    return false;
-  }
-  q_.push(obj);
-  consumer_cv_.notify_one();
-  return true;
-}
-
-template <typename T>
-bool BlockQueue<T>::CoverPush(const T &obj) noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  if (!push_enabled_.load())
-  {
-    return false;
-  }
-  if (q_.size() == max_size_)
-  {
-    q_.pop();
-  }
-  q_.push(obj);
-  consumer_cv_.notify_one();
-  return true;
-}
-
-template <typename T>
-std::optional<T> BlockQueue<T>::Take() noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  // block until: 1. take disabled; 2. no more input set; 3. new elements
-  while (q_.size() == 0 && take_enabled_ && no_more_input_ == false)
-  {
-    consumer_cv_.wait(u_lck);
-  }
-  if (!take_enabled_ || (no_more_input_ && q_.size() == 0))
-  {
-    return std::nullopt;
-  }
-  T ret = q_.front();
-  q_.pop();
-  producer_cv_.notify_one();
-
-  if (no_more_input_)
-  {
-    consumer_cv_.notify_all();
-  }
-  return ret;
-}
-
-template <typename T>
-std::optional<T> BlockQueue<T>::TryTake() noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  if (q_.size() == 0)
-  {
-    return std::nullopt;
-  } else
-  {
-    T ret = q_.front();
-    q_.pop();
-    producer_cv_.notify_all();
-    if (no_more_input_)
-    {
-      consumer_cv_.notify_all();
-    }
-    return ret;
-  }
-}
-
-template <typename T>
-int BlockQueue<T>::Size() noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  return q_.size();
-}
-
-template <typename T>
-bool BlockQueue<T>::Empty() noexcept
-{
-  std::unique_lock<std::mutex> u_lck(lck_);
-  return q_.size() == 0;
-}
-
-template <typename T>
-int BlockQueue<T>::GetMaxSize() const noexcept
-{
-  return max_size_;
-}
-
-template <typename T>
-void BlockQueue<T>::Disable() noexcept
-{
-  DisablePush();
-  DisableTake();
-}
-
-template <typename T>
-void BlockQueue<T>::DisableAndClear() noexcept
-{
-  Disable();
-  std::unique_lock<std::mutex> u_lck(lck_);
-  while (!q_.empty()) q_.pop();
-}
-
-template <typename T>
-void BlockQueue<T>::DisablePush() noexcept
-{
-  push_enabled_.store(false);
-  producer_cv_.notify_all();
-}
-
-template <typename T>
-void BlockQueue<T>::EnablePush() noexcept
-{
-  push_enabled_.store(true);
-}
-
-template <typename T>
-void BlockQueue<T>::DisableTake() noexcept
-{
-  take_enabled_.store(false);
-  consumer_cv_.notify_all();
-}
-
-template <typename T>
-void BlockQueue<T>::EnableTake() noexcept
-{
-  take_enabled_.store(true);
-}
-
-template <typename T>
-void BlockQueue<T>::SetNoMoreInput() noexcept
-{
-  no_more_input_.store(true);
-  consumer_cv_.notify_all();
-}
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/common_defination.h b/deploy_core/include/deploy_core/common_defination.h
deleted file mode 100644
index 612cde9..0000000
--- a/deploy_core/include/deploy_core/common_defination.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 22:07:03
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/common_defination.h
- */
-#ifndef __EASY_DEPLOY_COMMON_DEFINATION_H
-#define __EASY_DEPLOY_COMMON_DEFINATION_H
-
-/**
- * @brief Defination of common 2D bounding box
- *
- * @param x center of bbox `x`
- * @param y center of bbox `y`
- * @param w width of bbox
- * @param h height of bbox
- * @param conf confidence of bbox
- * @param cls classification of bbox
- */
-struct BBox2D {
-  float x;
-  float y;
-  float w;
-  float h;
-  float conf;
-  float cls;
-};
-
-/**
- * @brief Enum of data loacation
- *
- * @param HOST data is host accessable
- * @param DEVICE data is device accessable, means host cant read/write the data buffer directly
- * @param UNKOWN some other condition
- *
- */
-enum DataLocation { HOST = 0, DEVICE = 1, UNKOWN = 2 };
-
-/**
- * @brief Defination of common image format.
- *
- */
-enum ImageDataFormat { YUV = 0, RGB = 1, BGR = 2, GRAY = 3 };
-
-// some macro
-#define CHECK_STATE(state, hint) \
-  {                              \
-    if (!(state))                \
-    {                            \
-      LOG(ERROR) << (hint);      \
-      return false;              \
-    }                            \
-  }
-
-#define MESSURE_DURATION(run)                                                                \
-  {                                                                                          \
-    auto start = std::chrono::high_resolution_clock::now();                                  \
-    (run);                                                                                   \
-    auto end = std::chrono::high_resolution_clock::now();                                    \
-    LOG(INFO) << #run << " cost(us): "                                                       \
-              << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); \
-  }
-
-#define MESSURE_DURATION_AND_CHECK_STATE(run, hint)                                          \
-  {                                                                                          \
-    auto start = std::chrono::high_resolution_clock::now();                                  \
-    CHECK_STATE((run), hint);                                                                \
-    auto end = std::chrono::high_resolution_clock::now();                                    \
-    LOG(INFO) << #run << " cost(us): "                                                       \
-              << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); \
-  }
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/include/deploy_core/wrapper.h b/deploy_core/include/deploy_core/wrapper.h
deleted file mode 100644
index 6c9b5b0..0000000
--- a/deploy_core/include/deploy_core/wrapper.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 21:58:32
- * @FilePath: /easy_deploy/deploy_core/include/deploy_core/wrapper.h
- */
-#ifndef __EASY_DEPLOY_WRAPPER_H
-#define __EASY_DEPLOY_WRAPPER_H
-
-#include "deploy_core/async_pipeline.h"
-
-#include <opencv2/opencv.hpp>
-
-#include <unordered_map>
-
-/**
- * @brief A simple wrapper of cv::Mat. Used in pipeline.
- *
- */
-class PipelineCvImageWrapper : public async_pipeline::IPipelineImageData {
-public:
-  PipelineCvImageWrapper(const cv::Mat &cv_image, bool isRGB = false) : inner_cv_image(cv_image)
-  {
-    image_data_info.data_pointer   = cv_image.data;
-    image_data_info.format         = isRGB ? ImageDataFormat::RGB : ImageDataFormat::BGR;
-    image_data_info.image_height   = cv_image.rows;
-    image_data_info.image_width    = cv_image.cols;
-    image_data_info.image_channels = cv_image.channels();
-    image_data_info.location       = DataLocation::HOST;
-  }
-
-  const ImageDataInfo &GetImageDataInfo() const
-  {
-    return image_data_info;
-  }
-
-private:
-  IPipelineImageData::ImageDataInfo image_data_info;
-  const cv::Mat                     inner_cv_image;
-};
-
-#endif
\ No newline at end of file
diff --git a/deploy_core/src/base_detection.cpp b/deploy_core/src/base_detection.cpp
deleted file mode 100644
index 5e2628b..0000000
--- a/deploy_core/src/base_detection.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * @Description: 
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:24:19
- * @LastEditTime: 2024-11-26 21:58:50
- * @FilePath: /easy_deploy/deploy_core/src/base_detection.cpp
- */
-#include "deploy_core/base_detection.h"
-
-#include "deploy_core/wrapper.h"
-
-namespace detection_2d {
-
-std::string BaseDetectionModel::detection_pipeline_name_ = "DetectionPipeline";
-
-/**
- * @brief construct a `DetectionPipelinePackage`
- *
- * @param input_image
- * @param conf_thresh
- * @param isRGB
- * @param blob_buffers
- * @return std::shared_ptr<DetectionPipelinePackage>
- */
-static std::shared_ptr<DetectionPipelinePackage> CreateDetectionPipelineUnit(
-    const cv::Mat                &input_image,
-    float                         conf_thresh,
-    bool                          isRGB,
-    std::shared_ptr<inference_core::IBlobsBuffer> blob_buffers)
-{
-  // 1. construct the image wrapper
-  auto image_wrapper = std::make_shared<PipelineCvImageWrapper>(input_image, isRGB);
-  // 2. construct `DetectionPipelinePakcage`
-  auto package              = std::make_shared<DetectionPipelinePackage>();
-  package->input_image_data = image_wrapper;
-  package->conf_thresh      = conf_thresh;
-  package->infer_buffer     = blob_buffers;
-
-  return package;
-}
-
-BaseDetectionModel::BaseDetectionModel(std::shared_ptr<inference_core::BaseInferCore> infer_core)
-    : infer_core_(infer_core)
-{
-  // 1. check infer_core
-  if (infer_core == nullptr)
-  {
-    throw std::invalid_argument("[BaseDetectionModel] Input argument `infer_core` is nullptr!!!");
-  }
-
-  // 2. configure pipeline
-  auto preprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [=](ParsingType unit) -> bool { return PreProcess(unit); }, "BaseDet PreProcess");
-
-  auto infer_core_context = infer_core->GetPipelineContext();
-
-  auto postprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [=](ParsingType unit) -> bool { return PostProcess(unit); }, "BaseDet PostProcess");
-
-  BaseAsyncPipeline::ConfigPipeline(detection_pipeline_name_,
-                                    {preprocess_block, infer_core_context, postprocess_block});
-}
-
-bool BaseDetectionModel::Detect(const cv::Mat       &input_image,
-                                std::vector<BBox2D> &det_results, // todo
-                                float                conf_thresh,
-                                bool                 isRGB) noexcept
-{
-  // 1. Get blobs buffer
-  auto blob_buffers = infer_core_->GetBuffer(false);
-  if (blob_buffers == nullptr)
-  {
-    LOG(ERROR) << "[BaseDetectionModel] Inference Core run out buffer!!!";
-    return false;
-  }
-
-  // 2. Create a dummy pipeline package
-  auto package = CreateDetectionPipelineUnit(input_image, conf_thresh, isRGB, blob_buffers);
-
-  // 3. preprocess by derived class
-  MESSURE_DURATION_AND_CHECK_STATE(PreProcess(package),
-                                   "[BaseDetectionModel] Preprocess execute failed!!!");
-
-  // 4. network inference
-  MESSURE_DURATION_AND_CHECK_STATE(infer_core_->SyncInfer(blob_buffers),
-                                   "[BaseDetectionModel] SyncInfer execute failed!!!");
-
-  // 5. postprocess by derived class
-  MESSURE_DURATION_AND_CHECK_STATE(PostProcess(package),
-                                   "[BaseDetectionModel] PostProcess execute failed!!!");
-
-  // 6. take output
-  det_results = std::move(package->results);
-
-  return true;
-}
-
-std::future<std::vector<BBox2D>> BaseDetectionModel::DetectAsync(const cv::Mat &input_image,
-                                                                 float          conf_thresh,
-                                                                 bool           isRGB,
-                                                                 bool cover_oldest) noexcept
-{
-  // 1. check if the pipeline is initialized
-  if (!IsPipelineInitialized(detection_pipeline_name_))
-  {
-    LOG(ERROR) << "[BaseDetectionModel] Async Pipeline is not init yet!!!";
-    return std::future<std::vector<BBox2D>>();
-  }
-
-  // 2. get blob buffer
-  auto blob_buffers = infer_core_->GetBuffer(true);
-  if (blob_buffers == nullptr)
-  {
-    LOG(ERROR) << "[BaseDetectionModel] Failed to get buffer from inference core!!!";
-    return std::future<std::vector<BBox2D>>();
-  }
-
-  // 3. create a pipeline package
-  auto package = CreateDetectionPipelineUnit(input_image, conf_thresh, isRGB, blob_buffers);
-
-  // 4. push package into pipeline and return `std::future`
-  return PushPipeline(detection_pipeline_name_, package);
-}
-
-BaseDetectionModel::~BaseDetectionModel()
-{
-  ClosePipeline();
-  infer_core_->Release();
-}
-
-
-} // namespace detection_2d
\ No newline at end of file
diff --git a/deploy_core/src/base_infer_core.cpp b/deploy_core/src/base_infer_core.cpp
deleted file mode 100644
index 8981805..0000000
--- a/deploy_core/src/base_infer_core.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-19 18:33:00
- * @LastEditTime: 2024-11-26 21:56:31
- * @FilePath: /easy_deploy/deploy_core/src/base_infer_core.cpp
- */
-#include "deploy_core/base_infer_core.h"
-
-namespace inference_core {
-
-// used in sync infer
-struct _InnerSyncInferPackage : public async_pipeline::IPipelinePackage {
-public:
-  std::shared_ptr<IBlobsBuffer> GetInferBuffer() override
-  {
-    return buffer;
-  }
-  std::shared_ptr<IBlobsBuffer> buffer;
-};
-
-BaseInferCore::BaseInferCore()
-{
-  auto preprocess_block = BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return PreProcess(unit); }, "BaseInferCore PreProcess");
-  auto inference_block = BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return Inference(unit); }, "BaseInferCore Inference");
-  auto postprocess_block = BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return PostProcess(unit); }, "BaseInferCore PostProcess");
-  ConfigPipeline("InferCore Pipieline", {preprocess_block, inference_block, postprocess_block});
-}
-
-bool BaseInferCore::SyncInfer(std::shared_ptr<IBlobsBuffer> buffer, const int batch_size)
-{
-  auto inner_package    = std::make_shared<_InnerSyncInferPackage>();
-  inner_package->buffer = buffer;
-  CHECK_STATE(PreProcess(inner_package), "[BaseInferCore] SyncInfer Preprocess Failed!!!");
-  CHECK_STATE(Inference(inner_package), "[BaseInferCore] SyncInfer Inference Failed!!!");
-  CHECK_STATE(PostProcess(inner_package), "[BaseInferCore] SyncInfer PostProcess Failed!!!");
-  return true;
-}
-
-std::shared_ptr<IBlobsBuffer> BaseInferCore::GetBuffer(bool block)
-{
-  return mem_buf_pool_->Alloc(block);
-}
-
-void BaseInferCore::Release()
-{
-  BaseAsyncPipeline::ClosePipeline();
-  mem_buf_pool_.reset();
-}
-
-void BaseInferCore::Init(int mem_buf_size)
-{
-  if (mem_buf_size <= 0 || mem_buf_size > 100)
-  {
-    throw std::invalid_argument("mem_buf_size should be between [1,100], Got: " +
-                                std::to_string(mem_buf_size));
-  }
-  mem_buf_pool_ = std::make_unique<MemBufferPool>(this, mem_buf_size);
-  LOG(INFO) << "successfully init mem buf pool with pool_size : " << mem_buf_size;
-}
-
-BaseInferCore::~BaseInferCore()
-{
-  Release();
-}
-
-} // namespace inference_core
\ No newline at end of file
diff --git a/deploy_core/src/base_sam.cpp b/deploy_core/src/base_sam.cpp
deleted file mode 100644
index 9ff533e..0000000
--- a/deploy_core/src/base_sam.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-24 20:05:41
- * @LastEditTime: 2024-11-26 21:57:33
- * @FilePath: /easy_deploy/deploy_core/src/base_sam.cpp
- */
-#include "deploy_core/base_sam.h"
-
-#include "deploy_core/wrapper.h"
-
-namespace sam {
-
-/**
- * @brief Check if the input arguments are valid
- *
- * @param image
- * @param infer_core
- * @param points
- * @param labels
- */
-static bool CheckValidArguments(const cv::Mat                                        &image,
-                                const std::shared_ptr<inference_core::IRotInferCore> &infer_core,
-                                const std::vector<std::pair<int, int>>               &points,
-                                const std::vector<int> &labels) noexcept
-{
-  if (image.empty())
-  {
-    LOG(ERROR) << "[BaseSamModel] Got empty image!!!";
-    return false;
-  } else if (infer_core == nullptr)
-  {
-    LOG(ERROR) << "[BaseSamModel] Infer_core with points as prompt is null!!!";
-    return false;
-  } else if (points.size() != labels.size() || points.size() < 1)
-  {
-    LOG(ERROR) << "[BaseSamModel] points/labels size is not valid!!! "
-               << "points.size: " << points.size() << ", labels.size: " << labels.size();
-    return false;
-  }
-
-  return true;
-}
-
-/**
- * @brief Check if the input arguments are valid
- *
- * @param image
- * @param infer_core
- * @param boxes
- */
-static bool CheckValidArguments(const cv::Mat                                        &image,
-                                const std::shared_ptr<inference_core::IRotInferCore> &infer_core,
-                                const std::vector<BBox2D> &boxes) noexcept
-{
-  if (image.empty())
-  {
-    LOG(ERROR) << "[BaseSamModel] Got empty image!!!";
-    return false;
-  } else if (infer_core == nullptr)
-  {
-    LOG(ERROR) << "[BaseSamModel] Infer_core with boxes as prompt is null!!!";
-    return false;
-  } else if (boxes.size() < 1)
-  {
-    LOG(ERROR) << "[BaseSamModel] boxes size is not valid!!! "
-               << "boxes.size: " << boxes.size();
-    return false;
-  } else if (boxes.size() > 1)
-  {
-    LOG(WARNING) << "[BaseSamModel] More than one boxes is not support in sam model!!";
-  }
-
-  return true;
-}
-
-BaseSamModel::BaseSamModel(const std::string                             &model_name,
-                           std::shared_ptr<inference_core::BaseInferCore> image_encoder_core,
-                           std::shared_ptr<inference_core::BaseInferCore> mask_points_decoder_core,
-                           std::shared_ptr<inference_core::BaseInferCore> mask_boxes_decoder_core)
-    : model_name_(model_name),
-      image_encoder_core_(image_encoder_core),
-      mask_points_decoder_core_(mask_points_decoder_core),
-      mask_boxes_decoder_core_(mask_boxes_decoder_core),
-      box_pipeline_name_(model_name + "_SamWithBoxPipeline"),
-      point_pipeline_name_(model_name + "_SamWithPointPipeline")
-{
-  if (image_encoder_core == nullptr)
-  {
-    throw std::invalid_argument("`image_encoder_core` should not be null");
-  }
-
-  if (mask_points_decoder_core == nullptr && mask_boxes_decoder_core == nullptr)
-  {
-    throw std::invalid_argument("one of `point/box` decoder should be non-nullptr");
-  }
-
-  if (mask_points_decoder_core_ != nullptr)
-  {
-    ConfigurePointPipeline();
-  }
-  if (mask_boxes_decoder_core_ != nullptr)
-  {
-    ConfigureBoxPipeline();
-  }
-}
-
-BaseSamModel::~BaseSamModel()
-{
-  BaseAsyncPipeline::ClosePipeline();
-
-  if (image_encoder_core_ != nullptr)
-  {
-    image_encoder_core_->Release();
-  }
-  if (mask_points_decoder_core_ != nullptr)
-  {
-    mask_points_decoder_core_->Release();
-  }
-  if (mask_boxes_decoder_core_ != nullptr)
-  {
-    mask_boxes_decoder_core_->Release();
-  }
-}
-
-void BaseSamModel::ConfigureBoxPipeline()
-{
-  auto image_preprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return ImagePreProcess(unit); },
-      "[MobileSam Image PreProcess]");
-
-  auto prompt_preprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return PromptBoxPreProcess(unit); },
-      "[MobileSam Prompt PreProcess]");
-
-  auto mask_postprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return MaskPostProcess(unit); },
-      "[MobileSam Mask PostProcess]");
-
-  const auto &image_encoder_context = image_encoder_core_->GetPipelineContext();
-
-  const auto &mask_decoder_context = mask_boxes_decoder_core_->GetPipelineContext();
-
-  BaseAsyncPipeline::ConfigPipeline(
-      box_pipeline_name_, {image_preprocess_block, image_encoder_context, prompt_preprocess_block,
-                           mask_decoder_context, mask_postprocess_block});
-}
-
-void BaseSamModel::ConfigurePointPipeline()
-{
-  auto image_preprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return ImagePreProcess(unit); },
-      "[MobileSam Image PreProcess]");
-
-  auto prompt_preprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return PromptPointPreProcess(unit); },
-      "[MobileSam Prompt PreProcess]");
-
-  auto mask_postprocess_block = BaseAsyncPipeline::BuildPipelineBlock(
-      [&](ParsingType unit) -> bool { return MaskPostProcess(unit); },
-      "[MobileSam Mask PostProcess]");
-
-  const auto &image_encoder_context = image_encoder_core_->GetPipelineContext();
-
-  const auto &mask_decoder_context = mask_points_decoder_core_->GetPipelineContext();
-
-  BaseAsyncPipeline::ConfigPipeline(
-      point_pipeline_name_, {image_preprocess_block, image_encoder_context, prompt_preprocess_block,
-                             mask_decoder_context, mask_postprocess_block});
-}
-
-bool BaseSamModel::GenerateMask(const cv::Mat                          &image,
-                                const std::vector<std::pair<int, int>> &points,
-                                const std::vector<int>                 &labels,
-                                cv::Mat                                &result,
-                                bool                                    isRGB)
-{
-  // 0. check
-  CHECK_STATE(CheckValidArguments(image, mask_points_decoder_core_, points, labels),
-              "[BaseSamModel] `GenerateMask` with points got invalid arguments");
-
-  // 1. Get blobs buffers
-  auto encoder_blob_buffers = image_encoder_core_->GetBuffer(true);
-  auto decoder_blob_buffers = mask_points_decoder_core_->GetBuffer(true);
-
-  // 2. Construct `SamPipelinePackage`
-  auto package                        = std::make_shared<SamPipelinePackage>();
-  package->input_image_data           = std::make_shared<PipelineCvImageWrapper>(image, isRGB);
-  package->points                     = points;
-  package->labels                     = labels;
-  package->image_encoder_blobs_buffer = encoder_blob_buffers;
-  package->mask_decoder_blobs_buffer  = decoder_blob_buffers;
-
-  // 3. Carry out workflow
-  MESSURE_DURATION_AND_CHECK_STATE(ImagePreProcess(package),
-                                   "[BaseSamModel] Image-Preprocess execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(image_encoder_core_->SyncInfer(package->GetInferBuffer()),
-                                   "[BaseSamModel] Image-encoder sync infer execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(PromptPointPreProcess(package),
-                                   "[BaseSamModel] Prompt-preprocess execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(mask_points_decoder_core_->SyncInfer(package->GetInferBuffer()),
-                                   "[BaseSamModel] Prompt-decoder sync infer execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(MaskPostProcess(package),
-                                   "[BaseSamModel] Mask-postprocess execute failed!!!");
-
-  // 4. output the result
-  result = package->mask;
-  return true;
-}
-
-bool BaseSamModel::GenerateMask(const cv::Mat             &image,
-                                const std::vector<BBox2D> &boxes,
-                                cv::Mat                   &result,
-                                bool                       isRGB)
-{
-  // 0. check
-  CHECK_STATE(CheckValidArguments(image, mask_boxes_decoder_core_, boxes),
-              "[BaseSamModel] `GenerateMask` with boxes got invalid arguments");
-
-  // 1. Get blobs buffers
-  auto encoder_blob_buffers = image_encoder_core_->GetBuffer(true);
-  auto decoder_blob_buffers = mask_boxes_decoder_core_->GetBuffer(true);
-
-  // 2. Construct `SamPipelinePackage`
-  auto package                        = std::make_shared<SamPipelinePackage>();
-  package->input_image_data           = std::make_shared<PipelineCvImageWrapper>(image, isRGB);
-  package->boxes                      = boxes;
-  package->image_encoder_blobs_buffer = encoder_blob_buffers;
-  package->mask_decoder_blobs_buffer  = decoder_blob_buffers;
-
-  // 3. Carry out workflow
-  MESSURE_DURATION_AND_CHECK_STATE(ImagePreProcess(package),
-                                   "[BaseSamModel] Image-Preprocess execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(image_encoder_core_->SyncInfer(package->GetInferBuffer()),
-                                   "[BaseSamModel] Image-encoder sync infer execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(PromptBoxPreProcess(package),
-                                   "[BaseSamModel] Prompt-preprocess execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(mask_boxes_decoder_core_->SyncInfer(package->GetInferBuffer()),
-                                   "[BaseSamModel] Prompt-decoder sync infer execute failed!!!");
-
-  MESSURE_DURATION_AND_CHECK_STATE(MaskPostProcess(package),
-                                   "[BaseSamModel] Mask-postprocess execute failed!!!");
-
-  // 4. output the result
-  result = package->mask;
-  return true;
-}
-
-std::future<cv::Mat> BaseSamModel::GenerateMaskAsync(const cv::Mat                          &image,
-                                                     const std::vector<std::pair<int, int>> &points,
-                                                     const std::vector<int>                 &labels,
-                                                     bool                                    isRGB,
-                                                     bool cover_oldest)
-{
-  // 0. Check
-  if (!CheckValidArguments(image, mask_points_decoder_core_, points, labels))
-  {
-    LOG(ERROR) << "[BaseSamModel] `GenerateMask` with points got invalid arguments";
-    return std::future<cv::Mat>();
-  }
-  if (!BaseAsyncPipeline::IsPipelineInitialized(point_pipeline_name_))
-  {
-    LOG(ERROR) << "[BaseSamModel] Async pipeline with points as prompt is not initialized yet!!!";
-    return std::future<cv::Mat>();
-  }
-
-  // 1. Get blobs buffers
-  auto encoder_blob_buffers = image_encoder_core_->GetBuffer(true);
-  auto decoder_blob_buffers = mask_points_decoder_core_->GetBuffer(true);
-
-  // 2. Construct `SamPipelinePackage`
-  auto package                        = std::make_shared<SamPipelinePackage>();
-  package->input_image_data           = std::make_shared<PipelineCvImageWrapper>(image, isRGB);
-  package->points                     = points;
-  package->labels                     = labels;
-  package->image_encoder_blobs_buffer = encoder_blob_buffers;
-  package->mask_decoder_blobs_buffer  = decoder_blob_buffers;
-
-  // 3. return `std::future` instance
-  return BaseAsyncPipeline::PushPipeline(point_pipeline_name_, package);
-}
-
-std::future<cv::Mat> BaseSamModel::GenerateMaskAsync(const cv::Mat             &image,
-                                                     const std::vector<BBox2D> &boxes,
-                                                     bool                       isRGB,
-                                                     bool                       cover_oldest)
-{
-  // 0. check
-  if (!CheckValidArguments(image, mask_boxes_decoder_core_, boxes))
-  {
-    LOG(ERROR) << "[BaseSamModel] `GenerateMask` with boxes got invalid arguments";
-    return std::future<cv::Mat>();
-  }
-
-  if (!BaseAsyncPipeline::IsPipelineInitialized(box_pipeline_name_))
-  {
-    LOG(ERROR) << "[BaseSamModel] Async pipeline with boxes as prompt is not initialized yet!!!";
-    return std::future<cv::Mat>();
-  }
-
-  // 1. Get blobs buffers
-  auto encoder_blob_buffers = image_encoder_core_->GetBuffer(true);
-  auto decoder_blob_buffers = mask_boxes_decoder_core_->GetBuffer(true);
-
-  // 2. Construct `SamPipelinePackage`
-  auto package                        = std::make_shared<SamPipelinePackage>();
-  package->input_image_data           = std::make_shared<PipelineCvImageWrapper>(image, isRGB);
-  package->boxes                      = boxes;
-  package->image_encoder_blobs_buffer = encoder_blob_buffers;
-  package->mask_decoder_blobs_buffer  = decoder_blob_buffers;
-
-  // 3. return `std::future` instance
-  return BaseAsyncPipeline::PushPipeline(box_pipeline_name_, package);
-}
-
-} // namespace sam
\ No newline at end of file
diff --git a/detection_6d_foundationpose/CMakeLists.txt b/detection_6d_foundationpose/CMakeLists.txt
index 77aea27..5290dd6 100644
--- a/detection_6d_foundationpose/CMakeLists.txt
+++ b/detection_6d_foundationpose/CMakeLists.txt
@@ -52,7 +52,7 @@ include_directories(
   ${CUDA_INCLUDE_DIRS}
 )
 
-add_library(${PROJECT_NAME} SHARED ${source_file})   
+add_library(${PROJECT_NAME} SHARED ${source_file})
 
 
 target_link_libraries(${PROJECT_NAME} PUBLIC
@@ -66,4 +66,4 @@ target_link_libraries(${PROJECT_NAME} PUBLIC
 )
 
 
-target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
+target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
index 12dd106..8c8c4a1 100644
--- a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
+++ b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
@@ -1,7 +1,6 @@
 #ifndef __FOUNDATIONPOSE_H
 #define __FOUNDATIONPOSE_H
 
-
 #include <opencv2/core.hpp>
 #include <Eigen/Dense>
 
@@ -13,135 +12,123 @@ class Base6DofDetectionModel {
 public:
   /**
    * @brief 实现的接口应支持动态输入尺寸，并检查rgb/depth/mask尺寸是否一致.
-   * 
-   * @note 相机内参与原始图像的尺寸是绑定的，不可在外部直接对图像进行resize操作，若需要进行resize，应对内参intrinsic同样处理 
-   * 
+   *
+   * @note
+   * 相机内参与原始图像的尺寸是绑定的，不可在外部直接对图像进行resize操作，若需要进行resize，应对内参intrinsic同样处理
+   *
    * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
    * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
    * @param mask 目标物的mask图像，cv::Mat数据格式为CV_8UC1，positive的像素值大于0即可
    * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
    * @param out_pose 输出位姿
-   * @return true 
-   * @return false 
+   * @return true
+   * @return false
    */
-  virtual bool Register(const cv::Mat& rgb, 
-                        const cv::Mat& depth, 
-                        const cv::Mat& mask,
-                        const std::string& target_name,
-                        Eigen::Matrix4f& out_pose) = 0;
+  virtual bool Register(const cv::Mat     &rgb,
+                        const cv::Mat     &depth,
+                        const cv::Mat     &mask,
+                        const std::string &target_name,
+                        Eigen::Matrix4f   &out_pose) = 0;
   /**
-   * @brief 从第二帧开始的跟踪过程，是`Register`的轻量化版本，精度稍低但推理效率非常高，调用前必须先调用`Register`
-   * 
+   * @brief
+   * 从第二帧开始的跟踪过程，是`Register`的轻量化版本，精度稍低但推理效率非常高，调用前必须先调用`Register`
+   *
    * @param rgb rgb图像，必须是`rgb`格式，从opencv-imread读取的图像默认是bgr格式，需经过转换
    * @param depth 获取的深度图像，cv::Mat数据格式为CV_32F1
    * @param target_name 目标物的名称，与构建时提供的name->mesh_path映射一致
    * @param out_pose 输出位姿
-   * @return true 
-   * @return false 
+   * @return true
+   * @return false
    */
-  virtual bool Track(const cv::Mat& rgb,
-                     const cv::Mat& depth,
-                     const std::string& target_name,
-                     Eigen::Matrix4f& out_pose) = 0;
+  virtual bool Track(const cv::Mat     &rgb,
+                     const cv::Mat     &depth,
+                     const std::string &target_name,
+                     Eigen::Matrix4f   &out_pose) = 0;
 
   /**
    * @brief 获取某个输入mesh目标的三维尺寸(辅助功能，用户也可自己计算)
-   * 
-   * @return Eigen::Vector3f 
+   *
+   * @return Eigen::Vector3f
    */
-  virtual Eigen::Vector3f GetObjectDimension(const std::string& target_name) const {
+  virtual Eigen::Vector3f GetObjectDimension(const std::string &target_name) const
+  {
     throw std::runtime_error("[Base6DofDetectionModel] GetOjbectDimension NOT Implemented yet!!!");
   };
 
   virtual ~Base6DofDetectionModel() = default;
+
 protected:
   Base6DofDetectionModel() = default;
 };
 
-
-
 /**
  * @brief 创建一个`FoundationPose`实例
- * 
+ *
  * @param refiner_core refiner推理核心
  * @param scorer_core scorer推理核心
  * @param mesh_file_path 使用的三维模型`mesh_file`路径
  * @param texture_file_path 使用的三维模型外观特征图像路径
  * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel> 
+ * @return std::shared_ptr<Base6DofDetectionModel>
  */
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
     std::shared_ptr<inference_core::BaseInferCore> refiner_core,
     std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string& target_name,
-    const std::string& mesh_file_path,
-    const std::string& texture_file_path,
-    const std::vector<float>& intrinsic_in_vec
-);
-
+    const std::string                             &target_name,
+    const std::string                             &mesh_file_path,
+    const std::string                             &texture_file_path,
+    const std::vector<float>                      &intrinsic_in_vec);
 
 /**
  * @brief 创建一个`FoundationPose`实例
- * 
+ *
  * @param refiner_core refiner推理核心
  * @param scorer_core scorer推理核心
  * @param mesh_file_path 使用的三维模型`mesh_file`路径
  * @param texture_file_path 使用的三维模型外观特征图像路径
  * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel> 
+ * @return std::shared_ptr<Base6DofDetectionModel>
  */
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
     std::shared_ptr<inference_core::BaseInferCore> refiner_core,
     std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string& target_name,
-    const std::string& mesh_file_path,
-    const std::string& texture_file_path,
-    const Eigen::Matrix3f& intrinsic_in_mat
-);
-
+    const std::string                             &target_name,
+    const std::string                             &mesh_file_path,
+    const std::string                             &texture_file_path,
+    const Eigen::Matrix3f                         &intrinsic_in_mat);
 
 /**
  * @brief 创建一个`FoundationPose`实例
- * 
+ *
  * @param refiner_core refiner推理核心
  * @param scorer_core scorer推理核心
  * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
-   *             键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
+ *             键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
  * @param intrinsic_in_vec 相机内参矩阵，std::vector<float>形式，`row_major`格式
- * @return std::shared_ptr<Base6DofDetectionModel> 
+ * @return std::shared_ptr<Base6DofDetectionModel>
  */
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>>& meshes,
-    const std::vector<float>& intrinsic_in_vec
-);
-
-
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
+    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
+    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+    const std::vector<float>                                                   &intrinsic_in_vec);
 
 /**
  * @brief 创建一个`FoundationPose`实例
- * 
+ *
  * @param refiner_core refiner推理核心
  * @param scorer_core scorer推理核心
  * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
  *               键值[name]在后续检测过程中用于辨别特定种类目标，**保持一致**
  * @param intrinsic_in_mat 相机内参矩阵，Eigen::Matrix3f格式
- * @return std::shared_ptr<Base6DofDetectionModel> 
+ * @return std::shared_ptr<Base6DofDetectionModel>
  */
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>>& meshes,
-    const Eigen::Matrix3f& intrinsic_in_mat
-);
-
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
+    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
+    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+    const Eigen::Matrix3f                                                      &intrinsic_in_mat);
 
 } // namespace detection_6d
 
-
 #endif
diff --git a/detection_6d_foundationpose/src/foundationpose.cpp b/detection_6d_foundationpose/src/foundationpose.cpp
index 22f1122..7fd3b22 100644
--- a/detection_6d_foundationpose/src/foundationpose.cpp
+++ b/detection_6d_foundationpose/src/foundationpose.cpp
@@ -9,18 +9,13 @@
 #define MAX_INPUT_IMAGE_HEIGHT 1080
 #define MAX_INPUT_IMAGE_WIDTH 1920
 
-
 namespace detection_6d {
 
-
-
-
-
-class FoundationPose : public Base6DofDetectionModel{
+class FoundationPose : public Base6DofDetectionModel {
 public:
   /**
    * @brief 使用单个目标的mesh构建一个FoundationPose实例
-   * 
+   *
    * @param refiner_core refiner的推理核心
    * @param scorer_core scorer的推理核心
    * @param mesh_file_path 目标的mesh文件路径
@@ -34,21 +29,21 @@ class FoundationPose : public Base6DofDetectionModel{
    * @param max_depth 有效深度最大值
    */
   FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                const std::string& target_name,
-                const std::string& mesh_file_path,
-                const std::string& texture_file_path,
-                const Eigen::Matrix3f& intrinsic,
-                const int input_image_H = MAX_INPUT_IMAGE_HEIGHT,
-                const int input_image_W = MAX_INPUT_IMAGE_WIDTH,
-                const int crop_window_H = 160,
-                const int crop_window_W = 160,
-                const float min_depth = 0.1,
-                const float max_depth = 4.0);
-  
+                 std::shared_ptr<inference_core::BaseInferCore> scorer_core,
+                 const std::string                             &target_name,
+                 const std::string                             &mesh_file_path,
+                 const std::string                             &texture_file_path,
+                 const Eigen::Matrix3f                         &intrinsic,
+                 const int   input_image_H = MAX_INPUT_IMAGE_HEIGHT,
+                 const int   input_image_W = MAX_INPUT_IMAGE_WIDTH,
+                 const int   crop_window_H = 160,
+                 const int   crop_window_W = 160,
+                 const float min_depth     = 0.1,
+                 const float max_depth     = 4.0);
+
   /**
    * @brief 使用多个目标的mesh构建一个FoundationPose实例
-   * 
+   *
    * @param refiner_core refiner的推理核心
    * @param scorer_core scorer的推理核心
    * @param meshes 多个目标的mesh/texture路径map: [name] -> [mesh_file_path, texture_file_path]，
@@ -62,41 +57,39 @@ class FoundationPose : public Base6DofDetectionModel{
    * @param max_depth 有效深度最大值
    */
   FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                const std::unordered_map<std::string, std::pair<std::string, std::string>>& meshes,
-                const Eigen::Matrix3f& intrinsic,
-                const int max_input_image_H = MAX_INPUT_IMAGE_HEIGHT,
-                const int max_input_image_W = MAX_INPUT_IMAGE_WIDTH,
-                const int crop_window_H = 160,
-                const int crop_window_W = 160,
-                const float min_depth = 0.1,
-                const float max_depth = 4.0);
-
-
-
-  bool Register(const cv::Mat& rgb, 
-                const cv::Mat& depth, 
-                const cv::Mat& mask,
-                const std::string& target_name,
-                Eigen::Matrix4f& out_pose) override;
-
-  bool Track(const cv::Mat& rgb,
-            const cv::Mat& depth,
-            const std::string& target_name,
-            Eigen::Matrix4f& out_pose) override;
-
-  Eigen::Vector3f GetObjectDimension(const std::string& target_name) const override;
+                 std::shared_ptr<inference_core::BaseInferCore> scorer_core,
+                 const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+                 const Eigen::Matrix3f &intrinsic,
+                 const int              max_input_image_H = MAX_INPUT_IMAGE_HEIGHT,
+                 const int              max_input_image_W = MAX_INPUT_IMAGE_WIDTH,
+                 const int              crop_window_H     = 160,
+                 const int              crop_window_W     = 160,
+                 const float            min_depth         = 0.1,
+                 const float            max_depth         = 4.0);
+
+  bool Register(const cv::Mat     &rgb,
+                const cv::Mat     &depth,
+                const cv::Mat     &mask,
+                const std::string &target_name,
+                Eigen::Matrix4f   &out_pose) override;
+
+  bool Track(const cv::Mat     &rgb,
+             const cv::Mat     &depth,
+             const std::string &target_name,
+             Eigen::Matrix4f   &out_pose) override;
+
+  Eigen::Vector3f GetObjectDimension(const std::string &target_name) const override;
 
 private:
-  bool CheckInputArguments(const cv::Mat& rgb, 
-                           const cv::Mat& depth, 
-                           const cv::Mat& mask, 
-                           const std::string& target_name);
+  bool CheckInputArguments(const cv::Mat     &rgb,
+                           const cv::Mat     &depth,
+                           const cv::Mat     &mask,
+                           const std::string &target_name);
 
-  bool UploadDataToDevice(const cv::Mat& rgb,
-                          const cv::Mat& depth,
-                          const cv::Mat& mask,
-                          const std::shared_ptr<FoundationPosePipelinePackage>& package);
+  bool UploadDataToDevice(const cv::Mat                                        &rgb,
+                          const cv::Mat                                        &depth,
+                          const cv::Mat                                        &mask,
+                          const std::shared_ptr<FoundationPosePipelinePackage> &package);
 
   bool RefinePreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> package);
 
@@ -109,420 +102,383 @@ class FoundationPose : public Base6DofDetectionModel{
 private:
   // 以下参数不对外开放
   // 默认的blob输入名称
-  const std::string RENDER_INPUT_BLOB_NAME = "render_input";
-  const std::string TRANSF_INPUT_BLOB_NAME = "transf_input";
+  const std::string RENDER_INPUT_BLOB_NAME     = "render_input";
+  const std::string TRANSF_INPUT_BLOB_NAME     = "transf_input";
   const std::string REFINE_TRANS_OUT_BLOB_NAME = "trans";
-  const std::string REFINE_ROT_OUT_BLOB_NAME = "rot";
-  const float REFINE_ROT_NORMALIZER = 0.349065850398865;
-  const std::string SCORE_OUTPUT_BLOB_NAME = "scores";
+  const std::string REFINE_ROT_OUT_BLOB_NAME   = "rot";
+  const float       REFINE_ROT_NORMALIZER      = 0.349065850398865;
+  const std::string SCORE_OUTPUT_BLOB_NAME     = "scores";
   // render参数
-  const int score_mode_poses_num_ = 252;
-  const int refine_mode_poses_num_ = 1;
+  const int   score_mode_poses_num_  = 252;
+  const int   refine_mode_poses_num_ = 1;
   const float refine_mode_crop_ratio = 1.2;
-  const float score_mode_crop_ratio = 1.1;
+  const float score_mode_crop_ratio  = 1.1;
 
 private:
   // 以下参数对外开放，通过构造函数传入
   const Eigen::Matrix3f intrinsic_;
-  const int max_input_image_H_;
-  const int max_input_image_W_;
-  const int crop_window_H_;
-  const int crop_window_W_;
+  const int             max_input_image_H_;
+  const int             max_input_image_W_;
+  const int             crop_window_H_;
+  const int             crop_window_W_;
 
-  std::shared_ptr<inference_core::BaseInferCore> refiner_core_; 
-  std::shared_ptr<inference_core::BaseInferCore> scorer_core_; 
+  std::shared_ptr<inference_core::BaseInferCore> refiner_core_;
+  std::shared_ptr<inference_core::BaseInferCore> scorer_core_;
 
 private:
   // 内部各个模块
-  std::unordered_map<std::string, std::shared_ptr<TexturedMeshLoader>> map_name2loaders_;
+  std::unordered_map<std::string, std::shared_ptr<TexturedMeshLoader>>     map_name2loaders_;
   std::unordered_map<std::string, std::shared_ptr<FoundationPoseRenderer>> map_name2renderer_;
-  std::shared_ptr<FoundationPoseSampler> hyp_poses_sampler_;
-  std::shared_ptr<FoundationPoseDecoder> out_pose_decoder_;
+  std::shared_ptr<FoundationPoseSampler>                                   hyp_poses_sampler_;
+  std::shared_ptr<FoundationPoseDecoder>                                   out_pose_decoder_;
 
   // 维护一个Track用的prev_pose
   std::unordered_map<std::string, Eigen::Matrix4f> map_name2prev_pose_;
 };
 
-
-FoundationPose::FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                              std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                              const std::string& target_name,
-                              const std::string& mesh_file_path,
-                              const std::string& texture_file_path,
-                              const Eigen::Matrix3f& intrinsic,
-                              const int input_image_H,
-                              const int input_image_W,
-                              const int crop_window_H,
-                              const int crop_window_W,
-                              const float min_depth,
-                              const float max_depth)
-                            : FoundationPose(refiner_core,
-                                            scorer_core,
-                                            {{target_name, {mesh_file_path, texture_file_path}}},
-                                            intrinsic,
-                                            input_image_H,
-                                            input_image_W,
-                                            crop_window_H,
-                                            crop_window_W,
-                                            min_depth,
-                                            max_depth)
-{
-
-}
-
-
-
 FoundationPose::FoundationPose(std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-                              std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-                              const std::unordered_map<std::string, 
-                                              std::pair<std::string, std::string>>& meshes,
-                              const Eigen::Matrix3f& intrinsic,
-                              const int max_input_image_H,
-                              const int max_input_image_W,
-                              const int crop_window_H,
-                              const int crop_window_W,
-                              const float min_depth,
-                              const float max_depth)
-                            : refiner_core_(refiner_core),
-                              scorer_core_(scorer_core),
-                              intrinsic_(intrinsic),
-                              max_input_image_H_(max_input_image_H),
-                              max_input_image_W_(max_input_image_W),
-                              crop_window_H_(crop_window_H),
-                              crop_window_W_(crop_window_W)
+                               std::shared_ptr<inference_core::BaseInferCore> scorer_core,
+                               const std::string                             &target_name,
+                               const std::string                             &mesh_file_path,
+                               const std::string                             &texture_file_path,
+                               const Eigen::Matrix3f                         &intrinsic,
+                               const int                                      input_image_H,
+                               const int                                      input_image_W,
+                               const int                                      crop_window_H,
+                               const int                                      crop_window_W,
+                               const float                                    min_depth,
+                               const float                                    max_depth)
+    : FoundationPose(refiner_core,
+                     scorer_core,
+                     {{target_name, {mesh_file_path, texture_file_path}}},
+                     intrinsic,
+                     input_image_H,
+                     input_image_W,
+                     crop_window_H,
+                     crop_window_W,
+                     min_depth,
+                     max_depth)
+{}
+
+FoundationPose::FoundationPose(
+    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
+    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+    const Eigen::Matrix3f                                                      &intrinsic,
+    const int                                                                   max_input_image_H,
+    const int                                                                   max_input_image_W,
+    const int                                                                   crop_window_H,
+    const int                                                                   crop_window_W,
+    const float                                                                 min_depth,
+    const float                                                                 max_depth)
+    : refiner_core_(refiner_core),
+      scorer_core_(scorer_core),
+      intrinsic_(intrinsic),
+      max_input_image_H_(max_input_image_H),
+      max_input_image_W_(max_input_image_W),
+      crop_window_H_(crop_window_H),
+      crop_window_W_(crop_window_W)
 {
   // Check
   auto refiner_blobs_buffer = refiner_core->GetBuffer(true);
-  if (refiner_blobs_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first == nullptr) {
+  if (refiner_blobs_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first == nullptr)
+  {
     LOG(ERROR) << "[FoundationPose] Failed to Construct FoundationPose since `renfiner_core` "
-              << "do not has a blob named `" << RENDER_INPUT_BLOB_NAME << "`.";
+               << "do not has a blob named `" << RENDER_INPUT_BLOB_NAME << "`.";
     throw std::runtime_error("[FoundationPose] Failed to Construct FoundationPose");
   }
-  if (refiner_blobs_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first == nullptr) {
+  if (refiner_blobs_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first == nullptr)
+  {
     LOG(ERROR) << "[FoundationPose] Failed to Construct FoundationPose since `renfiner_core` "
-              << "do not has a blob named `" << TRANSF_INPUT_BLOB_NAME << "`.";
+               << "do not has a blob named `" << TRANSF_INPUT_BLOB_NAME << "`.";
     throw std::runtime_error("[FoundationPose] Failed to Construct FoundationPose");
   }
 
-
   auto scorer_blobs_buffer = scorer_core->GetBuffer(true);
-  if (scorer_blobs_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first == nullptr) {
+  if (scorer_blobs_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first == nullptr)
+  {
     LOG(ERROR) << "[FoundationPose] Failed to Construct FoundationPose since `scorer_core` "
-              << "do not has a blob named `" << RENDER_INPUT_BLOB_NAME << "`.";
+               << "do not has a blob named `" << RENDER_INPUT_BLOB_NAME << "`.";
     throw std::runtime_error("[FoundationPose] Failed to Construct FoundationPose");
   }
-  if (scorer_blobs_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first == nullptr) {
+  if (scorer_blobs_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first == nullptr)
+  {
     LOG(ERROR) << "[FoundationPose] Failed to Construct FoundationPose since `scorer_core` "
-              << "do not has a blob named `" << TRANSF_INPUT_BLOB_NAME << "`.";
+               << "do not has a blob named `" << TRANSF_INPUT_BLOB_NAME << "`.";
     throw std::runtime_error("[FoundationPose] Failed to Construct FoundationPose");
   }
 
-
   // preload modules
-  for (const auto& p_name_paths : meshes) {
-    const std::string& target_name = p_name_paths.first;
-    const std::string& mesh_file_path = p_name_paths.second.first;
-    const std::string& texutre_img_path = p_name_paths.second.second;
+  for (const auto &p_name_paths : meshes)
+  {
+    const std::string &target_name      = p_name_paths.first;
+    const std::string &mesh_file_path   = p_name_paths.second.first;
+    const std::string &texutre_img_path = p_name_paths.second.second;
     LOG(INFO) << "[FoundationPose] Got target_name : " << target_name
               << ", mesh_file_path: " << mesh_file_path
               << ", texture_img_path: " << texutre_img_path;
     auto mesh_loader = std::make_shared<TexturedMeshLoader>(mesh_file_path, texutre_img_path);
-    map_name2loaders_[p_name_paths.first] = mesh_loader;
-    map_name2renderer_[p_name_paths.first]
-        = std::make_shared<FoundationPoseRenderer>(
-                    mesh_loader,
-                    intrinsic_, 
-                    score_mode_poses_num_,
-                    refine_mode_crop_ratio
-        );
+    map_name2loaders_[p_name_paths.first]  = mesh_loader;
+    map_name2renderer_[p_name_paths.first] = std::make_shared<FoundationPoseRenderer>(
+        mesh_loader, intrinsic_, score_mode_poses_num_, refine_mode_crop_ratio);
   }
 
-  hyp_poses_sampler_ = 
-        std::make_shared<FoundationPoseSampler>(
-                    max_input_image_H_,
-                    max_input_image_W_,
-                    min_depth, 
-                    max_depth, 
-                    intrinsic_
-        );
-  
+  hyp_poses_sampler_ = std::make_shared<FoundationPoseSampler>(
+      max_input_image_H_, max_input_image_W_, min_depth, max_depth, intrinsic_);
+
   out_pose_decoder_ = std::make_shared<FoundationPoseDecoder>(score_mode_poses_num_);
 }
 
-bool FoundationPose::CheckInputArguments(const cv::Mat& rgb, 
-                                         const cv::Mat& depth, 
-                                         const cv::Mat& mask, 
-                                         const std::string& target_name)
+bool FoundationPose::CheckInputArguments(const cv::Mat     &rgb,
+                                         const cv::Mat     &depth,
+                                         const cv::Mat     &mask,
+                                         const std::string &target_name)
 {
   const int r_rows = rgb.rows, r_cols = rgb.cols;
   const int d_rows = depth.rows, d_cols = depth.cols;
   const int m_rows = mask.empty() ? d_rows : mask.rows, m_cols = mask.empty() ? d_cols : mask.cols;
 
-  if (!(r_rows == d_rows && d_rows == m_rows) || !(r_cols == d_cols && d_cols == m_cols)) {
-    LOG(ERROR) << "[FoundationPose] Got rgb/depth/mask with different size! " << rgb.size << ", " << depth.size << ", " << mask.size;
+  if (!(r_rows == d_rows && d_rows == m_rows) || !(r_cols == d_cols && d_cols == m_cols))
+  {
+    LOG(ERROR) << "[FoundationPose] Got rgb/depth/mask with different size! " << rgb.size << ", "
+               << depth.size << ", " << mask.size;
     return false;
-  } 
+  }
 
   CHECK_STATE(map_name2loaders_.find(target_name) != map_name2loaders_.end(),
-            "[FoundationPose] Register Got Invalid `target_name` \
+              "[FoundationPose] Register Got Invalid `target_name` \
                               which was not provided to FoundationPose instance!!!");
 
   return true;
 }
 
-
-bool 
-FoundationPose::Register(const cv::Mat& rgb, 
-                    const cv::Mat& depth, 
-                    const cv::Mat& mask,
-                    const std::string& target_name,
-                    Eigen::Matrix4f& out_pose)
+bool FoundationPose::Register(const cv::Mat     &rgb,
+                              const cv::Mat     &depth,
+                              const cv::Mat     &mask,
+                              const std::string &target_name,
+                              Eigen::Matrix4f   &out_pose)
 {
   CHECK_STATE(CheckInputArguments(rgb, depth, mask, target_name),
-            "[FoundationPose] `Register` Got invalid arguments!!!");
+              "[FoundationPose] `Register` Got invalid arguments!!!");
 
-  auto package = std::make_shared<FoundationPosePipelinePackage>();
-  package->rgb_on_host = rgb;
+  auto package           = std::make_shared<FoundationPosePipelinePackage>();
+  package->rgb_on_host   = rgb;
   package->depth_on_host = depth;
-  package->mask_on_host = mask;
-  package->target_name = target_name;
+  package->mask_on_host  = mask;
+  package->target_name   = target_name;
   // 将数据传输至device端，并生成xyz_map数据
   MESSURE_DURATION_AND_CHECK_STATE(UploadDataToDevice(rgb, depth, mask, package),
-              "[FoundationPose] SyncDetect Failed to upload data!!!");
+                                   "[FoundationPose] SyncDetect Failed to upload data!!!");
 
-  MESSURE_DURATION_AND_CHECK_STATE(RefinePreProcess(package),
-              "[FoundationPose] SyncDetect Failed to execute RefinePreProcess!!!");
+  MESSURE_DURATION_AND_CHECK_STATE(
+      RefinePreProcess(package),
+      "[FoundationPose] SyncDetect Failed to execute RefinePreProcess!!!");
 
   // package->infer_buffer = package->refiner_blobs_buffer;
-  MESSURE_DURATION_AND_CHECK_STATE(refiner_core_->SyncInfer(package->GetInferBuffer()),
-              "[FoundationPose] SyncDetect Failed to execute refiner_core_->SyncInfer!!!");
+  MESSURE_DURATION_AND_CHECK_STATE(
+      refiner_core_->SyncInfer(package->GetInferBuffer()),
+      "[FoundationPose] SyncDetect Failed to execute refiner_core_->SyncInfer!!!");
 
-  MESSURE_DURATION_AND_CHECK_STATE(ScorePreprocess(package),
-              "[FoundationPose] SyncDetect Failed to execute ScorePreprocess!!!");
+  MESSURE_DURATION_AND_CHECK_STATE(
+      ScorePreprocess(package), "[FoundationPose] SyncDetect Failed to execute ScorePreprocess!!!");
 
   // unit_buffer->p_blob_buffers = package->scorer_blobs_buffer;
-  MESSURE_DURATION_AND_CHECK_STATE(scorer_core_->SyncInfer(package->GetInferBuffer()),
-              "[FoundationPose] SyncDetect Failed to execute scorer_core_->SyncInfer!!!");
+  MESSURE_DURATION_AND_CHECK_STATE(
+      scorer_core_->SyncInfer(package->GetInferBuffer()),
+      "[FoundationPose] SyncDetect Failed to execute scorer_core_->SyncInfer!!!");
 
   MESSURE_DURATION_AND_CHECK_STATE(ScorePostProcess(package),
-              "[FoundationPose] SyncDetect Failed to execute PostProcess!!!");
+                                   "[FoundationPose] SyncDetect Failed to execute PostProcess!!!");
 
   out_pose = std::move(package->actual_pose);
 
   return true;
 }
 
-
-bool 
-FoundationPose::Track(const cv::Mat& rgb,
-                      const cv::Mat& depth,
-                      const std::string& target_name,
-                      Eigen::Matrix4f& out_pose) 
+bool FoundationPose::Track(const cv::Mat     &rgb,
+                           const cv::Mat     &depth,
+                           const std::string &target_name,
+                           Eigen::Matrix4f   &out_pose)
 {
   CHECK_STATE(CheckInputArguments(rgb, depth, cv::Mat(), target_name),
-            "[FoundationPose] `Track` Got invalid arguments!!!");
+              "[FoundationPose] `Track` Got invalid arguments!!!");
 
   CHECK_STATE(map_name2prev_pose_.find(target_name) != map_name2prev_pose_.end(),
-            "[FoundationPose] Track target: " + target_name + " is NOT registered yet!!!\
+              "[FoundationPose] Track target: " + target_name +
+                  " is NOT registered yet!!!\
             Call `Register` first before calling `Track`!!!");
 
-  auto package = std::make_shared<FoundationPosePipelinePackage>();
-  package->rgb_on_host = rgb;
+  auto package           = std::make_shared<FoundationPosePipelinePackage>();
+  package->rgb_on_host   = rgb;
   package->depth_on_host = depth;
-  package->target_name = target_name;
-  package->hyp_poses = {map_name2prev_pose_[target_name]};
+  package->target_name   = target_name;
+  package->hyp_poses     = {map_name2prev_pose_[target_name]};
   // 将数据传输至device端，并生成xyz_map数据
   MESSURE_DURATION_AND_CHECK_STATE(UploadDataToDevice(rgb, depth, cv::Mat(), package),
-              "[FoundationPose] Track Failed to upload data!!!");
+                                   "[FoundationPose] Track Failed to upload data!!!");
 
   MESSURE_DURATION_AND_CHECK_STATE(RefinePreProcess(package),
-              "[FoundationPose] Track Failed to execute RefinePreProcess!!!");
-  
-  MESSURE_DURATION_AND_CHECK_STATE(refiner_core_->SyncInfer(package->GetInferBuffer()),
-              "[FoundationPose] Track Failed to execute refiner_core_->SyncInfer!!!");
+                                   "[FoundationPose] Track Failed to execute RefinePreProcess!!!");
 
+  MESSURE_DURATION_AND_CHECK_STATE(
+      refiner_core_->SyncInfer(package->GetInferBuffer()),
+      "[FoundationPose] Track Failed to execute refiner_core_->SyncInfer!!!");
 
   MESSURE_DURATION_AND_CHECK_STATE(TrackPostProcess(package),
-              "[Foundation] Track Failed to execute `TrackPostProcess`!!!");
+                                   "[Foundation] Track Failed to execute `TrackPostProcess`!!!");
 
   out_pose = std::move(package->actual_pose);
 
   return true;
 }
 
-
-Eigen::Vector3f
-FoundationPose::GetObjectDimension(const std::string& target_name) const
+Eigen::Vector3f FoundationPose::GetObjectDimension(const std::string &target_name) const
 {
-  if (map_name2loaders_.find(target_name) == map_name2loaders_.end()) {
+  if (map_name2loaders_.find(target_name) == map_name2loaders_.end())
+  {
     LOG(ERROR) << "[FoundationPose] GetObjectDimension Got invalid `target_name`:" << target_name
-              << ", whose mesh_file mapping was not provided to FoundationPose instance!!!";
-    throw std::runtime_error("[FoundationPose] GetObjectDimension Got invalid `target_name`: " + target_name); 
+               << ", whose mesh_file mapping was not provided to FoundationPose instance!!!";
+    throw std::runtime_error("[FoundationPose] GetObjectDimension Got invalid `target_name`: " +
+                             target_name);
   }
 
   return map_name2loaders_.at(target_name)->GetObjectDimension();
 }
 
-
-bool 
-FoundationPose::UploadDataToDevice(const cv::Mat& rgb,
-                          const cv::Mat& depth,
-                          const cv::Mat& mask,
-                          const std::shared_ptr<FoundationPosePipelinePackage>& package)
+bool FoundationPose::UploadDataToDevice(
+    const cv::Mat                                        &rgb,
+    const cv::Mat                                        &depth,
+    const cv::Mat                                        &mask,
+    const std::shared_ptr<FoundationPosePipelinePackage> &package)
 {
   const int input_image_height = rgb.rows, input_image_width = rgb.cols;
   package->input_image_height = input_image_height;
-  package->input_image_width = input_image_width;
+  package->input_image_width  = input_image_width;
 
-  void  *rgb_on_device = nullptr, 
-        *depth_on_device = nullptr, 
-        *xyz_map_on_device = nullptr;
+  void        *rgb_on_device = nullptr, *depth_on_device = nullptr, *xyz_map_on_device = nullptr;
   const size_t input_image_pixel_num = input_image_height * input_image_width;
 
   // rgb图像拷贝至device端
-  CHECK_CUDA(cudaMalloc(&rgb_on_device, 
-                          input_image_pixel_num * 3 * sizeof(uint8_t)),
-            "[FoundationPose] RefinePreProcess malloc managed `rgb_on_device` failed!!!");
-  CHECK_CUDA(cudaMemcpy(rgb_on_device, 
-                        package->rgb_on_host.data, 
-                        input_image_pixel_num * 3 * sizeof(uint8_t), 
-                        cudaMemcpyHostToDevice),
-            "[FoundationPose] cudaMemcpy rgb_host -> rgb_device FAILED!!!");
+  CHECK_CUDA(cudaMalloc(&rgb_on_device, input_image_pixel_num * 3 * sizeof(uint8_t)),
+             "[FoundationPose] RefinePreProcess malloc managed `rgb_on_device` failed!!!");
+  CHECK_CUDA(cudaMemcpy(rgb_on_device, package->rgb_on_host.data,
+                        input_image_pixel_num * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice),
+             "[FoundationPose] cudaMemcpy rgb_host -> rgb_device FAILED!!!");
 
   // depth拷贝至device端
-  CHECK_CUDA(cudaMalloc(&depth_on_device,
-                          input_image_pixel_num * sizeof(float)),
-            "[FoundationPose] RefinePreProcess malloc managed `depth_on_device` failed!!!");
-  CHECK_CUDA(cudaMemcpy(depth_on_device,
-                        package->depth_on_host.data,
-                        input_image_pixel_num * sizeof(float),
-                        cudaMemcpyHostToDevice),
-            "[FoundationPose] cudaMemcpy depth_host -> depth_device FAILED!!!");
+  CHECK_CUDA(cudaMalloc(&depth_on_device, input_image_pixel_num * sizeof(float)),
+             "[FoundationPose] RefinePreProcess malloc managed `depth_on_device` failed!!!");
+  CHECK_CUDA(cudaMemcpy(depth_on_device, package->depth_on_host.data,
+                        input_image_pixel_num * sizeof(float), cudaMemcpyHostToDevice),
+             "[FoundationPose] cudaMemcpy depth_host -> depth_device FAILED!!!");
 
   // 根据depth生成xyz_map，并拷贝至device端
-  CHECK_CUDA(cudaMalloc(&xyz_map_on_device,
-                          input_image_pixel_num * 3 * sizeof(float)),
-            "[FoundationPose] RefinePreProcess malloc managed `xyz_map_on_device` failed!!!");
-
-  convert_depth_to_xyz_map(static_cast<float*>(depth_on_device), 
-                           input_image_height, 
-                           input_image_width, 
-                           static_cast<float*>(xyz_map_on_device), 
-                           intrinsic_(0, 0),
-                           intrinsic_(1, 1),
-                           intrinsic_(0, 2),
-                           intrinsic_(1, 2),
+  CHECK_CUDA(cudaMalloc(&xyz_map_on_device, input_image_pixel_num * 3 * sizeof(float)),
+             "[FoundationPose] RefinePreProcess malloc managed `xyz_map_on_device` failed!!!");
+
+  convert_depth_to_xyz_map(static_cast<float *>(depth_on_device), input_image_height,
+                           input_image_width, static_cast<float *>(xyz_map_on_device),
+                           intrinsic_(0, 0), intrinsic_(1, 1), intrinsic_(0, 2), intrinsic_(1, 2),
                            0.1);
 
   // 输出device端指针，并注册析构过程
-  auto func_release_cuda_buffer = [](void* ptr) {
+  auto func_release_cuda_buffer = [](void *ptr) {
     auto suc = cudaFree(ptr);
-    if (suc != cudaSuccess) {
+    if (suc != cudaSuccess)
+    {
       LOG(INFO) << "[FoundationPose] FAILED to free cuda memory!!!";
     }
   };
-  package->rgb_on_device = std::shared_ptr<void>(rgb_on_device, func_release_cuda_buffer);
-  package->depth_on_device = std::shared_ptr<void>(depth_on_device, func_release_cuda_buffer);
+  package->rgb_on_device     = std::shared_ptr<void>(rgb_on_device, func_release_cuda_buffer);
+  package->depth_on_device   = std::shared_ptr<void>(depth_on_device, func_release_cuda_buffer);
   package->xyz_map_on_device = std::shared_ptr<void>(xyz_map_on_device, func_release_cuda_buffer);
 
   return true;
 }
 
-
-
-
-
-
-bool 
-FoundationPose::RefinePreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
+bool FoundationPose::RefinePreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
 {
   auto package = std::dynamic_pointer_cast<FoundationPosePipelinePackage>(_package);
-  CHECK_STATE(package != nullptr,
-              "[FoundationPose] RefinePreProcess Got INVALID package ptr!!!");
+  CHECK_STATE(package != nullptr, "[FoundationPose] RefinePreProcess Got INVALID package ptr!!!");
 
   // 1. sample
-  if (package->hyp_poses.empty()) {
-    CHECK_STATE(hyp_poses_sampler_->GetHypPoses(package->depth_on_device.get(),
-                                                package->mask_on_host.data,
-                                                package->input_image_height,
-                                                package->input_image_width,
-                                                package->hyp_poses),
-                "[FoundationPose] Failed to generate hyp poses!!!");    
+  if (package->hyp_poses.empty())
+  {
+    CHECK_STATE(hyp_poses_sampler_->GetHypPoses(
+                    package->depth_on_device.get(), package->mask_on_host.data,
+                    package->input_image_height, package->input_image_width, package->hyp_poses),
+                "[FoundationPose] Failed to generate hyp poses!!!");
   }
 
   // 2. render
-  auto& refine_renderer = map_name2renderer_[package->target_name];
-  auto refiner_blob_buffer = refiner_core_->GetBuffer(false);
+  auto &refine_renderer     = map_name2renderer_[package->target_name];
+  auto  refiner_blob_buffer = refiner_core_->GetBuffer(false);
   // 设置推理前blob的输入位置为device，输出的blob位置为host端
   refiner_blob_buffer->SetBlobBuffer(RENDER_INPUT_BLOB_NAME, DataLocation::DEVICE);
   refiner_blob_buffer->SetBlobBuffer(TRANSF_INPUT_BLOB_NAME, DataLocation::DEVICE);
-  CHECK_STATE(refine_renderer->RenderAndTransform(package->hyp_poses,
-                                    package->rgb_on_device.get(), 
-                                    package->depth_on_device.get(), 
-                                    package->xyz_map_on_device.get(), 
-                                    package->input_image_height,
-                                    package->input_image_width,
-                                    refiner_blob_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first,
-                                    refiner_blob_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first),
-              "[FoundationPose] Failed to render and transform !!!");
+  CHECK_STATE(
+      refine_renderer->RenderAndTransform(
+          package->hyp_poses, package->rgb_on_device.get(), package->depth_on_device.get(),
+          package->xyz_map_on_device.get(), package->input_image_height, package->input_image_width,
+          refiner_blob_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first,
+          refiner_blob_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first),
+      "[FoundationPose] Failed to render and transform !!!");
   // 3. 设置推理时形状
   const int input_poses_num = package->hyp_poses.size();
-  refiner_blob_buffer->SetBlobShape(RENDER_INPUT_BLOB_NAME, 
+  refiner_blob_buffer->SetBlobShape(RENDER_INPUT_BLOB_NAME,
+                                    {input_poses_num, crop_window_H_, crop_window_W_, 6});
+  refiner_blob_buffer->SetBlobShape(TRANSF_INPUT_BLOB_NAME,
                                     {input_poses_num, crop_window_H_, crop_window_W_, 6});
-  refiner_blob_buffer->SetBlobShape(TRANSF_INPUT_BLOB_NAME, 
-                                    {input_poses_num, crop_window_H_, crop_window_W_, 6});                        
   package->refiner_blobs_buffer = refiner_blob_buffer;
-  package->infer_buffer = refiner_blob_buffer;
+  package->infer_buffer         = refiner_blob_buffer;
 
   return true;
 }
 
-
-
-bool 
-FoundationPose::ScorePreprocess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
+bool FoundationPose::ScorePreprocess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
 {
   auto package = std::dynamic_pointer_cast<FoundationPosePipelinePackage>(_package);
-  CHECK_STATE(package != nullptr,
-              "[FoundationPose] ScorePreprocess Got INVALID package ptr!!!");
+  CHECK_STATE(package != nullptr, "[FoundationPose] ScorePreprocess Got INVALID package ptr!!!");
   // 获取refiner模型的缓存指针
-  const auto& refiner_blob_buffer = package->refiner_blobs_buffer;
+  const auto &refiner_blob_buffer = package->refiner_blobs_buffer;
   const auto _trans_ptr = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_TRANS_OUT_BLOB_NAME).first;
-  const auto _rot_ptr = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_ROT_OUT_BLOB_NAME).first;
-  const float* trans_ptr = static_cast<float*>(_trans_ptr);
-  const float* rot_ptr = static_cast<float*>(_rot_ptr);
+  const auto _rot_ptr   = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_ROT_OUT_BLOB_NAME).first;
+  const float *trans_ptr = static_cast<float *>(_trans_ptr);
+  const float *rot_ptr   = static_cast<float *>(_rot_ptr);
 
   // 获取生成的假设位姿
-  const auto& hyp_poses = package->hyp_poses;
-  const int poses_num = hyp_poses.size();
+  const auto &hyp_poses = package->hyp_poses;
+  const int   poses_num = hyp_poses.size();
 
   // 获取对应的mesh_loader
-  const auto& mesh_loader = map_name2loaders_[package->target_name];
+  const auto &mesh_loader = map_name2loaders_[package->target_name];
 
   // transformation 将模型输出的相对位姿转换为绝对位姿
-  const float mesh_diameter = mesh_loader->GetMeshDiameter();
+  const float                  mesh_diameter = mesh_loader->GetMeshDiameter();
   std::vector<Eigen::Vector3f> trans_delta(poses_num);
   std::vector<Eigen::Vector3f> rot_delta(poses_num);
   std::vector<Eigen::Matrix3f> rot_mat_delta(poses_num);
-  for (int i = 0 ; i < poses_num ; ++ i) {
+  for (int i = 0; i < poses_num; ++i)
+  {
     const size_t offset = i * 3;
-    trans_delta[i] << trans_ptr[offset], trans_ptr[offset+1], trans_ptr[offset+2];
+    trans_delta[i] << trans_ptr[offset], trans_ptr[offset + 1], trans_ptr[offset + 2];
     trans_delta[i] *= mesh_diameter / 2;
 
-    rot_delta[i] << rot_ptr[offset], rot_ptr[offset+1], rot_ptr[offset+2];
+    rot_delta[i] << rot_ptr[offset], rot_ptr[offset + 1], rot_ptr[offset + 2];
     auto normalized_vect = (rot_delta[i].array().tanh() * REFINE_ROT_NORMALIZER).matrix();
     Eigen::AngleAxis rot_delta_angle_axis(normalized_vect.norm(), normalized_vect.normalized());
     rot_mat_delta[i] = rot_delta_angle_axis.toRotationMatrix().transpose();
   }
 
   std::vector<Eigen::Matrix4f> refine_poses(poses_num);
-  for (int i = 0 ; i < poses_num ; ++ i) {
+  for (int i = 0; i < poses_num; ++i)
+  {
     refine_poses[i] = hyp_poses[i];
     refine_poses[i].col(3).head(3) += trans_delta[i];
 
-    Eigen::Matrix3f top_left_3x3 = refine_poses[i].block<3,3>(0,0);
-    Eigen::Matrix3f result_3x3 = rot_mat_delta[i] * top_left_3x3;
-    refine_poses[i].block<3,3>(0,0) = result_3x3;
+    Eigen::Matrix3f top_left_3x3      = refine_poses[i].block<3, 3>(0, 0);
+    Eigen::Matrix3f result_3x3        = rot_mat_delta[i] * top_left_3x3;
+    refine_poses[i].block<3, 3>(0, 0) = result_3x3;
   }
 
   auto scorer_blob_buffer = scorer_core_->GetBuffer(false);
@@ -531,41 +487,35 @@ FoundationPose::ScorePreprocess(std::shared_ptr<async_pipeline::IPipelinePackage
   scorer_blob_buffer->SetBlobBuffer(RENDER_INPUT_BLOB_NAME, DataLocation::DEVICE);
   scorer_blob_buffer->SetBlobBuffer(TRANSF_INPUT_BLOB_NAME, DataLocation::DEVICE);
   scorer_blob_buffer->SetBlobBuffer(SCORE_OUTPUT_BLOB_NAME, DataLocation::DEVICE);
-  auto& score_renderer = map_name2renderer_[package->target_name];
-  CHECK_STATE(score_renderer->RenderAndTransform(refine_poses,
-                        package->rgb_on_device.get(), 
-                        package->depth_on_device.get(), 
-                        package->xyz_map_on_device.get(), 
-                        package->input_image_height,
-                        package->input_image_width,
-                        scorer_blob_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first,
-                        scorer_blob_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first),
-              "[FoundationPose] score_renderer RenderAndTransform Failed!!!");
-  
-  package->refine_poses = std::move(refine_poses);
+  auto &score_renderer = map_name2renderer_[package->target_name];
+  CHECK_STATE(
+      score_renderer->RenderAndTransform(
+          refine_poses, package->rgb_on_device.get(), package->depth_on_device.get(),
+          package->xyz_map_on_device.get(), package->input_image_height, package->input_image_width,
+          scorer_blob_buffer->GetOuterBlobBuffer(RENDER_INPUT_BLOB_NAME).first,
+          scorer_blob_buffer->GetOuterBlobBuffer(TRANSF_INPUT_BLOB_NAME).first),
+      "[FoundationPose] score_renderer RenderAndTransform Failed!!!");
+
+  package->refine_poses        = std::move(refine_poses);
   package->scorer_blobs_buffer = scorer_blob_buffer;
-  package->infer_buffer = scorer_blob_buffer;
+  package->infer_buffer        = scorer_blob_buffer;
 
   return true;
 }
 
-
-
-bool 
-FoundationPose::ScorePostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
+bool FoundationPose::ScorePostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
 {
   auto package = std::dynamic_pointer_cast<FoundationPosePipelinePackage>(_package);
-  CHECK_STATE(package != nullptr,
-              "[FoundationPose] ScorePostProcess Got INVALID package ptr!!!");
-  const auto& scorer_blob_buffer = package->scorer_blobs_buffer;
+  CHECK_STATE(package != nullptr, "[FoundationPose] ScorePostProcess Got INVALID package ptr!!!");
+  const auto &scorer_blob_buffer = package->scorer_blobs_buffer;
   // 获取scorer模型的输出缓存指针
-  void* score_ptr = scorer_blob_buffer->GetOuterBlobBuffer(SCORE_OUTPUT_BLOB_NAME).first;
+  void *score_ptr = scorer_blob_buffer->GetOuterBlobBuffer(SCORE_OUTPUT_BLOB_NAME).first;
 
-  const auto& refine_poses = package->refine_poses;
-  const int poses_num = refine_poses.size();
+  const auto &refine_poses = package->refine_poses;
+  const int   poses_num    = refine_poses.size();
   // 输入到decoder进行解码(找到得分最高的位姿，并通过包络盒转换到正确的位姿)
   // 获取对应的mesh_loader
-  const auto& mesh_loader = map_name2loaders_[package->target_name];
+  const auto &mesh_loader = map_name2loaders_[package->target_name];
 
   // 获取置信度最大的refined_pose
   int max_score_index = out_pose_decoder_->GetMaxScoreIndex(score_ptr);
@@ -580,55 +530,51 @@ FoundationPose::ScorePostProcess(std::shared_ptr<async_pipeline::IPipelinePackag
   return true;
 }
 
-
-
-
-bool 
-FoundationPose::TrackPostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
+bool FoundationPose::TrackPostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> _package)
 {
   auto package = std::dynamic_pointer_cast<FoundationPosePipelinePackage>(_package);
-  CHECK_STATE(package != nullptr,
-              "[FoundationPose] TrackPostProcess Got INVALID package ptr!!!");
-
+  CHECK_STATE(package != nullptr, "[FoundationPose] TrackPostProcess Got INVALID package ptr!!!");
 
   // 获取refiner模型的缓存指针
-  const auto& refiner_blob_buffer = package->refiner_blobs_buffer;
+  const auto &refiner_blob_buffer = package->refiner_blobs_buffer;
   const auto _trans_ptr = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_TRANS_OUT_BLOB_NAME).first;
-  const auto _rot_ptr = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_ROT_OUT_BLOB_NAME).first;
-  const float* trans_ptr = static_cast<float*>(_trans_ptr);
-  const float* rot_ptr = static_cast<float*>(_rot_ptr);
+  const auto _rot_ptr   = refiner_blob_buffer->GetOuterBlobBuffer(REFINE_ROT_OUT_BLOB_NAME).first;
+  const float *trans_ptr = static_cast<float *>(_trans_ptr);
+  const float *rot_ptr   = static_cast<float *>(_rot_ptr);
 
   // 获取生成的假设位姿
-  const auto& hyp_poses = package->hyp_poses;
-  const int poses_num = hyp_poses.size();
+  const auto &hyp_poses = package->hyp_poses;
+  const int   poses_num = hyp_poses.size();
 
   // 获取对应的mesh_loader
-  const auto& mesh_loader = map_name2loaders_[package->target_name];
+  const auto &mesh_loader = map_name2loaders_[package->target_name];
 
   // transformation 将模型输出的相对位姿转换为绝对位姿
-  const float mesh_diameter = mesh_loader->GetMeshDiameter();
+  const float                  mesh_diameter = mesh_loader->GetMeshDiameter();
   std::vector<Eigen::Vector3f> trans_delta(poses_num);
   std::vector<Eigen::Vector3f> rot_delta(poses_num);
   std::vector<Eigen::Matrix3f> rot_mat_delta(poses_num);
-  for (int i = 0 ; i < poses_num ; ++ i) {
+  for (int i = 0; i < poses_num; ++i)
+  {
     const size_t offset = i * 3;
-    trans_delta[i] << trans_ptr[offset], trans_ptr[offset+1], trans_ptr[offset+2];
+    trans_delta[i] << trans_ptr[offset], trans_ptr[offset + 1], trans_ptr[offset + 2];
     trans_delta[i] *= mesh_diameter / 2;
 
-    rot_delta[i] << rot_ptr[offset], rot_ptr[offset+1], rot_ptr[offset+2];
+    rot_delta[i] << rot_ptr[offset], rot_ptr[offset + 1], rot_ptr[offset + 2];
     auto normalized_vect = (rot_delta[i].array().tanh() * REFINE_ROT_NORMALIZER).matrix();
     Eigen::AngleAxis rot_delta_angle_axis(normalized_vect.norm(), normalized_vect.normalized());
     rot_mat_delta[i] = rot_delta_angle_axis.toRotationMatrix().transpose();
   }
 
   std::vector<Eigen::Matrix4f> refine_poses(poses_num);
-  for (int i = 0 ; i < poses_num ; ++ i) {
+  for (int i = 0; i < poses_num; ++i)
+  {
     refine_poses[i] = hyp_poses[i];
     refine_poses[i].col(3).head(3) += trans_delta[i];
 
-    Eigen::Matrix3f top_left_3x3 = refine_poses[i].block<3,3>(0,0);
-    Eigen::Matrix3f result_3x3 = rot_mat_delta[i] * top_left_3x3;
-    refine_poses[i].block<3,3>(0,0) = result_3x3;
+    Eigen::Matrix3f top_left_3x3      = refine_poses[i].block<3, 3>(0, 0);
+    Eigen::Matrix3f result_3x3        = rot_mat_delta[i] * top_left_3x3;
+    refine_poses[i].block<3, 3>(0, 0) = result_3x3;
   }
 
   // 记录
@@ -640,90 +586,64 @@ FoundationPose::TrackPostProcess(std::shared_ptr<async_pipeline::IPipelinePackag
   return true;
 }
 
-
-
-
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
     std::shared_ptr<inference_core::BaseInferCore> refiner_core,
     std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string& target_name,
-    const std::string& mesh_file_path,
-    const std::string& texture_file_path,
-    const std::vector<float>& intrinsic_in_vec)
+    const std::string                             &target_name,
+    const std::string                             &mesh_file_path,
+    const std::string                             &texture_file_path,
+    const std::vector<float>                      &intrinsic_in_vec)
 {
   // 重构内参矩阵，`row_major` -> `col_major`
   Eigen::Matrix3f intrinsic;
-  for (int r = 0 ; r < 3 ; ++ r) {
-    for (int c = 0 ; c < 3 ; ++ c) {
-      intrinsic(r, c) = intrinsic_in_vec[r*3+c];
+  for (int r = 0; r < 3; ++r)
+  {
+    for (int c = 0; c < 3; ++c)
+    {
+      intrinsic(r, c) = intrinsic_in_vec[r * 3 + c];
     }
   }
-  return std::make_shared<FoundationPose>(refiner_core, 
-                                          scorer_core,
-                                          target_name,
-                                          mesh_file_path,
-                                          texture_file_path,
-                                          intrinsic);
+  return std::make_shared<FoundationPose>(refiner_core, scorer_core, target_name, mesh_file_path,
+                                          texture_file_path, intrinsic);
 }
 
-
-
-
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
     std::shared_ptr<inference_core::BaseInferCore> refiner_core,
     std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::string& target_name,
-    const std::string& mesh_file_path,
-    const std::string& texture_file_path,
-    const Eigen::Matrix3f& intrinsic_in_mat)
+    const std::string                             &target_name,
+    const std::string                             &mesh_file_path,
+    const std::string                             &texture_file_path,
+    const Eigen::Matrix3f                         &intrinsic_in_mat)
 {
-  return std::make_shared<FoundationPose>(refiner_core, 
-                                          scorer_core,
-                                          target_name,
-                                          mesh_file_path,
-                                          texture_file_path,
-                                          intrinsic_in_mat);
+  return std::make_shared<FoundationPose>(refiner_core, scorer_core, target_name, mesh_file_path,
+                                          texture_file_path, intrinsic_in_mat);
 }
 
-
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>>& meshes,
-    const std::vector<float>& intrinsic_in_vec)
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
+    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
+    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+    const std::vector<float>                                                   &intrinsic_in_vec)
 {
   // 重构内参矩阵，`row_major` -> `col_major`
   Eigen::Matrix3f intrinsic;
-  for (int r = 0 ; r < 3 ; ++ r) {
-    for (int c = 0 ; c < 3 ; ++ c) {
-      intrinsic(r, c) = intrinsic_in_vec[r*3+c];
+  for (int r = 0; r < 3; ++r)
+  {
+    for (int c = 0; c < 3; ++c)
+    {
+      intrinsic(r, c) = intrinsic_in_vec[r * 3 + c];
     }
   }
-  return std::make_shared<FoundationPose>(refiner_core,
-                                          scorer_core,
-                                          meshes,
-                                          intrinsic);
+  return std::make_shared<FoundationPose>(refiner_core, scorer_core, meshes, intrinsic);
 }
 
-
-
-
-
-std::shared_ptr<Base6DofDetectionModel>
-CreateFoundationPoseModel(
-    std::shared_ptr<inference_core::BaseInferCore> refiner_core,
-    std::shared_ptr<inference_core::BaseInferCore> scorer_core,
-    const std::unordered_map<std::string, std::pair<std::string, std::string>>& meshes,
-    const Eigen::Matrix3f& intrinsic_in_mat)
+std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel(
+    std::shared_ptr<inference_core::BaseInferCore>                              refiner_core,
+    std::shared_ptr<inference_core::BaseInferCore>                              scorer_core,
+    const std::unordered_map<std::string, std::pair<std::string, std::string>> &meshes,
+    const Eigen::Matrix3f                                                      &intrinsic_in_mat)
 {
-  return std::make_shared<FoundationPose>(refiner_core,
-                                          scorer_core,
-                                          meshes,
-                                          intrinsic_in_mat);
+  return std::make_shared<FoundationPose>(refiner_core, scorer_core, meshes, intrinsic_in_mat);
 }
 
-
 } // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.cpp b/detection_6d_foundationpose/src/foundationpose_decoder.cpp
index bb2f55d..3136dab 100644
--- a/detection_6d_foundationpose/src/foundationpose_decoder.cpp
+++ b/detection_6d_foundationpose/src/foundationpose_decoder.cpp
@@ -2,60 +2,53 @@
 
 #include "foundationpose_decoder.cu.hpp"
 
-
 namespace detection_6d {
 
 FoundationPoseDecoder::FoundationPoseDecoder(const int input_poses_num)
-                                            : input_poses_num_(input_poses_num)
+    : input_poses_num_(input_poses_num)
 {
-  if (cudaStreamCreate(&cuda_stream_) != cudaSuccess) {
+  if (cudaStreamCreate(&cuda_stream_) != cudaSuccess)
+  {
     throw std::runtime_error("[FoundationPoseDecoder] Failed to create cuda stream!!!");
   }
 }
 
 FoundationPoseDecoder::~FoundationPoseDecoder()
 {
-  if (cudaStreamDestroy(cuda_stream_) != cudaSuccess) {
+  if (cudaStreamDestroy(cuda_stream_) != cudaSuccess)
+  {
     LOG(WARNING) << "[FoundationPoseDecoder] Failed to destroy cuda stream!!!";
   }
 }
 
-int 
-FoundationPoseDecoder::GetMaxScoreIndex(void* scores_on_device) noexcept
+int FoundationPoseDecoder::GetMaxScoreIndex(void *scores_on_device) noexcept
 {
-  return getMaxScoreIndex(cuda_stream_, 
-                          reinterpret_cast<float*>(scores_on_device), 
+  return getMaxScoreIndex(cuda_stream_, reinterpret_cast<float *>(scores_on_device),
                           input_poses_num_);
 }
 
-bool
-FoundationPoseDecoder::DecodeWithMaxScore(int max_score_index,
-                          const std::vector<Eigen::Matrix4f>& refined_poses,
-                          Eigen::Matrix4f& out_pose,
-                          std::shared_ptr<TexturedMeshLoader> mesh_loader) 
+bool FoundationPoseDecoder::DecodeWithMaxScore(int                                 max_score_index,
+                                               const std::vector<Eigen::Matrix4f> &refined_poses,
+                                               Eigen::Matrix4f                    &out_pose,
+                                               std::shared_ptr<TexturedMeshLoader> mesh_loader)
 {
-  const auto& best_pose_matrix = refined_poses[max_score_index];
-  Eigen::Matrix4f tf_to_center = Eigen::Matrix4f::Identity();
-  tf_to_center.block<3, 1>(0, 3) = - mesh_loader->GetMeshModelCenter();
-  out_pose = best_pose_matrix * tf_to_center;  
-  out_pose = out_pose * mesh_loader->GetOrientBounds();
+  const auto     &best_pose_matrix = refined_poses[max_score_index];
+  Eigen::Matrix4f tf_to_center     = Eigen::Matrix4f::Identity();
+  tf_to_center.block<3, 1>(0, 3)   = -mesh_loader->GetMeshModelCenter();
+  out_pose                         = best_pose_matrix * tf_to_center;
+  out_pose                         = out_pose * mesh_loader->GetOrientBounds();
   return true;
 }
 
-
-
-bool 
-FoundationPoseDecoder::DecodeInRefine(const Eigen::Matrix4f& refined_pose,
-                          Eigen::Matrix4f& out_pose,
-                          std::shared_ptr<TexturedMeshLoader> mesh_loader)
+bool FoundationPoseDecoder::DecodeInRefine(const Eigen::Matrix4f              &refined_pose,
+                                           Eigen::Matrix4f                    &out_pose,
+                                           std::shared_ptr<TexturedMeshLoader> mesh_loader)
 {
-  Eigen::Matrix4f tf_to_center = Eigen::Matrix4f::Identity();
-  tf_to_center.block<3, 1>(0, 3) = - mesh_loader->GetMeshModelCenter();
-  out_pose = refined_pose * tf_to_center;
-  out_pose = out_pose * mesh_loader->GetOrientBounds();
+  Eigen::Matrix4f tf_to_center   = Eigen::Matrix4f::Identity();
+  tf_to_center.block<3, 1>(0, 3) = -mesh_loader->GetMeshModelCenter();
+  out_pose                       = refined_pose * tf_to_center;
+  out_pose                       = out_pose * mesh_loader->GetOrientBounds();
   return true;
 }
 
-
 } // namespace detection_6d
-
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.cu b/detection_6d_foundationpose/src/foundationpose_decoder.cu
index c3b06b7..2ca45f9 100644
--- a/detection_6d_foundationpose/src/foundationpose_decoder.cu
+++ b/detection_6d_foundationpose/src/foundationpose_decoder.cu
@@ -20,9 +20,9 @@
 #include <thrust/device_vector.h>
 #include <thrust/extrema.h>
 
-
 // This function will find the index with the maximum score
-int getMaxScoreIndex(cudaStream_t cuda_stream, float* scores, int N) {
+int getMaxScoreIndex(cudaStream_t cuda_stream, float *scores, int N)
+{
   // Wrap raw pointers with device pointers
   thrust::device_ptr<float> dev_scores(scores);
   // Find the maximum score
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.cu.hpp b/detection_6d_foundationpose/src/foundationpose_decoder.cu.hpp
index f648d82..24ac808 100644
--- a/detection_6d_foundationpose/src/foundationpose_decoder.cu.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_decoder.cu.hpp
@@ -25,8 +25,6 @@
 #include "cuda.h"
 #include "cuda_runtime.h"
 
+int getMaxScoreIndex(cudaStream_t cuda_stream, float *scores, int N);
 
-int getMaxScoreIndex(cudaStream_t cuda_stream, float* scores, int N);
-
-
-#endif  // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_DECODER_CUDA_HPP_
\ No newline at end of file
+#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_DECODER_CUDA_HPP_
diff --git a/detection_6d_foundationpose/src/foundationpose_decoder.hpp b/detection_6d_foundationpose/src/foundationpose_decoder.hpp
index bf32904..be3ece8 100644
--- a/detection_6d_foundationpose/src/foundationpose_decoder.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_decoder.hpp
@@ -9,25 +9,24 @@ class FoundationPoseDecoder {
 public:
   FoundationPoseDecoder(const int input_poses_num);
 
-  bool DecodeWithMaxScore(int max_score_index,
-          const std::vector<Eigen::Matrix4f>& refined_poses,
-          Eigen::Matrix4f& out_pose,
-          std::shared_ptr<TexturedMeshLoader> mesh_loader);
+  bool DecodeWithMaxScore(int                                 max_score_index,
+                          const std::vector<Eigen::Matrix4f> &refined_poses,
+                          Eigen::Matrix4f                    &out_pose,
+                          std::shared_ptr<TexturedMeshLoader> mesh_loader);
 
-  bool DecodeInRefine(const Eigen::Matrix4f& refined_pose,
-          Eigen::Matrix4f& out_pose,
-          std::shared_ptr<TexturedMeshLoader> mesh_loader);
+  bool DecodeInRefine(const Eigen::Matrix4f              &refined_pose,
+                      Eigen::Matrix4f                    &out_pose,
+                      std::shared_ptr<TexturedMeshLoader> mesh_loader);
 
-  int GetMaxScoreIndex(void* scores_on_device) noexcept;
+  int GetMaxScoreIndex(void *scores_on_device) noexcept;
 
   ~FoundationPoseDecoder();
 
 private:
-  const int input_poses_num_;
+  const int    input_poses_num_;
   cudaStream_t cuda_stream_;
 };
 
 } // namespace detection_6d
 
-
-#endif
\ No newline at end of file
+#endif
diff --git a/detection_6d_foundationpose/src/foundationpose_render.cpp b/detection_6d_foundationpose/src/foundationpose_render.cpp
index 643334c..7385489 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.cpp
+++ b/detection_6d_foundationpose/src/foundationpose_render.cpp
@@ -6,68 +6,67 @@
 
 namespace detection_6d {
 
-void saveFloatsToFile(const float* data, size_t N, const std::string& filename) {
-    std::ofstream outFile(filename, std::ios::binary);
-    if (!outFile) {
-        std::cerr << "Error opening file for writing." << std::endl;
-        return;
-    }
-    outFile.write(reinterpret_cast<const char*>(data), N * sizeof(float));
-    outFile.close();
+void saveFloatsToFile(const float *data, size_t N, const std::string &filename)
+{
+  std::ofstream outFile(filename, std::ios::binary);
+  if (!outFile)
+  {
+    std::cerr << "Error opening file for writing." << std::endl;
+    return;
+  }
+  outFile.write(reinterpret_cast<const char *>(data), N * sizeof(float));
+  outFile.close();
 }
 
 // From OpenCV camera (cvcam) coordinate system to the OpenGL camera (glcam) coordinate system
 const Eigen::Matrix4f kGLCamInCVCam =
     (Eigen::Matrix4f(4, 4) << 1, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 1).finished();
 
-RowMajorMatrix ComputeTF(float left, float right, 
-                         float top, float bottom, 
-                         Eigen::Vector2i out_size) 
+RowMajorMatrix ComputeTF(float left, float right, float top, float bottom, Eigen::Vector2i out_size)
 {
-  left = std::round(left);
-  right = std::round(right);
-  top = std::round(top);
+  left   = std::round(left);
+  right  = std::round(right);
+  top    = std::round(top);
   bottom = std::round(bottom);
 
   RowMajorMatrix tf = Eigen::MatrixXf::Identity(3, 3);
-  tf(0, 2) = -left;
-  tf(1, 2) = -top;
+  tf(0, 2)          = -left;
+  tf(1, 2)          = -top;
 
   RowMajorMatrix new_tf = Eigen::MatrixXf::Identity(3, 3);
-  new_tf(0, 0) = out_size(0) / (right - left);
-  new_tf(1, 1) = out_size(1) / (bottom - top);
+  new_tf(0, 0)          = out_size(0) / (right - left);
+  new_tf(1, 1)          = out_size(1) / (bottom - top);
 
   auto result = new_tf * tf;
   return result;
 }
 
-
-std::vector<RowMajorMatrix> 
-ComputeCropWindowTF(const std::vector<Eigen::MatrixXf>& poses, 
-                    const Eigen::MatrixXf& K, 
-                    Eigen::Vector2i out_size,
-                    float crop_ratio, 
-                    float mesh_diameter) 
+std::vector<RowMajorMatrix> ComputeCropWindowTF(const std::vector<Eigen::MatrixXf> &poses,
+                                                const Eigen::MatrixXf              &K,
+                                                Eigen::Vector2i                     out_size,
+                                                float                               crop_ratio,
+                                                float                               mesh_diameter)
 {
   // Compute the tf batch from the left, right, top, and bottom coordinates
-  int B = poses.size();
-  float r = mesh_diameter * crop_ratio / 2;
+  int             B = poses.size();
+  float           r = mesh_diameter * crop_ratio / 2;
   Eigen::MatrixXf offsets(5, 3);
   offsets << 0, 0, 0, r, 0, 0, -r, 0, 0, 0, r, 0, 0, -r, 0;
 
   std::vector<RowMajorMatrix> tfs;
-  for (int i = 0; i < B; i++) {
-    auto block = poses[i].block<3, 1>(0, 3).transpose();
-    Eigen::MatrixXf pts = block.replicate(offsets.rows(), 1).array() + offsets.array();
+  for (int i = 0; i < B; i++)
+  {
+    auto            block     = poses[i].block<3, 1>(0, 3).transpose();
+    Eigen::MatrixXf pts       = block.replicate(offsets.rows(), 1).array() + offsets.array();
     Eigen::MatrixXf projected = (K * pts.transpose()).transpose();
     Eigen::MatrixXf uvs =
         projected.leftCols(2).array() / projected.rightCols(1).replicate(1, 2).array();
     Eigen::MatrixXf center = uvs.row(0);
 
     float radius = std::abs((uvs - center.replicate(uvs.rows(), 1)).rightCols(1).maxCoeff());
-    float left = center(0, 0) - radius;
-    float right = center(0, 0) + radius;
-    float top = center(0, 1) - radius;
+    float left   = center(0, 0) - radius;
+    float right  = center(0, 0) + radius;
+    float top    = center(0, 1) - radius;
     float bottom = center(0, 1) + radius;
 
     tfs.push_back(ComputeTF(left, right, top, bottom, out_size));
@@ -75,43 +74,43 @@ ComputeCropWindowTF(const std::vector<Eigen::MatrixXf>& poses,
   return tfs;
 }
 
-
 /**
- * @brief 
- * 
- * @param output 
- * @param pts 
- * @param tfs 
- * @return true 
- * @return false 
+ * @brief
+ *
+ * @param output
+ * @param pts
+ * @param tfs
+ * @return true
+ * @return false
  */
-bool TransformPts(std::vector<RowMajorMatrix>& output, 
-                  const Eigen::MatrixXf& pts, 
-                  const std::vector<Eigen::MatrixXf>& tfs) 
+bool TransformPts(std::vector<RowMajorMatrix>        &output,
+                  const Eigen::MatrixXf              &pts,
+                  const std::vector<Eigen::MatrixXf> &tfs)
 {
   // Get the dimensions of the inputs
-  int rows = pts.rows();
-  int cols = pts.cols();
+  int rows     = pts.rows();
+  int cols     = pts.cols();
   int tfs_size = tfs.size();
-  CHECK_STATE(tfs_size != 0,
-        "[FoundationposeRender] The transfomation matrix is empty! ");
+  CHECK_STATE(tfs_size != 0, "[FoundationposeRender] The transfomation matrix is empty! ");
 
   CHECK_STATE(tfs[0].cols() == tfs[0].rows(),
-        "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
+              "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
 
   int dim = tfs[0].rows();
   CHECK_STATE(cols == dim - 1,
-        "[FoundationposeRender] The dimension of pts and tf are not match! ");
+              "[FoundationposeRender] The dimension of pts and tf are not match! ");
 
-  for (int i = 0; i < tfs_size; i++) {
+  for (int i = 0; i < tfs_size; i++)
+  {
     RowMajorMatrix transformed_matrix;
     transformed_matrix.resize(rows, dim - 1);
     auto submatrix = tfs[i].block(0, 0, dim - 1, dim - 1);
-    auto last_col = tfs[i].block(0, dim - 1, dim - 1, 1);
+    auto last_col  = tfs[i].block(0, dim - 1, dim - 1, 1);
 
     // Apply the transformation to the points
-    for (int j = 0; j < rows; j++) {
-      auto new_row = submatrix * pts.row(j).transpose() + last_col;
+    for (int j = 0; j < rows; j++)
+    {
+      auto new_row              = submatrix * pts.row(j).transpose() + last_col;
       transformed_matrix.row(j) = new_row.transpose();
     }
     output.push_back(transformed_matrix);
@@ -121,63 +120,64 @@ bool TransformPts(std::vector<RowMajorMatrix>& output,
   return true;
 }
 
-bool ConstructBBox2D(RowMajorMatrix& bbox2d, 
-                    const std::vector<RowMajorMatrix>& tfs, 
-                    int H, int W) 
+bool ConstructBBox2D(RowMajorMatrix &bbox2d, const std::vector<RowMajorMatrix> &tfs, int H, int W)
 {
-
   Eigen::MatrixXf bbox2d_crop(2, 2);
   bbox2d_crop << 0.0, 0.0, W - 1, H - 1;
 
   std::vector<Eigen::MatrixXf> inversed_tfs;
   // Inverse tfs before transform
-  for (size_t i = 0; i < tfs.size(); i++) {
+  for (size_t i = 0; i < tfs.size(); i++)
+  {
     inversed_tfs.push_back(tfs[i].inverse());
   }
 
   std::vector<RowMajorMatrix> bbox2d_ori_vec;
-  auto suc = TransformPts(bbox2d_ori_vec, bbox2d_crop, inversed_tfs);
-  if(!suc) {
+  auto                        suc = TransformPts(bbox2d_ori_vec, bbox2d_crop, inversed_tfs);
+  if (!suc)
+  {
     LOG(ERROR) << "[FoundationposeRender] Failed to transform the 2D bounding box";
     return suc;
   }
 
-  for (size_t i = 0; i < bbox2d_ori_vec.size(); i++) {
-    bbox2d.row(i) = Eigen::Map<Eigen::RowVectorXf>(bbox2d_ori_vec[i].data(), bbox2d_ori_vec[i].size());
+  for (size_t i = 0; i < bbox2d_ori_vec.size(); i++)
+  {
+    bbox2d.row(i) =
+        Eigen::Map<Eigen::RowVectorXf>(bbox2d_ori_vec[i].data(), bbox2d_ori_vec[i].size());
   }
   return true;
 }
 
-
-
-
-bool ProjectMatrixFromIntrinsics(Eigen::Matrix4f& proj_output, 
-                                 const Eigen::Matrix3f& K, 
-                                 int height, int width, 
-                                 float znear = 0.1, 
-                                 float zfar = 100.0,
-                                 std::string window_coords = "y_down") 
+bool ProjectMatrixFromIntrinsics(Eigen::Matrix4f       &proj_output,
+                                 const Eigen::Matrix3f &K,
+                                 int                    height,
+                                 int                    width,
+                                 float                  znear         = 0.1,
+                                 float                  zfar          = 100.0,
+                                 std::string            window_coords = "y_down")
 {
-
-  int x0 = 0;
-  int y0 = 0;
-  int w = width;
-  int h = height;
+  int   x0 = 0;
+  int   y0 = 0;
+  int   w  = width;
+  int   h  = height;
   float nc = znear;
   float fc = zfar;
 
   float depth = fc - nc;
-  float q = -(fc + nc) / depth;
-  float qn = -2 * (fc * nc) / depth;
+  float q     = -(fc + nc) / depth;
+  float qn    = -2 * (fc * nc) / depth;
 
   // Get the projection matrix from camera K matrix
-  if (window_coords == "y_up") {
+  if (window_coords == "y_up")
+  {
     proj_output << 2 * K(0, 0) / w, -2 * K(0, 1) / w, (-2 * K(0, 2) + w + 2 * x0) / w, 0, 0,
         -2 * K(1, 1) / h, (-2 * K(1, 2) + h + 2 * y0) / h, 0, 0, 0, q, qn, 0, 0, -1, 0;
-  } else if (window_coords == "y_down") {
+  } else if (window_coords == "y_down")
+  {
     proj_output << 2 * K(0, 0) / w, -2 * K(0, 1) / w, (-2 * K(0, 2) + w + 2 * x0) / w, 0, 0,
         2 * K(1, 1) / h, (2 * K(1, 2) - h + 2 * y0) / h, 0, 0, 0, q, qn, 0, 0, -1, 0;
-  } else {
+  } else
+  {
     LOG(ERROR) << "[FoundationposeRender] The window coordinates should be y_up or y_down";
     return false;
   }
@@ -186,203 +186,215 @@ bool ProjectMatrixFromIntrinsics(Eigen::Matrix4f& proj_output,
 }
 
 void WrapImgPtrToNHWCTensor(
-    uint8_t* input_ptr, nvcv::Tensor& output_tensor, int N, int H, int W, int C) {
+    uint8_t *input_ptr, nvcv::Tensor &output_tensor, int N, int H, int W, int C)
+{
   nvcv::TensorDataStridedCuda::Buffer output_buffer;
   output_buffer.strides[3] = sizeof(uint8_t);
   output_buffer.strides[2] = C * output_buffer.strides[3];
   output_buffer.strides[1] = W * output_buffer.strides[2];
   output_buffer.strides[0] = H * output_buffer.strides[1];
-  output_buffer.basePtr = reinterpret_cast<NVCVByte*>(input_ptr);
+  output_buffer.basePtr    = reinterpret_cast<NVCVByte *>(input_ptr);
 
   nvcv::TensorShape::ShapeType shape{N, H, W, C};
-  nvcv::TensorShape tensor_shape{shape, "NHWC"};
-  nvcv::TensorDataStridedCuda output_data(tensor_shape, nvcv::TYPE_U8, output_buffer);
+  nvcv::TensorShape            tensor_shape{shape, "NHWC"};
+  nvcv::TensorDataStridedCuda  output_data(tensor_shape, nvcv::TYPE_U8, output_buffer);
   output_tensor = nvcv::TensorWrapData(output_data);
 }
 
 void WrapFloatPtrToNHWCTensor(
-    float* input_ptr, nvcv::Tensor& output_tensor, int N, int H, int W, int C) {
+    float *input_ptr, nvcv::Tensor &output_tensor, int N, int H, int W, int C)
+{
   nvcv::TensorDataStridedCuda::Buffer output_buffer;
   output_buffer.strides[3] = sizeof(float);
   output_buffer.strides[2] = C * output_buffer.strides[3];
   output_buffer.strides[1] = W * output_buffer.strides[2];
   output_buffer.strides[0] = H * output_buffer.strides[1];
-  output_buffer.basePtr = reinterpret_cast<NVCVByte*>(input_ptr);
+  output_buffer.basePtr    = reinterpret_cast<NVCVByte *>(input_ptr);
 
   nvcv::TensorShape::ShapeType shape{N, H, W, C};
-  nvcv::TensorShape tensor_shape{shape, "NHWC"};
-  nvcv::TensorDataStridedCuda output_data(tensor_shape, nvcv::TYPE_F32, output_buffer);
+  nvcv::TensorShape            tensor_shape{shape, "NHWC"};
+  nvcv::TensorDataStridedCuda  output_data(tensor_shape, nvcv::TYPE_F32, output_buffer);
   output_tensor = nvcv::TensorWrapData(output_data);
 }
 
-
-
-
 FoundationPoseRenderer::FoundationPoseRenderer(std::shared_ptr<TexturedMeshLoader> mesh_loader,
-                                              const Eigen::Matrix3f& intrinsic,
-                                              const int input_poses_num,
-                                              const float crop_ratio,
-                                              const int crop_window_H,
-                                              const int crop_window_W,
-                                              const float min_depth,
-                                              const float max_depth)
-                                            : mesh_loader_(mesh_loader),
-                                              intrinsic_(intrinsic),
-                                              input_poses_num_(input_poses_num),
-                                              crop_ratio_(crop_ratio),
-                                              crop_window_H_(crop_window_H),
-                                              crop_window_W_(crop_window_W),
-                                              min_depth_(min_depth),
-                                              max_depth_(max_depth)
+                                               const Eigen::Matrix3f              &intrinsic,
+                                               const int                           input_poses_num,
+                                               const float                         crop_ratio,
+                                               const int                           crop_window_H,
+                                               const int                           crop_window_W,
+                                               const float                         min_depth,
+                                               const float                         max_depth)
+    : mesh_loader_(mesh_loader),
+      intrinsic_(intrinsic),
+      input_poses_num_(input_poses_num),
+      crop_ratio_(crop_ratio),
+      crop_window_H_(crop_window_H),
+      crop_window_W_(crop_window_W),
+      min_depth_(min_depth),
+      max_depth_(max_depth)
 {
-  if (cudaStreamCreate(&cuda_stream_render_) != cudaSuccess
-    || cudaStreamCreate(&cuda_stream_transf_) != cudaSuccess) 
+  if (cudaStreamCreate(&cuda_stream_render_) != cudaSuccess ||
+      cudaStreamCreate(&cuda_stream_transf_) != cudaSuccess)
   {
     throw std::runtime_error("[FoundationPose Renderer] Failed to create cuda stream!!!");
   }
 
   // 1. load mesh file
   bool load_mesh_suc = LoadTexturedMesh();
-  if (!load_mesh_suc) {
+  if (!load_mesh_suc)
+  {
     throw std::runtime_error("[FoundationPose Renderer] Failed to load textured mesh!!!");
   }
 
   // 2. prepare device buffer
   bool prepare_buf_suc = PrepareBuffer();
-  if (!prepare_buf_suc) {
+  if (!prepare_buf_suc)
+  {
     throw std::runtime_error("[FoundationPose Renderer] Failed to prepare buffer!!!");
   }
 }
 
 FoundationPoseRenderer::~FoundationPoseRenderer()
 {
-  if (cudaStreamDestroy(cuda_stream_render_) != cudaSuccess
-    || cudaStreamDestroy(cuda_stream_transf_) != cudaSuccess)
+  if (cudaStreamDestroy(cuda_stream_render_) != cudaSuccess ||
+      cudaStreamDestroy(cuda_stream_transf_) != cudaSuccess)
   {
     LOG(WARNING) << "[FoundationPoseRenderer] Failed to destroy cuda stream !";
   }
 }
 
-
-bool
-FoundationPoseRenderer::PrepareBuffer()
+bool FoundationPoseRenderer::PrepareBuffer()
 {
   // nvdiffrast render 用到的缓存以及渲染器
-  size_t pose_clip_size =  num_vertices_ * (kVertexPoints + 1) * input_poses_num_ * sizeof(float);
-  size_t pts_cam_size = num_vertices_ * kVertexPoints * input_poses_num_ * sizeof(float);
+  size_t pose_clip_size = num_vertices_ * (kVertexPoints + 1) * input_poses_num_ * sizeof(float);
+  size_t pts_cam_size   = num_vertices_ * kVertexPoints * input_poses_num_ * sizeof(float);
   size_t diffuse_intensity_size = num_vertices_ * input_poses_num_ * sizeof(float);
-  size_t diffuse_intensity_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * sizeof(float);
-  size_t rast_out_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * (kVertexPoints + 1) * sizeof(float);
-  size_t color_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
-  size_t xyz_map_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
-  size_t texcoords_out_size = input_poses_num_ * crop_window_H_ * crop_window_W_ * kTexcoordsDim * sizeof(float);
+  size_t diffuse_intensity_map_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * sizeof(float);
+  size_t rast_out_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * (kVertexPoints + 1) * sizeof(float);
+  size_t color_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
+  size_t xyz_map_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
+  size_t texcoords_out_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * kTexcoordsDim * sizeof(float);
 
   // nvdiffrast render时相关缓存
-  float* _pose_clip_device;
-  float* _rast_out_device;
-  float* _pts_cam_device;
-  float* _diffuse_intensity_device;
-  float* _diffuse_intensity_map_device;
-  float* _texcoords_out_device;
-  float* _color_device;
-  float* _xyz_map_device;
-  float* _render_crop_rgb_tensor_device;
-  float* _render_crop_xyz_map_tensor_device;
+  float *_pose_clip_device;
+  float *_rast_out_device;
+  float *_pts_cam_device;
+  float *_diffuse_intensity_device;
+  float *_diffuse_intensity_map_device;
+  float *_texcoords_out_device;
+  float *_color_device;
+  float *_xyz_map_device;
+  float *_render_crop_rgb_tensor_device;
+  float *_render_crop_xyz_map_tensor_device;
 
   // transf 相关缓存
-  float* _transformed_rgb_device;
-  float* _transformed_xyz_map_device;
-  uint8_t* _transformed_crop_rgb_tensor_device;
+  float   *_transformed_rgb_device;
+  float   *_transformed_xyz_map_device;
+  uint8_t *_transformed_crop_rgb_tensor_device;
 
   // 输入的假设位姿
-  float* _input_poses_device;
-
+  float *_input_poses_device;
 
   // render用到的缓存
   CHECK_CUDA(cudaMalloc(&_pose_clip_device, pose_clip_size),
-            "[FoundationPoseRenderer] cudaMalloc `_pose_clip_device` FAILED!!!");
-  pose_clip_device_ = DeviceBufferUniquePtrType<float>(_pose_clip_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_pose_clip_device` FAILED!!!");
+  pose_clip_device_ =
+      DeviceBufferUniquePtrType<float>(_pose_clip_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_rast_out_device, rast_out_size),
-            "[FoundationPoseRenderer] cudaMalloc `_rast_out_device` FAILED!!!");
+             "[FoundationPoseRenderer] cudaMalloc `_rast_out_device` FAILED!!!");
   rast_out_device_ = DeviceBufferUniquePtrType<float>(_rast_out_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_pts_cam_device, pts_cam_size),
-            "[FoundationPoseRenderer] cudaMalloc `_pts_cam_device` FAILED!!!");
+             "[FoundationPoseRenderer] cudaMalloc `_pts_cam_device` FAILED!!!");
   pts_cam_device_ = DeviceBufferUniquePtrType<float>(_pts_cam_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_diffuse_intensity_device, diffuse_intensity_size),
-            "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_device` FAILED!!!");
-  diffuse_intensity_device_ = DeviceBufferUniquePtrType<float>(_diffuse_intensity_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_device` FAILED!!!");
+  diffuse_intensity_device_ =
+      DeviceBufferUniquePtrType<float>(_diffuse_intensity_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_diffuse_intensity_map_device, diffuse_intensity_map_size),
-            "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_map_device` FAILED!!!");
-  diffuse_intensity_map_device_ = DeviceBufferUniquePtrType<float>(_diffuse_intensity_map_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_diffuse_intensity_map_device` FAILED!!!");
+  diffuse_intensity_map_device_ =
+      DeviceBufferUniquePtrType<float>(_diffuse_intensity_map_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_texcoords_out_device, texcoords_out_size),
-            "[FoundationPoseRenderer] cudaMalloc `_texcoords_out_device` FAILED!!!");
-  texcoords_out_device_ = DeviceBufferUniquePtrType<float>(_texcoords_out_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_texcoords_out_device` FAILED!!!");
+  texcoords_out_device_ =
+      DeviceBufferUniquePtrType<float>(_texcoords_out_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_color_device, color_size),
-            "[FoundationPoseRenderer] cudaMalloc `_color_device` FAILED!!!");
+             "[FoundationPoseRenderer] cudaMalloc `_color_device` FAILED!!!");
   color_device_ = DeviceBufferUniquePtrType<float>(_color_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_xyz_map_device, xyz_map_size),
-            "[FoundationPoseRenderer] cudaMalloc `_xyz_map_device` FAILED!!!");
+             "[FoundationPoseRenderer] cudaMalloc `_xyz_map_device` FAILED!!!");
   xyz_map_device_ = DeviceBufferUniquePtrType<float>(_xyz_map_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_render_crop_rgb_tensor_device, color_size),
-            "[FoundationPoseRenderer] cudaMalloc `_color_device` FAILED!!!");
-  render_crop_rgb_tensor_device_ = DeviceBufferUniquePtrType<float>(_render_crop_rgb_tensor_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_color_device` FAILED!!!");
+  render_crop_rgb_tensor_device_ =
+      DeviceBufferUniquePtrType<float>(_render_crop_rgb_tensor_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_render_crop_xyz_map_tensor_device, xyz_map_size),
-            "[FoundationPoseRenderer] cudaMalloc `_xyz_map_device` FAILED!!!");
-  render_crop_xyz_map_tensor_device_ = DeviceBufferUniquePtrType<float>(_render_crop_xyz_map_tensor_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_xyz_map_device` FAILED!!!");
+  render_crop_xyz_map_tensor_device_ = DeviceBufferUniquePtrType<float>(
+      _render_crop_xyz_map_tensor_device, CudaMemoryDeleter<float>());
 
   // transf 用到的缓存
-  const size_t device_buffer_byte_size 
-                      = input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
-  
+  const size_t device_buffer_byte_size =
+      input_poses_num_ * crop_window_H_ * crop_window_W_ * kNumChannels * sizeof(float);
+
   CHECK_CUDA(cudaMalloc(&_transformed_xyz_map_device, device_buffer_byte_size),
-            "[FoundationPoseRenderer] cudaMalloc `_transformed_xyz_map_device` FAILED!!!");
-  transformed_xyz_map_device_ = DeviceBufferUniquePtrType<float>(_transformed_xyz_map_device, CudaMemoryDeleter<float>());
-  
+             "[FoundationPoseRenderer] cudaMalloc `_transformed_xyz_map_device` FAILED!!!");
+  transformed_xyz_map_device_ =
+      DeviceBufferUniquePtrType<float>(_transformed_xyz_map_device, CudaMemoryDeleter<float>());
+
   CHECK_CUDA(cudaMalloc(&_transformed_rgb_device, device_buffer_byte_size),
-            "[FoundationPoseRenderer] cudaMalloc `_transformed_rgb_device` FAILED!!!");
-  transformed_rgb_device_ = DeviceBufferUniquePtrType<float>(_transformed_rgb_device, CudaMemoryDeleter<float>());
+             "[FoundationPoseRenderer] cudaMalloc `_transformed_rgb_device` FAILED!!!");
+  transformed_rgb_device_ =
+      DeviceBufferUniquePtrType<float>(_transformed_rgb_device, CudaMemoryDeleter<float>());
 
   const size_t crop_rgb_byte_size = 1 * crop_window_H_ * crop_window_W_ * kNumChannels;
   CHECK_CUDA(cudaMalloc(&_transformed_crop_rgb_tensor_device, crop_rgb_byte_size),
-            "[FoundationPoseRenderer] cudaMalloc `_transformed_crop_rgb_tensor_device` FAILED!!!");
-  transformed_crop_rgb_tensor_device_ = DeviceBufferUniquePtrType<uint8_t>(_transformed_crop_rgb_tensor_device, CudaMemoryDeleter<uint8_t>());
+             "[FoundationPoseRenderer] cudaMalloc `_transformed_crop_rgb_tensor_device` FAILED!!!");
+  transformed_crop_rgb_tensor_device_ = DeviceBufferUniquePtrType<uint8_t>(
+      _transformed_crop_rgb_tensor_device, CudaMemoryDeleter<uint8_t>());
 
   // poses 的device缓存
-  CHECK_CUDA(cudaMalloc(&_input_poses_device, input_poses_num_ * kTSMatrixDim * kTSMatrixDim * sizeof(float)),
-            "[FoundationPoseRenderer] cudaMalloc `_input_poses_device` FAILED!!!");
-  input_poses_device_ = DeviceBufferUniquePtrType<float>(_input_poses_device, CudaMemoryDeleter<float>());
-
+  CHECK_CUDA(cudaMalloc(&_input_poses_device,
+                        input_poses_num_ * kTSMatrixDim * kTSMatrixDim * sizeof(float)),
+             "[FoundationPoseRenderer] cudaMalloc `_input_poses_device` FAILED!!!");
+  input_poses_device_ =
+      DeviceBufferUniquePtrType<float>(_input_poses_device, CudaMemoryDeleter<float>());
 
   cr_ = std::make_unique<CR::CudaRaster>();
 
   return true;
 }
 
-
-bool 
-FoundationPoseRenderer::LoadTexturedMesh()
+bool FoundationPoseRenderer::LoadTexturedMesh()
 {
-  const auto& mesh_model_center = mesh_loader_->GetMeshModelCenter();  
-  const auto& mesh_vertices = mesh_loader_->GetMeshVertices();
-  const auto& mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals();
-  const auto& mesh_texcoords = mesh_loader_->GetMeshTextureCoords();
-  const auto& mesh_faces = mesh_loader_->GetMeshFaces();
-  const auto& rgb_texture_map = mesh_loader_->GetTextureMap();
-  mesh_diameter_ = mesh_loader_->GetMeshDiameter();
+  const auto &mesh_model_center   = mesh_loader_->GetMeshModelCenter();
+  const auto &mesh_vertices       = mesh_loader_->GetMeshVertices();
+  const auto &mesh_vertex_normals = mesh_loader_->GetMeshVertexNormals();
+  const auto &mesh_texcoords      = mesh_loader_->GetMeshTextureCoords();
+  const auto &mesh_faces          = mesh_loader_->GetMeshFaces();
+  const auto &rgb_texture_map     = mesh_loader_->GetTextureMap();
+  mesh_diameter_                  = mesh_loader_->GetMeshDiameter();
 
   std::vector<float> vertex_normals;
 
   // Walk through each of the mesh's vertices
-  for (unsigned int v = 0; v < mesh_vertices.size(); v++) {
+  for (unsigned int v = 0; v < mesh_vertices.size(); v++)
+  {
     vertices_.push_back(mesh_vertices[v].x - mesh_model_center[0]);
     vertices_.push_back(mesh_vertices[v].y - mesh_model_center[1]);
     vertices_.push_back(mesh_vertices[v].z - mesh_model_center[2]);
@@ -392,22 +404,24 @@ FoundationPoseRenderer::LoadTexturedMesh()
     vertex_normals.push_back(mesh_vertex_normals[v].z);
 
     // Check if the mesh has texture coordinates
-    if (mesh_texcoords.size() >= 1) {
+    if (mesh_texcoords.size() >= 1)
+    {
       texcoords_.push_back(mesh_texcoords[0][v].x);
       texcoords_.push_back(1 - mesh_texcoords[0][v].y);
     }
   }
 
   // Walk through each of the mesh's faces (a face is a mesh its triangle)
-  for (unsigned int f = 0; f < mesh_faces.size() ; f++) {
-    const aiFace& face = mesh_faces[f];
+  for (unsigned int f = 0; f < mesh_faces.size(); f++)
+  {
+    const aiFace &face = mesh_faces[f];
 
     // We assume the face is a triangle due to aiProcess_Triangulate
-    CHECK_STATE(face.mNumIndices == 3,
-                "Only triangle is supported, but the object face has "
-                + std::to_string(face.mNumIndices) + " vertices. ");   
-       
-    for (unsigned int i = 0; i < face.mNumIndices; i++) {
+    CHECK_STATE(face.mNumIndices == 3, "Only triangle is supported, but the object face has " +
+                                           std::to_string(face.mNumIndices) + " vertices. ");
+
+    for (unsigned int i = 0; i < face.mNumIndices; i++)
+    {
       mesh_faces_.push_back(face.mIndices[i]);
     }
   }
@@ -417,15 +431,17 @@ FoundationPoseRenderer::LoadTexturedMesh()
               "[FoundationposeRender] Texture map is not continuous");
 
   CHECK_STATE(rgb_texture_map.channels() == kNumChannels,
-              "[FoundationposeRender] Recieved texture map has" + std::to_string(rgb_texture_map.channels())
-              + " number of channels, expected " + std::to_string(kNumChannels)); 
+              "[FoundationposeRender] Recieved texture map has" +
+                  std::to_string(rgb_texture_map.channels()) + " number of channels, expected " +
+                  std::to_string(kNumChannels));
 
   texture_map_height_ = rgb_texture_map.rows;
-  texture_map_width_ = rgb_texture_map.cols;
+  texture_map_width_  = rgb_texture_map.cols;
 
   // The number of vertices is the size of the vertices array divided by 3 (since it's x,y,z)
   num_vertices_ = vertices_.size() / kVertexPoints;
-  // The number of texture coordinates is the size of the texcoords array divided by 2 (since it's u,v)
+  // The number of texture coordinates is the size of the texcoords array divided by 2 (since it's
+  // u,v)
   num_texcoords_ = texcoords_.size() / kTexcoordsDim;
   // The number of faces is the size of the faces array divided by 3 (since each face has 3 edges)
   num_faces_ = mesh_faces_.size() / kTriangleVertices;
@@ -433,188 +449,169 @@ FoundationPoseRenderer::LoadTexturedMesh()
   mesh_vertices_ =
       Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>(
           vertices_.data(), num_vertices_, 3);
-  
+
   // Allocate device memory for mesh data
-  size_t vertices_size = vertices_.size() * sizeof(float);
-  size_t faces_size = mesh_faces_.size() * sizeof(int32_t);
+  size_t vertices_size  = vertices_.size() * sizeof(float);
+  size_t faces_size     = mesh_faces_.size() * sizeof(int32_t);
   size_t texcoords_size = texcoords_.size() * sizeof(float);
 
-  float* _vertices_device;
-  float* _vertex_normals_device;
-  float* _texcoords_device;
-  int32_t* _mesh_faces_device;
-  uint8_t* _texture_map_device;
+  float   *_vertices_device;
+  float   *_vertex_normals_device;
+  float   *_texcoords_device;
+  int32_t *_mesh_faces_device;
+  uint8_t *_texture_map_device;
 
   CHECK_CUDA(cudaMalloc(&_vertices_device, vertices_size),
-            "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
+             "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
   vertices_device_ = DeviceBufferUniquePtrType<float>(_vertices_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_vertex_normals_device, vertices_size),
-            "[FoundationposeRender] cudaMalloc `vertex_normals_device` FAILED!!!");
-  vertex_normals_device_ = DeviceBufferUniquePtrType<float>(_vertex_normals_device, CudaMemoryDeleter<float>());
+             "[FoundationposeRender] cudaMalloc `vertex_normals_device` FAILED!!!");
+  vertex_normals_device_ =
+      DeviceBufferUniquePtrType<float>(_vertex_normals_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_mesh_faces_device, faces_size),
-            "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
-  mesh_faces_device_ = DeviceBufferUniquePtrType<int32_t>(_mesh_faces_device, CudaMemoryDeleter<int32_t>());
+             "[FoundationposeRender] cudaMalloc `mesh_faces_device` FAILED!!!");
+  mesh_faces_device_ =
+      DeviceBufferUniquePtrType<int32_t>(_mesh_faces_device, CudaMemoryDeleter<int32_t>());
 
   CHECK_CUDA(cudaMalloc(&_texcoords_device, texcoords_size),
-            "[FoundationposeRender] cudaMalloc `texcoords_device_` FAILED!!!");
-  texcoords_device_ = DeviceBufferUniquePtrType<float>(_texcoords_device, CudaMemoryDeleter<float>());
+             "[FoundationposeRender] cudaMalloc `texcoords_device_` FAILED!!!");
+  texcoords_device_ =
+      DeviceBufferUniquePtrType<float>(_texcoords_device, CudaMemoryDeleter<float>());
 
   CHECK_CUDA(cudaMalloc(&_texture_map_device, rgb_texture_map.total() * kNumChannels),
-            "[FoundationposeRender] cudaMalloc `texture_map_device_` FAILED!!!");
-  texture_map_device_ = DeviceBufferUniquePtrType<uint8_t>(_texture_map_device, CudaMemoryDeleter<uint8_t>());
-
-  CHECK_CUDA(cudaMemcpy(vertices_device_.get(),
-                        vertices_.data(),
-                        vertices_size,
-                        cudaMemcpyHostToDevice),
-            "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
-  CHECK_CUDA(cudaMemcpy(vertex_normals_device_.get(),
-                        vertex_normals.data(),
-                        vertices_size,
-                        cudaMemcpyHostToDevice),
-            "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
-  CHECK_CUDA(cudaMemcpy(mesh_faces_device_.get(), 
-                        mesh_faces_.data(), 
-                        faces_size, 
+             "[FoundationposeRender] cudaMalloc `texture_map_device_` FAILED!!!");
+  texture_map_device_ =
+      DeviceBufferUniquePtrType<uint8_t>(_texture_map_device, CudaMemoryDeleter<uint8_t>());
+
+  CHECK_CUDA(
+      cudaMemcpy(vertices_device_.get(), vertices_.data(), vertices_size, cudaMemcpyHostToDevice),
+      "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
+  CHECK_CUDA(cudaMemcpy(vertex_normals_device_.get(), vertex_normals.data(), vertices_size,
                         cudaMemcpyHostToDevice),
-            "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
-  CHECK_CUDA(cudaMemcpy(texcoords_device_.get(), 
-                        texcoords_.data(), 
-                        texcoords_.size() * sizeof(float), 
-                        cudaMemcpyHostToDevice),
-            "[FoundationposeRender] cudaMemcpy texcoords_host -> texcoords_device_ FAILED!!!");
-  CHECK_CUDA(cudaMemcpy(texture_map_device_.get(), 
-                        reinterpret_cast<uint8_t*>(rgb_texture_map.data),
-                        rgb_texture_map.total() * kNumChannels, 
-                        cudaMemcpyHostToDevice),
-            "[FoundationposeRender] cudaMemcpy rgb_texture_map_host -> texture_map_device_ FAILED!!!");
+             "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
+  CHECK_CUDA(
+      cudaMemcpy(mesh_faces_device_.get(), mesh_faces_.data(), faces_size, cudaMemcpyHostToDevice),
+      "[FoundationposeRender] cudaMemcpy mesh_faces_host -> mesh_faces_device FAILED!!!");
+  CHECK_CUDA(cudaMemcpy(texcoords_device_.get(), texcoords_.data(),
+                        texcoords_.size() * sizeof(float), cudaMemcpyHostToDevice),
+             "[FoundationposeRender] cudaMemcpy texcoords_host -> texcoords_device_ FAILED!!!");
+  CHECK_CUDA(
+      cudaMemcpy(texture_map_device_.get(), reinterpret_cast<uint8_t *>(rgb_texture_map.data),
+                 rgb_texture_map.total() * kNumChannels, cudaMemcpyHostToDevice),
+      "[FoundationposeRender] cudaMemcpy rgb_texture_map_host -> texture_map_device_ FAILED!!!");
 
   // Preprocess mesh data
   nvcv::Tensor texture_map_tensor;
-  WrapImgPtrToNHWCTensor(texture_map_device_.get(), texture_map_tensor, 1, texture_map_height_, texture_map_width_, kNumChannels);
+  WrapImgPtrToNHWCTensor(texture_map_device_.get(), texture_map_tensor, 1, texture_map_height_,
+                         texture_map_width_, kNumChannels);
 
   nvcv::TensorShape::ShapeType shape{1, texture_map_height_, texture_map_width_, kNumChannels};
-  nvcv::TensorShape tensor_shape{shape, "NHWC"};
+  nvcv::TensorShape            tensor_shape{shape, "NHWC"};
   float_texture_map_tensor_ = nvcv::Tensor(tensor_shape, nvcv::TYPE_F32);
 
-  const float scale_factor =  1.0f / 255.0f;
+  const float       scale_factor = 1.0f / 255.0f;
   cvcuda::ConvertTo convert_op;
-  convert_op(cuda_stream_render_, texture_map_tensor, float_texture_map_tensor_, scale_factor, 0.0f);
+  convert_op(cuda_stream_render_, texture_map_tensor, float_texture_map_tensor_, scale_factor,
+             0.0f);
 
-  return true;  
+  return true;
 }
 
-bool FoundationPoseRenderer::TransformVerticesOnCUDA(cudaStream_t stream,
-                  const std::vector<Eigen::MatrixXf>& tfs,
-                  float* output_buffer) 
+bool FoundationPoseRenderer::TransformVerticesOnCUDA(cudaStream_t                        stream,
+                                                     const std::vector<Eigen::MatrixXf> &tfs,
+                                                     float *output_buffer)
 {
   // Get the dimensions of the inputs
   int tfs_size = tfs.size();
-  CHECK_STATE(tfs_size != 0,
-        "[FoundationposeRender] The transfomation matrix is empty! ");
+  CHECK_STATE(tfs_size != 0, "[FoundationposeRender] The transfomation matrix is empty! ");
 
   CHECK_STATE(tfs[0].cols() == tfs[0].rows(),
-        "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
+              "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
 
   const int total_elements = tfs[0].cols() * tfs[0].rows();
 
-  float* transform_device_buffer_ = nullptr;
+  float *transform_device_buffer_ = nullptr;
   cudaMallocAsync(&transform_device_buffer_, tfs_size * total_elements * sizeof(float), stream);
 
-  for (int i = 0 ; i < tfs_size ; ++ i) {
-    cudaMemcpyAsync(transform_device_buffer_ + i * total_elements, 
-                    tfs[i].data(), 
-                    total_elements * sizeof(float), 
-                    cudaMemcpyHostToDevice, 
-                    stream);
+  for (int i = 0; i < tfs_size; ++i)
+  {
+    cudaMemcpyAsync(transform_device_buffer_ + i * total_elements, tfs[i].data(),
+                    total_elements * sizeof(float), cudaMemcpyHostToDevice, stream);
   }
 
-  foundationpose_render::transform_points(stream, 
-                                          transform_device_buffer_, 
-                                          tfs_size, 
-                                          vertices_device_.get(), 
-                                          num_vertices_, 
-                                          output_buffer);
-  
+  foundationpose_render::transform_points(stream, transform_device_buffer_, tfs_size,
+                                          vertices_device_.get(), num_vertices_, output_buffer);
+
   cudaFreeAsync(transform_device_buffer_, stream);
   return true;
 }
 
 bool FoundationPoseRenderer::TransformVertexNormalsOnCUDA(cudaStream_t stream,
-                          const std::vector<Eigen::MatrixXf>& tfs,
-                          float* output_buffer)
+                                                          const std::vector<Eigen::MatrixXf> &tfs,
+                                                          float *output_buffer)
 {
   // Get the dimensions of the inputs
   int tfs_size = tfs.size();
-  CHECK_STATE(tfs_size != 0,
-        "[FoundationposeRender] The transfomation matrix is empty! ");
+  CHECK_STATE(tfs_size != 0, "[FoundationposeRender] The transfomation matrix is empty! ");
 
   CHECK_STATE(tfs[0].cols() == tfs[0].rows(),
-        "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
+              "[FoundationposeRender] The transfomation matrix has different rows and cols! ");
 
   const int total_elements = tfs[0].cols() * tfs[0].rows();
 
-  float* transform_device_buffer_ = nullptr;
+  float *transform_device_buffer_ = nullptr;
   cudaMallocAsync(&transform_device_buffer_, tfs_size * total_elements * sizeof(float), stream);
 
-  for (int i = 0 ; i < tfs_size ; ++ i) {
-    cudaMemcpyAsync(transform_device_buffer_ + i * total_elements, 
-                    tfs[i].data(), 
-                    total_elements * sizeof(float), 
-                    cudaMemcpyHostToDevice, 
-                    stream);
+  for (int i = 0; i < tfs_size; ++i)
+  {
+    cudaMemcpyAsync(transform_device_buffer_ + i * total_elements, tfs[i].data(),
+                    total_elements * sizeof(float), cudaMemcpyHostToDevice, stream);
   }
 
-  foundationpose_render::transform_normals(stream, transform_device_buffer_, tfs_size, vertex_normals_device_.get(), num_vertices_, output_buffer);
+  foundationpose_render::transform_normals(stream, transform_device_buffer_, tfs_size,
+                                           vertex_normals_device_.get(), num_vertices_,
+                                           output_buffer);
 
   cudaFreeAsync(transform_device_buffer_, stream);
   return true;
 }
 
 bool FoundationPoseRenderer::GeneratePoseClipOnCUDA(cudaStream_t stream,
-                      float* output_buffer,
-                      const std::vector<Eigen::MatrixXf>& poses, 
-                      const RowMajorMatrix& bbox2d, 
-                      const Eigen::Matrix3f& K, 
-                      int rgb_H, int rgb_W) 
+                                                    float       *output_buffer,
+                                                    const std::vector<Eigen::MatrixXf> &poses,
+                                                    const RowMajorMatrix               &bbox2d,
+                                                    const Eigen::Matrix3f              &K,
+                                                    int                                 rgb_H,
+                                                    int                                 rgb_W)
 {
   const int tfs_size = poses.size();
   CHECK_STATE(tfs_size > 0, "[FoundationPoseRender] `GeneratePoseClip` Got empty poses!!!");
   Eigen::Matrix4f projection_mat;
   CHECK_STATE(ProjectMatrixFromIntrinsics(projection_mat, K, rgb_H, rgb_W),
-        "[FoundationPoseRender] ProjectMatrixFromIntrinsics Failed!!!");
+              "[FoundationPoseRender] ProjectMatrixFromIntrinsics Failed!!!");
 
-  float* transform_buffer_device;
+  float    *transform_buffer_device;
   const int transform_total_elements = poses[0].cols() * poses[0].rows();
-  cudaMallocAsync(&transform_buffer_device, tfs_size * transform_total_elements * sizeof(float), stream);
-  for (int i = 0 ; i < tfs_size ; ++ i) {
+  cudaMallocAsync(&transform_buffer_device, tfs_size * transform_total_elements * sizeof(float),
+                  stream);
+  for (int i = 0; i < tfs_size; ++i)
+  {
     Eigen::Matrix4f transform_matrix = projection_mat * (kGLCamInCVCam * poses[i]);
-    cudaMemcpyAsync(transform_buffer_device + i * transform_total_elements, 
-                    transform_matrix.data(), 
-                    transform_total_elements * sizeof(float), 
-                    cudaMemcpyHostToDevice, 
-                    stream); 
+    cudaMemcpyAsync(transform_buffer_device + i * transform_total_elements, transform_matrix.data(),
+                    transform_total_elements * sizeof(float), cudaMemcpyHostToDevice, stream);
   }
 
-  float* bbox2d_buffer_device;
+  float    *bbox2d_buffer_device;
   const int bbox2d_matrix_total_elements = bbox2d.rows() * bbox2d.cols();
   cudaMallocAsync(&bbox2d_buffer_device, bbox2d_matrix_total_elements * sizeof(float), stream);
-  cudaMemcpyAsync(bbox2d_buffer_device, 
-                  bbox2d.data(), 
-                  bbox2d_matrix_total_elements * sizeof(float), 
-                  cudaMemcpyHostToDevice, 
-                  stream);
+  cudaMemcpyAsync(bbox2d_buffer_device, bbox2d.data(), bbox2d_matrix_total_elements * sizeof(float),
+                  cudaMemcpyHostToDevice, stream);
 
-  foundationpose_render::generate_pose_clip(stream, 
-                                          transform_buffer_device, 
-                                          bbox2d_buffer_device,
-                                          tfs_size, 
-                                          vertices_device_.get(), 
-                                          num_vertices_, 
-                                          output_buffer, 
-                                          rgb_H, rgb_W);
+  foundationpose_render::generate_pose_clip(stream, transform_buffer_device, bbox2d_buffer_device,
+                                            tfs_size, vertices_device_.get(), num_vertices_,
+                                            output_buffer, rgb_H, rgb_W);
 
   cudaFreeAsync(bbox2d_buffer_device, stream);
   cudaFreeAsync(transform_buffer_device, stream);
@@ -622,86 +619,66 @@ bool FoundationPoseRenderer::GeneratePoseClipOnCUDA(cudaStream_t stream,
   return true;
 }
 
-
-
-bool 
-FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream, 
-                                        const std::vector<Eigen::MatrixXf>& poses, 
-                                        const Eigen::Matrix3f& K, 
-                                        const RowMajorMatrix& bbox2d, 
-                                        int rgb_H, int rgb_W, int H, int W, 
-                                        nvcv::Tensor& flip_color_tensor, 
-                                        nvcv::Tensor& flip_xyz_map_tensor) 
+bool FoundationPoseRenderer::NvdiffrastRender(cudaStream_t                        cuda_stream,
+                                              const std::vector<Eigen::MatrixXf> &poses,
+                                              const Eigen::Matrix3f              &K,
+                                              const RowMajorMatrix               &bbox2d,
+                                              int                                 rgb_H,
+                                              int                                 rgb_W,
+                                              int                                 H,
+                                              int                                 W,
+                                              nvcv::Tensor                       &flip_color_tensor,
+                                              nvcv::Tensor &flip_xyz_map_tensor)
 {
   size_t N = poses.size();
   CHECK_STATE(TransformVerticesOnCUDA(cuda_stream, poses, pts_cam_device_.get()),
               "[FoundationPoseRender] Failed transform mesh vertices points !!!");
 
-  CHECK_STATE(GeneratePoseClipOnCUDA(cuda_stream, pose_clip_device_.get(),
-                                   poses, bbox2d, K, rgb_H, rgb_W),
-              "[FoundationposeRender] `GeneratePoseClipCUDA` Failed !!!");
-
-
-  foundationpose_render::rasterize(
-      cuda_stream, cr_.get(),
-      pose_clip_device_.get(), mesh_faces_device_.get(), rast_out_device_.get(),
-      num_vertices_, num_faces_,
-      H, W, N);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] rasterize failed!!!");
-
-  foundationpose_render::interpolate(
-      cuda_stream,
-      pts_cam_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), xyz_map_device_.get(),
-      num_vertices_, num_faces_, 3, kVertexPoints,
-      H, W, N);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] interpolate failed!!!");
-
-  foundationpose_render::interpolate(
-      cuda_stream,
-      texcoords_device_.get(), rast_out_device_.get(), mesh_faces_device_.get(), texcoords_out_device_.get(),
-      num_vertices_, num_faces_, 2, kTexcoordsDim,
-      H, W, N);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] interpolate failed!!!");
+  CHECK_STATE(
+      GeneratePoseClipOnCUDA(cuda_stream, pose_clip_device_.get(), poses, bbox2d, K, rgb_H, rgb_W),
+      "[FoundationposeRender] `GeneratePoseClipCUDA` Failed !!!");
+
+  foundationpose_render::rasterize(cuda_stream, cr_.get(), pose_clip_device_.get(),
+                                   mesh_faces_device_.get(), rast_out_device_.get(), num_vertices_,
+                                   num_faces_, H, W, N);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] rasterize failed!!!");
+
+  foundationpose_render::interpolate(cuda_stream, pts_cam_device_.get(), rast_out_device_.get(),
+                                     mesh_faces_device_.get(), xyz_map_device_.get(), num_vertices_,
+                                     num_faces_, 3, kVertexPoints, H, W, N);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] interpolate failed!!!");
+
+  foundationpose_render::interpolate(cuda_stream, texcoords_device_.get(), rast_out_device_.get(),
+                                     mesh_faces_device_.get(), texcoords_out_device_.get(),
+                                     num_vertices_, num_faces_, 2, kTexcoordsDim, H, W, N);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] interpolate failed!!!");
 
   auto float_texture_map_data = float_texture_map_tensor_.exportData<nvcv::TensorDataStridedCuda>();
-  foundationpose_render::texture(
-      cuda_stream,
-      reinterpret_cast<float*>(float_texture_map_data->basePtr()),
-      texcoords_out_device_.get(),
-      color_device_.get(),
-      texture_map_height_, texture_map_width_, kNumChannels,
-      1, H, W, N);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] texture failed!!!");
-
-  CHECK_STATE(TransformVertexNormalsOnCUDA(cuda_stream, poses, diffuse_intensity_device_.get()), 
-            "[FoundationPoseRenderer] Transform vertex normals failed!!!");
-
-  foundationpose_render::interpolate(cuda_stream, 
-                                     diffuse_intensity_device_.get(), 
-                                     rast_out_device_.get(), 
-                                     mesh_faces_device_.get(),
-                                     diffuse_intensity_map_device_.get(),
-                                     num_vertices_, num_faces_, 3, 1, H, W, N);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] interpolate failed!!!");
-  
-  foundationpose_render::refine_color(cuda_stream, color_device_.get(), 
-                                      diffuse_intensity_map_device_.get(), 
-                                      rast_out_device_.get(),
-                                      color_device_.get(), 
-                                      poses.size(), 0.8, 0.5, H, W);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] refine_color failed!!!");
+  foundationpose_render::texture(cuda_stream,
+                                 reinterpret_cast<float *>(float_texture_map_data->basePtr()),
+                                 texcoords_out_device_.get(), color_device_.get(),
+                                 texture_map_height_, texture_map_width_, kNumChannels, 1, H, W, N);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] texture failed!!!");
+
+  CHECK_STATE(TransformVertexNormalsOnCUDA(cuda_stream, poses, diffuse_intensity_device_.get()),
+              "[FoundationPoseRenderer] Transform vertex normals failed!!!");
+
+  foundationpose_render::interpolate(cuda_stream, diffuse_intensity_device_.get(),
+                                     rast_out_device_.get(), mesh_faces_device_.get(),
+                                     diffuse_intensity_map_device_.get(), num_vertices_, num_faces_,
+                                     3, 1, H, W, N);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] interpolate failed!!!");
+
+  foundationpose_render::refine_color(cuda_stream, color_device_.get(),
+                                      diffuse_intensity_map_device_.get(), rast_out_device_.get(),
+                                      color_device_.get(), poses.size(), 0.8, 0.5, H, W);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] refine_color failed!!!");
 
   float min_value = 0.0;
   float max_value = 1.0;
-  foundationpose_render::clamp(cuda_stream, color_device_.get(), min_value, max_value, N * H * W * kNumChannels);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] clamp failed!!!");
+  foundationpose_render::clamp(cuda_stream, color_device_.get(), min_value, max_value,
+                               N * H * W * kNumChannels);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] clamp failed!!!");
 
   nvcv::Tensor color_tensor, xyz_map_tensor;
   WrapFloatPtrToNHWCTensor(color_device_.get(), color_tensor, N, H, W, kNumChannels);
@@ -709,24 +686,20 @@ FoundationPoseRenderer::NvdiffrastRender(cudaStream_t cuda_stream,
 
   cvcuda::Flip flip_op;
   flip_op(cuda_stream, color_tensor, flip_color_tensor, 0);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] flip_op failed!!!");
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] flip_op failed!!!");
 
   flip_op(cuda_stream, xyz_map_tensor, flip_xyz_map_tensor, 0);
-  CHECK_CUDA(cudaGetLastError(),
-            "[FoundationPoseRenderer] flip_op failed!!!");
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPoseRenderer] flip_op failed!!!");
   return true;
 }
 
-
-bool 
-FoundationPoseRenderer::RenderProcess(cudaStream_t cuda_stream,
-                                      const std::vector<Eigen::MatrixXf>& poses,
-                                      const std::vector<RowMajorMatrix>& tfs,
-                                      void* poses_on_device,
-                                      int input_image_height,
-                                      int input_image_width,
-                                      void* render_input_dst_ptr)
+bool FoundationPoseRenderer::RenderProcess(cudaStream_t                        cuda_stream,
+                                           const std::vector<Eigen::MatrixXf> &poses,
+                                           const std::vector<RowMajorMatrix>  &tfs,
+                                           void                               *poses_on_device,
+                                           int                                 input_image_height,
+                                           int                                 input_image_width,
+                                           void                               *render_input_dst_ptr)
 {
   const int N = poses.size();
   // Convert the bbox2d from vector N of 2*2 matrix into a N*4 matrix
@@ -737,184 +710,160 @@ FoundationPoseRenderer::RenderProcess(cudaStream_t cuda_stream,
   // render
   nvcv::Tensor render_rgb_tensor;
   nvcv::Tensor render_xyz_map_tensor;
-  WrapFloatPtrToNHWCTensor(render_crop_rgb_tensor_device_.get(),
-                          render_rgb_tensor, N, crop_window_H_, crop_window_W_, kNumChannels);
-  WrapFloatPtrToNHWCTensor(render_crop_xyz_map_tensor_device_.get(),
-                          render_xyz_map_tensor, N, crop_window_H_, crop_window_W_, kNumChannels);
+  WrapFloatPtrToNHWCTensor(render_crop_rgb_tensor_device_.get(), render_rgb_tensor, N,
+                           crop_window_H_, crop_window_W_, kNumChannels);
+  WrapFloatPtrToNHWCTensor(render_crop_xyz_map_tensor_device_.get(), render_xyz_map_tensor, N,
+                           crop_window_H_, crop_window_W_, kNumChannels);
 
   // Render the object using give poses
-  CHECK_STATE(NvdiffrastRender(cuda_stream, 
-                        poses, 
-                        intrinsic_, 
-                        bbox2d, 
-                        input_image_height, 
-                        input_image_width, 
-                        crop_window_H_, 
-                        crop_window_W_, 
-                        render_rgb_tensor, 
-                        render_xyz_map_tensor),
+  CHECK_STATE(NvdiffrastRender(cuda_stream, poses, intrinsic_, bbox2d, input_image_height,
+                               input_image_width, crop_window_H_, crop_window_W_, render_rgb_tensor,
+                               render_xyz_map_tensor),
               "[FoundationPose Render] RenderProcess NvdiffrastRender failed!!!");
 
-  auto render_rgb_data = render_rgb_tensor.exportData<nvcv::TensorDataStridedCuda>();
+  auto render_rgb_data     = render_rgb_tensor.exportData<nvcv::TensorDataStridedCuda>();
   auto render_xyz_map_data = render_xyz_map_tensor.exportData<nvcv::TensorDataStridedCuda>();
- 
+
   foundationpose_render::threshold_and_downscale_pointcloud(
-      cuda_stream,
-      reinterpret_cast<float*>(render_xyz_map_data->basePtr()),
-      reinterpret_cast<float*>(poses_on_device),
-      N, crop_window_H_ * crop_window_W_, mesh_diameter_ / 2, min_depth_, max_depth_);
-  CHECK_CUDA(cudaGetLastError(), "[FoundationPose] RenderProcess threshold_and... FAILED!!!"); 
-
-  foundationpose_render::concat(
-    cuda_stream,
-    reinterpret_cast<float*>(render_rgb_data->basePtr()),
-    reinterpret_cast<float*>(render_xyz_map_data->basePtr()),
-    reinterpret_cast<float*>(render_input_dst_ptr),
-    N, crop_window_H_, crop_window_W_, kNumChannels, kNumChannels);
+      cuda_stream, reinterpret_cast<float *>(render_xyz_map_data->basePtr()),
+      reinterpret_cast<float *>(poses_on_device), N, crop_window_H_ * crop_window_W_,
+      mesh_diameter_ / 2, min_depth_, max_depth_);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPose] RenderProcess threshold_and... FAILED!!!");
+
+  foundationpose_render::concat(cuda_stream, reinterpret_cast<float *>(render_rgb_data->basePtr()),
+                                reinterpret_cast<float *>(render_xyz_map_data->basePtr()),
+                                reinterpret_cast<float *>(render_input_dst_ptr), N, crop_window_H_,
+                                crop_window_W_, kNumChannels, kNumChannels);
   CHECK_CUDA(cudaGetLastError(), "[FoundationPose] RenderProcess concat FAILED!!!");
 
   return true;
 }
 
-
-bool 
-FoundationPoseRenderer::TransfProcess(cudaStream_t cuda_stream,
-                                      void* rgb_on_device,
-                                      void* xyz_map_on_device,
-                                      int input_image_height,
-                                      int input_image_width,
-                                      const std::vector<RowMajorMatrix>& tfs,
-                                      void* poses_on_device,
-                                      void* transf_input_dst_ptr)
+bool FoundationPoseRenderer::TransfProcess(cudaStream_t                       cuda_stream,
+                                           void                              *rgb_on_device,
+                                           void                              *xyz_map_on_device,
+                                           int                                input_image_height,
+                                           int                                input_image_width,
+                                           const std::vector<RowMajorMatrix> &tfs,
+                                           void                              *poses_on_device,
+                                           void                              *transf_input_dst_ptr)
 {
   // crop rgb (transformed)
   const size_t N = tfs.size();
 
   nvcv::Tensor rgb_tensor;
   nvcv::Tensor xyz_map_tensor;
-  WrapImgPtrToNHWCTensor(reinterpret_cast<uint8_t*>(rgb_on_device), 
-                        rgb_tensor, 1, input_image_height, input_image_width, kNumChannels);
+  WrapImgPtrToNHWCTensor(reinterpret_cast<uint8_t *>(rgb_on_device), rgb_tensor, 1,
+                         input_image_height, input_image_width, kNumChannels);
 
-  WrapFloatPtrToNHWCTensor(reinterpret_cast<float*>(xyz_map_on_device), 
-                          xyz_map_tensor, 
-                          1, input_image_height, input_image_width, kNumChannels);
+  WrapFloatPtrToNHWCTensor(reinterpret_cast<float *>(xyz_map_on_device), xyz_map_tensor, 1,
+                           input_image_height, input_image_width, kNumChannels);
 
-  const int rgb_flags = NVCV_INTERP_LINEAR;
-  const int xyz_flags = NVCV_INTERP_NEAREST;
-  const float4 border_value = {0,0,0,0};
+  const int    rgb_flags    = NVCV_INTERP_LINEAR;
+  const int    xyz_flags    = NVCV_INTERP_NEAREST;
+  const float4 border_value = {0, 0, 0, 0};
 
-  const float scale_factor =  1.0f / 255.0f;
+  const float             scale_factor = 1.0f / 255.0f;
   cvcuda::WarpPerspective warpPerspectiveOp(0);
-  cvcuda::ConvertTo convert_op;
-  nvcv::Tensor transformed_rgb_tensor;
-  WrapImgPtrToNHWCTensor(transformed_crop_rgb_tensor_device_.get(), 
-                    transformed_rgb_tensor, 1, crop_window_H_, crop_window_W_, kNumChannels);
+  cvcuda::ConvertTo       convert_op;
+  nvcv::Tensor            transformed_rgb_tensor;
+  WrapImgPtrToNHWCTensor(transformed_crop_rgb_tensor_device_.get(), transformed_rgb_tensor, 1,
+                         crop_window_H_, crop_window_W_, kNumChannels);
 
-  for (size_t index = 0; index < N; index++) {
+  for (size_t index = 0; index < N; index++)
+  {
     nvcv::Tensor float_rgb_tensor;
     nvcv::Tensor transformed_xyz_map_tensor;
 
     // get ptr offset from index
     const size_t single_batch_element_size = crop_window_H_ * crop_window_W_ * kNumChannels;
-    WrapFloatPtrToNHWCTensor(transformed_rgb_device_.get() + index * single_batch_element_size, 
-                            float_rgb_tensor, 
-                            1, crop_window_H_, crop_window_W_, kNumChannels);
+    WrapFloatPtrToNHWCTensor(transformed_rgb_device_.get() + index * single_batch_element_size,
+                             float_rgb_tensor, 1, crop_window_H_, crop_window_W_, kNumChannels);
 
-    WrapFloatPtrToNHWCTensor(transformed_xyz_map_device_.get() + index * single_batch_element_size, 
-                            transformed_xyz_map_tensor, 
-                            1, crop_window_H_, crop_window_W_, kNumChannels);
+    WrapFloatPtrToNHWCTensor(transformed_xyz_map_device_.get() + index * single_batch_element_size,
+                             transformed_xyz_map_tensor, 1, crop_window_H_, crop_window_W_,
+                             kNumChannels);
 
     NVCVPerspectiveTransform trans_matrix;
-    for (size_t i = 0; i < kPTMatrixDim; i++) {
-      for (size_t j = 0; j < kPTMatrixDim; j++) {
-        trans_matrix[i*kPTMatrixDim+j] = tfs[index](i,j);
+    for (size_t i = 0; i < kPTMatrixDim; i++)
+    {
+      for (size_t j = 0; j < kPTMatrixDim; j++)
+      {
+        trans_matrix[i * kPTMatrixDim + j] = tfs[index](i, j);
       }
     }
 
-    warpPerspectiveOp(cuda_stream, rgb_tensor, transformed_rgb_tensor, trans_matrix, rgb_flags, NVCV_BORDER_CONSTANT, border_value);
-    CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess warpPerspectiveOp on rgb FAILED!!!");
+    warpPerspectiveOp(cuda_stream, rgb_tensor, transformed_rgb_tensor, trans_matrix, rgb_flags,
+                      NVCV_BORDER_CONSTANT, border_value);
+    CHECK_CUDA(cudaGetLastError(),
+               "[FoundationPose] TransfProcess warpPerspectiveOp on rgb FAILED!!!");
 
     convert_op(cuda_stream, transformed_rgb_tensor, float_rgb_tensor, scale_factor, 0.0f);
     CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess convert_op on rgb FAILED!!!");
 
-    warpPerspectiveOp(cuda_stream, xyz_map_tensor, transformed_xyz_map_tensor, trans_matrix, xyz_flags, NVCV_BORDER_CONSTANT, border_value);
-    CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess warpPerspectiveOp on xyz_map FAILED!!!");
+    warpPerspectiveOp(cuda_stream, xyz_map_tensor, transformed_xyz_map_tensor, trans_matrix,
+                      xyz_flags, NVCV_BORDER_CONSTANT, border_value);
+    CHECK_CUDA(cudaGetLastError(),
+               "[FoundationPose] TransfProcess warpPerspectiveOp on xyz_map FAILED!!!");
   }
 
   foundationpose_render::threshold_and_downscale_pointcloud(
-      cuda_stream,
-      transformed_xyz_map_device_.get(),
-      reinterpret_cast<float*>(poses_on_device),
-      N, crop_window_W_ * crop_window_H_, mesh_diameter_ / 2, min_depth_, max_depth_);
-  CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess threshold_and... FAILED!!!"); 
+      cuda_stream, transformed_xyz_map_device_.get(), reinterpret_cast<float *>(poses_on_device), N,
+      crop_window_W_ * crop_window_H_, mesh_diameter_ / 2, min_depth_, max_depth_);
+  CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess threshold_and... FAILED!!!");
 
   // concat 到缓存上
-  foundationpose_render::concat(
-    cuda_stream,
-    transformed_rgb_device_.get(), 
-    transformed_xyz_map_device_.get(),
-    reinterpret_cast<float*>(transf_input_dst_ptr),
-    N, crop_window_H_, crop_window_W_, kNumChannels, kNumChannels);
+  foundationpose_render::concat(cuda_stream, transformed_rgb_device_.get(),
+                                transformed_xyz_map_device_.get(),
+                                reinterpret_cast<float *>(transf_input_dst_ptr), N, crop_window_H_,
+                                crop_window_W_, kNumChannels, kNumChannels);
   CHECK_CUDA(cudaGetLastError(), "[FoundationPose] TransfProcess concat FAILED!!!");
-  
+
   return true;
 }
 
-
-bool 
-FoundationPoseRenderer::RenderAndTransform(
-                            const std::vector<Eigen::Matrix4f>& _poses,
-                            void* rgb_on_device,
-                            void* depth_on_device,
-                            void* xyz_map_on_device,
-                            int input_image_height,
-                            int input_image_width,
-                            void* render_buffer,
-                            void* transf_buffer)
+bool FoundationPoseRenderer::RenderAndTransform(const std::vector<Eigen::Matrix4f> &_poses,
+                                                void                               *rgb_on_device,
+                                                void                               *depth_on_device,
+                                                void *xyz_map_on_device,
+                                                int   input_image_height,
+                                                int   input_image_width,
+                                                void *render_buffer,
+                                                void *transf_buffer)
 {
   const int input_poses_num = _poses.size();
 
   // 1. 根据目标位姿计算变换矩阵
   std::vector<Eigen::MatrixXf> poses(_poses.begin(), _poses.end());
-  Eigen::Vector2i out_size = {crop_window_H_, crop_window_W_};
+  Eigen::Vector2i              out_size = {crop_window_H_, crop_window_W_};
   auto tfs = ComputeCropWindowTF(poses, intrinsic_, out_size, crop_ratio_, mesh_diameter_);
-  CHECK_STATE(tfs.size() != 0,
-              "[FoundationposeRender] The transform matrix vector is empty");
+  CHECK_STATE(tfs.size() != 0, "[FoundationposeRender] The transform matrix vector is empty");
 
   // 2. 将输入的poses拷贝到device端
-  float* _poses_device = static_cast<float*>(input_poses_device_.get());
-  for (size_t i = 0 ; i < poses.size() ; ++ i) {
-    CHECK_CUDA(cudaMemcpy(&_poses_device[i*16], _poses[i].data(), 16*sizeof(float), cudaMemcpyHostToDevice),
-              "[FoundationposeRender] cudaMemcpy poses_host -> poses_device FAILED!!!");
+  float *_poses_device = static_cast<float *>(input_poses_device_.get());
+  for (size_t i = 0; i < poses.size(); ++i)
+  {
+    CHECK_CUDA(cudaMemcpy(&_poses_device[i * 16], _poses[i].data(), 16 * sizeof(float),
+                          cudaMemcpyHostToDevice),
+               "[FoundationposeRender] cudaMemcpy poses_host -> poses_device FAILED!!!");
   }
 
   // 3. 根据poses和tfs裁剪输入rgb和xyz_map，并Transpose后填充至目标缓存
-  CHECK_STATE(TransfProcess(cuda_stream_transf_,
-                            rgb_on_device,
-                            xyz_map_on_device,
-                            input_image_height,
-                            input_image_width,
-                            tfs,
-                            input_poses_device_.get(),
-                            transf_buffer),
-              "[FoundationPose Renderer] TransfProcess Failed!!!");
+  CHECK_STATE(
+      TransfProcess(cuda_stream_transf_, rgb_on_device, xyz_map_on_device, input_image_height,
+                    input_image_width, tfs, input_poses_device_.get(), transf_buffer),
+      "[FoundationPose Renderer] TransfProcess Failed!!!");
   // 4. 根据poses和tfs渲染rgb图像和xyz_map，并Transpose后填充至目标缓存
-  CHECK_STATE(RenderProcess(cuda_stream_render_,
-                            poses,
-                            tfs,
-                            input_poses_device_.get(),
-                            input_image_height,
-                            input_image_width,
-                            render_buffer),
+  CHECK_STATE(RenderProcess(cuda_stream_render_, poses, tfs, input_poses_device_.get(),
+                            input_image_height, input_image_width, render_buffer),
               "[FoundationPose Renderer] RenderProcess Failed!!!");
 
   // 同步render和transform流程的cuda_stream，确保退出前任务全部完成
   CHECK_CUDA(cudaStreamSynchronize(cuda_stream_transf_),
-            "[FoundationPose Renderer] cudaStreamSync `cuda_stream_transf_` FAILED!!!");
+             "[FoundationPose Renderer] cudaStreamSync `cuda_stream_transf_` FAILED!!!");
   CHECK_CUDA(cudaStreamSynchronize(cuda_stream_render_),
-            "[FoundationPose Renderer] cudaStreamSync `cuda_stream_render` FAILED!!!");
+             "[FoundationPose Renderer] cudaStreamSync `cuda_stream_render` FAILED!!!");
   return true;
 }
 
-
 } // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_render.cu b/detection_6d_foundationpose/src/foundationpose_render.cu
index 28f73e8..7f09ab8 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.cu
+++ b/detection_6d_foundationpose/src/foundationpose_render.cu
@@ -18,36 +18,38 @@
 #include <iostream>
 #include "foundationpose_render.cu.hpp"
 
-
 void RasterizeCudaFwdShaderKernel(const RasterizeCudaFwdShaderParams p);
 void InterpolateFwdKernel(const InterpolateKernelParams p);
 void TextureFwdKernelLinear1(const TextureKernelParams p);
 
-__device__ float clamp_func(float f, float a, float b) {
+__device__ float clamp_func(float f, float a, float b)
+{
   return fmaxf(a, fminf(f, b));
 }
 
-__global__ void clamp_kernel(float* input, float min_value, float max_value, int N) {
+__global__ void clamp_kernel(float *input, float min_value, float max_value, int N)
+{
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
   // Check the boundaries
-  if (idx >= N) {
+  if (idx >= N)
+  {
     return;
   }
   input[idx] = clamp_func(input[idx], min_value, max_value);
 }
 
-namespace foundationpose_render {  
+namespace foundationpose_render {
 
 /*
 This kernel performs:
  1. thresholdingof the point cloud
  2. subtraction of the position of pose array from the pointcloud
  3. downscaling of the point cloud
- 
+
  pose_array_input is of size N*16, where N is the number of poses. 16  = transformation_mat_size
  pointcloud_input is of size N*n_points*3, where N is the number of poses
     and n_points is the number of points in the point cloud.
- 
+
  It subtracts the pose transformation from each point in the cloud,
  1. checks if the z-component of the point is below "min_depth" and sets it to zero if it is
  2. and applies a downscaling factor to reduce the number of points.
@@ -56,12 +58,18 @@ This kernel performs:
 
  The result is stored back in the input array.
 */
-__global__ void threshold_and_downscale_pointcloud_kernel(
-    float* input, float* pose_array_input, int N, int n_points, float downscale_factor,
-    float min_depth, float max_depth) {
+__global__ void threshold_and_downscale_pointcloud_kernel(float *input,
+                                                          float *pose_array_input,
+                                                          int    N,
+                                                          int    n_points,
+                                                          float  downscale_factor,
+                                                          float  min_depth,
+                                                          float  max_depth)
+{
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (idx >= N * n_points) {
-    return;  // Check the boundaries
+  if (idx >= N * n_points)
+  {
+    return; // Check the boundaries
   }
 
   int pose_idx = idx / n_points;
@@ -78,7 +86,8 @@ __global__ void threshold_and_downscale_pointcloud_kernel(
 
   bool invalid_flag = false;
   // Any points with z below min_depth is set to 0
-  if (input[z_idx] < min_depth) {
+  if (input[z_idx] < min_depth)
+  {
     invalid_flag = true;
   }
 
@@ -92,74 +101,103 @@ __global__ void threshold_and_downscale_pointcloud_kernel(
   input[z_idx] /= downscale_factor;
 
   // Any points with absolute value(x,y or z) above max_depth is set to 0
-  if (fabs(input[x_idx]) > max_depth || invalid_flag) {
+  if (fabs(input[x_idx]) > max_depth || invalid_flag)
+  {
     input[x_idx] = 0.0f;
   }
-  if (fabs(input[y_idx]) > max_depth || invalid_flag) {
+  if (fabs(input[y_idx]) > max_depth || invalid_flag)
+  {
     input[y_idx] = 0.0f;
   }
 
-  if (fabs(input[z_idx]) > max_depth || invalid_flag) {
+  if (fabs(input[z_idx]) > max_depth || invalid_flag)
+  {
     input[z_idx] = 0.0f;
   }
   return;
 }
 
-
 // concat two NHWC array on the last dimension
 __global__ void concat_kernel(
-    float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) {
+    float *input_a, float *input_b, float *output, int N, int H, int W, int C1, int C2)
+{
   int idx = threadIdx.x + blockIdx.x * blockDim.x;
   // Check the boundaries
-  if (idx >= N * H * W) {
+  if (idx >= N * H * W)
+  {
     return;
   }
 
-  for (int i = 0; i < C1; i++) {
+  for (int i = 0; i < C1; i++)
+  {
     output[idx * (C1 + C2) + i] = input_a[idx * C1 + i];
   }
 
-  for (int i = 0; i < C2; i++) {
+  for (int i = 0; i < C2; i++)
+  {
     output[idx * (C1 + C2) + C1 + i] = input_b[idx * C2 + i];
   }
 }
 
-
-void clamp(cudaStream_t stream, float* input, float min_value, float max_value, int N) {
+void clamp(cudaStream_t stream, float *input, float min_value, float max_value, int N)
+{
   int block_size = 256;
-  int grid_size = (N + block_size - 1) / block_size;
+  int grid_size  = (N + block_size - 1) / block_size;
 
   clamp_kernel<<<grid_size, block_size, 0, stream>>>(input, min_value, max_value, N);
 }
 
-void threshold_and_downscale_pointcloud(
-    cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor,
-    float min_depth, float max_depth) {
+void threshold_and_downscale_pointcloud(cudaStream_t stream,
+                                        float       *pointcloud_input,
+                                        float       *pose_array_input,
+                                        int          N,
+                                        int          n_points,
+                                        float        downscale_factor,
+                                        float        min_depth,
+                                        float        max_depth)
+{
   // Launch n_points threads
   int block_size = 256;
-  int grid_size = ((N * n_points) + block_size - 1) / block_size;
+  int grid_size  = ((N * n_points) + block_size - 1) / block_size;
 
   threshold_and_downscale_pointcloud_kernel<<<grid_size, block_size, 0, stream>>>(
       pointcloud_input, pose_array_input, N, n_points, downscale_factor, min_depth, max_depth);
 }
 
-void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2) {
+void concat(cudaStream_t stream,
+            float       *input_a,
+            float       *input_b,
+            float       *output,
+            int          N,
+            int          H,
+            int          W,
+            int          C1,
+            int          C2)
+{
   // Launch N*H*W threads, each thread handle a vector of size C
   int block_size = 256;
-  int grid_size = (N * H * W + block_size - 1) / block_size;
+  int grid_size  = (N * H * W + block_size - 1) / block_size;
 
   concat_kernel<<<grid_size, block_size>>>(input_a, input_b, output, N, H, W, C1, C2);
 }
 
-void rasterize(
-    cudaStream_t stream, CR::CudaRaster* cr, float* pos_ptr, int32_t* tri_ptr, float* out, int pos_count, int tri_count,
-    int H, int W, int C) {
-  const int32_t* range_ptr = 0;
+void rasterize(cudaStream_t    stream,
+               CR::CudaRaster *cr,
+               float          *pos_ptr,
+               int32_t        *tri_ptr,
+               float          *out,
+               int             pos_count,
+               int             tri_count,
+               int             H,
+               int             W,
+               int             C)
+{
+  const int32_t *range_ptr = 0;
 
   bool enablePeel = false;
   cr->setViewportSize(W, H, C);
-  cr->setVertexBuffer((void*)pos_ptr, pos_count);
-  cr->setIndexBuffer((void*)tri_ptr, tri_count);
+  cr->setVertexBuffer((void *)pos_ptr, pos_count);
+  cr->setIndexBuffer((void *)tri_ptr, tri_count);
   cr->setRenderModeFlags(0);
 
   cr->deferredClear(0u);
@@ -167,157 +205,188 @@ void rasterize(
 
   // Populate pixel shader kernel parameters.
   RasterizeCudaFwdShaderParams p;
-  p.pos = pos_ptr;
-  p.tri = tri_ptr;
-  p.in_idx = (const int*)cr->getColorBuffer();
-  p.out = out;
+  p.pos          = pos_ptr;
+  p.tri          = tri_ptr;
+  p.in_idx       = (const int *)cr->getColorBuffer();
+  p.out          = out;
   p.numTriangles = tri_count;
-  p.numVertices = pos_count;
-  p.width = W;
-  p.height = H;
-  p.depth = C;
+  p.numVertices  = pos_count;
+  p.width        = W;
+  p.height       = H;
+  p.depth        = C;
 
   p.instance_mode = 1;
-  p.xs = 2.f / (float)p.width;
-  p.xo = 1.f / (float)p.width - 1.f;
-  p.ys = 2.f / (float)p.height;
-  p.yo = 1.f / (float)p.height - 1.f;
+  p.xs            = 2.f / (float)p.width;
+  p.xo            = 1.f / (float)p.width - 1.f;
+  p.ys            = 2.f / (float)p.height;
+  p.yo            = 1.f / (float)p.height - 1.f;
 
   // Choose launch parameters.
-  dim3 blockSize = getLaunchBlockSize(
-      RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_WIDTH, RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_HEIGHT, p.width,
-      p.height);
-  dim3 gridSize = getLaunchGridSize(blockSize, p.width, p.height, p.depth);
+  dim3 blockSize = getLaunchBlockSize(RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_WIDTH,
+                                      RAST_CUDA_FWD_SHADER_KERNEL_BLOCK_HEIGHT, p.width, p.height);
+  dim3 gridSize  = getLaunchGridSize(blockSize, p.width, p.height, p.depth);
 
   // Launch CUDA kernel.
-  void* args[] = {&p};
-  cudaLaunchKernel((void*)RasterizeCudaFwdShaderKernel, gridSize, blockSize, args, 0, stream);
+  void *args[] = {&p};
+  cudaLaunchKernel((void *)RasterizeCudaFwdShaderKernel, gridSize, blockSize, args, 0, stream);
 }
 
-void interpolate(
-    cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices,
-    int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C) {
+void interpolate(cudaStream_t stream,
+                 float       *attr_ptr,
+                 float       *rast_ptr,
+                 int32_t     *tri_ptr,
+                 float       *out,
+                 int          num_vertices,
+                 int          num_triangles,
+                 int          attr_shape_dim,
+                 int          attr_dim,
+                 int          H,
+                 int          W,
+                 int          C)
+{
   int instance_mode = attr_shape_dim > 2 ? 1 : 0;
 
-  InterpolateKernelParams p = {};  // Initialize all fields to zero.
-  p.instance_mode = instance_mode;
-  p.numVertices = num_vertices;
-  p.numAttr = attr_dim;
-  p.numTriangles = num_triangles;
-  p.height = H;
-  p.width = W;
-  p.depth = C;
+  InterpolateKernelParams p = {}; // Initialize all fields to zero.
+  p.instance_mode           = instance_mode;
+  p.numVertices             = num_vertices;
+  p.numAttr                 = attr_dim;
+  p.numTriangles            = num_triangles;
+  p.height                  = H;
+  p.width                   = W;
+  p.depth                   = C;
 
   // Get input pointers.
-  p.attr = attr_ptr;
-  p.rast = rast_ptr;
-  p.tri = tri_ptr;
+  p.attr   = attr_ptr;
+  p.rast   = rast_ptr;
+  p.tri    = tri_ptr;
   p.attrBC = 0;
-  p.out = out;
+  p.out    = out;
 
   // Choose launch parameters.
-  dim3 blockSize = getLaunchBlockSize(
-      IP_FWD_MAX_KERNEL_BLOCK_WIDTH, IP_FWD_MAX_KERNEL_BLOCK_HEIGHT, p.width, p.height);
-  dim3 gridSize = getLaunchGridSize(blockSize, p.width, p.height, p.depth);
+  dim3 blockSize = getLaunchBlockSize(IP_FWD_MAX_KERNEL_BLOCK_WIDTH, IP_FWD_MAX_KERNEL_BLOCK_HEIGHT,
+                                      p.width, p.height);
+  dim3 gridSize  = getLaunchGridSize(blockSize, p.width, p.height, p.depth);
 
   // Launch CUDA kernel.
-  void* args[] = {&p};
-  void* func = (void*)InterpolateFwdKernel;
+  void *args[] = {&p};
+  void *func   = (void *)InterpolateFwdKernel;
   cudaLaunchKernel(func, gridSize, blockSize, args, 0, stream);
 }
 
-void texture(
-    cudaStream_t stream, float* tex_ptr, float* uv_ptr, float* out, int tex_height, int tex_width, int tex_channel,
-    int tex_depth, int H, int W, int N) {
-  TextureKernelParams p = {};  // Initialize all fields to zero.
-  p.enableMip = false;
-  p.filterMode = TEX_MODE_LINEAR;
-  p.boundaryMode = TEX_BOUNDARY_MODE_WRAP;
+void texture(cudaStream_t stream,
+             float       *tex_ptr,
+             float       *uv_ptr,
+             float       *out,
+             int          tex_height,
+             int          tex_width,
+             int          tex_channel,
+             int          tex_depth,
+             int          H,
+             int          W,
+             int          N)
+{
+  TextureKernelParams p = {}; // Initialize all fields to zero.
+  p.enableMip           = false;
+  p.filterMode          = TEX_MODE_LINEAR;
+  p.boundaryMode        = TEX_BOUNDARY_MODE_WRAP;
 
-  p.texDepth = tex_depth;
+  p.texDepth  = tex_depth;
   p.texHeight = tex_height;
-  p.texWidth = tex_width;
-  p.channels = tex_channel;
+  p.texWidth  = tex_width;
+  p.channels  = tex_channel;
 
-  p.n = N;
+  p.n         = N;
   p.imgHeight = H;
-  p.imgWidth = W;
+  p.imgWidth  = W;
 
   // Get input pointers.
-  p.tex[0] = tex_ptr;
-  p.uv = uv_ptr;
+  p.tex[0]       = tex_ptr;
+  p.uv           = uv_ptr;
   p.mipLevelBias = NULL;
 
   p.out = out;
 
   // Choose kernel variants based on channel count.
-  void* args[] = {&p};
+  void *args[] = {&p};
 
   // Choose launch parameters for texture lookup kernel.
-  dim3 blockSize = getLaunchBlockSize(
-      TEX_FWD_MAX_KERNEL_BLOCK_WIDTH, TEX_FWD_MAX_KERNEL_BLOCK_HEIGHT, p.imgWidth, p.imgHeight);
-  dim3 gridSize = getLaunchGridSize(blockSize, p.imgWidth, p.imgHeight, p.n);
+  dim3 blockSize = getLaunchBlockSize(TEX_FWD_MAX_KERNEL_BLOCK_WIDTH,
+                                      TEX_FWD_MAX_KERNEL_BLOCK_HEIGHT, p.imgWidth, p.imgHeight);
+  dim3 gridSize  = getLaunchGridSize(blockSize, p.imgWidth, p.imgHeight, p.n);
 
-  void* func = (void*)TextureFwdKernelLinear1;
+  void *func = (void *)TextureFwdKernelLinear1;
   cudaLaunchKernel(func, gridSize, blockSize, args, 0, stream);
 }
 
-__global__ void transform_points_kernel(
-    const float* transform_matrixs, int M, const float* points_vectors, 
-    int N, float* transformed_points_vectors)
+__global__ void transform_points_kernel(const float *transform_matrixs,
+                                        int          M,
+                                        const float *points_vectors,
+                                        int          N,
+                                        float       *transformed_points_vectors)
 {
   int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
   int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (row_idx >= M || col_idx >= N) return;
+  if (row_idx >= M || col_idx >= N)
+    return;
 
-  const float* matrix = transform_matrixs + row_idx * 16;  // 指向当前 4x4 变换矩阵
-  const float* point = points_vectors + col_idx * 3;       // 指向当前 3D 点
-  float* transformed_point = transformed_points_vectors + (row_idx * N + col_idx) * 3;
+  const float *matrix            = transform_matrixs + row_idx * 16; // 指向当前 4x4 变换矩阵
+  const float *point             = points_vectors + col_idx * 3;     // 指向当前 3D 点
+  float       *transformed_point = transformed_points_vectors + (row_idx * N + col_idx) * 3;
 
   float x = point[0], y = point[1], z = point[2];
   // **Column-Major 访问方式**
-  transformed_point[0] = matrix[0] * x + matrix[4] * y + matrix[8]  * z + matrix[12];
-  transformed_point[1] = matrix[1] * x + matrix[5] * y + matrix[9]  * z + matrix[13];
+  transformed_point[0] = matrix[0] * x + matrix[4] * y + matrix[8] * z + matrix[12];
+  transformed_point[1] = matrix[1] * x + matrix[5] * y + matrix[9] * z + matrix[13];
   transformed_point[2] = matrix[2] * x + matrix[6] * y + matrix[10] * z + matrix[14];
 }
 
-static uint16_t ceil_div(uint16_t numerator, uint16_t denominator) {
+static uint16_t ceil_div(uint16_t numerator, uint16_t denominator)
+{
   uint32_t accumulator = numerator + denominator - 1;
   return accumulator / denominator + 1;
 }
 
-void transform_points(cudaStream_t stream, const float* transform_matrixs, int M, const float* points_vectors, 
-    int N, float* transformed_points_vectors)
+void transform_points(cudaStream_t stream,
+                      const float *transform_matrixs,
+                      int          M,
+                      const float *points_vectors,
+                      int          N,
+                      float       *transformed_points_vectors)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(N, 32), ceil_div(M, 32)};
+  dim3 gridSize  = {ceil_div(N, 32), ceil_div(M, 32)};
 
-  transform_points_kernel<<<gridSize, blockSize, 0, stream>>>(
-      transform_matrixs, M, points_vectors, N, transformed_points_vectors);
+  transform_points_kernel<<<gridSize, blockSize, 0, stream>>>(transform_matrixs, M, points_vectors,
+                                                              N, transformed_points_vectors);
 }
 
-__global__ void generate_pose_clip_kernel(
-    const float* transform_matrixs, const float* bbox2d_matrixs, int M, const float* points_vectors, 
-    int N, float* transformed_points_vectors, int rgb_H, int rgb_W)
+__global__ void generate_pose_clip_kernel(const float *transform_matrixs,
+                                          const float *bbox2d_matrixs,
+                                          int          M,
+                                          const float *points_vectors,
+                                          int          N,
+                                          float       *transformed_points_vectors,
+                                          int          rgb_H,
+                                          int          rgb_W)
 {
   int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
   int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (row_idx >= M || col_idx >= N) return;
-
-  const float* matrix = transform_matrixs + row_idx * 16;  // 指向当前 4x4 变换矩阵
-  const float* bbox2d = bbox2d_matrixs + row_idx * 4;      // 指向当前 4x1 bbox2d向量
-  const float* point = points_vectors + col_idx * 3;       // 指向当前 3D 点
-  float* transformed_point = transformed_points_vectors + (row_idx * N + col_idx) * 4;
+  if (row_idx >= M || col_idx >= N)
+    return;
 
+  const float *matrix            = transform_matrixs + row_idx * 16; // 指向当前 4x4 变换矩阵
+  const float *bbox2d            = bbox2d_matrixs + row_idx * 4;     // 指向当前 4x1 bbox2d向量
+  const float *point             = points_vectors + col_idx * 3;     // 指向当前 3D 点
+  float       *transformed_point = transformed_points_vectors + (row_idx * N + col_idx) * 4;
 
   float l = bbox2d[0], t = rgb_H - bbox2d[1], r = bbox2d[2], b = rgb_H - bbox2d[3];
-  float a00 = rgb_W / (r - l),           a11 = rgb_H / (t - b), 
-        a30 = (rgb_W - r - l) / (r - l), a31 = (rgb_H - t - b) / (t - b);
+  float a00 = rgb_W / (r - l), a11 = rgb_H / (t - b), a30 = (rgb_W - r - l) / (r - l),
+        a31 = (rgb_H - t - b) / (t - b);
   float x = point[0], y = point[1], z = point[2];
 
   // 1. 坐标变换
-  float tx = matrix[0] * x + matrix[4] * y + matrix[8]  * z + matrix[12];
-  float ty = matrix[1] * x + matrix[5] * y + matrix[9]  * z + matrix[13];
+  float tx = matrix[0] * x + matrix[4] * y + matrix[8] * z + matrix[12];
+  float ty = matrix[1] * x + matrix[5] * y + matrix[9] * z + matrix[13];
   float tz = matrix[2] * x + matrix[6] * y + matrix[10] * z + matrix[14];
   float tw = matrix[3] * x + matrix[7] * y + matrix[11] * z + matrix[15];
 
@@ -328,99 +397,126 @@ __global__ void generate_pose_clip_kernel(
   transformed_point[3] = tw;
 }
 
-
-void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, const float* bbox2d_matrix, 
-        int M, const float* points_vectors, int N, float* transformed_points_vectors, int rgb_H, int rgb_W)
+void generate_pose_clip(cudaStream_t stream,
+                        const float *transform_matrixs,
+                        const float *bbox2d_matrix,
+                        int          M,
+                        const float *points_vectors,
+                        int          N,
+                        float       *transformed_points_vectors,
+                        int          rgb_H,
+                        int          rgb_W)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(N, 32), ceil_div(M, 32)};
+  dim3 gridSize  = {ceil_div(N, 32), ceil_div(M, 32)};
 
   generate_pose_clip_kernel<<<gridSize, blockSize, 0, stream>>>(
-      transform_matrixs, bbox2d_matrix, M, points_vectors, N, transformed_points_vectors, rgb_H, rgb_W);
+      transform_matrixs, bbox2d_matrix, M, points_vectors, N, transformed_points_vectors, rgb_H,
+      rgb_W);
 }
 
-
-__global__ void transform_normals_kernel(
-    const float* transform_matrixs, int M, const float* normals_vectors, 
-    int N, float* transformed_normal_vectors)
+__global__ void transform_normals_kernel(const float *transform_matrixs,
+                                         int          M,
+                                         const float *normals_vectors,
+                                         int          N,
+                                         float       *transformed_normal_vectors)
 {
   int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
   int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (row_idx >= M || col_idx >= N) return;
+  if (row_idx >= M || col_idx >= N)
+    return;
 
-  const float* matrix = transform_matrixs + row_idx * 16;  // 指向当前 4x4 变换矩阵
-  const float* normal = normals_vectors + col_idx * 3;       // 指向当前 normal 向量
-  float* transformed_normal = transformed_normal_vectors + (row_idx * N + col_idx);
+  const float *matrix             = transform_matrixs + row_idx * 16; // 指向当前 4x4 变换矩阵
+  const float *normal             = normals_vectors + col_idx * 3;    // 指向当前 normal 向量
+  float       *transformed_normal = transformed_normal_vectors + (row_idx * N + col_idx);
 
   float x = normal[0], y = normal[1], z = normal[2];
   // **Column-Major 访问方式**
-  float tx = matrix[0] * x + matrix[4] * y + matrix[8]  * z;
-  float ty = matrix[1] * x + matrix[5] * y + matrix[9]  * z;
+  float tx = matrix[0] * x + matrix[4] * y + matrix[8] * z;
+  float ty = matrix[1] * x + matrix[5] * y + matrix[9] * z;
   float tz = matrix[2] * x + matrix[6] * y + matrix[10] * z;
   // 只保留z方向的分量，取反
-  float l2 = sqrt(tx*tx + ty*ty + tz*tz);
-  float value = l2 == 0 ? 0 : - tz / l2;
-  value = clamp_func(value, 0, 1);
+  float l2              = sqrt(tx * tx + ty * ty + tz * tz);
+  float value           = l2 == 0 ? 0 : -tz / l2;
+  value                 = clamp_func(value, 0, 1);
   transformed_normal[0] = value;
 }
 
-void transform_normals(cudaStream_t stream, const float* transform_matrixs, int M, const float* normals_vectors, 
-    int N, float* transformed_normal_vectors)
+void transform_normals(cudaStream_t stream,
+                       const float *transform_matrixs,
+                       int          M,
+                       const float *normals_vectors,
+                       int          N,
+                       float       *transformed_normal_vectors)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(N, 32), ceil_div(M, 32)};
+  dim3 gridSize  = {ceil_div(N, 32), ceil_div(M, 32)};
 
   transform_normals_kernel<<<gridSize, blockSize, 0, stream>>>(
       transform_matrixs, M, normals_vectors, N, transformed_normal_vectors);
 }
 
-
-__global__ void renfine_color_kernel(
-    const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output, int poses_num, float w_ambient, 
-    float w_diffuse, int rgb_H, int rgb_W)
+__global__ void renfine_color_kernel(const float *color,
+                                     const float *diffuse_intensity_map,
+                                     const float *rast_out,
+                                     float       *output,
+                                     int          poses_num,
+                                     float        w_ambient,
+                                     float        w_diffuse,
+                                     int          rgb_H,
+                                     int          rgb_W)
 {
   int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
   int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (row_idx >= rgb_H || col_idx >= rgb_W * poses_num) return;
+  if (row_idx >= rgb_H || col_idx >= rgb_W * poses_num)
+    return;
 
-  const int color_idx = col_idx / rgb_W;
+  const int color_idx     = col_idx / rgb_W;
   const int color_row_idx = row_idx;
   const int color_col_idx = col_idx - color_idx * rgb_W;
 
-  const size_t pixel_idx = color_row_idx * rgb_W + color_col_idx;
+  const size_t pixel_idx    = color_row_idx * rgb_W + color_col_idx;
   const size_t pixel_offset = color_idx * rgb_H * rgb_W + pixel_idx;
 
-  const float* rgb = color + pixel_offset * 3;
-  const float* diffuse = diffuse_intensity_map + pixel_offset;
-  const float* rast = rast_out + pixel_offset * 4;
-  float* out = output + pixel_offset * 3;
+  const float *rgb     = color + pixel_offset * 3;
+  const float *diffuse = diffuse_intensity_map + pixel_offset;
+  const float *rast    = rast_out + pixel_offset * 4;
+  float       *out     = output + pixel_offset * 3;
 
   float diff = diffuse[0];
 
   float is_foreground = clamp_func(rast[3], 0, 1);
 
-  float r = rgb[0] * (w_ambient + diff*w_diffuse) * is_foreground;
-  float g = rgb[1] * (w_ambient + diff*w_diffuse) * is_foreground;
-  float b = rgb[2] * (w_ambient + diff*w_diffuse) * is_foreground;
+  float r = rgb[0] * (w_ambient + diff * w_diffuse) * is_foreground;
+  float g = rgb[1] * (w_ambient + diff * w_diffuse) * is_foreground;
+  float b = rgb[2] * (w_ambient + diff * w_diffuse) * is_foreground;
 
   r = clamp_func(r, 0, 1);
   g = clamp_func(g, 0, 1);
   b = clamp_func(b, 0, 1);
-  
+
   out[0] = r;
   out[1] = g;
   out[2] = b;
 }
 
-void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast_out, float* output,
-        int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W)
+void refine_color(cudaStream_t stream,
+                  const float *color,
+                  const float *diffuse_intensity_map,
+                  const float *rast_out,
+                  float       *output,
+                  int          poses_num,
+                  float        w_ambient,
+                  float        w_diffuse,
+                  int          rgb_H,
+                  int          rgb_W)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(rgb_W * poses_num, 32), ceil_div(rgb_H, 32)};
+  dim3 gridSize  = {ceil_div(rgb_W * poses_num, 32), ceil_div(rgb_H, 32)};
 
-  renfine_color_kernel<<<gridSize, blockSize, 0, stream>>>(
-    color, diffuse_intensity_map, rast_out, output, poses_num, w_ambient, w_diffuse, rgb_H, rgb_W
-  );
+  renfine_color_kernel<<<gridSize, blockSize, 0, stream>>>(color, diffuse_intensity_map, rast_out,
+                                                           output, poses_num, w_ambient, w_diffuse,
+                                                           rgb_H, rgb_W);
 }
 
-}   // namespace foundationpose_render
\ No newline at end of file
+} // namespace foundationpose_render
diff --git a/detection_6d_foundationpose/src/foundationpose_render.cu.hpp b/detection_6d_foundationpose/src/foundationpose_render.cu.hpp
index e67fa69..72cc5a1 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.cu.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_render.cu.hpp
@@ -33,34 +33,74 @@
 
 namespace foundationpose_render {
 
-
-void clamp(cudaStream_t stream, float* input, float min_value, float max_value, int N);
-
-void threshold_and_downscale_pointcloud(
-    cudaStream_t stream, float* pointcloud_input, float* pose_array_input, int N, int n_points, float downscale_factor,
-    float min_depth, float max_depth);
-
-void concat(cudaStream_t stream, float* input_a, float* input_b, float* output, int N, int H, int W, int C1, int C2);
-
-void rasterize(
-    cudaStream_t stream, CR::CudaRaster* cr, float* pos_ptr, int32_t* tri_ptr, float* out, int pos_count, int tri_count,
-    int H, int W, int C);
-
-void interpolate(
-    cudaStream_t stream, float* attr_ptr, float* rast_ptr, int32_t* tri_ptr, float* out, int num_vertices,
-    int num_triangles, int attr_shape_dim, int attr_dim, int H, int W, int C);
-    
-void texture(
-    cudaStream_t stream, float* tex_ptr, float* uv_ptr, float* out, int tex_height, int tex_width, int tex_channel,
-    int tex_depth, int H, int W, int N);
+void clamp(cudaStream_t stream, float *input, float min_value, float max_value, int N);
+
+void threshold_and_downscale_pointcloud(cudaStream_t stream,
+                                        float       *pointcloud_input,
+                                        float       *pose_array_input,
+                                        int          N,
+                                        int          n_points,
+                                        float        downscale_factor,
+                                        float        min_depth,
+                                        float        max_depth);
+
+void concat(cudaStream_t stream,
+            float       *input_a,
+            float       *input_b,
+            float       *output,
+            int          N,
+            int          H,
+            int          W,
+            int          C1,
+            int          C2);
+
+void rasterize(cudaStream_t    stream,
+               CR::CudaRaster *cr,
+               float          *pos_ptr,
+               int32_t        *tri_ptr,
+               float          *out,
+               int             pos_count,
+               int             tri_count,
+               int             H,
+               int             W,
+               int             C);
+
+void interpolate(cudaStream_t stream,
+                 float       *attr_ptr,
+                 float       *rast_ptr,
+                 int32_t     *tri_ptr,
+                 float       *out,
+                 int          num_vertices,
+                 int          num_triangles,
+                 int          attr_shape_dim,
+                 int          attr_dim,
+                 int          H,
+                 int          W,
+                 int          C);
+
+void texture(cudaStream_t stream,
+             float       *tex_ptr,
+             float       *uv_ptr,
+             float       *out,
+             int          tex_height,
+             int          tex_width,
+             int          tex_channel,
+             int          tex_depth,
+             int          H,
+             int          W,
+             int          N);
 
 /**
  * @param transform_matrixs 应当是`Col-Major`的transform_num个4x4矩阵
  * @param points_vectors 应当是`points_num`个3x1向量
  * @param transformed_points_vectors 输出3x1大小的向量缓存, 共 `transform_num x points_num` 个
  */
-void transform_points(cudaStream_t stream, const float* transform_matrixs, int transform_num, const float* points_vectors, 
-    int points_num, float* transformed_points_vectors);
+void transform_points(cudaStream_t stream,
+                      const float *transform_matrixs,
+                      int          transform_num,
+                      const float *points_vectors,
+                      int          points_num,
+                      float       *transformed_points_vectors);
 
 /**
  * @param transform_matrixs 应当是`Col-Major`的transform_num个4x4矩阵
@@ -68,20 +108,40 @@ void transform_points(cudaStream_t stream, const float* transform_matrixs, int t
  * @param points_vectors 应当是`points_num`个3x1向量
  * @param transformed_points_vectors 输出4x1大小的向量缓存, 共 `transform_num x points_num` 个
  */
-void generate_pose_clip(cudaStream_t stream, const float* transform_matrixs, const float* bbox2d_matrix, int transform_num, const float* points_vectors, 
-    int points_num, float* transformed_points_vectors, int rgb_H, int rgb_W);
+void generate_pose_clip(cudaStream_t stream,
+                        const float *transform_matrixs,
+                        const float *bbox2d_matrix,
+                        int          transform_num,
+                        const float *points_vectors,
+                        int          points_num,
+                        float       *transformed_points_vectors,
+                        int          rgb_H,
+                        int          rgb_W);
 
 /**
  * @param transform_matrixs 应当是`Col-Major`的transform_num个4x4矩阵
  * @param normals_vectors 应当是`normals_num`个3x1向量
- * @param transformed_normal_vectors 这里直接输出归一化后的z方向分量，供 `transform_num x normals_num`个，即 [hyp-pose, H, W, 1] 
+ * @param transformed_normal_vectors 这里直接输出归一化后的z方向分量，供 `transform_num x
+ * normals_num`个，即 [hyp-pose, H, W, 1]
  */
-void transform_normals(cudaStream_t stream, const float* transform_matrixs, int transform_num, const float* normals_vectors, 
-    int normals_num, float* transformed_normal_vectors);
-
-void refine_color(cudaStream_t stream, const float* color, const float* diffuse_intensity_map, const float* rast, float* output,
-    int poses_num, float w_ambient, float w_diffuse, int rgb_H, int rgb_W);
-
-}   // namespace foundationpose_render
-
-#endif  // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_RENDER_CUDA_HPP_
\ No newline at end of file
+void transform_normals(cudaStream_t stream,
+                       const float *transform_matrixs,
+                       int          transform_num,
+                       const float *normals_vectors,
+                       int          normals_num,
+                       float       *transformed_normal_vectors);
+
+void refine_color(cudaStream_t stream,
+                  const float *color,
+                  const float *diffuse_intensity_map,
+                  const float *rast,
+                  float       *output,
+                  int          poses_num,
+                  float        w_ambient,
+                  float        w_diffuse,
+                  int          rgb_H,
+                  int          rgb_W);
+
+} // namespace foundationpose_render
+
+#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_RENDER_CUDA_HPP_
diff --git a/detection_6d_foundationpose/src/foundationpose_render.hpp b/detection_6d_foundationpose/src/foundationpose_render.hpp
index fb55266..d1c3a9f 100644
--- a/detection_6d_foundationpose/src/foundationpose_render.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_render.hpp
@@ -11,157 +11,154 @@
 #include "nvdiffrast/common/cudaraster/CudaRaster.hpp"
 #include "foundationpose_utils.hpp"
 
-
 namespace detection_6d {
 
-
 typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> RowMajorMatrix;
 
 class FoundationPoseRenderer {
 public:
   FoundationPoseRenderer(std::shared_ptr<TexturedMeshLoader> mesh_loader,
-                        const Eigen::Matrix3f& intrinsic,
-                        const int input_poses_num,
-                        const float crop_ratio = 1.2,
-                        const int crop_window_H = 160,
-                        const int crop_window_W = 160,
-                        const float min_depth = 0.1,
-                        const float max_depth = 4.0);
-
-  bool RenderAndTransform(const std::vector<Eigen::Matrix4f>& _poses,
-          void* rgb_on_device,
-          void* depth_on_device,
-          void* xyz_map_on_device,
-          int input_image_height,
-          int input_image_width,
-          void* render_buffer,
-          void* transf_buffer);
+                         const Eigen::Matrix3f              &intrinsic,
+                         const int                           input_poses_num,
+                         const float                         crop_ratio    = 1.2,
+                         const int                           crop_window_H = 160,
+                         const int                           crop_window_W = 160,
+                         const float                         min_depth     = 0.1,
+                         const float                         max_depth     = 4.0);
+
+  bool RenderAndTransform(const std::vector<Eigen::Matrix4f> &_poses,
+                          void                               *rgb_on_device,
+                          void                               *depth_on_device,
+                          void                               *xyz_map_on_device,
+                          int                                 input_image_height,
+                          int                                 input_image_width,
+                          void                               *render_buffer,
+                          void                               *transf_buffer);
 
   ~FoundationPoseRenderer();
+
 private:
-  bool RenderProcess(cudaStream_t cuda_stream,
-                    const std::vector<Eigen::MatrixXf>& poses,
-                    const std::vector<RowMajorMatrix>& tfs,
-                    void* poses_on_device,
-                    int input_image_height,
-                    int input_image_width,
-                    void* render_input_dst_ptr);
-
-  bool TransfProcess(cudaStream_t cuda_stream,
-                    void* rgb_on_device,
-                    void* xyz_map_on_device,
-                    int input_image_height,
-                    int input_image_width,
-                    const std::vector<RowMajorMatrix>& tfs,
-                    void* poses_on_device,
-                    void* transf_input_dst_ptr);
+  bool RenderProcess(cudaStream_t                        cuda_stream,
+                     const std::vector<Eigen::MatrixXf> &poses,
+                     const std::vector<RowMajorMatrix>  &tfs,
+                     void                               *poses_on_device,
+                     int                                 input_image_height,
+                     int                                 input_image_width,
+                     void                               *render_input_dst_ptr);
+
+  bool TransfProcess(cudaStream_t                       cuda_stream,
+                     void                              *rgb_on_device,
+                     void                              *xyz_map_on_device,
+                     int                                input_image_height,
+                     int                                input_image_width,
+                     const std::vector<RowMajorMatrix> &tfs,
+                     void                              *poses_on_device,
+                     void                              *transf_input_dst_ptr);
 
   bool LoadTexturedMesh();
 
   bool PrepareBuffer();
 
-  bool TransformVerticesOnCUDA(cudaStream_t stream,
-                  const std::vector<Eigen::MatrixXf>& tfs,
-                  float* output_buffer) ;
-
-  bool TransformVertexNormalsOnCUDA(cudaStream_t stream,
-                  const std::vector<Eigen::MatrixXf>& tfs,
-                  float* output_buffer);
-
-  bool GeneratePoseClipOnCUDA(cudaStream_t stream,
-                      float* output_buffer,
-                      const std::vector<Eigen::MatrixXf>& poses, 
-                      const RowMajorMatrix& bbox2d, 
-                      const Eigen::Matrix3f& K, 
-                      int rgb_H, int rgb_W);
-
-  bool NvdiffrastRender(cudaStream_t cuda_stream_, 
-                        const std::vector<Eigen::MatrixXf>& poses, 
-                        const Eigen::Matrix3f& K, 
-                        const RowMajorMatrix& bbox2d, 
-                        int rgb_H, int rgb_W, int H, int W, 
-                        nvcv::Tensor& flip_color_tensor, nvcv::Tensor& flip_xyz_map_tensor);
+  bool TransformVerticesOnCUDA(cudaStream_t                        stream,
+                               const std::vector<Eigen::MatrixXf> &tfs,
+                               float                              *output_buffer);
+
+  bool TransformVertexNormalsOnCUDA(cudaStream_t                        stream,
+                                    const std::vector<Eigen::MatrixXf> &tfs,
+                                    float                              *output_buffer);
+
+  bool GeneratePoseClipOnCUDA(cudaStream_t                        stream,
+                              float                              *output_buffer,
+                              const std::vector<Eigen::MatrixXf> &poses,
+                              const RowMajorMatrix               &bbox2d,
+                              const Eigen::Matrix3f              &K,
+                              int                                 rgb_H,
+                              int                                 rgb_W);
+
+  bool NvdiffrastRender(cudaStream_t                        cuda_stream_,
+                        const std::vector<Eigen::MatrixXf> &poses,
+                        const Eigen::Matrix3f              &K,
+                        const RowMajorMatrix               &bbox2d,
+                        int                                 rgb_H,
+                        int                                 rgb_W,
+                        int                                 H,
+                        int                                 W,
+                        nvcv::Tensor                       &flip_color_tensor,
+                        nvcv::Tensor                       &flip_xyz_map_tensor);
 
 private:
   //
   const int input_poses_num_;
 
   // crop window size (model input size)
-  const int crop_window_H_;
-  const int crop_window_W_;
-  const float crop_ratio_; // refine,    score->1.1
+  const int             crop_window_H_;
+  const int             crop_window_W_;
+  const float           crop_ratio_; // refine,    score->1.1
   const Eigen::Matrix3f intrinsic_;
 
   // depth threshold
   const float min_depth_;
   const float max_depth_;
 
-
   // mesh
-  std::shared_ptr<TexturedMeshLoader> mesh_loader_;
-  std::vector<float> vertices_;
-  std::vector<float> texcoords_;
-  std::vector<int32_t> mesh_faces_;
+  std::shared_ptr<TexturedMeshLoader>                                   mesh_loader_;
+  std::vector<float>                                                    vertices_;
+  std::vector<float>                                                    texcoords_;
+  std::vector<int32_t>                                                  mesh_faces_;
   Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> mesh_vertices_;
-  int num_vertices_;
-  int num_faces_;
-  int num_texcoords_;
-  float mesh_diameter_;
-  int texture_map_height_;
-  int texture_map_width_;
-
+  int                                                                   num_vertices_;
+  int                                                                   num_faces_;
+  int                                                                   num_texcoords_;
+  float                                                                 mesh_diameter_;
+  int                                                                   texture_map_height_;
+  int                                                                   texture_map_width_;
 
   // constants
-  const int kNumChannels = 3;
-  const size_t kTexcoordsDim = 2;
-  const size_t kVertexPoints = 3;
+  const int    kNumChannels      = 3;
+  const size_t kTexcoordsDim     = 2;
+  const size_t kVertexPoints     = 3;
   const size_t kTriangleVertices = 3;
-  const size_t kPTMatrixDim = 3;
+  const size_t kPTMatrixDim      = 3;
   // poses位姿变换矩阵维度
   const size_t kTSMatrixDim = 4;
 
 private:
-  template<typename T>
-  using DeviceBufferUniquePtrType = std::unique_ptr<T, std::function<void(T*)>>;
-
-  DeviceBufferUniquePtrType<float> vertices_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> vertex_normals_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> texcoords_device_ {nullptr};
-  DeviceBufferUniquePtrType<int32_t> mesh_faces_device_ {nullptr};
-  DeviceBufferUniquePtrType<uint8_t> texture_map_device_ {nullptr};
+  template <typename T>
+  using DeviceBufferUniquePtrType = std::unique_ptr<T, std::function<void(T *)>>;
+
+  DeviceBufferUniquePtrType<float>   vertices_device_{nullptr};
+  DeviceBufferUniquePtrType<float>   vertex_normals_device_{nullptr};
+  DeviceBufferUniquePtrType<float>   texcoords_device_{nullptr};
+  DeviceBufferUniquePtrType<int32_t> mesh_faces_device_{nullptr};
+  DeviceBufferUniquePtrType<uint8_t> texture_map_device_{nullptr};
   // nvdiffrast render时相关缓存
-  DeviceBufferUniquePtrType<float> pose_clip_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> rast_out_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> pts_cam_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> diffuse_intensity_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> diffuse_intensity_map_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> texcoords_out_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> color_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> xyz_map_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> render_crop_rgb_tensor_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> render_crop_xyz_map_tensor_device_ {nullptr};
+  DeviceBufferUniquePtrType<float> pose_clip_device_{nullptr};
+  DeviceBufferUniquePtrType<float> rast_out_device_{nullptr};
+  DeviceBufferUniquePtrType<float> pts_cam_device_{nullptr};
+  DeviceBufferUniquePtrType<float> diffuse_intensity_device_{nullptr};
+  DeviceBufferUniquePtrType<float> diffuse_intensity_map_device_{nullptr};
+  DeviceBufferUniquePtrType<float> texcoords_out_device_{nullptr};
+  DeviceBufferUniquePtrType<float> color_device_{nullptr};
+  DeviceBufferUniquePtrType<float> xyz_map_device_{nullptr};
+  DeviceBufferUniquePtrType<float> render_crop_rgb_tensor_device_{nullptr};
+  DeviceBufferUniquePtrType<float> render_crop_xyz_map_tensor_device_{nullptr};
 
   // transf 相关缓存
-  DeviceBufferUniquePtrType<float> transformed_rgb_device_ {nullptr};
-  DeviceBufferUniquePtrType<float> transformed_xyz_map_device_ {nullptr};
-  DeviceBufferUniquePtrType<uint8_t> transformed_crop_rgb_tensor_device_ {nullptr};
+  DeviceBufferUniquePtrType<float>   transformed_rgb_device_{nullptr};
+  DeviceBufferUniquePtrType<float>   transformed_xyz_map_device_{nullptr};
+  DeviceBufferUniquePtrType<uint8_t> transformed_crop_rgb_tensor_device_{nullptr};
 
   // refine部分输入的poses在过程中是静止的，提供提前计算这部分poses和render结果的功能
-  DeviceBufferUniquePtrType<float> input_poses_device_ {nullptr};
+  DeviceBufferUniquePtrType<float> input_poses_device_{nullptr};
 
   std::shared_ptr<CR::CudaRaster> cr_;
 
   cudaStream_t cuda_stream_render_;
-  cudaStream_t cuda_stream_transf_; 
+  cudaStream_t cuda_stream_transf_;
 
   nvcv::Tensor float_texture_map_tensor_;
 };
 
-
-
 } // namespace detection_6d
 
-
-
-
-#endif
\ No newline at end of file
+#endif
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.cpp b/detection_6d_foundationpose/src/foundationpose_sampling.cpp
index ac98010..78831ee 100644
--- a/detection_6d_foundationpose/src/foundationpose_sampling.cpp
+++ b/detection_6d_foundationpose/src/foundationpose_sampling.cpp
@@ -7,54 +7,60 @@
 
 #include <opencv2/core.hpp>
 
-
 namespace detection_6d {
 
 typedef Eigen::Matrix<uint8_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> RowMajorMatrix8u;
 
 // A helper function to create a vertex from a point
-int AddVertex(const Eigen::Vector3f& p, std::vector<Eigen::Vector3f>& vertices) {
+int AddVertex(const Eigen::Vector3f &p, std::vector<Eigen::Vector3f> &vertices)
+{
   vertices.push_back(p.normalized());
   return vertices.size() - 1;
 }
 
 // A helper function to create a face from three indices
-void AddFace(int i, int j, int k, std::vector<Eigen::Vector3i>& faces) {
+void AddFace(int i, int j, int k, std::vector<Eigen::Vector3i> &faces)
+{
   faces.emplace_back(i, j, k);
 }
 
 // A helper function to get the middle point of two vertices
-int GetMiddlePoint(
-    int i, int j, std::vector<Eigen::Vector3f>& vertices, std::map<int64_t, int>& cache) {
+int GetMiddlePoint(int                           i,
+                   int                           j,
+                   std::vector<Eigen::Vector3f> &vertices,
+                   std::map<int64_t, int>       &cache)
+{
   // check if the edge (i, j) has been processed before
-  bool first_is_smaller = i < j;
-  int64_t smaller = first_is_smaller ? i : j;
-  int64_t greater = first_is_smaller ? j : i;
-  int64_t key = (smaller << 32) + greater;
+  bool    first_is_smaller = i < j;
+  int64_t smaller          = first_is_smaller ? i : j;
+  int64_t greater          = first_is_smaller ? j : i;
+  int64_t key              = (smaller << 32) + greater;
 
   auto it = cache.find(key);
-  if (it != cache.end()) {
+  if (it != cache.end())
+  {
     return it->second;
   }
 
   // if not cached, create a new vertex
-  Eigen::Vector3f p1 = vertices[i];
-  Eigen::Vector3f p2 = vertices[j];
-  Eigen::Vector3f pm = (p1 + p2) / 2.0;
-  int index = AddVertex(pm, vertices);
-  cache[key] = index;
+  Eigen::Vector3f p1    = vertices[i];
+  Eigen::Vector3f p2    = vertices[j];
+  Eigen::Vector3f pm    = (p1 + p2) / 2.0;
+  int             index = AddVertex(pm, vertices);
+  cache[key]            = index;
   return index;
 }
 
 // A function to generate an icosphere
 // Initial triangle values could found from https://sinestesia.co/blog/tutorials/python-icospheres/
-std::vector<Eigen::Vector3f> GenerateIcosphere(unsigned int n_views) {
-  std::map<int64_t, int> cache;
+std::vector<Eigen::Vector3f> GenerateIcosphere(unsigned int n_views)
+{
+  std::map<int64_t, int>       cache;
   std::vector<Eigen::Vector3f> vertices;
   std::vector<Eigen::Vector3i> faces;
 
   // create 12 vertices
-  float t = (1.0 + std::sqrt(5.0)) / 2.0;  // the golden ratio
+  float t = (1.0 + std::sqrt(5.0)) / 2.0; // the golden ratio
   AddVertex(Eigen::Vector3f(-1, t, 0), vertices);
   AddVertex(Eigen::Vector3f(1, t, 0), vertices);
   AddVertex(Eigen::Vector3f(-1, -t, 0), vertices);
@@ -91,9 +97,11 @@ std::vector<Eigen::Vector3f> GenerateIcosphere(unsigned int n_views) {
   AddFace(9, 8, 1, faces);
 
   // subdivide each face into four smaller faces
-  while (vertices.size() < n_views) {
+  while (vertices.size() < n_views)
+  {
     std::vector<Eigen::Vector3i> new_faces;
-    for (const auto& face : faces) {
+    for (const auto &face : faces)
+    {
       int a = face[0];
       int b = face[1];
       int c = face[2];
@@ -112,64 +120,76 @@ std::vector<Eigen::Vector3f> GenerateIcosphere(unsigned int n_views) {
   return std::move(vertices);
 }
 
-
-float RotationGeodesticDistance(const Eigen::Matrix3f& R1, const Eigen::Matrix3f& R2) {
+float RotationGeodesticDistance(const Eigen::Matrix3f &R1, const Eigen::Matrix3f &R2)
+{
   float cos = ((R1 * R2.transpose()).trace() - 1) / 2.0;
-  cos = std::max(std::min(cos, 1.0f), -1.0f);
+  cos       = std::max(std::min(cos, 1.0f), -1.0f);
   return std::acos(cos);
 }
 
-std::vector<Eigen::Matrix4f> ClusterPoses(
-    float angle_diff, float dist_diff, std::vector<Eigen::Matrix4f>& poses_in,
-    std::vector<Eigen::Matrix4f>& symmetry_tfs) {
+std::vector<Eigen::Matrix4f> ClusterPoses(float                         angle_diff,
+                                          float                         dist_diff,
+                                          std::vector<Eigen::Matrix4f> &poses_in,
+                                          std::vector<Eigen::Matrix4f> &symmetry_tfs)
+{
   std::vector<Eigen::Matrix4f> poses_out;
   poses_out.push_back(poses_in[0]);
   const float radian_thres = angle_diff / 180.0 * M_PI;
 
-  for (unsigned int i = 1; i < poses_in.size(); i++) {
-    bool is_new = true;
+  for (unsigned int i = 1; i < poses_in.size(); i++)
+  {
+    bool            is_new   = true;
     Eigen::Matrix4f cur_pose = poses_in[i];
 
-    for (const auto& cluster : poses_out) {
+    for (const auto &cluster : poses_out)
+    {
       Eigen::Vector3f t0 = cluster.block(0, 3, 3, 1);
       Eigen::Vector3f t1 = cur_pose.block(0, 3, 3, 1);
-      if ((t0 - t1).norm() >= dist_diff) {
+      if ((t0 - t1).norm() >= dist_diff)
+      {
         continue;
       }
       // Remove symmetry
-      for (const auto& tf : symmetry_tfs) {
+      for (const auto &tf : symmetry_tfs)
+      {
         Eigen::Matrix4f cur_pose_tmp = cur_pose * tf;
-        float rot_diff =
+        float           rot_diff =
             RotationGeodesticDistance(cur_pose_tmp.block(0, 0, 3, 3), cluster.block(0, 0, 3, 3));
-        if (rot_diff < radian_thres) {
+        if (rot_diff < radian_thres)
+        {
           is_new = false;
           break;
         }
       }
-      if (!is_new) {
+      if (!is_new)
+      {
         break;
       }
     }
 
-    if (is_new) {
+    if (is_new)
+    {
       poses_out.push_back(poses_in[i]);
     }
   }
   return std::move(poses_out);
 }
 
-std::vector<Eigen::Matrix4f> SampleViewsIcosphere(unsigned int n_views) {
+std::vector<Eigen::Matrix4f> SampleViewsIcosphere(unsigned int n_views)
+{
   auto vertices = GenerateIcosphere(n_views);
   std::vector<Eigen::Matrix4f, std::allocator<Eigen::Matrix4f>> cam_in_obs(
       vertices.size(), Eigen::Matrix4f::Identity(4, 4));
-  for (unsigned int i = 0; i < vertices.size(); i++) {
+  for (unsigned int i = 0; i < vertices.size(); i++)
+  {
     cam_in_obs[i].block<3, 1>(0, 3) = vertices[i];
     Eigen::Vector3f up(0, 0, 1);
     Eigen::Vector3f z_axis = -cam_in_obs[i].block<3, 1>(0, 3);
     z_axis.normalize();
 
     Eigen::Vector3f x_axis = up.cross(z_axis);
-    if (x_axis.isZero()) {
+    if (x_axis.isZero())
+    {
       x_axis << 1, 0, 0;
     }
     x_axis.normalize();
@@ -182,27 +202,29 @@ std::vector<Eigen::Matrix4f> SampleViewsIcosphere(unsigned int n_views) {
   return std::move(cam_in_obs);
 }
 
-
 /**
  * @brief 创建一个`n_views`面体，返回它的顶点位姿集合
- * 
+ *
  * @param n_views 默认40
  * @param inplane_step 默认60
- * @return std::vector<Eigen::Matrix4f> 
+ * @return std::vector<Eigen::Matrix4f>
  */
-std::vector<Eigen::Matrix4f> MakeRotationGrid(unsigned int n_views = 40, int inplane_step = 60) {
+std::vector<Eigen::Matrix4f> MakeRotationGrid(unsigned int n_views = 40, int inplane_step = 60)
+{
   auto cam_in_obs = SampleViewsIcosphere(n_views);
 
   std::vector<Eigen::Matrix4f> rot_grid;
-  for (unsigned int i = 0; i < cam_in_obs.size(); i++) {
-    for (double inplane_rot = 0; inplane_rot < 360; inplane_rot += inplane_step) {
+  for (unsigned int i = 0; i < cam_in_obs.size(); i++)
+  {
+    for (double inplane_rot = 0; inplane_rot < 360; inplane_rot += inplane_step)
+    {
       Eigen::Matrix4f cam_in_ob = cam_in_obs[i];
-      auto R_inplane = Eigen::Affine3f::Identity();
+      auto            R_inplane = Eigen::Affine3f::Identity();
       R_inplane.rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitX()))
           .rotate(Eigen::AngleAxisf(0, Eigen::Vector3f::UnitY()))
           .rotate(Eigen::AngleAxisf(inplane_rot * M_PI / 180.0f, Eigen::Vector3f::UnitZ()));
 
-      cam_in_ob = cam_in_ob * R_inplane.matrix();
+      cam_in_ob                 = cam_in_ob * R_inplane.matrix();
       Eigen::Matrix4f ob_in_cam = cam_in_ob.inverse();
       rot_grid.push_back(ob_in_cam);
     }
@@ -214,26 +236,31 @@ std::vector<Eigen::Matrix4f> MakeRotationGrid(unsigned int n_views = 40, int inp
   return std::move(rot_grid);
 }
 
-
 /**
  * @brief 根据深度图、掩码、相机内参来估计目标物体的近似三维中心
- * 
+ *
  * @param depth 深度图
  * @param mask 掩码
  * @param K 相机内参
  * @param min_depth 有效最小深度
  * @param center 输出三维中心
- * @return true 
- * @return false 
+ * @return true
+ * @return false
  */
-bool GuessTranslation(
-    const Eigen::MatrixXf& depth, const RowMajorMatrix8u& mask, const Eigen::Matrix3f& K,
-      float min_depth, Eigen::Vector3f& center) {
+bool GuessTranslation(const Eigen::MatrixXf  &depth,
+                      const RowMajorMatrix8u &mask,
+                      const Eigen::Matrix3f  &K,
+                      float                   min_depth,
+                      Eigen::Vector3f        &center)
+{
   // Find the indices where mask is positive
   std::vector<int> vs, us;
-  for (int i = 0; i < mask.rows(); i++) {
-    for (int j = 0; j < mask.cols(); j++) {
-      if (mask(i, j) > 0) {
+  for (int i = 0; i < mask.rows(); i++)
+  {
+    for (int j = 0; j < mask.cols(); j++)
+    {
+      if (mask(i, j) > 0)
+      {
         vs.push_back(i);
         us.push_back(j);
       }
@@ -251,15 +278,18 @@ bool GuessTranslation(
   CHECK_STATE(valid.any(), "[FoundationposeSampling] No valid value in mask.");
 
   std::vector<float> valid_depth;
-  for (int i = 0; i < valid.rows(); i++) {
-    for (int j = 0; j < valid.cols(); j++) {
-      if (valid(i, j)) {
+  for (int i = 0; i < valid.rows(); i++)
+  {
+    for (int j = 0; j < valid.cols(); j++)
+    {
+      if (valid(i, j))
+      {
         valid_depth.push_back(depth(i, j));
       }
     }
   }
   std::sort(valid_depth.begin(), valid_depth.end());
-  int n = valid_depth.size();
+  int   n = valid_depth.size();
   float zc =
       (n % 2 == 0) ? (valid_depth[n / 2 - 1] + valid_depth[n / 2]) / 2.0 : valid_depth[n / 2];
 
@@ -267,114 +297,101 @@ bool GuessTranslation(
   return true;
 }
 
-
-
-FoundationPoseSampler::FoundationPoseSampler(const int max_input_image_H,
-                                            const int max_input_image_W,
-                                            const float min_depth,
-                                            const float max_depth,
-                                            const Eigen::Matrix3f& intrinsic)
-                                          : max_input_image_H_(max_input_image_H),
-                                            max_input_image_W_(max_input_image_W),
-                                            min_depth_(min_depth),
-                                            intrinsic_(intrinsic),
-                                            pre_compute_rotations_(MakeRotationGrid())
+FoundationPoseSampler::FoundationPoseSampler(const int              max_input_image_H,
+                                             const int              max_input_image_W,
+                                             const float            min_depth,
+                                             const float            max_depth,
+                                             const Eigen::Matrix3f &intrinsic)
+    : max_input_image_H_(max_input_image_H),
+      max_input_image_W_(max_input_image_W),
+      min_depth_(min_depth),
+      intrinsic_(intrinsic),
+      pre_compute_rotations_(MakeRotationGrid())
 {
   CHECK_CUDA_THROW(cudaStreamCreate(&cuda_stream_),
-                  "[FoundationPoseSampler] Failed to create cuda stream!!");
-
-  CHECK_CUDA_THROW(cudaMalloc(&erode_depth_buffer_device_, 
-                          max_input_image_H_ * max_input_image_W_ * sizeof(float)),
-                  "[FoundationPoseSampler] Failed to malloc cuda memory of `erode_depth`");
-  
-  CHECK_CUDA_THROW(cudaMalloc(&bilateral_depth_buffer_device_, 
-                          max_input_image_H_ * max_input_image_W_ * sizeof(float)),
-                  "[FoundationPoseSampler] Failed to malloc cuda memory of `bilateral_depth`");
-  
+                   "[FoundationPoseSampler] Failed to create cuda stream!!");
+
+  CHECK_CUDA_THROW(cudaMalloc(&erode_depth_buffer_device_,
+                              max_input_image_H_ * max_input_image_W_ * sizeof(float)),
+                   "[FoundationPoseSampler] Failed to malloc cuda memory of `erode_depth`");
+
+  CHECK_CUDA_THROW(cudaMalloc(&bilateral_depth_buffer_device_,
+                              max_input_image_H_ * max_input_image_W_ * sizeof(float)),
+                   "[FoundationPoseSampler] Failed to malloc cuda memory of `bilateral_depth`");
+
   bilateral_depth_buffer_host_.resize(max_input_image_H_ * max_input_image_W_);
 
-  LOG(INFO) << "[FoundationPoseSampler] Pre-computed rotations, size : " << pre_compute_rotations_.size();
+  LOG(INFO) << "[FoundationPoseSampler] Pre-computed rotations, size : "
+            << pre_compute_rotations_.size();
 }
 
 FoundationPoseSampler::~FoundationPoseSampler()
 {
-  if (cudaStreamDestroy(cuda_stream_)) {
+  if (cudaStreamDestroy(cuda_stream_))
+  {
     LOG(WARNING) << "[FoundationPoseSampler] Failed to destroy cuda stream !";
   }
 
-  if (cudaFree(erode_depth_buffer_device_) != cudaSuccess) {
+  if (cudaFree(erode_depth_buffer_device_) != cudaSuccess)
+  {
     LOG(WARNING) << "[FoundationPoseSampler] Failed to free `erode_depth` buffer on device !";
   }
-  if (cudaFree(bilateral_depth_buffer_device_) != cudaSuccess) {
+  if (cudaFree(bilateral_depth_buffer_device_) != cudaSuccess)
+  {
     LOG(WARNING) << "[FoundationPoseSampler] Failed to free `bilateral_depth` buffer on device !";
   }
 }
 
-
-bool 
-FoundationPoseSampler::GetHypPoses(void* _depth_on_device,
-                          void* _mask_on_host,
-                          int input_image_height,
-                          int input_image_width,
-                          std::vector<Eigen::Matrix4f>& out_hyp_poses)
+bool FoundationPoseSampler::GetHypPoses(void                         *_depth_on_device,
+                                        void                         *_mask_on_host,
+                                        int                           input_image_height,
+                                        int                           input_image_width,
+                                        std::vector<Eigen::Matrix4f> &out_hyp_poses)
 {
-  if (_depth_on_device == nullptr || _mask_on_host == nullptr) {
+  if (_depth_on_device == nullptr || _mask_on_host == nullptr)
+  {
     throw std::invalid_argument("[FoudationPoseSampler] Got INVALID depth/mask ptr on device!!!");
   }
   // 1. 生成基于多面体的初始假设位姿
   out_hyp_poses = pre_compute_rotations_;
   // 2. 优化depth深度图
-  float* depth_on_device = static_cast<float*>(_depth_on_device);
+  float *depth_on_device = static_cast<float *>(_depth_on_device);
   // 2.1 depth腐蚀操作
-  erode_depth(cuda_stream_, 
-              depth_on_device, 
-              erode_depth_buffer_device_, 
-              input_image_height, 
+  erode_depth(cuda_stream_, depth_on_device, erode_depth_buffer_device_, input_image_height,
               input_image_width);
   // 2.2 depth双边滤波操作
-  bilateral_filter_depth(cuda_stream_, 
-                        erode_depth_buffer_device_, 
-                        bilateral_depth_buffer_device_, 
-                        input_image_height, 
-                        input_image_width);
+  bilateral_filter_depth(cuda_stream_, erode_depth_buffer_device_, bilateral_depth_buffer_device_,
+                         input_image_height, input_image_width);
   // 2.3 拷贝到host端缓存
-  cudaMemcpyAsync(bilateral_depth_buffer_host_.data(), 
-                  bilateral_depth_buffer_device_, 
-                  input_image_height * input_image_width * sizeof(float), 
-                  cudaMemcpyDeviceToHost, 
+  cudaMemcpyAsync(bilateral_depth_buffer_host_.data(), bilateral_depth_buffer_device_,
+                  input_image_height * input_image_width * sizeof(float), cudaMemcpyDeviceToHost,
                   cuda_stream_);
 
   // 2.4 同步cuda流
   CHECK_CUDA(cudaStreamSynchronize(cuda_stream_),
-            "[FoundationPoseSampling] cudaStreamSync `cuda_stream_` FAILED!!!");
+             "[FoundationPoseSampling] cudaStreamSync `cuda_stream_` FAILED!!!");
 
   // 3. 基于depth、mask估计目标物三维中心
-  Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> 
-                  bilateral_filter_depth_host(bilateral_depth_buffer_host_.data(), 
-                                              input_image_height, 
-                                              input_image_width);
-  Eigen::Map<Eigen::Matrix<uint8_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> 
-                  mask_host(static_cast<uint8_t*>(_mask_on_host),
-                            input_image_height,
-                            input_image_width);
+  Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
+      bilateral_filter_depth_host(bilateral_depth_buffer_host_.data(), input_image_height,
+                                  input_image_width);
+  Eigen::Map<Eigen::Matrix<uint8_t, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> mask_host(
+      static_cast<uint8_t *>(_mask_on_host), input_image_height, input_image_width);
 
   Eigen::Vector3f center;
-  CHECK_STATE(GuessTranslation(bilateral_filter_depth_host, 
-                              mask_host, 
-                              intrinsic_, 
-                              min_depth_, 
-                              center),
-              "[FoundationPose Sampling] Failed to GuessTranslation!!!");
+  CHECK_STATE(
+      GuessTranslation(bilateral_filter_depth_host, mask_host, intrinsic_, min_depth_, center),
+      "[FoundationPose Sampling] Failed to GuessTranslation!!!");
 
   LOG(INFO) << "[FoundationPose Sampling] Center: " << center;
 
   // 4. 把三维中心放到变换矩阵内
-  for (auto& pose : out_hyp_poses) {
+  for (auto &pose : out_hyp_poses)
+  {
     pose.block<3, 1>(0, 3) = center;
   }
 
   return true;
 }
 
-
-} // namespace detection_6d
\ No newline at end of file
+} // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.cu b/detection_6d_foundationpose/src/foundationpose_sampling.cu
index 42e4239..a541df3 100644
--- a/detection_6d_foundationpose/src/foundationpose_sampling.cu
+++ b/detection_6d_foundationpose/src/foundationpose_sampling.cu
@@ -18,61 +18,77 @@
 #include <cstdio>
 #include "foundationpose_sampling.cu.hpp"
 
-
-__global__ void erode_depth_kernel(
-    float* depth, float* out, int H, int W, int radius, float depth_diff_thres, float ratio_thres,
-    float zfar) {
+__global__ void erode_depth_kernel(float *depth,
+                                   float *out,
+                                   int    H,
+                                   int    W,
+                                   int    radius,
+                                   float  depth_diff_thres,
+                                   float  ratio_thres,
+                                   float  zfar)
+{
   int h = blockIdx.y * blockDim.y + threadIdx.y;
   int w = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if (w >= W || h >= H) {
+  if (w >= W || h >= H)
+  {
     return;
   }
 
   float d_ori = depth[h * W + w];
 
   // Check the validity of the depth value
-  if (d_ori < 0.1f || d_ori >= zfar) {
+  if (d_ori < 0.1f || d_ori >= zfar)
+  {
     out[h * W + w] = 0.0f;
     return;
   }
 
   float bad_cnt = 0.0f;
-  float total = 0.0f;
+  float total   = 0.0f;
 
   // Loop over the neighboring pixels
-  for (int u = w - radius; u <= w + radius; u++) {
-    if (u < 0 || u >= W) {
+  for (int u = w - radius; u <= w + radius; u++)
+  {
+    if (u < 0 || u >= W)
+    {
       continue;
     }
-    for (int v = h - radius; v <= h + radius; v++) {
-      if (v < 0 || v >= H) {
+    for (int v = h - radius; v <= h + radius; v++)
+    {
+      if (v < 0 || v >= H)
+      {
         continue;
       }
       float cur_depth = depth[v * W + u];
 
       total += 1.0f;
 
-      if (cur_depth < 0.1f || cur_depth >= zfar || fabsf(cur_depth - d_ori) > depth_diff_thres) {
+      if (cur_depth < 0.1f || cur_depth >= zfar || fabsf(cur_depth - d_ori) > depth_diff_thres)
+      {
         bad_cnt += 1.0f;
       }
     }
   }
 
   // Check the ratio of bad pixels
-  if ((bad_cnt / total) > ratio_thres) {
+  if ((bad_cnt / total) > ratio_thres)
+  {
     out[h * W + w] = 0.0f;
-  } else {
+  } else
+  {
     out[h * W + w] = d_ori;
   }
 }
 
 __global__ void bilateral_filter_depth_kernel(
-    float* depth, float* out, int H, int W, float zfar, int radius, float sigmaD, float sigmaR) {
+    float *depth, float *out, int H, int W, float zfar, int radius, float sigmaD, float sigmaR)
+{
   int h = blockIdx.y * blockDim.y + threadIdx.y;
   int w = blockIdx.x * blockDim.x + threadIdx.x;
 
-  if (w >= W || h >= H) {
+  if (w >= W || h >= H)
+  {
     return;
   }
 
@@ -80,18 +96,23 @@ __global__ void bilateral_filter_depth_kernel(
 
   // Compute the mean depth of the neighboring pixels
   float mean_depth = 0.0f;
-  int num_valid = 0;
-  for (int u = w - radius; u <= w + radius; u++) {
-    if (u < 0 || u >= W) {
+  int   num_valid  = 0;
+  for (int u = w - radius; u <= w + radius; u++)
+  {
+    if (u < 0 || u >= W)
+    {
       continue;
     }
-    for (int v = h - radius; v <= h + radius; v++) {
-      if (v < 0 || v >= H) {
+    for (int v = h - radius; v <= h + radius; v++)
+    {
+      if (v < 0 || v >= H)
+      {
         continue;
       }
       // Get the current depth value
       float cur_depth = depth[v * W + u];
-      if (cur_depth >= 0.1f && cur_depth < zfar) {
+      if (cur_depth >= 0.1f && cur_depth < zfar)
+      {
         num_valid++;
         mean_depth += cur_depth;
       }
@@ -99,30 +120,36 @@ __global__ void bilateral_filter_depth_kernel(
   }
 
   // Check if there are any valid pixels
-  if (num_valid == 0) {
+  if (num_valid == 0)
+  {
     return;
   }
 
   mean_depth /= (float)num_valid;
 
   float depthCenter = depth[h * W + w];
-  float sum_weight = 0.0f;
-  float sum = 0.0f;
+  float sum_weight  = 0.0f;
+  float sum         = 0.0f;
 
   // Loop over the neighboring pixels again
-  for (int u = w - radius; u <= w + radius; u++) {
-    if (u < 0 || u >= W) {
+  for (int u = w - radius; u <= w + radius; u++)
+  {
+    if (u < 0 || u >= W)
+    {
       continue;
     }
-    for (int v = h - radius; v <= h + radius; v++) {
-      if (v < 0 || v >= H) {
+    for (int v = h - radius; v <= h + radius; v++)
+    {
+      if (v < 0 || v >= H)
+      {
         continue;
       }
       float cur_depth = depth[v * W + u];
-      if (cur_depth >= 0.1f && cur_depth < zfar && fabsf(cur_depth - mean_depth) < 0.01f) {
-        float weight = expf(
-            -((float)((u - w) * (u - w) + (v - h) * (v - h))) / (2.0f * sigmaD * sigmaD) -
-            (depthCenter - cur_depth) * (depthCenter - cur_depth) / (2.0f * sigmaR * sigmaR));
+      if (cur_depth >= 0.1f && cur_depth < zfar && fabsf(cur_depth - mean_depth) < 0.01f)
+      {
+        float weight =
+            expf(-((float)((u - w) * (u - w) + (v - h) * (v - h))) / (2.0f * sigmaD * sigmaD) -
+                 (depthCenter - cur_depth) * (depthCenter - cur_depth) / (2.0f * sigmaR * sigmaR));
         sum_weight += weight;
         sum += weight * cur_depth;
       }
@@ -130,30 +157,48 @@ __global__ void bilateral_filter_depth_kernel(
   }
 
   // Check if the sum of weights is positive and the number of valid pixels is positive
-  if (sum_weight > 0.0f && num_valid > 0) {
+  if (sum_weight > 0.0f && num_valid > 0)
+  {
     out[h * W + w] = sum / sum_weight;
   }
 }
 
-uint16_t ceil_div(uint16_t numerator, uint16_t denominator) {
+uint16_t ceil_div(uint16_t numerator, uint16_t denominator)
+{
   uint32_t accumulator = numerator + denominator - 1;
   return accumulator / denominator;
 }
 
-void erode_depth(
-    cudaStream_t stream, float* depth, float* out, int H, int W, int radius, float depth_diff_thres, float ratio_thres,
-    float zfar) {
+void erode_depth(cudaStream_t stream,
+                 float       *depth,
+                 float       *out,
+                 int          H,
+                 int          W,
+                 int          radius,
+                 float        depth_diff_thres,
+                 float        ratio_thres,
+                 float        zfar)
+{
   dim3 block(16, 16);
   dim3 grid(ceil_div(W, 16), ceil_div(H, 16), 1);
 
-  erode_depth_kernel<<<grid, block, 0, stream>>>(
-      depth, out, H, W, radius, depth_diff_thres, ratio_thres, zfar);
+  erode_depth_kernel<<<grid, block, 0, stream>>>(depth, out, H, W, radius, depth_diff_thres,
+                                                 ratio_thres, zfar);
 }
 
-void bilateral_filter_depth(
-    cudaStream_t stream, float* depth, float* out, int H, int W, float zfar, int radius, float sigmaD, float sigmaR) {
+void bilateral_filter_depth(cudaStream_t stream,
+                            float       *depth,
+                            float       *out,
+                            int          H,
+                            int          W,
+                            float        zfar,
+                            int          radius,
+                            float        sigmaD,
+                            float        sigmaR)
+{
   dim3 block(16, 16);
   dim3 grid(ceil_div(W, 16), ceil_div(H, 16), 1);
 
-  bilateral_filter_depth_kernel<<<grid, block, 0, stream>>>(depth, out, H, W, zfar, radius, sigmaD, sigmaR);
+  bilateral_filter_depth_kernel<<<grid, block, 0, stream>>>(depth, out, H, W, zfar, radius, sigmaD,
+                                                            sigmaR);
 }
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.cu.hpp b/detection_6d_foundationpose/src/foundationpose_sampling.cu.hpp
index 95886f6..675380d 100644
--- a/detection_6d_foundationpose/src/foundationpose_sampling.cu.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_sampling.cu.hpp
@@ -24,12 +24,23 @@
 #include "cuda.h"
 #include "cuda_runtime.h"
 
-void erode_depth(
-    cudaStream_t stream, float* depth, float* out, int H, int W, int radius = 2, float depth_diff_thres = 0.001,
-    float ratio_thres = 0.8, float zfar = 100);
-void bilateral_filter_depth(
-    cudaStream_t stream, float* depth, float* out, int H, int W, float zfar = 100, int radius = 2, float sigmaD = 2,
-    float sigmaR = 100000);
+void erode_depth(cudaStream_t stream,
+                 float       *depth,
+                 float       *out,
+                 int          H,
+                 int          W,
+                 int          radius           = 2,
+                 float        depth_diff_thres = 0.001,
+                 float        ratio_thres      = 0.8,
+                 float        zfar             = 100);
+void bilateral_filter_depth(cudaStream_t stream,
+                            float       *depth,
+                            float       *out,
+                            int          H,
+                            int          W,
+                            float        zfar   = 100,
+                            int          radius = 2,
+                            float        sigmaD = 2,
+                            float        sigmaR = 100000);
 
-
-#endif  // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_SAMPLING_CUDA_HPP_
\ No newline at end of file
+#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_SAMPLING_CUDA_HPP_
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.hpp b/detection_6d_foundationpose/src/foundationpose_sampling.hpp
index 32cf34b..518ccea 100644
--- a/detection_6d_foundationpose/src/foundationpose_sampling.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_sampling.hpp
@@ -10,34 +10,34 @@ namespace detection_6d {
 
 class FoundationPoseSampler {
 public:
-  FoundationPoseSampler(const int max_input_image_H,
-                        const int max_input_image_W,
-                        const float min_depth,
-                        const float max_depth,
-                        const Eigen::Matrix3f& intrinsic);
-
-  bool GetHypPoses(void* _depth_on_device,
-                  void* _mask_on_host,
-                  int input_image_height,
-                  int input_image_width,
-                  std::vector<Eigen::Matrix4f>& out_hyp_poses);
+  FoundationPoseSampler(const int              max_input_image_H,
+                        const int              max_input_image_W,
+                        const float            min_depth,
+                        const float            max_depth,
+                        const Eigen::Matrix3f &intrinsic);
+
+  bool GetHypPoses(void                         *_depth_on_device,
+                   void                         *_mask_on_host,
+                   int                           input_image_height,
+                   int                           input_image_width,
+                   std::vector<Eigen::Matrix4f> &out_hyp_poses);
 
   ~FoundationPoseSampler();
 
 private:
-  const int max_input_image_H_;
-  const int max_input_image_W_;
-  const float min_depth_;
+  const int             max_input_image_H_;
+  const int             max_input_image_W_;
+  const float           min_depth_;
   const Eigen::Matrix3f intrinsic_;
-  cudaStream_t cuda_stream_;
+  cudaStream_t          cuda_stream_;
 
 private:
-  float* erode_depth_buffer_device_;
-  float* bilateral_depth_buffer_device_;
-  std::vector<float> bilateral_depth_buffer_host_;
+  float                             *erode_depth_buffer_device_;
+  float                             *bilateral_depth_buffer_device_;
+  std::vector<float>                 bilateral_depth_buffer_host_;
   const std::vector<Eigen::Matrix4f> pre_compute_rotations_;
 };
 
 } // namespace detection_6d
 
-#endif
\ No newline at end of file
+#endif
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.cpp b/detection_6d_foundationpose/src/foundationpose_utils.cpp
index 43323ba..ae665bf 100644
--- a/detection_6d_foundationpose/src/foundationpose_utils.cpp
+++ b/detection_6d_foundationpose/src/foundationpose_utils.cpp
@@ -26,11 +26,13 @@
 
 namespace detection_6d {
 
-std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh* mesh) {
+std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh *mesh)
+{
   Eigen::Vector3f min_vertex = {0, 0, 0};
   Eigen::Vector3f max_vertex = {0, 0, 0};
 
-  if (mesh->mNumVertices == 0) {
+  if (mesh->mNumVertices == 0)
+  {
     return std::pair{min_vertex, max_vertex};
   }
 
@@ -38,7 +40,8 @@ std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh* mesh)
   max_vertex << mesh->mVertices[0].x, mesh->mVertices[0].y, mesh->mVertices[0].z;
 
   // Iterate over all vertices to find the bounding box
-  for (size_t v = 0; v < mesh->mNumVertices; v++) {
+  for (size_t v = 0; v < mesh->mNumVertices; v++)
+  {
     float vx = mesh->mVertices[v].x;
     float vy = mesh->mVertices[v].y;
     float vz = mesh->mVertices[v].z;
@@ -54,59 +57,64 @@ std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh* mesh)
   return std::pair{min_vertex, max_vertex};
 }
 
-float CalcMeshDiameter(const aiMesh* mesh) {
+float CalcMeshDiameter(const aiMesh *mesh)
+{
   float max_dist = 0.0;
-  for (unsigned int i = 0; i < mesh->mNumVertices; ++i) {
-    for (unsigned int j = i + 1; j < mesh->mNumVertices; ++j) {
+  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
+  {
+    for (unsigned int j = i + 1; j < mesh->mNumVertices; ++j)
+    {
       aiVector3D diff = mesh->mVertices[i] - mesh->mVertices[j];
-      float dist = diff.Length();
-      max_dist = std::max(max_dist, dist);
+      float      dist = diff.Length();
+      max_dist        = std::max(max_dist, dist);
     }
   }
   return max_dist;
 }
 
-
-
-void ComputeOBB(const aiMesh* mesh, 
-                Eigen::Matrix4f& out_orient_bbox, 
-                Eigen::Vector3f& out_dimension) 
+void ComputeOBB(const aiMesh    *mesh,
+                Eigen::Matrix4f &out_orient_bbox,
+                Eigen::Vector3f &out_dimension)
 {
   std::vector<Eigen::Vector3f> vertices;
-  for (unsigned int i = 0; i < mesh->mNumVertices; ++i) {
-      vertices.emplace_back(mesh->mVertices[i].x, mesh->mVertices[i].y, mesh->mVertices[i].z);
+  for (unsigned int i = 0; i < mesh->mNumVertices; ++i)
+  {
+    vertices.emplace_back(mesh->mVertices[i].x, mesh->mVertices[i].y, mesh->mVertices[i].z);
   }
 
   // 计算质心
   Eigen::Vector3f mean = Eigen::Vector3f::Zero();
-  for (const auto& v : vertices) {
-      mean += v;
+  for (const auto &v : vertices)
+  {
+    mean += v;
   }
   mean /= vertices.size();
 
   // 计算协方差矩阵
   Eigen::Matrix3f cov = Eigen::Matrix3f::Zero();
-  for (const auto& v : vertices) {
-      Eigen::Vector3f diff = v - mean;
-      cov += diff * diff.transpose();
+  for (const auto &v : vertices)
+  {
+    Eigen::Vector3f diff = v - mean;
+    cov += diff * diff.transpose();
   }
   cov /= vertices.size();
 
   // 特征分解
   Eigen::SelfAdjointEigenSolver<Eigen::Matrix3f> solver(cov);
-  Eigen::Matrix3f rotation = solver.eigenvectors();
-  Eigen::Vector3f extents = solver.eigenvalues().cwiseSqrt();
+  Eigen::Matrix3f                                rotation = solver.eigenvectors();
+  Eigen::Vector3f                                extents  = solver.eigenvalues().cwiseSqrt();
   // 生成变换矩阵
-  Eigen::Matrix4f transform = Eigen::Matrix4f::Identity();
+  Eigen::Matrix4f transform   = Eigen::Matrix4f::Identity();
   transform.block<3, 3>(0, 0) = rotation;
   transform.block<3, 1>(0, 3) = mean;
 
   Eigen::MatrixXf transformed(vertices.size(), 3);
-  for (int i = 0 ; i < vertices.size() ; ++ i) {
+  for (int i = 0; i < vertices.size(); ++i)
+  {
     Eigen::Vector3f proj = rotation.transpose() * vertices[i];
-    transformed(i, 0) = proj(0);
-    transformed(i, 1) = proj(1);
-    transformed(i, 2) = proj(2);
+    transformed(i, 0)    = proj(0);
+    transformed(i, 1)    = proj(1);
+    transformed(i, 2)    = proj(2);
   }
 
   Eigen::Vector3f minBound = transformed.colwise().minCoeff();
@@ -115,59 +123,64 @@ void ComputeOBB(const aiMesh* mesh,
   Eigen::Vector3f dimension = maxBound - minBound;
 
   out_orient_bbox = transform;
-  out_dimension = dimension;
+  out_dimension   = dimension;
 }
 
-
-TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path,
-                                      const std::string& textured_file_path)
+TexturedMeshLoader::TexturedMeshLoader(const std::string &mesh_file_path,
+                                       const std::string &textured_file_path)
 {
   // 1. load textured mesh file using assimp
-  if (mesh_file_path.empty() || textured_file_path.empty()) {
-    throw std::invalid_argument("[TexturedMeshLoader] Invalid textured mesh file path: "
-                              + mesh_file_path + "\t" + textured_file_path);
+  if (mesh_file_path.empty() || textured_file_path.empty())
+  {
+    throw std::invalid_argument("[TexturedMeshLoader] Invalid textured mesh file path: " +
+                                mesh_file_path + "\t" + textured_file_path);
   }
   LOG(INFO) << "Loading mesh file: " << mesh_file_path;
   Assimp::Importer importer;
-  const aiScene* scene = importer.ReadFile(
-    mesh_file_path,
-    aiProcess_Triangulate | aiProcess_JoinIdenticalVertices | aiProcess_SortByPType
-  );
-  if (scene == nullptr) {
-    throw std::runtime_error("[TexturedMeshLoader] Failed to read mesh file: "
-                            + mesh_file_path);
+  const aiScene   *scene =
+      importer.ReadFile(mesh_file_path, aiProcess_Triangulate | aiProcess_JoinIdenticalVertices |
+                                            aiProcess_SortByPType);
+  if (scene == nullptr)
+  {
+    throw std::runtime_error("[TexturedMeshLoader] Failed to read mesh file: " + mesh_file_path);
   }
 
-  const aiMesh* mesh = scene->mMeshes[0];
-  mesh_diamter_ = CalcMeshDiameter(mesh);
+  const aiMesh *mesh = scene->mMeshes[0];
+  mesh_diamter_      = CalcMeshDiameter(mesh);
   ComputeOBB(mesh, obb_, dim_);
 
   auto min_max_vertex = FindMinMaxVertex(mesh);
-  mesh_center_ = (min_max_vertex.second + min_max_vertex.first) / 2.0;
+  mesh_center_        = (min_max_vertex.second + min_max_vertex.first) / 2.0;
 
   // Walk through each of the mesh's vertices
-  for (unsigned int v = 0; v < mesh->mNumVertices; v++) {
+  for (unsigned int v = 0; v < mesh->mNumVertices; v++)
+  {
     vertices_.push_back(mesh->mVertices[v]);
     vertex_normals_.push_back(mesh->mNormals[v]);
   }
-  for (unsigned int i = 0 ; i < AI_MAX_NUMBER_OF_TEXTURECOORDS ; ++ i) {
-    if (mesh->mTextureCoords[i] != nullptr) {
+  for (unsigned int i = 0; i < AI_MAX_NUMBER_OF_TEXTURECOORDS; ++i)
+  {
+    if (mesh->mTextureCoords[i] != nullptr)
+    {
       std::vector<aiVector3D> tex_coords_vec;
       tex_coords_vec.reserve(mesh->mNumVertices);
-      for (int v = 0 ; v < mesh->mNumVertices ; ++ v) {
+      for (int v = 0; v < mesh->mNumVertices; ++v)
+      {
         tex_coords_vec[v] = mesh->mTextureCoords[i][v];
       }
       texcoords_.push_back(std::move(tex_coords_vec));
     }
   }
 
-  for (unsigned int f = 0 ; f < mesh->mNumFaces ; ++ f){ 
+  for (unsigned int f = 0; f < mesh->mNumFaces; ++f)
+  {
     faces_.push_back(mesh->mFaces[f]);
   }
 
   LOG(INFO) << "Loading textured map file: " << textured_file_path;
   texture_map_ = cv::imread(textured_file_path);
-  if (texture_map_.empty()) {
+  if (texture_map_.empty())
+  {
     // throw std::runtime_error("[TexturedMeshLoader] Failed to read textured image: "
     //                         + textured_file_path);
     texture_map_ = cv::Mat(2, 2, CV_8UC3, {100, 100, 100});
@@ -175,132 +188,109 @@ TexturedMeshLoader::TexturedMeshLoader(const std::string& mesh_file_path,
   cv::cvtColor(texture_map_, texture_map_, cv::COLOR_BGR2RGB);
 
   LOG(INFO) << "Successfully Loaded textured mesh file!!!";
-  LOG(INFO) << "Mesh has vertices_num: " << vertices_.size()
-            << ", diameter: " << mesh_diamter_
-            << ", faces_num: " << faces_.size()
-            << ", center: " << mesh_center_;
+  LOG(INFO) << "Mesh has vertices_num: " << vertices_.size() << ", diameter: " << mesh_diamter_
+            << ", faces_num: " << faces_.size() << ", center: " << mesh_center_;
 }
 
-
-
-
-
 /**
  * @brief 获取mesh模型的半径
- * 
- * @return float 
+ *
+ * @return float
  */
-float 
-TexturedMeshLoader::GetMeshDiameter() const noexcept
+float TexturedMeshLoader::GetMeshDiameter() const noexcept
 {
   return mesh_diamter_;
 }
 
 /**
  * @brief 获取mesh模型的顶点数量
- * 
- * @return size_t 
+ *
+ * @return size_t
  */
-size_t 
-TexturedMeshLoader::GetMeshNumVertices() const noexcept
+size_t TexturedMeshLoader::GetMeshNumVertices() const noexcept
 {
   return vertices_.size();
 }
 
 /**
  * @brief 获取mesh模型的顶点数据指针
- * 
+ *
  * @return const std::vector<aiVector3D> &
  */
-const std::vector<aiVector3D> &
-TexturedMeshLoader::GetMeshVertices() const noexcept
+const std::vector<aiVector3D> &TexturedMeshLoader::GetMeshVertices() const noexcept
 {
   return vertices_;
 }
 
 /**
  * @brief 获取mesh模型顶点的法向量
- * 
+ *
  * @return const std::vector<aiVector3D> &
  */
-const std::vector<aiVector3D> & 
-TexturedMeshLoader::GetMeshVertexNormals() const noexcept
+const std::vector<aiVector3D> &TexturedMeshLoader::GetMeshVertexNormals() const noexcept
 {
   return vertex_normals_;
 }
 
 /**
  * @brief 获取mesh模型的外观坐标系
- * 
+ *
  * @return const std::vector<aiVector3D> &
  */
-const std::vector<std::vector<aiVector3D>> &
-TexturedMeshLoader::GetMeshTextureCoords() const noexcept
+const std::vector<std::vector<aiVector3D>> &TexturedMeshLoader::GetMeshTextureCoords()
+    const noexcept
 {
   return texcoords_;
 }
 
 /**
  * @brief 获取mesh模型的faces
- * 
+ *
  * @return const std::vector<aiFace> &
  */
-const std::vector<aiFace> &
-TexturedMeshLoader::GetMeshFaces() const noexcept
+const std::vector<aiFace> &TexturedMeshLoader::GetMeshFaces() const noexcept
 {
   return faces_;
 }
 
-
-  /**
+/**
  * @brief 获取mesh模型的三维中心
- * 
- * @return const std::vector<Eigen::Vector3f>& 
+ *
+ * @return const std::vector<Eigen::Vector3f>&
  */
-const Eigen::Vector3f&
-TexturedMeshLoader::GetMeshModelCenter() const noexcept
+const Eigen::Vector3f &TexturedMeshLoader::GetMeshModelCenter() const noexcept
 {
   return mesh_center_;
 }
 
-
-  /**
+/**
  * @brief 获取mesh包围盒转换矩阵
- * 
- * @return const Eigen::Matrix4f& 
+ *
+ * @return const Eigen::Matrix4f&
  */
-const Eigen::Matrix4f& 
-TexturedMeshLoader::GetOrientBounds() const noexcept
+const Eigen::Matrix4f &TexturedMeshLoader::GetOrientBounds() const noexcept
 {
   return obb_;
 }
 
-
-
 /**
  * @brief 获取cv::Mat格式的外观图
- * 
- * @return const cv::Mat& 
+ *
+ * @return const cv::Mat&
  */
-const cv::Mat& 
-TexturedMeshLoader::GetTextureMap() const noexcept
+const cv::Mat &TexturedMeshLoader::GetTextureMap() const noexcept
 {
   return texture_map_;
 }
 
-
-
-
 /**
  * @brief 获取物体最小包络盒的尺寸
- * 
- * @return const Eigen::Vector3f 
+ *
+ * @return const Eigen::Vector3f
  */
-const Eigen::Vector3f 
-TexturedMeshLoader::GetObjectDimension() const noexcept
+const Eigen::Vector3f TexturedMeshLoader::GetObjectDimension() const noexcept
 {
-  return dim_;  
+  return dim_;
 }
 
-
-} // namespace detection_6d
\ No newline at end of file
+} // namespace detection_6d
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.cu b/detection_6d_foundationpose/src/foundationpose_utils.cu
index 9d6743b..375d8ee 100644
--- a/detection_6d_foundationpose/src/foundationpose_utils.cu
+++ b/detection_6d_foundationpose/src/foundationpose_utils.cu
@@ -1,51 +1,75 @@
 #include "foundationpose_utils.cu.hpp"
 
-
-__global__ void convert_depth_to_xyz_map_kernel(const float* depth_on_device, int input_image_height,
-    int input_image_width, float* xyz_map_on_device, const float fx, const float fy, const float dx, const float dy, const float min_depth)
+__global__ void convert_depth_to_xyz_map_kernel(const float *depth_on_device,
+                                                int          input_image_height,
+                                                int          input_image_width,
+                                                float       *xyz_map_on_device,
+                                                const float  fx,
+                                                const float  fy,
+                                                const float  dx,
+                                                const float  dy,
+                                                const float  min_depth)
 {
   const int row_idx = threadIdx.y + blockIdx.y * blockDim.y;
   const int col_idx = threadIdx.x + blockIdx.x * blockDim.x;
-  if (row_idx >= input_image_height || col_idx >= input_image_width) return;
+  if (row_idx >= input_image_height || col_idx >= input_image_width)
+    return;
 
   const int pixel_idx = row_idx * input_image_width + col_idx;
 
   const float depth = depth_on_device[pixel_idx];
-  if (depth < min_depth) return;
+  if (depth < min_depth)
+    return;
 
   const float x = (col_idx - dx) * depth / fx;
   const float y = (row_idx - dy) * depth / fy;
   const float z = depth;
 
-  float* this_pixel_xyz = xyz_map_on_device + pixel_idx * 3;
-  this_pixel_xyz[0] = x;
-  this_pixel_xyz[1] = y;
-  this_pixel_xyz[2] = z;
+  float *this_pixel_xyz = xyz_map_on_device + pixel_idx * 3;
+  this_pixel_xyz[0]     = x;
+  this_pixel_xyz[1]     = y;
+  this_pixel_xyz[2]     = z;
 }
 
-static uint16_t ceil_div(uint16_t numerator, uint16_t denominator) {
+static uint16_t ceil_div(uint16_t numerator, uint16_t denominator)
+{
   uint32_t accumulator = numerator + denominator - 1;
   return accumulator / denominator + 1;
 }
 
-
-void convert_depth_to_xyz_map(cudaStream_t cuda_stream, const float* depth_on_device, int input_image_height,
-    int input_image_width, float* xyz_map_on_device, const float fx, const float fy, const float dx, const float dy, const float min_depth)
+void convert_depth_to_xyz_map(cudaStream_t cuda_stream,
+                              const float *depth_on_device,
+                              int          input_image_height,
+                              int          input_image_width,
+                              float       *xyz_map_on_device,
+                              const float  fx,
+                              const float  fy,
+                              const float  dx,
+                              const float  dy,
+                              const float  min_depth)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(input_image_width, 32), ceil_div(input_image_height, 32)};
+  dim3 gridSize  = {ceil_div(input_image_width, 32), ceil_div(input_image_height, 32)};
 
   convert_depth_to_xyz_map_kernel<<<gridSize, blockSize, 0, cuda_stream>>>(
-      depth_on_device, input_image_height, input_image_width, xyz_map_on_device, fx, fy, dx, dy, min_depth);
+      depth_on_device, input_image_height, input_image_width, xyz_map_on_device, fx, fy, dx, dy,
+      min_depth);
 }
 
-
-void convert_depth_to_xyz_map(const float* depth_on_device, int input_image_height,
-    int input_image_width, float* xyz_map_on_device, const float fx, const float fy, const float dx, const float dy, const float min_depth)
+void convert_depth_to_xyz_map(const float *depth_on_device,
+                              int          input_image_height,
+                              int          input_image_width,
+                              float       *xyz_map_on_device,
+                              const float  fx,
+                              const float  fy,
+                              const float  dx,
+                              const float  dy,
+                              const float  min_depth)
 {
   dim3 blockSize = {32, 32};
-  dim3 gridSize = {ceil_div(input_image_width, 32), ceil_div(input_image_height, 32)};
+  dim3 gridSize  = {ceil_div(input_image_width, 32), ceil_div(input_image_height, 32)};
 
-  convert_depth_to_xyz_map_kernel<<<gridSize, blockSize, 0>>>(
-      depth_on_device, input_image_height, input_image_width, xyz_map_on_device, fx, fy, dx, dy, min_depth);
+  convert_depth_to_xyz_map_kernel<<<gridSize, blockSize, 0>>>(depth_on_device, input_image_height,
+                                                              input_image_width, xyz_map_on_device,
+                                                              fx, fy, dx, dy, min_depth);
 }
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.cu.hpp b/detection_6d_foundationpose/src/foundationpose_utils.cu.hpp
index ea06626..8664d6f 100644
--- a/detection_6d_foundationpose/src/foundationpose_utils.cu.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_utils.cu.hpp
@@ -6,9 +6,23 @@
 #include "cuda.h"
 #include "cuda_runtime.h"
 
+void convert_depth_to_xyz_map(cudaStream_t cuda_stream,
+                              const float *depth_on_device,
+                              int          input_image_height,
+                              int          input_image_width,
+                              float       *xyz_map_on_device,
+                              const float  fx,
+                              const float  fy,
+                              const float  dx,
+                              const float  dy,
+                              const float  min_depth);
 
-void convert_depth_to_xyz_map(cudaStream_t cuda_stream, const float* depth_on_device, int input_image_height,
-    int input_image_width, float* xyz_map_on_device, const float fx, const float fy, const float dx, const float dy, const float min_depth);
-
-void convert_depth_to_xyz_map(const float* depth_on_device, int input_image_height,
-    int input_image_width, float* xyz_map_on_device, const float fx, const float fy, const float dx, const float dy, const float min_depth);
\ No newline at end of file
+void convert_depth_to_xyz_map(const float *depth_on_device,
+                              int          input_image_height,
+                              int          input_image_width,
+                              float       *xyz_map_on_device,
+                              const float  fx,
+                              const float  fy,
+                              const float  dx,
+                              const float  dy,
+                              const float  min_depth);
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.hpp b/detection_6d_foundationpose/src/foundationpose_utils.hpp
index b8943db..3761f8a 100644
--- a/detection_6d_foundationpose/src/foundationpose_utils.hpp
+++ b/detection_6d_foundationpose/src/foundationpose_utils.hpp
@@ -38,100 +38,97 @@ class TexturedMeshLoader {
 public:
   /**
    * @brief 创建TexturedMeshLoader实例，并加载mesh模型以及其外观图
-   * 
+   *
    * @param mesh_file_path 应当以`.obj`结尾
    * @param textured_file_path 应当以`.png`结尾
-   * 
+   *
    * @throw 如果输入路径格式不正确，抛出`std::invalid_arguments`异常
    */
-  TexturedMeshLoader(const std::string& mesh_file_path,
-                  const std::string& textured_file_path);
-  
+  TexturedMeshLoader(const std::string &mesh_file_path, const std::string &textured_file_path);
+
   /**
    * @brief 获取mesh模型的半径
-   * 
-   * @return float 
+   *
+   * @return float
    */
   float GetMeshDiameter() const noexcept;
 
   /**
    * @brief 获取mesh模型的顶点数量
-   * 
-   * @return size_t 
+   *
+   * @return size_t
    */
   size_t GetMeshNumVertices() const noexcept;
 
   /**
    * @brief 获取mesh模型的顶点数据指针
-   * 
+   *
    * @return const std::vector<aiVector3D> &
    */
-  const std::vector<aiVector3D> & GetMeshVertices() const noexcept;
+  const std::vector<aiVector3D> &GetMeshVertices() const noexcept;
 
   /**
    * @brief 获取mesh模型顶点的法向量
-   * 
+   *
    * @return const std::vector<aiVector3D> &
    */
-  const std::vector<aiVector3D> & GetMeshVertexNormals() const noexcept;
+  const std::vector<aiVector3D> &GetMeshVertexNormals() const noexcept;
 
   /**
    * @brief 获取mesh模型的外观坐标系
-   * 
+   *
    * @return const std::vector<aiVector3D> &
    */
-  const std::vector<std::vector<aiVector3D>> & GetMeshTextureCoords() const noexcept;
+  const std::vector<std::vector<aiVector3D>> &GetMeshTextureCoords() const noexcept;
 
   /**
    * @brief 获取mesh模型的faces
-   * 
+   *
    * @return const std::vector<aiFace> &
    */
-  const std::vector<aiFace> & GetMeshFaces() const noexcept;
+  const std::vector<aiFace> &GetMeshFaces() const noexcept;
 
   /**
    * @brief 获取mesh模型的三维中心
-   * 
-   * @return const std::vector<Eigen::Vector3f>& 
+   *
+   * @return const std::vector<Eigen::Vector3f>&
    */
-  const Eigen::Vector3f& GetMeshModelCenter() const noexcept;
+  const Eigen::Vector3f &GetMeshModelCenter() const noexcept;
 
   /**
    * @brief 获取mesh包围盒转换矩阵
-   * 
-   * @return const Eigen::Matrix4f& 
+   *
+   * @return const Eigen::Matrix4f&
    */
-  const Eigen::Matrix4f& GetOrientBounds() const noexcept;
+  const Eigen::Matrix4f &GetOrientBounds() const noexcept;
 
   /**
    * @brief 获取cv::Mat格式的外观图
-   * 
-   * @return const cv::Mat& 
+   *
+   * @return const cv::Mat&
    */
-  const cv::Mat& GetTextureMap() const noexcept;
+  const cv::Mat &GetTextureMap() const noexcept;
 
   /**
    * @brief 获取物体最小包络盒的尺寸
-   * 
-   * @return const Eigen::Vector3f 
+   *
+   * @return const Eigen::Vector3f
    */
   const Eigen::Vector3f GetObjectDimension() const noexcept;
 
 private:
-  float mesh_diamter_;
-  Eigen::Vector3f mesh_center_;
-  std::vector<aiVector3D> vertices_;
-  std::vector<aiVector3D> vertex_normals_;
+  float                                mesh_diamter_;
+  Eigen::Vector3f                      mesh_center_;
+  std::vector<aiVector3D>              vertices_;
+  std::vector<aiVector3D>              vertex_normals_;
   std::vector<std::vector<aiVector3D>> texcoords_;
-  std::vector<aiFace> faces_;
-  Eigen::Matrix4f obb_;
-  Eigen::Vector3f dim_;
-  cv::Mat texture_map_;
+  std::vector<aiFace>                  faces_;
+  Eigen::Matrix4f                      obb_;
+  Eigen::Vector3f                      dim_;
+  cv::Mat                              texture_map_;
 };
 
-
-struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage 
-{
+struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage {
   // 输入host端rgb图像
   cv::Mat rgb_on_host;
   // 输入host端depth
@@ -142,7 +139,7 @@ struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage
   std::string target_name;
 
   int input_image_height;
-  
+
   int input_image_width;
 
   // device端的输入图像缓存
@@ -168,40 +165,39 @@ struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage
   // **最终输出的位姿** //
   Eigen::Matrix4f actual_pose;
 
-  std::shared_ptr<inference_core::IBlobsBuffer> GetInferBuffer() {
+  std::shared_ptr<inference_core::IBlobsBuffer> GetInferBuffer()
+  {
     return infer_buffer;
   }
 };
 
+#define CHECK_CUDA(result, hint)                    \
+  {                                                 \
+    auto res = (result);                            \
+    if (res != cudaSuccess)                         \
+    {                                               \
+      LOG(ERROR) << hint << "  CudaError: " << res; \
+      return false;                                 \
+    }                                               \
+  }
 
-
-
-#define CHECK_CUDA(result, hint) \
-{ \
-  auto res = (result); \
-  if (res != cudaSuccess) { \
-    LOG(ERROR) << hint << "  CudaError: " << res; \
-    return false; \
-  } \
-}
-
-#define MESSURE_DURATION_AND_CHECK_CUDA(run, hint) \
-{ \
-    auto start = std::chrono::high_resolution_clock::now(); \
-    CHECK_CUDA((run), hint); \
-    auto end = std::chrono::high_resolution_clock::now(); \
-    LOG(INFO) << #run << " cost(us): " \
-            << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); \
-}
+#define MESSURE_DURATION_AND_CHECK_CUDA(run, hint)                                           \
+  {                                                                                          \
+    auto start = std::chrono::high_resolution_clock::now();                                  \
+    CHECK_CUDA((run), hint);                                                                 \
+    auto end = std::chrono::high_resolution_clock::now();                                    \
+    LOG(INFO) << #run << " cost(us): "                                                       \
+              << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count(); \
+  }
 
 #define CHECK_CUDA_THROW(result, hint) \
-{ \
-  if ((result) != cudaSuccess) { \
-    LOG(ERROR) << hint ; \
-    throw std::runtime_error(hint); \
-  } \
-}
-
+  {                                    \
+    if ((result) != cudaSuccess)       \
+    {                                  \
+      LOG(ERROR) << hint;              \
+      throw std::runtime_error(hint);  \
+    }                                  \
+  }
 
 // static auto func_cuda_memory_release = [](float* p) {
 //   CHECK_CUDA(cudaFree(p), "Release cuda memory ptr FAILED!!!");
@@ -210,25 +206,22 @@ struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage
 template <typename T>
 class CudaMemoryDeleter {
 public:
-  void operator()(T* ptr) {
+  void operator()(T *ptr)
+  {
     auto suc = cudaFree(ptr);
-    if (suc != cudaSuccess) {
+    if (suc != cudaSuccess)
+    {
       LOG(INFO) << "Release cuda memory ptr FAILED!!!";
     }
   }
 };
 
-
-
-
-
 // Finds the minimum and maximum vertex from the mesh loaded by assimp
-std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh* mesh);
+std::pair<Eigen::Vector3f, Eigen::Vector3f> FindMinMaxVertex(const aiMesh *mesh);
 
 // Calculates the diameter of the mesh loaded by assimp
-float CalcMeshDiameter(const aiMesh* mesh);
+float CalcMeshDiameter(const aiMesh *mesh);
 
 } // namespace detection_6d
 
-
-#endif  // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_UTILS_HPP_
\ No newline at end of file
+#endif // NVIDIA_ISAAC_ROS_EXTENSIONS_FOUNDATIONPOSE_UTILS_HPP_
diff --git a/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/CudaRaster.hpp b/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/CudaRaster.hpp
index 8daf611..dae4735 100644
--- a/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/CudaRaster.hpp
+++ b/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/CudaRaster.hpp
@@ -69,4 +69,3 @@ class CudaRaster
 
 //------------------------------------------------------------------------
 } // namespace CR
-
diff --git a/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/impl/RasterImpl.hpp b/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/impl/RasterImpl.hpp
index 462621c..124936c 100644
--- a/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/impl/RasterImpl.hpp
+++ b/detection_6d_foundationpose/src/nvdiffrast/common/cudaraster/impl/RasterImpl.hpp
@@ -101,4 +101,3 @@ class RasterImpl
 
 //------------------------------------------------------------------------
 } // namespace CR
-
diff --git a/detection_6d_foundationpose/src/nvdiffrast/common/framework.h b/detection_6d_foundationpose/src/nvdiffrast/common/framework.h
index cccbd7b..b050caf 100644
--- a/detection_6d_foundationpose/src/nvdiffrast/common/framework.h
+++ b/detection_6d_foundationpose/src/nvdiffrast/common/framework.h
@@ -20,4 +20,4 @@
 //------------------------------------------------------------------------
 
 // PATCH for Isaac ROS FoundationPose
-#define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; } while(0)
\ No newline at end of file
+#define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; } while(0)
diff --git a/docker/build_docker.sh b/docker/build_docker.sh
index 14d812e..e935314 100644
--- a/docker/build_docker.sh
+++ b/docker/build_docker.sh
@@ -14,7 +14,7 @@ usage() {
 
 parse_args() {
   if [ "$#" -ne 1 ]; then
-    usage 
+    usage
   fi
   # 解析参数
   for i in "$@"; do
@@ -35,7 +35,7 @@ is_image_exist() {
   if docker images --filter "reference=$name" \
                    --format "{{.Repository}}:{{.Tag}}" | grep -q "$name"; then
     return 0
-  else 
+  else
     return 1
   fi
 }
@@ -54,7 +54,7 @@ build_nvidia_gpu_trt8_image() {
   if is_image_exist ${BUILT_IMAGE_TAG}; then
     echo Image: ${BUILT_IMAGE_TAG} exists! Skip image building process ...
   else
-    docker build -f foundationpose_nvidia_gpu_trt8.dockerfile -t ${BUILT_IMAGE_TAG} . 
+    docker build -f foundationpose_nvidia_gpu_trt8.dockerfile -t ${BUILT_IMAGE_TAG} .
   fi
 }
 
@@ -63,7 +63,7 @@ build_nvidia_gpu_trt10_image() {
   if is_image_exist ${BUILT_IMAGE_TAG}; then
     echo Image: ${BUILT_IMAGE_TAG} exists! Skip image building process ...
   else
-    docker build -f foundationpose_nvidia_gpu_trt10.dockerfile -t ${BUILT_IMAGE_TAG} . 
+    docker build -f foundationpose_nvidia_gpu_trt10.dockerfile -t ${BUILT_IMAGE_TAG} .
   fi
 }
 
@@ -72,7 +72,7 @@ build_jetson_trt8_image() {
   if is_image_exist ${BUILT_IMAGE_TAG}; then
     echo Image: ${BUILT_IMAGE_TAG} exists! Skip image building process ...
   else
-    docker build -f foundationpose_jetson_orin_trt8.dockerfile -t ${BUILT_IMAGE_TAG} . 
+    docker build -f foundationpose_jetson_orin_trt8.dockerfile -t ${BUILT_IMAGE_TAG} .
   fi
 }
 
@@ -81,7 +81,7 @@ build_jetson_trt10_image() {
   if is_image_exist ${BUILT_IMAGE_TAG}; then
     echo Image: ${BUILT_IMAGE_TAG} exists! Skip image building process ...
   else
-    docker build -f foundationpose_jetson_orin_trt10.dockerfile -t ${BUILT_IMAGE_TAG} . 
+    docker build -f foundationpose_jetson_orin_trt10.dockerfile -t ${BUILT_IMAGE_TAG} .
   fi
 }
 
@@ -148,4 +148,4 @@ build_image
 create_container
 
 echo "FoundationPose Base Dev Enviroment Built Successfully!!!"
-echo "Now Run into_docker.sh"
\ No newline at end of file
+echo "Now Run into_docker.sh"
diff --git a/docker/foundationpose_jetson_orin_trt10.dockerfile b/docker/foundationpose_jetson_orin_trt10.dockerfile
index 466e555..1809e37 100644
--- a/docker/foundationpose_jetson_orin_trt10.dockerfile
+++ b/docker/foundationpose_jetson_orin_trt10.dockerfile
@@ -11,7 +11,7 @@ RUN rm /etc/apt/sources.list && \
     echo "deb https://mirrors.ustc.edu.cn/ubuntu-ports/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
     echo "deb https://mirrors.ustc.edu.cn/ubuntu-ports/ jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
     apt-get update
-    
+
 RUN apt install -y \
     build-essential \
     manpages-dev \
@@ -73,4 +73,4 @@ RUN cd /tmp && \
     rm TensorRT-10.7.0.23.l4t.aarch64-gnu.cuda-12.6.tar.gz && \
     mv TensorRT-10.7.0.23 /usr/src/tensorrt && \
     cp /usr/src/tensorrt/lib/*.so* /usr/lib/aarch64-linux-gnu/ && \
-    cp /usr/src/tensorrt/include/* /usr/include/aarch64-linux-gnu/
\ No newline at end of file
+    cp /usr/src/tensorrt/include/* /usr/include/aarch64-linux-gnu/
diff --git a/docker/foundationpose_jetson_orin_trt8.dockerfile b/docker/foundationpose_jetson_orin_trt8.dockerfile
index b24d5e8..8dbf18b 100644
--- a/docker/foundationpose_jetson_orin_trt8.dockerfile
+++ b/docker/foundationpose_jetson_orin_trt8.dockerfile
@@ -11,7 +11,7 @@ RUN rm /etc/apt/sources.list && \
     echo "deb https://mirrors.ustc.edu.cn/ubuntu-ports/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \
     echo "deb https://mirrors.ustc.edu.cn/ubuntu-ports/ jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \
     apt-get update
-    
+
 RUN apt install -y \
     build-essential \
     manpages-dev \
diff --git a/docker/foundationpose_nvidia_gpu_trt10.dockerfile b/docker/foundationpose_nvidia_gpu_trt10.dockerfile
index 8028a8b..2bbf3ce 100644
--- a/docker/foundationpose_nvidia_gpu_trt10.dockerfile
+++ b/docker/foundationpose_nvidia_gpu_trt10.dockerfile
@@ -71,7 +71,7 @@ RUN TENSORRT_MAJOR_VERSION=`echo ${TENSORRT_VERSION} | cut -d '.' -f 1` && \
                        libnvinfer-samples=${TENSORRT_PACAKGE_VERSION}
 
 RUN cd /usr/src/tensorrt/samples \
-    && make -j 
+    && make -j
 
 # foundationpose dependencies
 RUN apt-get install libassimp-dev -y
@@ -82,4 +82,4 @@ RUN cd /tmp && \
     dpkg -i cvcuda-lib-0.12.0_beta-cuda12-x86_64-linux.deb && \
     dpkg -i cvcuda-dev-0.12.0_beta-cuda12-x86_64-linux.deb && \
     rm cvcuda-lib-0.12.0_beta-cuda12-x86_64-linux.deb && \
-    rm cvcuda-dev-0.12.0_beta-cuda12-x86_64-linux.deb
\ No newline at end of file
+    rm cvcuda-dev-0.12.0_beta-cuda12-x86_64-linux.deb
diff --git a/docker/foundationpose_nvidia_gpu_trt8.dockerfile b/docker/foundationpose_nvidia_gpu_trt8.dockerfile
index 9b7ff34..10102bd 100644
--- a/docker/foundationpose_nvidia_gpu_trt8.dockerfile
+++ b/docker/foundationpose_nvidia_gpu_trt8.dockerfile
@@ -71,7 +71,7 @@ RUN TENSORRT_MAJOR_VERSION=`echo ${TENSORRT_VERSION} | cut -d '.' -f 1` && \
                        libnvinfer-samples=${TENSORRT_PACAKGE_VERSION}
 
 RUN cd /usr/src/tensorrt/samples \
-    && make -j 
+    && make -j
 
 # foundationpose dependencies
 RUN apt-get install libassimp-dev -y
@@ -82,4 +82,4 @@ RUN cd /tmp && \
     dpkg -i cvcuda-lib-0.12.0_beta-cuda11-x86_64-linux.deb && \
     dpkg -i cvcuda-dev-0.12.0_beta-cuda11-x86_64-linux.deb && \
     rm cvcuda-lib-0.12.0_beta-cuda11-x86_64-linux.deb && \
-    rm cvcuda-dev-0.12.0_beta-cuda11-x86_64-linux.deb
\ No newline at end of file
+    rm cvcuda-dev-0.12.0_beta-cuda11-x86_64-linux.deb
diff --git a/docker/into_docker.sh b/docker/into_docker.sh
index d803a92..e13c571 100644
--- a/docker/into_docker.sh
+++ b/docker/into_docker.sh
@@ -2,4 +2,4 @@
 
 CONTAINER_NAME="foundationpose"
 docker start $CONTAINER_NAME
-docker exec -it $CONTAINER_NAME /bin/bash
\ No newline at end of file
+docker exec -it $CONTAINER_NAME /bin/bash
diff --git a/docs/build_enviroment_on_jetson.md b/docs/build_enviroment_on_jetson.md
index 8b632c0..1bc143f 100644
--- a/docs/build_enviroment_on_jetson.md
+++ b/docs/build_enviroment_on_jetson.md
@@ -16,4 +16,3 @@ cd ${foundationpose_cpp}/docker/
 bash build_docker.sh --container_type=jetson_trt8
 bash into_docker.sh
 ```
-
diff --git a/docs/gen_3d_obj_with_bundlesdf.md b/docs/gen_3d_obj_with_bundlesdf.md
index d8e929b..afc2a01 100644
--- a/docs/gen_3d_obj_with_bundlesdf.md
+++ b/docs/gen_3d_obj_with_bundlesdf.md
@@ -9,7 +9,7 @@
 2. Build docker image
 ```bash
   cd BundleSDF/docker
-  docker build --network host -t nvcr.io/nvidian/bundlesdf . 
+  docker build --network host -t nvcr.io/nvidian/bundlesdf .
 ```
 3. Build docker container
 ```bash
@@ -28,7 +28,7 @@
 
   # fix opengl package missing issue
   pip install PyOpenGL-accelerate
-``` 
+```
 
 ## 2. 准备数据
 
@@ -82,59 +82,59 @@
   import numpy as np
   import cv2
   import os
-  
+
   # 创建管道
   pipeline = rs.pipeline()
   config = rs.config()
-  
+
   # 打开 .bag 文件
   config.enable_device_from_file('/workspace/realsense_bags/20240912_155301.bag')
-  
+
   # 开始管道
   pipeline.start(config)
-  
+
   # 创建对齐对象
   align = rs.align(rs.stream.color)
-  
+
   # 创建输出目录
   depth_output_dir = 'depth'
   color_output_dir = 'rgb'
   os.makedirs(depth_output_dir, exist_ok=True)
   os.makedirs(color_output_dir, exist_ok=True)
-  
+
   try:
       index = 0
       while True:
           # 等待新帧
           frames = pipeline.wait_for_frames()
-  
+
           # 对齐帧
           aligned_frames = align.process(frames)
-  
+
           # 获取对齐后的深度和颜色帧
           depth_frame = aligned_frames.get_depth_frame()
           color_frame = aligned_frames.get_color_frame()
-  
+
           if not depth_frame or not color_frame:
               continue
-  
+
           # 转换为 NumPy 数组
           depth_image = np.asanyarray(depth_frame.get_data())
           color_image = np.asanyarray(color_frame.get_data())
-  
+
           # 将 BGR 转换为 RGB
           color_image = cv2.cvtColor(color_image, cv2.COLOR_BGR2RGB)
-  
+
           # 保存图像
           cv2.imwrite(os.path.join(depth_output_dir, f'{index:05d}.png'), depth_image)
           cv2.imwrite(os.path.join(color_output_dir, f'{index:05d}.png'), color_image)
-  
+
           index += 1
-  
+
           # 显示图像（可选）
           cv2.imshow('Depth Image', depth_image)
           cv2.imshow('Color Image', color_image)
-  
+
           # 按 'q' 键退出
           if cv2.waitKey(1) & 0xFF == ord('q'):
               break
@@ -187,7 +187,7 @@
 
 ## 3. 使用BundleSDF构建三维模型
 
-运行第一步，最后可能会报错，似乎不用管他: 
+运行第一步，最后可能会报错，似乎不用管他:
 ```bash
   python run_custom.py --mode run_video --video_dir /home/${USER}/projects/BundleSDF/demo_data --out_folder /home/${USER}/projects/BundleSDF/demo_result --use_segmenter 0 --use_gui 0 --debug_level 1
 ```
@@ -195,4 +195,4 @@
 运行第二步，生成三维模型:
 ```bash
   python run_custom.py --mode global_refine --video_dir /home/${USER}/projects/BundleSDF/demo_data --out_folder /home/${USER}/projects/BundleSDF/demo_result
-```
\ No newline at end of file
+```
diff --git a/easy_deploy_tool b/easy_deploy_tool
new file mode 160000
index 0000000..30001cc
--- /dev/null
+++ b/easy_deploy_tool
@@ -0,0 +1 @@
+Subproject commit 30001ccf0deb35a79f18d7ec802f9e8113fe1212
diff --git a/inference_core/CMakeLists.txt b/inference_core/CMakeLists.txt
deleted file mode 100644
index 2f20d9e..0000000
--- a/inference_core/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-cmake_minimum_required(VERSION 3.8)
-project(inference_core)
-
-add_subdirectory(trt_core)
diff --git a/inference_core/trt_core/CMakeLists.txt b/inference_core/trt_core/CMakeLists.txt
deleted file mode 100644
index 2748734..0000000
--- a/inference_core/trt_core/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-cmake_minimum_required(VERSION 3.8)
-project(trt_core)
-
-
-add_compile_options(-std=c++17)
-add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
-set(CMAKE_CXX_STANDARD 17)
-
-set(CMAKE_THREAD_LIBS_INIT "-lpthread")
-set(CMAKE_HAVE_THREADS_LIBRARY 1)
-set(CMAKE_USE_WIN32_THREADS_INIT 0)
-set(CMAKE_USE_PTHREADS_INIT 1)
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-
-
-find_package(CUDA REQUIRED)
-find_package(glog REQUIRED)
-
-set(source_file src/trt_core.cpp
-                src/trt_core_factory.cpp)
-
-
-include_directories(
-  include
-  ${CUDA_INCLUDE_DIRS}
-)
-
-add_library(${PROJECT_NAME} SHARED ${source_file}) 
-
-
-target_link_libraries(${PROJECT_NAME} PUBLIC
-  ${CUDA_LIBRARIES}
-  nvinfer 
-  nvonnxparser
-  deploy_core
-)
-
-
-target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
diff --git a/inference_core/trt_core/include/trt_core/trt_core.h b/inference_core/trt_core/include/trt_core/trt_core.h
deleted file mode 100644
index 43b4e70..0000000
--- a/inference_core/trt_core/include/trt_core/trt_core.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-19 18:33:05
- * @LastEditTime: 2024-12-02 19:34:00
- * @FilePath: /easy_deploy/inference_core/trt_core/include/trt_core/trt_core.h
- */
-#ifndef __EASY_DEPLOY_TRT_CORE_H
-#define __EASY_DEPLOY_TRT_CORE_H
-
-#include <unordered_map>
-#include "deploy_core/base_infer_core.h"
-
-namespace inference_core {
-
-/**
- * @brief Construct `TrtInferCore` by providing tensorrt engine file path, max(default) blobs
- * shape and blob buffer pool size (defualt=5). If your model is parsed from a onnx model with
- * dynamic blob shape (e.g. blob_dim=-1), a mapping of blob_name and blob_shape should be provided
- * to help `TrtInferCore` alloc a apposite size blob buffer.
- *
- * @param engine_path Tensorrt engine file path.
- * @param blobs_shape Mapping of blob_name and blob_shape.
- * @param mem_buf_size Size of buffer pool.
- */
-std::shared_ptr<BaseInferCore> CreateTrtInferCore(
-    std::string                                                  model_path,
-    const std::unordered_map<std::string, std::vector<int64_t>> &input_blobs_shape  = {},
-    const std::unordered_map<std::string, std::vector<int64_t>> &output_blobs_shape = {},
-    const int                                                    mem_buf_size       = 5);
-
-std::shared_ptr<BaseInferCoreFactory> CreateTrtInferCoreFactory(
-    std::string                                                  model_path,
-    const std::unordered_map<std::string, std::vector<int64_t>> &input_blobs_shape  = {},
-    const std::unordered_map<std::string, std::vector<int64_t>> &output_blobs_shape = {},
-    const int                                                    mem_buf_size       = 5);
-
-} // namespace inference_core
-
-#endif
diff --git a/inference_core/trt_core/src/trt_blob_buffer.hpp b/inference_core/trt_core/src/trt_blob_buffer.hpp
deleted file mode 100644
index 528fc1b..0000000
--- a/inference_core/trt_core/src/trt_blob_buffer.hpp
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-11-25 14:00:38
- * @LastEditTime: 2024-11-26 09:31:01
- * @FilePath: /EasyDeploy/inference_core/trt_core/src/trt_blob_buffer.hpp
- */
-#ifndef __EASY_DEPLOY_TRT_BLOB_BUFFER_H
-#define __EASY_DEPLOY_TRT_BLOB_BUFFER_H
-
-#include <cuda_runtime.h>
-
-#include "deploy_core/blob_buffer.h"
-
-namespace inference_core {
-
-template <typename Type>
-inline Type CumVector(const std::vector<Type> &vec)
-{
-  Type ret = 1;
-  for (const auto &nn : vec)
-  {
-    ret *= nn;
-  }
-
-  return ret;
-}
-
-template <typename Type>
-inline std::string VisualVec(const std::vector<Type> &vec)
-{
-  std::string ret;
-  for (const auto &v : vec)
-  {
-    ret += std::to_string(v) + " ";
-  }
-  return ret;
-}
-
-class TrtBlobBuffer : public IBlobsBuffer {
-public:
-  /**
-   * @brief Overrided from `IBlobsBuffer`, provide the buffer ptr which is used as
-   * input data of tensorrt inference engine. It depends on `SetBlobBuffer` method.
-   *
-   * @param blob_name The blob_name of model.
-   * @return std::pair<void*, DataLocation> . Will return {nullptr, UNKOWN} if `blob_name`
-   * does not match.
-   */
-  std::pair<void *, DataLocation> GetOuterBlobBuffer(const std::string &blob_name) noexcept override
-  {
-    if (outer_map_blob2ptr_.find(blob_name) == outer_map_blob2ptr_.end())
-    {
-      LOG(ERROR) << "[TrtBlobBuffer] `GetOuterBlobBuffer` Got invalid `blob_name`: " << blob_name;
-      return {nullptr, UNKOWN};
-    }
-    return outer_map_blob2ptr_[blob_name];
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, users could make tensorrt inference core use customed
-   * data buffer to deploy inference. `data_ptr` and `location` are required to modify inner
-   * mapping.
-   *
-   * @param blob_name The blob_name of model.
-   * @param data_ptr Customed data buffer ptr.
-   * @param location Where the data buffer locates.
-   * @return true Successfully set customed data buffer.
-   * @return false Will return false if `blob_name` does not match, or `data_ptr` is not valid.
-   */
-  bool SetBlobBuffer(const std::string &blob_name,
-                     void              *data_ptr,
-                     DataLocation       location) noexcept override
-  {
-    if (outer_map_blob2ptr_.find(blob_name) == outer_map_blob2ptr_.end())
-    {
-      LOG(ERROR) << "[TrtBlobBuffer] `SetBlobBuffer` Got invalid `blob_name`: " << blob_name;
-      return false;
-    }
-
-    if (location == DataLocation::HOST)
-    {
-      outer_map_blob2ptr_[blob_name] = {inner_map_host_blob2ptr_[blob_name], location};
-    } else
-    {
-      cudaPointerAttributes attr;
-      cudaError_t           status = cudaPointerGetAttributes(&attr, data_ptr);
-      if (status != cudaSuccess || attr.type != cudaMemoryType::cudaMemoryTypeDevice)
-      {
-        LOG(ERROR) << "[TrtBlobBuffer] `SetBlobBuffer` Got "
-                      "invalid `data_ptr` "
-                      "which should be "
-                   << "allocated by `cudaMalloc`, but it "
-                      "is NOT !!!";
-        return false;
-      }
-      outer_map_blob2ptr_[blob_name] = {data_ptr, location};
-    }
-    return true;
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, set the default buffer ptr used in tensorrt
-   * engine inference stage. After calling `SetBlobBuffer`, `GetOuterBlobBuffer` could
-   * get certain buffer ptr on `location`.
-   *
-   * @param blob_name The blob_name of model.
-   * @param location Which buffer to use in inference stage.
-   * @return true Successfully set blob buffer location.
-   * @return false Will return false if blob_name does not match.
-   */
-  bool SetBlobBuffer(const std::string &blob_name, DataLocation location) noexcept override
-  {
-    if (outer_map_blob2ptr_.find(blob_name) == outer_map_blob2ptr_.end())
-    {
-      LOG(ERROR) << "[TrtBlobBuffer] `SetBlobBuffer` Got invalid `blob_name`: " << blob_name;
-      return false;
-    }
-
-    outer_map_blob2ptr_[blob_name] = {
-        (location == DataLocation::HOST ? inner_map_host_blob2ptr_[blob_name]
-                                        : inner_map_device_blob2ptr_[blob_name]),
-        location};
-
-    return true;
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, set the dynamic blob shape while tensorrt engine
-   * doing inference. Note that `shape` should not has more element number than origin_shape
-   * which is determined by model build stage. Dynamic shape suportted tensorrt inference
-   * core should constructed by customed max blob shape params. There should not be `0` or any
-   * negative values in `shape` vec.
-   *
-   * @note Please make sure your model supportes dynamic blob shape. Otherwise, it will leads
-   * to unknown results.
-   *
-   * @param blob_name The blob_name of model.
-   * @param shape Dynamic blob shape.
-   * @return true
-   * @return false Will return false if `shape` is not valid or `blob_name` does not match.
-   */
-  bool SetBlobShape(const std::string          &blob_name,
-                    const std::vector<int64_t> &shape) noexcept override
-  {
-    if (map_blob_name2shape_.find(blob_name) == map_blob_name2shape_.end())
-    {
-      LOG(ERROR) << "[TrtBlobBuffer] `SetBlobShape` Got invalid `blob_name`: " << blob_name;
-      return false;
-    }
-    const auto     &origin_shape      = map_blob_name2shape_[blob_name];
-    const long long ori_element_count = CumVector(origin_shape);
-    const long long dyn_element_count = CumVector(shape);
-    if (origin_shape.size() != shape.size() || dyn_element_count > ori_element_count ||
-        dyn_element_count < 0)
-    {
-      const std::string origin_shape_in_str = VisualVec(origin_shape);
-      const std::string shape_in_str        = VisualVec(shape);
-      LOG(ERROR) << "[TrtBlobBuffer] `SetBlobShape` Got invalid `shape` input. "
-                 << "`shape`: " << shape_in_str << "\t"
-                 << "`origin_shape`: " << origin_shape_in_str;
-      return false;
-    }
-    map_blob_name2shape_[blob_name] = shape;
-    return true;
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, provide default or dynamic blob shape. Default
-   * blob shape is defined while tensorrt inference core is built. Will return dynamic blob
-   * shape if `SetBlobShape` is called before `GetBlobShape`.
-   *
-   * @param blob_name The blob_name of model.
-   * @return const std::vector<int64_t>& . A const reference to blob shape recorded in buffer.
-   */
-  const std::vector<int64_t> &GetBlobShape(const std::string &blob_name) const noexcept override
-  {
-    if (map_blob_name2shape_.find(blob_name) == map_blob_name2shape_.end())
-    {
-      LOG(ERROR) << "[TrtBlobBuffer] `GetBlobShape` Got invalid `blob_name`: " << blob_name;
-      static std::vector<int64_t> empty_shape;
-      return empty_shape;
-    }
-    return map_blob_name2shape_.at(blob_name);
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, provide number of blobs.
-   *
-   * @return size_t
-   */
-  size_t Size() const noexcept override
-  {
-    return outer_map_blob2ptr_.size();
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, release the buffer instance.
-   *
-   */
-  void Release() noexcept override
-  {
-    // release device buffer
-    for (void *ptr : device_blobs_buffer_)
-    {
-      if (ptr != nullptr)
-        cudaFree(ptr);
-    }
-    // release host buffer
-    for (void *ptr : host_blobs_buffer_)
-    {
-      if (ptr != nullptr)
-        delete[] reinterpret_cast<u_char *>(ptr);
-    }
-    device_blobs_buffer_.clear();
-    host_blobs_buffer_.clear();
-  }
-
-  /**
-   * @brief Overrided from `IBlobsBuffer`, reset the buffer instance which will not
-   * release the buffer allocated. Mempool will call `Reset` after buffer instance is
-   * returned by user.
-   *
-   */
-  void Reset() noexcept override
-  {
-    for (const auto &p_name_ptr : inner_map_host_blob2ptr_)
-    {
-      outer_map_blob2ptr_[p_name_ptr.first] = {p_name_ptr.second, DataLocation::HOST};
-    }
-  }
-
-  ~TrtBlobBuffer()
-  {
-    Release();
-  }
-  // no copy
-  TrtBlobBuffer()                                 = default;
-  TrtBlobBuffer(const TrtBlobBuffer &)            = delete;
-  TrtBlobBuffer &operator=(const TrtBlobBuffer &) = delete;
-
-  // mapping blob_name and buffer ptrs
-  std::unordered_map<std::string, std::pair<void *, DataLocation>> outer_map_blob2ptr_;
-  std::unordered_map<std::string, void *>                          inner_map_device_blob2ptr_;
-  std::unordered_map<std::string, void *>                          inner_map_host_blob2ptr_;
-
-  // buffer ptr vector, used while doing inference with tensorrt engine
-  std::vector<void *> buffer_input_core_;
-
-  // maintain buffer ptrs.
-  std::vector<void *> device_blobs_buffer_;
-  std::vector<void *> host_blobs_buffer_;
-
-  // mapping blob_name and dynamic blob shape
-  std::unordered_map<std::string, std::vector<int64_t>> map_blob_name2shape_;
-};
-
-} // namespace inference_core
-
-#endif
\ No newline at end of file
diff --git a/inference_core/trt_core/src/trt_core.cpp b/inference_core/trt_core/src/trt_core.cpp
deleted file mode 100644
index 7195152..0000000
--- a/inference_core/trt_core/src/trt_core.cpp
+++ /dev/null
@@ -1,495 +0,0 @@
-#include "trt_core/trt_core.h"
-
-// std
-#include <fstream>
-#include <iostream>
-#include <unordered_map>
-#include <unordered_set>
-
-// thirdparty
-#include <NvInfer.h>
-#include <NvOnnxParser.h>
-#include <cuda_runtime_api.h>
-
-#include "trt_blob_buffer.hpp"
-
-namespace inference_core {
-
-class TensorrtLogger : public nvinfer1::ILogger {
-public:
-  void log(Severity severity, const char *msg) noexcept override
-  {
-    if (severity == Severity::kINFO)
-      LOG(INFO) << "[Tensorrt] : " << msg;
-    else if (severity == Severity::kERROR)
-      LOG(ERROR) << "[Tensorrt] : " << msg;
-    else if (severity == Severity::kWARNING)
-      LOG(WARNING) << "[Tensorrt] : " << msg;
-  }
-};
-
-/**
- * @brief `TrtInferCore` is derived from `BaseInferCore` and override the abstract methods
- * of `BaseInferCore`. It wraps tensorrt engine loading and inference process.
- *
- */
-class TrtInferCore : public BaseInferCore {
-public:
-  /**
-   * @brief Construct `TrtInferCore` by providing tensorrt engine file path and blob buffer
-   * pool size (defualt=5). This constructor does not need a map of blob_name and blob_shape,
-   * while it will resolve model information by it self.
-   *
-   * @warning This constructor only should be used if the blobs shape of input model is fixed.
-   * If you parse a model with dynamic blob shape, a exception will be thrown.
-   *
-   * @param engine_path Tensorrt engine file path.
-   * @param mem_buf_size Size of buffer pool.
-   */
-  TrtInferCore(const std::string engine_path, const int mem_buf_size = 5);
-
-  /**
-   * @brief Construct `TrtInferCore` by providing tensorrt engine file path, max(default) blobs
-   * shape and blob buffer pool size (defualt=5). If your model is parsed from a onnx model with
-   * dynamic blob shape (e.g. blob_dim=-1), a mapping of blob_name and blob_shape should be provided
-   * to help `TrtInferCore` alloc a apposite size blob buffer.
-   *
-   * @param engine_path Tensorrt engine file path.
-   * @param blobs_shape Mapping of blob_name and blob_shape.
-   * @param mem_buf_size Size of buffer pool.
-   */
-  TrtInferCore(const std::string                                            engine_path,
-               const std::unordered_map<std::string, std::vector<int64_t>> &blobs_shape,
-               const int                                                    mem_buf_size = 5);
-
-  /**
-   * @brief Overrided from `BaseInferCore`, construct a instance of `TrtBlobBuffer` and return
-   * the shared ptr of it. It is used by mem buffer pool in `BaseInferCore`, or users who wants
-   * to alloc a brand new buffer.
-   *
-   * @return std::shared_ptr<IBlobsBuffer>
-   */
-  std::shared_ptr<IBlobsBuffer> AllocBlobsBuffer() override;
-
-  /**
-   * @brief Overrided from `BaseInferCore`. The `PreProcess` stage of tensorrt inference. It
-   * prepares device buffers if user writes into host buffer derectly.
-   *
-   * @param buffer a common "pipeline" shared ptr.
-   * @return true
-   * @return false
-   */
-  bool PreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) override;
-
-  /**
-   * @brief Overrided from `BaseInferCore`. The `Inference` stage of tensorrt inference.
-   *
-   * @param buffer a common "pipeline" shared ptr.
-   * @return true
-   * @return false
-   */
-  bool Inference(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) override;
-
-  /**
-   * @brief Overrided from `BaseInferCore`. The `PostProcess` stage of tensorrt inference.
-   * It will prepare output host buffer if user needs the output of model be accessable on host.
-   *
-   * @param buffer a common "pipeline" shared ptr.
-   * @return true
-   * @return false
-   */
-  bool PostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer) override;
-
-  ~TrtInferCore() override;
-
-private:
-  /**
-   * @brief Load the tensorrt engine file on `engine_path`.
-   *
-   * @param engine_path
-   */
-  void LoadEngine(const std::string &engine_path);
-
-  /**
-   * @brief Automatically resolve model information.
-   *
-   * @param blobs_shape
-   */
-  void ResolveModelInformation(std::unordered_map<std::string, std::vector<int64_t>> &blobs_shape);
-
-private:
-  // some members related to tensorrt
-  TensorrtLogger                         logger_{};
-  std::unique_ptr<nvinfer1::IRuntime>    runtime_{nullptr};
-  std::unique_ptr<nvinfer1::ICudaEngine> engine_{nullptr};
-
-  /**
-   * @brief Due to tensorrt needs a unique inference context in every thread, we should maintain a
-   * mapping of thread_id and a ptr of tensorrt context. A context will be created when there is a
-   * new thread calls `Inference`. These contexts will be released when this `TrtInferCore` instance
-   * is released.
-   */
-  std::unordered_map<std::thread::id, std::shared_ptr<nvinfer1::IExecutionContext>>
-             s_map_tid2context_;
-  std::mutex s_context_lck_;
-
-  // cuda streams used in three stage.
-  cudaStream_t preproces_stream_, inference_stream_, postprocess_stream_;
-
-  // some model information mapping
-  std::unordered_map<std::string, std::vector<int64_t>> map_blob_name2shape_;
-  std::unordered_map<std::string, int>                  map_input_blob_name2index_;
-  std::unordered_map<std::string, int>                  map_output_blob_name2index_;
-  std::unordered_map<std::string, size_t>               map_blob_name2size_;
-};
-
-TrtInferCore::TrtInferCore(std::string engine_path, const int mem_buf_size)
-{
-  LoadEngine(engine_path);
-  ResolveModelInformation(map_blob_name2shape_);
-
-  BaseInferCore::Init(mem_buf_size);
-
-  cudaStreamCreate(&preproces_stream_);
-  cudaStreamCreate(&inference_stream_);
-  cudaStreamCreate(&postprocess_stream_);
-}
-
-TrtInferCore::TrtInferCore(const std::string                                            engine_path,
-                           const std::unordered_map<std::string, std::vector<int64_t>> &blobs_shape,
-                           const int mem_buf_size)
-{
-  LoadEngine(engine_path);
-  map_blob_name2shape_ = blobs_shape;
-  ResolveModelInformation(map_blob_name2shape_);
-
-  BaseInferCore::Init(mem_buf_size);
-
-  cudaStreamCreate(&preproces_stream_);
-  cudaStreamCreate(&inference_stream_);
-  cudaStreamCreate(&postprocess_stream_);
-}
-
-TrtInferCore::~TrtInferCore()
-{
-  BaseInferCore::Release();
-}
-
-void TrtInferCore::LoadEngine(const std::string &engine_path)
-{
-  std::ifstream file(engine_path, std::ios::binary);
-  if (!file.good())
-  {
-    throw std::runtime_error("[TrtInferCore] Failed to read engine file!!!");
-  }
-
-  std::vector<char> data;
-
-  file.seekg(0, file.end);
-  const auto size = file.tellg();
-  file.seekg(0, file.beg);
-
-  data.resize(size);
-  file.read(data.data(), size);
-
-  file.close();
-
-  runtime_.reset(nvinfer1::createInferRuntime(logger_));
-
-  engine_.reset(runtime_->deserializeCudaEngine(data.data(), data.size()));
-  if (engine_ == nullptr)
-  {
-    throw std::runtime_error("[TrtInferCore] Failed to create trt engine!!!");
-  }
-  LOG(INFO) << "[TrtInferCore] created tensorrt engine and "
-               "context ! ";
-}
-
-void TrtInferCore::ResolveModelInformation(
-    std::unordered_map<std::string, std::vector<int64_t>> &blobs_shape)
-{
-  const int blob_number = engine_->getNbIOTensors();
-  LOG(INFO) << "[TrtInferCore] model has " << blob_number << " blobs";
-  CHECK(blob_number >= 2);
-
-  bool resolve_blob_shape = blobs_shape.empty();
-
-  for (int i = 0; i < blob_number; ++i)
-  {
-    const char    *blob_name = engine_->getIOTensorName(i);
-    nvinfer1::Dims dim       = engine_->getTensorShape(blob_name);
-
-    const std::string s_blob_name(blob_name);
-    if (engine_->getTensorIOMode(blob_name) == nvinfer1::TensorIOMode::kINPUT)
-    {
-      map_input_blob_name2index_.emplace(s_blob_name, i);
-    } else
-    {
-      map_output_blob_name2index_.emplace(s_blob_name, i);
-    }
-
-    if (resolve_blob_shape)
-    {
-      blobs_shape[s_blob_name] = std::vector<int64_t>();
-      for (int j = 0; j < dim.nbDims; ++j)
-      {
-        // 检查是否包含动态shape，自动解析暂不支持动态shape
-        if (dim.d[j] <= 0)
-        {
-          throw std::runtime_error("[TrtInferCore] unsupport blob dim:" + std::to_string(dim.d[j]) +
-                                   ", use explicit blob shape consturctor instead");
-        }
-        blobs_shape[s_blob_name].push_back(dim.d[j]);
-      }
-
-      std::string s_dim;
-      for (auto d : dim.d)
-      {
-        s_dim += std::to_string(d) + " ";
-      }
-      LOG(INFO) << "[TrtInferCore] blob name : " << blob_name << " dims : " << s_dim;
-    }
-
-    size_t blob_byte_size = sizeof(float);
-    if (blobs_shape.find(s_blob_name) == blobs_shape.end())
-    {
-      throw std::runtime_error("[TrtInferCore] blob name: " + s_blob_name +
-                               " not found in provided blobs_shape map !!!");
-    }
-    for (const int64_t d : blobs_shape[s_blob_name])
-    {
-      blob_byte_size *= d;
-    }
-
-    map_blob_name2size_[s_blob_name] = blob_byte_size;
-  }
-}
-
-std::shared_ptr<IBlobsBuffer> TrtInferCore::AllocBlobsBuffer()
-{
-  auto ret = std::make_shared<TrtBlobBuffer>();
-
-  const int blob_number = engine_->getNbIOTensors();
-  CHECK(blob_number >= 2);
-  ret->device_blobs_buffer_.resize(blob_number);
-  ret->host_blobs_buffer_.resize(blob_number);
-
-  for (int i = 0; i < blob_number; ++i)
-  {
-    const std::string s_blob_name    = engine_->getIOTensorName(i);
-    int64_t           blob_byte_size = sizeof(float);
-    const auto       &blob_shape     = map_blob_name2shape_[s_blob_name];
-    for (const int64_t d : blob_shape)
-    {
-      blob_byte_size *= d;
-    }
-
-    // alloc buffer memory
-    // on device
-    CHECK(cudaMalloc(&ret->device_blobs_buffer_[i], blob_byte_size) == cudaSuccess);
-    CHECK(cudaMemset(ret->device_blobs_buffer_[i], 0, blob_byte_size) == cudaSuccess);
-    CHECK(cudaDeviceSynchronize() == cudaSuccess);
-    // on host
-    ret->host_blobs_buffer_[i] = new u_char[blob_byte_size];
-
-    // maintain buffer ptr
-    ret->outer_map_blob2ptr_.emplace(s_blob_name,
-                                     std::pair{ret->host_blobs_buffer_[i], DataLocation::HOST});
-    // mapping blob_name and buffer_ptr
-    ret->inner_map_device_blob2ptr_.emplace(s_blob_name, ret->device_blobs_buffer_[i]);
-    ret->inner_map_host_blob2ptr_.emplace(s_blob_name, ret->host_blobs_buffer_[i]);
-
-    // mapping blob_name and default blob_shape
-    ret->map_blob_name2shape_.emplace(s_blob_name, blob_shape);
-  }
-
-  // initialize the buffer ptr vector which will be used when tensorrt engine do inference.
-  ret->buffer_input_core_ = ret->device_blobs_buffer_;
-
-  return ret;
-}
-
-bool TrtInferCore::PreProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer)
-{
-  CHECK_STATE(buffer != nullptr, "[TrtInferCore] PreProcess got WRONG input data format!");
-  auto p_buf = std::dynamic_pointer_cast<TrtBlobBuffer>(buffer->GetInferBuffer());
-  CHECK_STATE(p_buf != nullptr, "[TrtInferCore] PreProcess got WRONG p_buf data format!");
-
-  // Set the input buffer data
-  for (const auto &p_name_index : map_input_blob_name2index_)
-  {
-    const std::string &s_blob_name = p_name_index.first;
-    const int          index       = p_name_index.second;
-
-    // Get the customed blob buffer data information, including data ptr and location.
-    const auto &p_ptr_loc = p_buf->GetOuterBlobBuffer(s_blob_name);
-    // Transport buffer data from host to device, if the customed blob data is on host.
-    if (p_ptr_loc.second == DataLocation::HOST)
-    {
-      p_buf->buffer_input_core_[index] = p_buf->inner_map_device_blob2ptr_[s_blob_name];
-      cudaMemcpyAsync(p_buf->buffer_input_core_[index], p_ptr_loc.first,
-                      map_blob_name2size_[s_blob_name], cudaMemcpyHostToDevice, preproces_stream_);
-    } else
-    {
-      p_buf->buffer_input_core_[index] = p_ptr_loc.first;
-    }
-  }
-
-  // Set the output buffer data ptr. Allways use inner pre-allocated device buffer.
-  for (const auto &p_name_index : map_output_blob_name2index_)
-  {
-    const std::string &s_blob_name   = p_name_index.first;
-    const int          index         = p_name_index.second;
-    p_buf->buffer_input_core_[index] = p_buf->inner_map_device_blob2ptr_[s_blob_name];
-  }
-
-  cudaStreamSynchronize(preproces_stream_);
-
-  return true;
-}
-
-bool TrtInferCore::Inference(std::shared_ptr<async_pipeline::IPipelinePackage> buffer)
-{
-  // Create tensorrt context if this is the first time execution of this thread.
-  std::thread::id cur_thread_id = std::this_thread::get_id();
-  if (s_map_tid2context_.find(cur_thread_id) == s_map_tid2context_.end())
-  {
-    std::shared_ptr<nvinfer1::IExecutionContext> context{engine_->createExecutionContext()};
-    {
-      std::unique_lock<std::mutex> u_lck(s_context_lck_);
-      s_map_tid2context_.insert({cur_thread_id, context});
-    }
-  }
-  auto context = s_map_tid2context_[cur_thread_id];
-
-  // Get buffer ptr
-  CHECK_STATE(buffer != nullptr, "[TrtInferCore] PreProcess got WRONG input data format!");
-  auto p_buf = std::dynamic_pointer_cast<TrtBlobBuffer>(buffer->GetInferBuffer());
-  CHECK_STATE(p_buf != nullptr, "[TrtInferCore] PreProcess got WRONG p_buf data format!");
-
-  TrtBlobBuffer &buf = *p_buf;
-  // Set dynamic blob shape
-  for (const auto &p_name_shape : buf.map_blob_name2shape_)
-  {
-    const auto &s_blob_name = p_name_shape.first;
-    const auto &v_shape     = p_name_shape.second;
-
-    if (engine_->getTensorIOMode(s_blob_name.c_str()) != nvinfer1::TensorIOMode::kINPUT)
-    {
-      continue;
-    }
-
-    nvinfer1::Dims dynamic_dim;
-    dynamic_dim.nbDims = v_shape.size();
-    for (size_t i = 0; i < v_shape.size(); ++i)
-    {
-      dynamic_dim.d[i] = v_shape[i];
-    }
-    CHECK_STATE(context->setInputShape(s_blob_name.c_str(), dynamic_dim),
-                "[TrtInferCore] Inference execute `context->setInputShape` failed!!!");
-  }
-
-#if NV_TENSORRT_MAJOR == 10
-  // CHECK_STATE(context->allInputDimensionsSpecified(), 
-  //             "[TrtInferCore] Got unspecified dimensions of input!!!");
-              
-  for (const auto &p_name_index : map_input_blob_name2index_)
-  {
-    const std::string &s_blob_name   = p_name_index.first;
-    const int          index         = p_name_index.second;
-    context->setTensorAddress(s_blob_name.c_str(), buf.buffer_input_core_[index]);
-  }
-  for (const auto &p_name_index : map_output_blob_name2index_)
-  {
-    const std::string &s_blob_name   = p_name_index.first;
-    const int          index         = p_name_index.second;
-    context->setTensorAddress(s_blob_name.c_str(), buf.buffer_input_core_[index]);
-  }
-  context->enqueueV3(inference_stream_);
-
-#else
-  // Do inference use `buf.buffer_input_core_` which is prepared by `PreProcess` stage.
-  CHECK_STATE(context->enqueueV2(buf.buffer_input_core_.data(), inference_stream_, nullptr),
-              "[TrtInferCore] Inference execute `context->enqueueV2` failed!!!");
-#endif
-
-  cudaStreamSynchronize(inference_stream_);
-  return true;
-}
-
-bool TrtInferCore::PostProcess(std::shared_ptr<async_pipeline::IPipelinePackage> buffer)
-{
-  CHECK_STATE(buffer != nullptr, "[TrtInferCore] PreProcess got WRONG input data format!");
-  auto p_buf = std::dynamic_pointer_cast<TrtBlobBuffer>(buffer->GetInferBuffer());
-  CHECK_STATE(p_buf != nullptr, "[TrtInferCore] PreProcess got WRONG p_buf data format!");
-
-  for (const auto &p_name_index : map_output_blob_name2index_)
-  {
-    const std::string &s_blob_name = p_name_index.first;
-    const int          index       = p_name_index.second;
-    const auto        &p_ptr_loc   = p_buf->GetOuterBlobBuffer(s_blob_name);
-    // Transport output buffer from device to host, if user needs host readable data.
-    if (p_ptr_loc.second == DataLocation::HOST)
-    {
-      cudaMemcpyAsync(p_ptr_loc.first, p_buf->buffer_input_core_[index],
-                      map_blob_name2size_[s_blob_name], cudaMemcpyDeviceToHost,
-                      postprocess_stream_);
-    }
-    // Transport output buffer from local device buffer to given device buffer.
-    else if (p_ptr_loc.first != p_buf->buffer_input_core_[index])
-    {
-      cudaMemcpyAsync(p_ptr_loc.first, p_buf->buffer_input_core_[index],
-                      map_blob_name2size_[s_blob_name], cudaMemcpyDeviceToDevice,
-                      postprocess_stream_);
-    }
-  }
-
-  cudaStreamSynchronize(postprocess_stream_);
-  return true;
-}
-
-static bool FileSuffixCheck(const std::string &file_path, const std::string &suffix)
-{
-  const size_t mark = file_path.rfind('.');
-  std::string  suf;
-  return mark != file_path.npos &&
-         (suf = file_path.substr(mark, file_path.size() - mark)) == suffix;
-}
-
-std::shared_ptr<BaseInferCore> CreateTrtInferCore(std::string model_path, const int mem_buf_size)
-{
-  if (!FileSuffixCheck(model_path, ".engine"))
-  {
-    throw std::invalid_argument("Trt infer core expects file end with `.engine`. But got " +
-                                model_path + " instead");
-  }
-
-  return std::make_shared<TrtInferCore>(model_path, mem_buf_size);
-}
-
-std::shared_ptr<BaseInferCore> CreateTrtInferCore(
-    std::string                                                  model_path,
-    const std::unordered_map<std::string, std::vector<int64_t>> &input_blobs_shape,
-    const std::unordered_map<std::string, std::vector<int64_t>> &output_blobs_shape,
-    const int                                                    mem_buf_size)
-{
-  if (!FileSuffixCheck(model_path, ".engine"))
-  {
-    throw std::invalid_argument("Trt infer core expects file end with `.engine`. But got " +
-                                model_path + " instead");
-  }
-
-  std::unordered_map<std::string, std::vector<int64_t>> blobs_shape;
-  for (const auto &p : input_blobs_shape)
-  {
-    blobs_shape.insert(p);
-  }
-  for (const auto &p : output_blobs_shape)
-  {
-    blobs_shape.insert(p);
-  }
-
-  return std::make_shared<TrtInferCore>(model_path, blobs_shape, mem_buf_size);
-}
-
-} // namespace inference_core
diff --git a/inference_core/trt_core/src/trt_core_factory.cpp b/inference_core/trt_core/src/trt_core_factory.cpp
deleted file mode 100644
index b88bf52..0000000
--- a/inference_core/trt_core/src/trt_core_factory.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * @Description:
- * @Author: Teddywesside 18852056629@163.com
- * @Date: 2024-12-02 19:35:03
- * @LastEditTime: 2024-12-02 19:41:35
- * @FilePath: /easy_deploy/inference_core/trt_core/src/trt_core_factory.cpp
- */
-#include "trt_core/trt_core.h"
-
-namespace inference_core {
-
-struct TrtInferCoreParams {
-  std::string                                           model_path;
-  std::unordered_map<std::string, std::vector<int64_t>> input_blobs_shape;
-  std::unordered_map<std::string, std::vector<int64_t>> output_blobs_shape;
-  int                                                   mem_buf_size;
-};
-
-class TrtInferCoreFactory : public BaseInferCoreFactory {
-public:
-  TrtInferCoreFactory(TrtInferCoreParams params) : params_(params)
-  {}
-
-  std::shared_ptr<BaseInferCore> Create() override
-  {
-    return CreateTrtInferCore(params_.model_path, params_.input_blobs_shape,
-                              params_.output_blobs_shape, params_.mem_buf_size);
-  }
-
-private:
-  TrtInferCoreParams params_;
-};
-
-std::shared_ptr<BaseInferCoreFactory> CreateTrtInferCoreFactory(
-    std::string                                                  model_path,
-    const std::unordered_map<std::string, std::vector<int64_t>> &input_blobs_shape,
-    const std::unordered_map<std::string, std::vector<int64_t>> &output_blobs_shape,
-    const int                                                    mem_buf_size)
-{
-  TrtInferCoreParams params;
-  params.model_path         = model_path;
-  params.input_blobs_shape  = input_blobs_shape;
-  params.output_blobs_shape = output_blobs_shape;
-  params.mem_buf_size       = mem_buf_size;
-
-  return std::make_shared<TrtInferCoreFactory>(params);
-}
-
-} // namespace inference_core
\ No newline at end of file
diff --git a/simple_tests/CMakeLists.txt b/simple_tests/CMakeLists.txt
index 700f710..a119eb5 100644
--- a/simple_tests/CMakeLists.txt
+++ b/simple_tests/CMakeLists.txt
@@ -9,7 +9,7 @@ find_package(GTest REQUIRED)
 find_package(glog REQUIRED)
 find_package(OpenCV REQUIRED)
 
-set(source_file 
+set(source_file
   src/test_foundationpose.cpp
 )
 
@@ -20,9 +20,9 @@ include_directories(
   ${OpenCV_INCLUDE_DIRS}
 )
 
-add_executable(${PROJECT_NAME} ${source_file})   
+add_executable(${PROJECT_NAME} ${source_file})
 
-target_link_libraries(${PROJECT_NAME} PUBLIC 
+target_link_libraries(${PROJECT_NAME} PUBLIC
   GTest::gtest_main
   glog::glog
   ${OpenCV_LIBS}
@@ -33,4 +33,4 @@ target_link_libraries(${PROJECT_NAME} PUBLIC
 
 
 
-target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
\ No newline at end of file
+target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
diff --git a/simple_tests/include/tests/fps_counter.h b/simple_tests/include/tests/fps_counter.h
index 4bd05cb..4458e76 100644
--- a/simple_tests/include/tests/fps_counter.h
+++ b/simple_tests/include/tests/fps_counter.h
@@ -7,51 +7,55 @@
 
 class FPSCounter {
 public:
-    // 构造函数，初始化累加值和开始时间
-    FPSCounter() : sum(0), is_running(false) {}
-
-    // 开始计时
-    void Start() {
-        start_time = std::chrono::high_resolution_clock::now();
-        sum = 0;
-        is_running = true;
+  // 构造函数，初始化累加值和开始时间
+  FPSCounter() : sum(0), is_running(false)
+  {}
+
+  // 开始计时
+  void Start()
+  {
+    start_time = std::chrono::high_resolution_clock::now();
+    sum        = 0;
+    is_running = true;
+  }
+
+  // 增加帧数计数
+  void Count(int i)
+  {
+    if (!is_running)
+    {
+      LOG(ERROR) << "Please call Start() before counting.";
+      return;
     }
-
-    // 增加帧数计数
-    void Count(int i) {
-        if (!is_running) {
-            LOG(ERROR) << "Please call Start() before counting.";
-            return;
-        }
-        sum += i;
+    sum += i;
+  }
+
+  // 获取 FPS
+  double GetFPS()
+  {
+    if (!is_running)
+    {
+      LOG(ERROR) << "Please call Start() before calculating FPS.";
+      return 0.0;
     }
 
-    // 获取 FPS
-    double GetFPS() {
-        if (!is_running) {
-            LOG(ERROR) << "Please call Start() before calculating FPS.";
-            return 0.0;
-        }
-
-        auto current_time = std::chrono::high_resolution_clock::now();
-        std::chrono::duration<double> duration = current_time - start_time;
-        double duration_seconds = 
-                    std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
-        
-        if (duration_seconds == 0) {
-            return 0.0; // 避免除以零
-        }
-        
-        return sum / duration_seconds * 1000;
+    auto                          current_time = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double> duration     = current_time - start_time;
+    double                        duration_seconds =
+        std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
+
+    if (duration_seconds == 0)
+    {
+      return 0.0; // 避免除以零
     }
 
+    return sum / duration_seconds * 1000;
+  }
+
 private:
-    int sum;  // 累加值
-    bool is_running;  // 计时是否运行
-    std::chrono::high_resolution_clock::time_point start_time;  // 开始时间
+  int                                            sum;        // 累加值
+  bool                                           is_running; // 计时是否运行
+  std::chrono::high_resolution_clock::time_point start_time; // 开始时间
 };
 
-
-
-
-#endif
\ No newline at end of file
+#endif // ifndef __TESTS_ALL_IN_ONE_FPS_COUNTER_H
diff --git a/simple_tests/include/tests/fs_util.hpp b/simple_tests/include/tests/fs_util.hpp
index d6bfdca..24f6d3e 100644
--- a/simple_tests/include/tests/fs_util.hpp
+++ b/simple_tests/include/tests/fs_util.hpp
@@ -7,20 +7,24 @@
 
 namespace fs = std::filesystem;
 
-inline std::vector<fs::path> get_files_in_directory(const fs::path& directory) {
-    std::vector<fs::path> files;
-    // 检查目录是否存在
-    if (!fs::exists(directory) || !fs::is_directory(directory)) {
-        std::cerr << "Directory does not exist or is not a directory." << std::endl;
-        return files;
-    }
+inline std::vector<fs::path> get_files_in_directory(const fs::path &directory)
+{
+  std::vector<fs::path> files;
+  // 检查目录是否存在
+  if (!fs::exists(directory) || !fs::is_directory(directory))
+  {
+    std::cerr << "Directory does not exist or is not a directory." << std::endl;
+    return files;
+  }
 
-    // 递归遍历目录
-    for (const auto& entry : fs::recursive_directory_iterator(directory)) {
-        if (entry.is_regular_file()) {
-            files.push_back(fs::absolute(entry.path()));
-        }
+  // 递归遍历目录
+  for (const auto &entry : fs::recursive_directory_iterator(directory))
+  {
+    if (entry.is_regular_file())
+    {
+      files.push_back(fs::absolute(entry.path()));
     }
+  }
 
-    return files;
-}
\ No newline at end of file
+  return files;
+}
diff --git a/simple_tests/include/tests/help_func.hpp b/simple_tests/include/tests/help_func.hpp
index 5a1bcfd..6bd7018 100644
--- a/simple_tests/include/tests/help_func.hpp
+++ b/simple_tests/include/tests/help_func.hpp
@@ -7,13 +7,13 @@
 #include <Eigen/Dense>
 #include <fstream>
 
-inline std::tuple<cv::Mat, cv::Mat, cv::Mat> ReadRgbDepthMask(const std::string& rgb_path,
-                                                       const std::string& depth_path,
-                                                       const std::string& mask_path)
+inline std::tuple<cv::Mat, cv::Mat, cv::Mat> ReadRgbDepthMask(const std::string &rgb_path,
+                                                              const std::string &depth_path,
+                                                              const std::string &mask_path)
 {
-  cv::Mat rgb = cv::imread(rgb_path);
+  cv::Mat rgb   = cv::imread(rgb_path);
   cv::Mat depth = cv::imread(depth_path, cv::IMREAD_UNCHANGED);
-  cv::Mat mask = cv::imread(mask_path, cv::IMREAD_UNCHANGED);
+  cv::Mat mask  = cv::imread(mask_path, cv::IMREAD_UNCHANGED);
 
   CHECK(!rgb.empty()) << "Failed reading rgb from path : " << rgb_path;
   CHECK(!depth.empty()) << "Failed reading depth from path : " << depth_path;
@@ -23,7 +23,8 @@ inline std::tuple<cv::Mat, cv::Mat, cv::Mat> ReadRgbDepthMask(const std::string&
   depth = depth / 1000.f;
 
   cv::cvtColor(rgb, rgb, cv::COLOR_BGR2RGB);
-  if (mask.channels() == 3) {
+  if (mask.channels() == 3)
+  {
     cv::cvtColor(mask, mask, cv::COLOR_BGR2RGB);
     std::vector<cv::Mat> channels;
     cv::split(mask, channels);
@@ -33,10 +34,10 @@ inline std::tuple<cv::Mat, cv::Mat, cv::Mat> ReadRgbDepthMask(const std::string&
   return {rgb, depth, mask};
 }
 
-inline std::tuple<cv::Mat, cv::Mat> ReadRgbDepth(const std::string& rgb_path,
-                                          const std::string& depth_path)
+inline std::tuple<cv::Mat, cv::Mat> ReadRgbDepth(const std::string &rgb_path,
+                                                 const std::string &depth_path)
 {
-  cv::Mat rgb = cv::imread(rgb_path);
+  cv::Mat rgb   = cv::imread(rgb_path);
   cv::Mat depth = cv::imread(depth_path, cv::IMREAD_UNCHANGED);
 
   CHECK(!rgb.empty()) << "Failed reading rgb from path : " << rgb_path;
@@ -50,60 +51,61 @@ inline std::tuple<cv::Mat, cv::Mat> ReadRgbDepth(const std::string& rgb_path,
   return {rgb, depth};
 }
 
-inline void draw3DBoundingBox(const Eigen::Matrix3f& intrinsic, 
-                       const Eigen::Matrix4f& pose, 
-                       int input_image_H, 
-                       int input_image_W, 
-                       const Eigen::Vector3f& dimension,
-                       cv::Mat& image) {
-    // 目标的长宽高
-    float l = dimension(0) / 2;
-    float w = dimension(1) / 2;
-    float h = dimension(2) / 2;
-
-    // 目标的八个顶点在物体坐标系中的位置
-    Eigen::Vector3f points[8] = {
-        {-l, -w, h}, {l, -w, h}, {l, w, h}, {-l, w, h},
-        {-l, -w, -h}, {l, -w, -h}, {l, w, -h}, {-l, w, -h}
-    };
-
-
-    // 变换到世界坐标系
-    Eigen::Vector4f transformed_points[8];
-    for (int i = 0; i < 8; ++i) {
-        transformed_points[i] = pose * Eigen::Vector4f(points[i](0), points[i](1), points[i](2), 1);
-    }
-
-    // 投影到图像平面
-    std::vector<cv::Point2f> image_points;
-    for (int i = 0; i < 8; ++i) {
-        float x = transformed_points[i](0) / transformed_points[i](2);
-        float y = transformed_points[i](1) / transformed_points[i](2);
+inline void draw3DBoundingBox(const Eigen::Matrix3f &intrinsic,
+                              const Eigen::Matrix4f &pose,
+                              int                    input_image_H,
+                              int                    input_image_W,
+                              const Eigen::Vector3f &dimension,
+                              cv::Mat               &image)
+{
+  // 目标的长宽高
+  float l = dimension(0) / 2;
+  float w = dimension(1) / 2;
+  float h = dimension(2) / 2;
+
+  // 目标的八个顶点在物体坐标系中的位置
+  Eigen::Vector3f points[8] = {{-l, -w, h},  {l, -w, h},  {l, w, h},  {-l, w, h},
+                               {-l, -w, -h}, {l, -w, -h}, {l, w, -h}, {-l, w, -h}};
+
+  // 变换到世界坐标系
+  Eigen::Vector4f transformed_points[8];
+  for (int i = 0; i < 8; ++i)
+  {
+    transformed_points[i] = pose * Eigen::Vector4f(points[i](0), points[i](1), points[i](2), 1);
+  }
 
-        // 使用内参矩阵进行投影
-        float u = intrinsic(0, 0) * x + intrinsic(0, 2);
-        float v = intrinsic(1, 1) * y + intrinsic(1, 2);
+  // 投影到图像平面
+  std::vector<cv::Point2f> image_points;
+  for (int i = 0; i < 8; ++i)
+  {
+    float x = transformed_points[i](0) / transformed_points[i](2);
+    float y = transformed_points[i](1) / transformed_points[i](2);
 
-        image_points.emplace_back(static_cast<float>(u), static_cast<float>(v));
+    // 使用内参矩阵进行投影
+    float u = intrinsic(0, 0) * x + intrinsic(0, 2);
+    float v = intrinsic(1, 1) * y + intrinsic(1, 2);
 
-    }
+    image_points.emplace_back(static_cast<float>(u), static_cast<float>(v));
+  }
 
-    // 绘制边框（连接顶点）
-    std::vector<std::pair<int, int>> edges = {
-        {0, 1}, {1, 2}, {2, 3}, {3, 0}, // 底面
-        {4, 5}, {5, 6}, {6, 7}, {7, 4}, // 顶面
-        {0, 4}, {1, 5}, {2, 6}, {3, 7}  // 侧面
-    };
-
-    for (const auto& edge : edges) {
-        if (edge.first < image_points.size() && edge.second < image_points.size()) {
-            cv::line(image, image_points[edge.first], image_points[edge.second], 
-                     cv::Scalar(0, 255, 0), 2); // 绿色边框
-        }
+  // 绘制边框（连接顶点）
+  std::vector<std::pair<int, int>> edges = {
+      {0, 1}, {1, 2}, {2, 3}, {3, 0}, // 底面
+      {4, 5}, {5, 6}, {6, 7}, {7, 4}, // 顶面
+      {0, 4}, {1, 5}, {2, 6}, {3, 7}  // 侧面
+  };
+
+  for (const auto &edge : edges)
+  {
+    if (edge.first < image_points.size() && edge.second < image_points.size())
+    {
+      cv::line(image, image_points[edge.first], image_points[edge.second], cv::Scalar(0, 255, 0),
+               2); // 绿色边框
     }
+  }
 }
 
-inline Eigen::Matrix3f ReadCamK(const std::string& cam_K_path)
+inline Eigen::Matrix3f ReadCamK(const std::string &cam_K_path)
 {
   Eigen::Matrix3f K;
 
@@ -112,10 +114,12 @@ inline Eigen::Matrix3f ReadCamK(const std::string& cam_K_path)
   CHECK(file) << "Failed open file : " << cam_K_path;
 
   // 读取数据并存入矩阵
-  for (int i = 0; i < 3; ++i) {
-      for (int j = 0; j < 3; ++j) {
-          file >> K(i, j);
-      }
+  for (int i = 0; i < 3; ++i)
+  {
+    for (int j = 0; j < 3; ++j)
+    {
+      file >> K(i, j);
+    }
   }
 
   // 关闭文件
@@ -124,40 +128,49 @@ inline Eigen::Matrix3f ReadCamK(const std::string& cam_K_path)
   return K;
 }
 
-inline void saveVideo(const std::vector<cv::Mat>& frames, const std::string& outputPath, double fps = 30.0) {
-    if (frames.empty()) {
-        std::cerr << "Error: No frames to write!" << std::endl;
-        return;
-    }
+inline void saveVideo(const std::vector<cv::Mat> &frames,
+                      const std::string          &outputPath,
+                      double                      fps = 30.0)
+{
+  if (frames.empty())
+  {
+    std::cerr << "Error: No frames to write!" << std::endl;
+    return;
+  }
 
-    // 获取帧的宽度和高度
-    int frameWidth = frames[0].cols;
-    int frameHeight = frames[0].rows;
-    
-    // 定义视频编码格式（MP4 使用 `cv::VideoWriter::fourcc('m', 'p', '4', 'v')` 或 `cv::VideoWriter::fourcc('H', '2', '6', '4')`）
-    int fourcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v');  // MPEG-4 编码
-    // int fourcc = cv::VideoWriter::fourcc('H', '2', '6', '4'); // H.264 编码（可能需要额外的编解码器支持）
-
-    // 创建 VideoWriter 对象
-    cv::VideoWriter writer(outputPath, fourcc, fps, cv::Size(frameWidth, frameHeight));
-
-    // 检查是否成功打开
-    if (!writer.isOpened()) {
-        std::cerr << "Error: Could not open the video file for writing!" << std::endl;
-        return;
-    }
+  // 获取帧的宽度和高度
+  int frameWidth  = frames[0].cols;
+  int frameHeight = frames[0].rows;
+
+  // 定义视频编码格式（MP4 使用 `cv::VideoWriter::fourcc('m', 'p', '4', 'v')` 或
+  // `cv::VideoWriter::fourcc('H', '2', '6', '4')`）
+  int fourcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v'); // MPEG-4 编码
+  // int fourcc = cv::VideoWriter::fourcc('H', '2', '6', '4'); // H.264
+  // 编码（可能需要额外的编解码器支持）
+
+  // 创建 VideoWriter 对象
+  cv::VideoWriter writer(outputPath, fourcc, fps, cv::Size(frameWidth, frameHeight));
 
-    // 写入所有帧
-    for (const auto& frame : frames) {
-        // 确保所有帧大小一致
-        if (frame.cols != frameWidth || frame.rows != frameHeight) {
-            std::cerr << "Error: Frame size mismatch!" << std::endl;
-            break;
-        }
-        writer.write(frame);
+  // 检查是否成功打开
+  if (!writer.isOpened())
+  {
+    std::cerr << "Error: Could not open the video file for writing!" << std::endl;
+    return;
+  }
+
+  // 写入所有帧
+  for (const auto &frame : frames)
+  {
+    // 确保所有帧大小一致
+    if (frame.cols != frameWidth || frame.rows != frameHeight)
+    {
+      std::cerr << "Error: Frame size mismatch!" << std::endl;
+      break;
     }
+    writer.write(frame);
+  }
 
-    // 释放资源
-    writer.release();
-    std::cout << "Video saved successfully: " << outputPath << std::endl;
-}
\ No newline at end of file
+  // 释放资源
+  writer.release();
+  std::cout << "Video saved successfully: " << outputPath << std::endl;
+}
diff --git a/simple_tests/src/test_foundationpose.cpp b/simple_tests/src/test_foundationpose.cpp
index 41a930a..9715791 100644
--- a/simple_tests/src/test_foundationpose.cpp
+++ b/simple_tests/src/test_foundationpose.cpp
@@ -11,62 +11,50 @@ using namespace inference_core;
 using namespace detection_6d;
 
 static const std::string refiner_engine_path_ = "/workspace/models/refiner_hwc_dynamic_fp16.engine";
-static const std::string scorer_engine_path_ = "/workspace/models/scorer_hwc_dynamic_fp16.engine";
-static const std::string demo_data_path_ = "/workspace/test_data/mustard0";
+static const std::string scorer_engine_path_  = "/workspace/models/scorer_hwc_dynamic_fp16.engine";
+static const std::string demo_data_path_      = "/workspace/test_data/mustard0";
 static const std::string demo_textured_obj_path = demo_data_path_ + "/mesh/textured_simple.obj";
 static const std::string demo_textured_map_path = demo_data_path_ + "/mesh/texture_map.png";
-static const std::string demo_name_ = "mustard";
-static const std::string frame_id = "1581120424100262102";
+static const std::string demo_name_             = "mustard";
+static const std::string frame_id               = "1581120424100262102";
 
 std::shared_ptr<Base6DofDetectionModel> CreateFoundationPoseModel()
 {
   auto refiner_core = CreateTrtInferCore(refiner_engine_path_,
-                                          {
+                                         {
+                                             {"transf_input", {252, 160, 160, 6}},
+                                             {"render_input", {252, 160, 160, 6}},
+                                         },
+                                         {{"trans", {252, 3}}, {"rot", {252, 3}}}, 1);
+  auto scorer_core  = CreateTrtInferCore(scorer_engine_path_,
+                                         {
                                             {"transf_input", {252, 160, 160, 6}},
                                             {"render_input", {252, 160, 160, 6}},
-                                          },
-                                          {
-                                            {"trans", {252, 3}},
-                                            {"rot", {252, 3}}
-                                          }, 
-                                          1);
-  auto scorer_core = CreateTrtInferCore(scorer_engine_path_,
-                                        {
-                                          {"transf_input", {252, 160, 160, 6}},
-                                          {"render_input", {252, 160, 160, 6}},
                                         },
-                                        {
-                                          {"scores", {252, 1}}
-                                        },
-                                        1); 
-  
+                                         {{"scores", {252, 1}}}, 1);
+
   Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
-  auto foundation_pose = CreateFoundationPoseModel(refiner_core, 
-                                                  scorer_core,
-                                                  demo_name_,
-                                                  demo_textured_obj_path,
-                                                  demo_textured_map_path,
-                                                  intrinsic_in_mat);
+  auto foundation_pose =
+      CreateFoundationPoseModel(refiner_core, scorer_core, demo_name_, demo_textured_obj_path,
+                                demo_textured_map_path, intrinsic_in_mat);
 
   return foundation_pose;
 }
 
-
-
-TEST(foundationpose_test, test) 
+TEST(foundationpose_test, test)
 {
-  auto foundation_pose = CreateFoundationPoseModel();
+  auto            foundation_pose  = CreateFoundationPoseModel();
   Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
 
-  const std::string first_rgb_path = demo_data_path_ + "/rgb/" + frame_id + ".png";
+  const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
-  const std::string first_mask_path = demo_data_path_ + "/masks/" + frame_id + ".png";
+  const std::string first_mask_path  = demo_data_path_ + "/masks/" + frame_id + ".png";
 
   auto [rgb, depth, mask] = ReadRgbDepthMask(first_rgb_path, first_depth_path, first_mask_path);
 
   const Eigen::Vector3f object_dimension = foundation_pose->GetObjectDimension(demo_name_);
-  
+
   Eigen::Matrix4f out_pose;
   CHECK(foundation_pose->Register(rgb.clone(), depth, mask, demo_name_, out_pose));
   LOG(WARNING) << "first Pose : " << out_pose;
@@ -77,20 +65,21 @@ TEST(foundationpose_test, test)
   draw3DBoundingBox(intrinsic_in_mat, out_pose, 480, 640, object_dimension, regist_plot);
   cv::imwrite("/workspace/test_data/test_foundationpose_plot.png", regist_plot);
 
-
   auto rgb_paths = get_files_in_directory(demo_data_path_ + "/rgb/");
   std::sort(rgb_paths.begin(), rgb_paths.end());
   std::vector<std::string> frame_ids;
-  for (const auto& rgb_path : rgb_paths) {
+  for (const auto &rgb_path : rgb_paths)
+  {
     frame_ids.push_back(rgb_path.stem());
   }
 
-  int total = frame_ids.size();
-  std::vector<cv::Mat> result_image_sequence {regist_plot};
-  for (int i = 1 ; i < total ; ++ i) {
-    std::string cur_rgb_path = demo_data_path_ + "/rgb/" + frame_ids[i] + ".png";
+  int                  total = frame_ids.size();
+  std::vector<cv::Mat> result_image_sequence{regist_plot};
+  for (int i = 1; i < total; ++i)
+  {
+    std::string cur_rgb_path   = demo_data_path_ + "/rgb/" + frame_ids[i] + ".png";
     std::string cur_depth_path = demo_data_path_ + "/depth/" + frame_ids[i] + ".png";
-    auto [cur_rgb, cur_depth] = ReadRgbDepth(cur_rgb_path, cur_depth_path);
+    auto [cur_rgb, cur_depth]  = ReadRgbDepth(cur_rgb_path, cur_depth_path);
 
     Eigen::Matrix4f track_pose;
     CHECK(foundation_pose->Track(cur_rgb.clone(), cur_depth, demo_name_, track_pose));
@@ -107,23 +96,22 @@ TEST(foundationpose_test, test)
   saveVideo(result_image_sequence, "/workspace/test_data/test_foundationpose_result.mp4");
 }
 
-
-
-TEST(foundationpose_test, speed_register) 
+TEST(foundationpose_test, speed_register)
 {
-  auto foundation_pose = CreateFoundationPoseModel();
+  auto            foundation_pose  = CreateFoundationPoseModel();
   Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
-  
-  const std::string first_rgb_path = demo_data_path_ + "/rgb/" + frame_id + ".png";
+
+  const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
-  const std::string first_mask_path = demo_data_path_ + "/masks/" + frame_id + ".png";
+  const std::string first_mask_path  = demo_data_path_ + "/masks/" + frame_id + ".png";
 
   auto [rgb, depth, mask] = ReadRgbDepthMask(first_rgb_path, first_depth_path, first_mask_path);
 
   // proccess
   FPSCounter counter;
   counter.Start();
-  for (int i = 0 ; i < 50 ; ++ i) {
+  for (int i = 0; i < 50; ++i)
+  {
     Eigen::Matrix4f out_pose;
     foundation_pose->Register(rgb.clone(), depth, mask, demo_name_, out_pose);
     counter.Count(1);
@@ -132,16 +120,14 @@ TEST(foundationpose_test, speed_register)
   LOG(WARNING) << "average fps: " << counter.GetFPS();
 }
 
-
-
-TEST(foundationpose_test, speed_track) 
+TEST(foundationpose_test, speed_track)
 {
-  auto foundation_pose = CreateFoundationPoseModel();
+  auto            foundation_pose  = CreateFoundationPoseModel();
   Eigen::Matrix3f intrinsic_in_mat = ReadCamK(demo_data_path_ + "/cam_K.txt");
-  
-  const std::string first_rgb_path = demo_data_path_ + "/rgb/" + frame_id + ".png";
+
+  const std::string first_rgb_path   = demo_data_path_ + "/rgb/" + frame_id + ".png";
   const std::string first_depth_path = demo_data_path_ + "/depth/" + frame_id + ".png";
-  const std::string first_mask_path = demo_data_path_ + "/masks/" + frame_id + ".png";
+  const std::string first_mask_path  = demo_data_path_ + "/masks/" + frame_id + ".png";
 
   auto [rgb, depth, mask] = ReadRgbDepthMask(first_rgb_path, first_depth_path, first_mask_path);
 
@@ -151,7 +137,8 @@ TEST(foundationpose_test, speed_track)
   // proccess
   FPSCounter counter;
   counter.Start();
-  for (int i = 0 ; i < 5000 ; ++ i) {
+  for (int i = 0; i < 5000; ++i)
+  {
     Eigen::Matrix4f track_pose;
     foundation_pose->Track(rgb.clone(), depth, demo_name_, track_pose);
     counter.Count(1);
diff --git a/test_data/download.md b/test_data/download.md
index 526a9ae..84c3220 100644
--- a/test_data/download.md
+++ b/test_data/download.md
@@ -12,4 +12,4 @@
         |-- masks
         |-- mesh
         `-- rgb
-  ```
\ No newline at end of file
+  ```
diff --git a/tools/cvt_onnx2trt.bash b/tools/cvt_onnx2trt.bash
index 604bb0e..39b3c1a 100644
--- a/tools/cvt_onnx2trt.bash
+++ b/tools/cvt_onnx2trt.bash
@@ -12,4 +12,4 @@
                               --optShapes=render_input:252x160x160x6,transf_input:252x160x160x6 \
                               --maxShapes=render_input:252x160x160x6,transf_input:252x160x160x6 \
                               --fp16 \
-                              --saveEngine=/workspace/models/refiner_hwc_dynamic_fp16.engine
\ No newline at end of file
+                              --saveEngine=/workspace/models/refiner_hwc_dynamic_fp16.engine