Skip to content

Advanced Edge Refinement results #4

@s-trinh

Description

@s-trinh

Hi,

I am interested by the "Advanced Edge Refinement" function since the paper states a median error of 0.017 px.
But I am not able to get good results using the refineCornerPointsByDirectEdgeOptimization function.

For instance, I am testing the refine code with 0019.png from the random.zip dataset.

Please see the code I am using if there is any obvious error I would have made:

  • used image is 0019.png:
    0019
  • ground truth file is tags.csv
  • code:
C++ code (collapsed block, click on the arrow)

#include <ceres/ceres.h>
#include <Eigen/Dense>

#include <apriltags/TagFamily.h>
#include <apriltags/Tag36h11.h>
#include <apriltags/TagDetector.h>

namespace
{
void read_ground_truth(std::map<std::string, std::map<int, std::vector<cv::Point2d>>>& ground_truth)
{
    const std::string filename = "tags.csv";
    std::ifstream file(filename);
    if (!file.is_open()) {
        throw std::invalid_argument("Cannot open ground truth file tags.csv.");
    }

    std::string line;
    file >> line;

    while (file >> line) {
        std::istringstream ss(line);
        std::string token;

        std::getline(ss, token, ',');
        std::string img = token;

        std::getline(ss, token, ',');
        int tag_id = std::atoi(token.c_str());

        std::getline(ss, token, ',');
        std::getline(ss, token, ',');
        double ground_truth_x = std::atof(token.c_str());

        std::getline(ss, token, ',');
        double ground_truth_y = std::atof(token.c_str());

        ground_truth[img][tag_id].push_back(cv::Point2d(ground_truth_x, ground_truth_y));
    }
}

double point_error(double u1, double v1, double u2, double v2)
{
    double u_err = u1 - u2, v_err = v1 - v2;
    return sqrt(u_err*u_err + v_err*v_err);
}

class EdgeCostFunctor
{
  public:
    EdgeCostFunctor(const cv::Mat& rawImg, const cv::Mat& maskImg)
        : raw_img(rawImg), mask_img(maskImg)
    {
        template_img.create(1, 2, CV_8U);
        template_img.data[0] = 0;
        template_img.data[1] = 255;
    }

    template<typename T>
    T getSubPixel(const cv::Mat& img, cv::Point_<T> pt) const
    {
        const T x = floor(pt.x);
        const T y = floor(pt.y);

        const int x0 = cv::borderInterpolate(x, img.cols, cv::BORDER_REPLICATE);
        const int x1 = cv::borderInterpolate(x + 1, img.cols, cv::BORDER_REPLICATE);
        const int y0 = cv::borderInterpolate(y, img.rows, cv::BORDER_REPLICATE);
        const int y1 = cv::borderInterpolate(y + 1, img.rows, cv::BORDER_REPLICATE);

        const T a = pt.x - T(x);
        const T c = pt.y - T(y);

        return (T(img.at<uint8_t>(y0, x0)) * (T(1.0) - a) + T(img.at<uint8_t>(y0, x1)) * a) * (T(1.0) - c) +
               (T(img.at<uint8_t>(y1, x0)) * (T(1.0) - a) + T(img.at<uint8_t>(y1, x1)) * a) * c;
    }

    template<typename T>
    bool operator()(const T* const edge_normal_ptr, const T* const edge_offset_ptr, T* residual) const
    {
        const Eigen::Map<const Eigen::Matrix<T, 2, 1>> edge_normal(edge_normal_ptr);
        const double& edge_offset = *edge_offset_ptr;

        const Eigen::Hyperplane<T, 2> line(edge_normal, edge_offset);

        int residual_index = 0;
        for (int row = 0; row < raw_img.rows; row++)
        {
            const auto raw_img_row_ptr = raw_img.ptr<uint8_t>(row);
            const auto mask_row_ptr = mask_img.ptr<uint8_t>(row);

            for (int col = 0; col < raw_img.cols; col++)
            {
                const auto eval = mask_row_ptr[col];

                if (eval)
                {
                    const Eigen::Vector2d raw_img_pos(col, row);
                    const double dist_to_line = line.signedDistance(raw_img_pos);

                    const ::cv::Point2d template_pt(dist_to_line + 0.5, 0);

                    const double pred_pixel_value = getSubPixel<double>(template_img, template_pt);
                    const uint8_t current_pixel_values = raw_img_row_ptr[col];
                    residual[residual_index] = pred_pixel_value - current_pixel_values;
                    residual_index++;
                }
            }
        }

        return true;
    }

  private:
    cv::Mat raw_img;
    cv::Mat mask_img;
    cv::Mat template_img;
};

void removeBadTags(std::vector<AprilTags::TagDetection>& tag_detections) noexcept
{
    tag_detections.erase(remove_if(begin(tag_detections),
                                   end(tag_detections),
                                   [](AprilTags::TagDetection const& tag) { return tag.good == false; }),
                         end(tag_detections));
}

void refineCornerPointsByDirectEdgeOptimization(
    cv::Mat& img, std::vector<AprilTags::TagDetection>& tag_detections) noexcept
{
    for (AprilTags::TagDetection& tag : tag_detections)
    {
        std::pair<float, float> x_range(std::numeric_limits<float>::max(), std::numeric_limits<float>::min());
        std::pair<float, float> y_range(std::numeric_limits<float>::max(), std::numeric_limits<float>::min());
        for (std::pair<float, float>& corner_point : tag.p)
        {
            x_range.first = std::min(x_range.first, corner_point.first);
            x_range.second = std::max(x_range.second, corner_point.first);
            y_range.first = std::min(y_range.first, corner_point.second);
            y_range.second = std::max(y_range.second, corner_point.second);
        }

        x_range.first = std::max(x_range.first - 10, static_cast<float>(0));
        x_range.second = std::min(x_range.second + 10, static_cast<float>(img.cols - 1));
        y_range.first = std::max(y_range.first - 10, static_cast<float>(0));
        y_range.second = std::min(y_range.second + 10, static_cast<float>(img.rows - 1));

        try
        {
            cv::Rect roi(cv::Point(x_range.first, y_range.first), cv::Point(x_range.second, y_range.second));

            cv::Mat cropped_img = img(roi);

            std::array<Eigen::Vector2d, 4> estimated_edge_normals;
            std::array<double, 4> estimated_edge_offsets;
            std::array<cv::Mat, 4> mask_images;

            int line_thickness = 5;

            auto nextCornerIndex = [](const int i) {
                if (i <= 2)
                {
                    return i + 1;
                }
                else
                {
                    return 0;
                }
            };

            for (int i = 0; i < 4; i++)
            {
                const int next_corner_index = nextCornerIndex(i);

                const Eigen::Vector2d roi_offset_vector(roi.x, roi.y);

                const Eigen::Vector2d corner(tag.p[i].first, tag.p[i].second);
                const Eigen::Vector2d next_corner(tag.p[next_corner_index].first, tag.p[next_corner_index].second);

                const Eigen::Hyperplane<double, 2> edge_line =
                    Eigen::Hyperplane<double, 2>::Through(corner - roi_offset_vector, next_corner - roi_offset_vector);

                estimated_edge_normals[i] = edge_line.normal();
                estimated_edge_offsets[i] = edge_line.offset();

                mask_images[i].create(cropped_img.rows, cropped_img.cols, CV_8U);
                mask_images[i].setTo(0);

                const cv::Point2i current_corner_point(std::round(tag.p[i].first - roi.x),
                                                       std::round(tag.p[i].second - roi.y));

                const cv::Point2i next_corner_point(std::round(tag.p[next_corner_index].first - roi.x),
                                                    std::round(tag.p[next_corner_index].second - roi.y));

                cv::line(mask_images[i], current_corner_point, next_corner_point, cv::Scalar(255), line_thickness);

                cv::rectangle(mask_images[i], current_corner_point, current_corner_point, cv::Scalar(0), 10);
                cv::rectangle(mask_images[i], next_corner_point, next_corner_point, cv::Scalar(0), 10);
            }

            ceres::Problem optimization_problem;

            // Set up the only cost function (also known as residual). This uses
            // auto-differentiation to obtain the derivative (jacobian).

            ceres::NumericDiffOptions numeric_diff_options;

            auto addEdgeResidualBlocks = [&optimization_problem,
                                          &mask_images,
                                          &cropped_img,
                                          &estimated_edge_normals,
                                          &estimated_edge_offsets,
                                          &numeric_diff_options,
                                          &nextCornerIndex](const int i) {
                const int pixel_count = cv::countNonZero(mask_images[i]);

                ceres::CostFunction* cost_function =
                    new ceres::NumericDiffCostFunction<EdgeCostFunctor, ceres::CENTRAL, ceres::DYNAMIC, 2, 1>(
                        new EdgeCostFunctor(cropped_img, mask_images[i]),
                        ceres::TAKE_OWNERSHIP,
                        pixel_count,
                        numeric_diff_options);
                optimization_problem.AddResidualBlock(
                    cost_function, nullptr, estimated_edge_normals[i].data(), &estimated_edge_offsets[i]);

                optimization_problem.SetParameterization(estimated_edge_normals[i].data(),
                                                         new ceres::HomogeneousVectorParameterization(2));
            };

            addEdgeResidualBlocks(0);
            addEdgeResidualBlocks(1);
            addEdgeResidualBlocks(2);
            addEdgeResidualBlocks(3);

            ceres::Solver::Options solve_options;
            solve_options.linear_solver_type = ceres::DENSE_QR;
            solve_options.max_num_iterations = 100;

            ceres::Solver::Summary summary;
            ceres::Solve(solve_options, &optimization_problem, &summary);

            for (int edge_index = 0; edge_index < 4; edge_index++)
            {
                const int next_edge_index = nextCornerIndex(edge_index);
                const int corner_index = next_edge_index;

                const Eigen::Hyperplane<double, 2> edge_A(estimated_edge_normals[edge_index],
                                                          estimated_edge_offsets[edge_index]);
                const Eigen::Hyperplane<double, 2> edge_B(estimated_edge_normals[next_edge_index],
                                                          estimated_edge_offsets[next_edge_index]);

                const Eigen::Vector2d estimated_corner_pos_roi = edge_A.intersection(edge_B);

                tag.p[corner_index].first = estimated_corner_pos_roi.x() + roi.x;
                tag.p[corner_index].second = estimated_corner_pos_roi.y() + roi.y;
            }
        }
        catch (const std::exception& /*e*/)
        {
            tag.good = false;
        }
    }
    // removeBadTags(tag_detections);
}

struct less_than_key
{
    inline bool operator() (const AprilTags::TagDetection& det1, const AprilTags::TagDetection& det2)
    {
        return (det1.id < det2.id);
    }
};
}

int main(int argc, char** argv)
{
    AprilTags::TagDetector apriltag_cpp_detector(AprilTags::TagCodes(AprilTags::tagCodes36h11));

    cv::Mat img_gray = cv::imread("0019.png", cv::IMREAD_GRAYSCALE);

    std::vector<AprilTags::TagDetection> tag_detections = apriltag_cpp_detector.extractTags(img_gray);
    std::sort(tag_detections.begin(), tag_detections.end(), less_than_key());
    std::cout << "tag_detections: " << tag_detections.size() << std::endl;

    std::vector<AprilTags::TagDetection> tag_detections_refined = tag_detections;
    refineCornerPointsByDirectEdgeOptimization(img_gray, tag_detections_refined);

    std::map<std::string, std::map<int, std::vector<cv::Point2d>>> ground_truth;
    read_ground_truth(ground_truth);
    const std::string imageFilename = "0019.png";
    std::cout << "ground_truth: " << (ground_truth.find(imageFilename) != ground_truth.end())
              << " ; ground_truth: " << ground_truth.size() << std::endl;

    for (size_t i = 0; i < tag_detections.size(); i++) {
        std::cout << "\nTag " << tag_detections[i].id << ":" << std::endl;
        for (size_t j = 0; j < 4; j++) {
            cv::Point2d corner = ground_truth[imageFilename][tag_detections[i].id][j];
            double error_detection = point_error(tag_detections[i].p[j].first, tag_detections[i].p[j].second, corner.x, corner.y);
            std::cout << "detection: " << tag_detections[i].p[j].first << ", " << tag_detections[i].p[j].second << " ; error="
                      << error_detection << std::endl;

            double error_detection_refined = point_error(tag_detections_refined[i].p[j].first, tag_detections_refined[i].p[j].second, corner.x, corner.y);
            std::cout << "detection refined: " << tag_detections_refined[i].p[j].first << ", " << tag_detections_refined[i].p[j].second << " ; error="
                      << error_detection_refined << std::endl;

            std::cout << "ground truth: " << corner.x << ", " << corner.y << std::endl;
        }
    }

    return 0;
}

This is mostly copy-paste code but comparing the corners coordinates between refined and ground truth data shows not good results. For instance:

Tag 8:
detection: 2033.96, 1260.67 ; error=0.698578
detection refined: 2033.88, 1260.73 ; error=0.706687
ground truth: 2034.38, 1261.23
detection: 2373.62, 1283.37 ; error=0.522154
detection refined: 2373.38, 1283.32 ; error=0.711717
ground truth: 2373.88, 1283.82
detection: 2367.81, 1005.07 ; error=0.869125
detection refined: 2368.05, 1005.02 ; error=0.708416
ground truth: 2368.55, 1005.53
detection: 2028.88, 1011.25 ; error=0.804983
detection refined: 2028.97, 1011.3 ; error=0.705023
ground truth: 2029.47, 1011.8

Tag 9:
detection: 1665.5, 1236.2 ; error=0.68357
detection refined: 1665.45, 1236.2 ; error=0.707975
ground truth: 1665.95, 1236.71
detection: 1921.41, 1253.23 ; error=0.6572
detection refined: 1921.34, 1253.25 ; error=0.697219
ground truth: 1921.83, 1253.74
detection: 1916.5, 1013.47 ; error=0.697531
detection refined: 1916.57, 1013.37 ; error=0.704689
ground truth: 1917.07, 1013.87
detection: 1660.98, 1017.99 ; error=0.826231
detection refined: 1661.03, 1018.1 ; error=0.712545
ground truth: 1661.54, 1018.6

What did I miss since it should be mostly copy-paste code? Any issue or subtility with the ground truth data?

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions