Advanced Edge Refinement results

Hi,

I am interested by the "Advanced Edge Refinement" function since the paper states a median error of `0.017 px`.
But I am not able to get good results using the `refineCornerPointsByDirectEdgeOptimization` function.

For instance, I am testing the refine code with `0019.png` from the [`random.zip`](https://rzunibw-my.sharepoint.com/:f:/g/personal/thorsten_luettel_rzunibw_onmicrosoft_com/Evi9SVHT-plFgYt41C_jEMkBdQVBSMP8gAnGx-wRpL0nOg) dataset.

Please see the code I am using if there is any obvious error I would have made:

- used image is `0019.png`:
![0019](https://user-images.githubusercontent.com/8035162/122585676-0ab2f100-d05c-11eb-8842-82df8501bdaa.png)
- ground truth file is [tags.csv](https://github.com/UniBwTAS/apriltags_tas/files/6678041/tags.csv)
- code:

<details><summary>C++ code (collapsed block, click on the arrow)</summary>
<p>

```cpp
#include <ceres/ceres.h>
#include <Eigen/Dense>

#include <apriltags/TagFamily.h>
#include <apriltags/Tag36h11.h>
#include <apriltags/TagDetector.h>

namespace
{
void read_ground_truth(std::map<std::string, std::map<int, std::vector<cv::Point2d>>>& ground_truth)
{
    const std::string filename = "tags.csv";
    std::ifstream file(filename);
    if (!file.is_open()) {
        throw std::invalid_argument("Cannot open ground truth file tags.csv.");
    }

    std::string line;
    file >> line;

    while (file >> line) {
        std::istringstream ss(line);
        std::string token;

        std::getline(ss, token, ',');
        std::string img = token;

        std::getline(ss, token, ',');
        int tag_id = std::atoi(token.c_str());

        std::getline(ss, token, ',');
        std::getline(ss, token, ',');
        double ground_truth_x = std::atof(token.c_str());

        std::getline(ss, token, ',');
        double ground_truth_y = std::atof(token.c_str());

        ground_truth[img][tag_id].push_back(cv::Point2d(ground_truth_x, ground_truth_y));
    }
}

double point_error(double u1, double v1, double u2, double v2)
{
    double u_err = u1 - u2, v_err = v1 - v2;
    return sqrt(u_err*u_err + v_err*v_err);
}

class EdgeCostFunctor
{
  public:
    EdgeCostFunctor(const cv::Mat& rawImg, const cv::Mat& maskImg)
        : raw_img(rawImg), mask_img(maskImg)
    {
        template_img.create(1, 2, CV_8U);
        template_img.data[0] = 0;
        template_img.data[1] = 255;
    }

    template<typename T>
    T getSubPixel(const cv::Mat& img, cv::Point_<T> pt) const
    {
        const T x = floor(pt.x);
        const T y = floor(pt.y);

        const int x0 = cv::borderInterpolate(x, img.cols, cv::BORDER_REPLICATE);
        const int x1 = cv::borderInterpolate(x + 1, img.cols, cv::BORDER_REPLICATE);
        const int y0 = cv::borderInterpolate(y, img.rows, cv::BORDER_REPLICATE);
        const int y1 = cv::borderInterpolate(y + 1, img.rows, cv::BORDER_REPLICATE);

        const T a = pt.x - T(x);
        const T c = pt.y - T(y);

        return (T(img.at<uint8_t>(y0, x0)) * (T(1.0) - a) + T(img.at<uint8_t>(y0, x1)) * a) * (T(1.0) - c) +
               (T(img.at<uint8_t>(y1, x0)) * (T(1.0) - a) + T(img.at<uint8_t>(y1, x1)) * a) * c;
    }

    template<typename T>
    bool operator()(const T* const edge_normal_ptr, const T* const edge_offset_ptr, T* residual) const
    {
        const Eigen::Map<const Eigen::Matrix<T, 2, 1>> edge_normal(edge_normal_ptr);
        const double& edge_offset = *edge_offset_ptr;

        const Eigen::Hyperplane<T, 2> line(edge_normal, edge_offset);

        int residual_index = 0;
        for (int row = 0; row < raw_img.rows; row++)
        {
            const auto raw_img_row_ptr = raw_img.ptr<uint8_t>(row);
            const auto mask_row_ptr = mask_img.ptr<uint8_t>(row);

            for (int col = 0; col < raw_img.cols; col++)
            {
                const auto eval = mask_row_ptr[col];

                if (eval)
                {
                    const Eigen::Vector2d raw_img_pos(col, row);
                    const double dist_to_line = line.signedDistance(raw_img_pos);

                    const ::cv::Point2d template_pt(dist_to_line + 0.5, 0);

                    const double pred_pixel_value = getSubPixel<double>(template_img, template_pt);
                    const uint8_t current_pixel_values = raw_img_row_ptr[col];
                    residual[residual_index] = pred_pixel_value - current_pixel_values;
                    residual_index++;
                }
            }
        }

        return true;
    }

  private:
    cv::Mat raw_img;
    cv::Mat mask_img;
    cv::Mat template_img;
};

void removeBadTags(std::vector<AprilTags::TagDetection>& tag_detections) noexcept
{
    tag_detections.erase(remove_if(begin(tag_detections),
                                   end(tag_detections),
                                   [](AprilTags::TagDetection const& tag) { return tag.good == false; }),
                         end(tag_detections));
}

void refineCornerPointsByDirectEdgeOptimization(
    cv::Mat& img, std::vector<AprilTags::TagDetection>& tag_detections) noexcept
{
    for (AprilTags::TagDetection& tag : tag_detections)
    {
        std::pair<float, float> x_range(std::numeric_limits<float>::max(), std::numeric_limits<float>::min());
        std::pair<float, float> y_range(std::numeric_limits<float>::max(), std::numeric_limits<float>::min());
        for (std::pair<float, float>& corner_point : tag.p)
        {
            x_range.first = std::min(x_range.first, corner_point.first);
            x_range.second = std::max(x_range.second, corner_point.first);
            y_range.first = std::min(y_range.first, corner_point.second);
            y_range.second = std::max(y_range.second, corner_point.second);
        }

        x_range.first = std::max(x_range.first - 10, static_cast<float>(0));
        x_range.second = std::min(x_range.second + 10, static_cast<float>(img.cols - 1));
        y_range.first = std::max(y_range.first - 10, static_cast<float>(0));
        y_range.second = std::min(y_range.second + 10, static_cast<float>(img.rows - 1));

        try
        {
            cv::Rect roi(cv::Point(x_range.first, y_range.first), cv::Point(x_range.second, y_range.second));

            cv::Mat cropped_img = img(roi);

            std::array<Eigen::Vector2d, 4> estimated_edge_normals;
            std::array<double, 4> estimated_edge_offsets;
            std::array<cv::Mat, 4> mask_images;

            int line_thickness = 5;

            auto nextCornerIndex = [](const int i) {
                if (i <= 2)
                {
                    return i + 1;
                }
                else
                {
                    return 0;
                }
            };

            for (int i = 0; i < 4; i++)
            {
                const int next_corner_index = nextCornerIndex(i);

                const Eigen::Vector2d roi_offset_vector(roi.x, roi.y);

                const Eigen::Vector2d corner(tag.p[i].first, tag.p[i].second);
                const Eigen::Vector2d next_corner(tag.p[next_corner_index].first, tag.p[next_corner_index].second);

                const Eigen::Hyperplane<double, 2> edge_line =
                    Eigen::Hyperplane<double, 2>::Through(corner - roi_offset_vector, next_corner - roi_offset_vector);

                estimated_edge_normals[i] = edge_line.normal();
                estimated_edge_offsets[i] = edge_line.offset();

                mask_images[i].create(cropped_img.rows, cropped_img.cols, CV_8U);
                mask_images[i].setTo(0);

                const cv::Point2i current_corner_point(std::round(tag.p[i].first - roi.x),
                                                       std::round(tag.p[i].second - roi.y));

                const cv::Point2i next_corner_point(std::round(tag.p[next_corner_index].first - roi.x),
                                                    std::round(tag.p[next_corner_index].second - roi.y));

                cv::line(mask_images[i], current_corner_point, next_corner_point, cv::Scalar(255), line_thickness);

                cv::rectangle(mask_images[i], current_corner_point, current_corner_point, cv::Scalar(0), 10);
                cv::rectangle(mask_images[i], next_corner_point, next_corner_point, cv::Scalar(0), 10);
            }

            ceres::Problem optimization_problem;

            // Set up the only cost function (also known as residual). This uses
            // auto-differentiation to obtain the derivative (jacobian).

            ceres::NumericDiffOptions numeric_diff_options;

            auto addEdgeResidualBlocks = [&optimization_problem,
                                          &mask_images,
                                          &cropped_img,
                                          &estimated_edge_normals,
                                          &estimated_edge_offsets,
                                          &numeric_diff_options,
                                          &nextCornerIndex](const int i) {
                const int pixel_count = cv::countNonZero(mask_images[i]);

                ceres::CostFunction* cost_function =
                    new ceres::NumericDiffCostFunction<EdgeCostFunctor, ceres::CENTRAL, ceres::DYNAMIC, 2, 1>(
                        new EdgeCostFunctor(cropped_img, mask_images[i]),
                        ceres::TAKE_OWNERSHIP,
                        pixel_count,
                        numeric_diff_options);
                optimization_problem.AddResidualBlock(
                    cost_function, nullptr, estimated_edge_normals[i].data(), &estimated_edge_offsets[i]);

                optimization_problem.SetParameterization(estimated_edge_normals[i].data(),
                                                         new ceres::HomogeneousVectorParameterization(2));
            };

            addEdgeResidualBlocks(0);
            addEdgeResidualBlocks(1);
            addEdgeResidualBlocks(2);
            addEdgeResidualBlocks(3);

            ceres::Solver::Options solve_options;
            solve_options.linear_solver_type = ceres::DENSE_QR;
            solve_options.max_num_iterations = 100;

            ceres::Solver::Summary summary;
            ceres::Solve(solve_options, &optimization_problem, &summary);

            for (int edge_index = 0; edge_index < 4; edge_index++)
            {
                const int next_edge_index = nextCornerIndex(edge_index);
                const int corner_index = next_edge_index;

                const Eigen::Hyperplane<double, 2> edge_A(estimated_edge_normals[edge_index],
                                                          estimated_edge_offsets[edge_index]);
                const Eigen::Hyperplane<double, 2> edge_B(estimated_edge_normals[next_edge_index],
                                                          estimated_edge_offsets[next_edge_index]);

                const Eigen::Vector2d estimated_corner_pos_roi = edge_A.intersection(edge_B);

                tag.p[corner_index].first = estimated_corner_pos_roi.x() + roi.x;
                tag.p[corner_index].second = estimated_corner_pos_roi.y() + roi.y;
            }
        }
        catch (const std::exception& /*e*/)
        {
            tag.good = false;
        }
    }
    // removeBadTags(tag_detections);
}

struct less_than_key
{
    inline bool operator() (const AprilTags::TagDetection& det1, const AprilTags::TagDetection& det2)
    {
        return (det1.id < det2.id);
    }
};
}

int main(int argc, char** argv)
{
    AprilTags::TagDetector apriltag_cpp_detector(AprilTags::TagCodes(AprilTags::tagCodes36h11));

    cv::Mat img_gray = cv::imread("0019.png", cv::IMREAD_GRAYSCALE);

    std::vector<AprilTags::TagDetection> tag_detections = apriltag_cpp_detector.extractTags(img_gray);
    std::sort(tag_detections.begin(), tag_detections.end(), less_than_key());
    std::cout << "tag_detections: " << tag_detections.size() << std::endl;

    std::vector<AprilTags::TagDetection> tag_detections_refined = tag_detections;
    refineCornerPointsByDirectEdgeOptimization(img_gray, tag_detections_refined);

    std::map<std::string, std::map<int, std::vector<cv::Point2d>>> ground_truth;
    read_ground_truth(ground_truth);
    const std::string imageFilename = "0019.png";
    std::cout << "ground_truth: " << (ground_truth.find(imageFilename) != ground_truth.end())
              << " ; ground_truth: " << ground_truth.size() << std::endl;

    for (size_t i = 0; i < tag_detections.size(); i++) {
        std::cout << "\nTag " << tag_detections[i].id << ":" << std::endl;
        for (size_t j = 0; j < 4; j++) {
            cv::Point2d corner = ground_truth[imageFilename][tag_detections[i].id][j];
            double error_detection = point_error(tag_detections[i].p[j].first, tag_detections[i].p[j].second, corner.x, corner.y);
            std::cout << "detection: " << tag_detections[i].p[j].first << ", " << tag_detections[i].p[j].second << " ; error="
                      << error_detection << std::endl;

            double error_detection_refined = point_error(tag_detections_refined[i].p[j].first, tag_detections_refined[i].p[j].second, corner.x, corner.y);
            std::cout << "detection refined: " << tag_detections_refined[i].p[j].first << ", " << tag_detections_refined[i].p[j].second << " ; error="
                      << error_detection_refined << std::endl;

            std::cout << "ground truth: " << corner.x << ", " << corner.y << std::endl;
        }
    }

    return 0;
}
```

</p>
</details>

This is mostly copy-paste code but comparing the corners coordinates between refined and ground truth data shows not good results. For instance:

```
Tag 8:
detection: 2033.96, 1260.67 ; error=0.698578
detection refined: 2033.88, 1260.73 ; error=0.706687
ground truth: 2034.38, 1261.23
detection: 2373.62, 1283.37 ; error=0.522154
detection refined: 2373.38, 1283.32 ; error=0.711717
ground truth: 2373.88, 1283.82
detection: 2367.81, 1005.07 ; error=0.869125
detection refined: 2368.05, 1005.02 ; error=0.708416
ground truth: 2368.55, 1005.53
detection: 2028.88, 1011.25 ; error=0.804983
detection refined: 2028.97, 1011.3 ; error=0.705023
ground truth: 2029.47, 1011.8

Tag 9:
detection: 1665.5, 1236.2 ; error=0.68357
detection refined: 1665.45, 1236.2 ; error=0.707975
ground truth: 1665.95, 1236.71
detection: 1921.41, 1253.23 ; error=0.6572
detection refined: 1921.34, 1253.25 ; error=0.697219
ground truth: 1921.83, 1253.74
detection: 1916.5, 1013.47 ; error=0.697531
detection refined: 1916.57, 1013.37 ; error=0.704689
ground truth: 1917.07, 1013.87
detection: 1660.98, 1017.99 ; error=0.826231
detection refined: 1661.03, 1018.1 ; error=0.712545
ground truth: 1661.54, 1018.6
```

What did I miss since it should be mostly copy-paste code? Any issue or subtility with the ground truth data?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Advanced Edge Refinement results #4

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Advanced Edge Refinement results #4

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions