convert loftr weights to onnx format

xmba15 · xmba15 · commit 740afc2b1ea9 · 2023-03-19T00:53:25.000+09:00
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "scripts/superglue/SuperGluePretrainedNetwork"]
 	path = scripts/superglue/SuperGluePretrainedNetwork
 	url = https://github.com/magicleap/SuperGluePretrainedNetwork.git
+[submodule "scripts/loftr/LoFTR"]
+	path = scripts/loftr/LoFTR
+	url = https://github.com/xmba15/LoFTR
diff --git a/scripts/loftr/LoFTR b/scripts/loftr/LoFTR
@@ -0,0 +1 @@
+Subproject commit 6cdf54498c3340dee0cc02d9406b60b2a0ecd541
diff --git a/scripts/loftr/README.md b/scripts/loftr/README.md
@@ -0,0 +1,32 @@
+# convert pre-trained loftr pytorch weights to onnx format
+
+---
+
+## dependencies
+
+---
+
+- python: 3x
+
+-
+
+```bash
+git submodule update --init --recursive
+
+python3 -m pip install -r requirements.txt
+```
+
+## :running: how to run
+
+---
+
+- download [LoFTR](https://github.com/zju3dv/LoFTR) weights indoor_ds_new.ckpt from [HERE](https://drive.google.com/drive/folders/1xu2Pq6mZT5hmFgiYMBT9Zt8h1yO-3SIp)
+
+- export onnx weights
+
+```
+python3 convert_to_onnx.py --model_path /path/to/indoor_ds_new.ckpt
+```
+## Note ##
+
+- The LoFTR's [latest commit](b4ee7eb0359d0062e794c99f73e27639d7c7ac9f) seems to be only compatible with the new weights (Ref: https://github.com/zju3dv/LoFTR/issues/48). Hence, this onnx cpp application is only compatible with *indoor_ds_new.ckpt* weights.
diff --git a/scripts/loftr/convert_to_onnx.py b/scripts/loftr/convert_to_onnx.py
@@ -0,0 +1,68 @@
+import os
+
+import onnxruntime
+import torch
+
+from loftr_wrapper import LoFTRWrapper as LoFTR
+
+
+def get_args():
+    import argparse
+
+    parser = argparse.ArgumentParser("convert loftr torch weights to onnx format")
+    parser.add_argument("--model_path", type=str, required=True)
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    model_path = args.model_path
+    model = LoFTR()
+    model.load_state_dict(torch.load(model_path)["state_dict"])
+    model.eval()
+
+    batch_size = 1
+    height = 480
+    width = 640
+
+    data = {}
+    data["image0"] = torch.randn(batch_size, 1, height, width)
+    data["image1"] = torch.randn(batch_size, 1, height, width)
+
+    torch.onnx.export(
+        model,
+        data,
+        "loftr.onnx",
+        export_params=True,
+        opset_version=12,
+        do_constant_folding=True,
+        input_names=list(data.keys()),
+        output_names=["keypoints0", "keypoints1", "confidence"],
+        dynamic_axes={
+            "image0": {2: "height", 3: "width"},
+            "image1": {2: "height", 3: "width"},
+            "keypoints0": {0: "num_keypoints"},
+            "keypoints1": {0: "num_keypoints"},
+            "confidence": {0: "num_keypoints"},
+        },
+    )
+
+    print(f"\nonnx model is saved to: {os.getcwd()}/loftr.onnx")
+
+    print("\ntest inference using onnxruntime")
+    sess = onnxruntime.InferenceSession("loftr.onnx")
+    for input in sess.get_inputs():
+        print("input: ", input)
+
+    print("\n")
+    for output in sess.get_outputs():
+        print("output: ", output)
+
+
+if __name__ == "__main__":
+    import warnings
+
+    warnings.filterwarnings("ignore")
+
+    main()
diff --git a/scripts/loftr/loftr_wrapper.py b/scripts/loftr/loftr_wrapper.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+import copy
+import os
+import sys
+from typing import Any, Dict
+
+import torch
+from einops.einops import rearrange
+
+_CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(_CURRENT_DIR, "LoFTR"))
+
+from src.loftr import LoFTR, default_cfg
+
+DEFAULT_CFG = copy.deepcopy(default_cfg)
+DEFAULT_CFG["coarse"]["temp_bug_fix"] = True
+
+
+class LoFTRWrapper(LoFTR):
+    def __init__(
+        self,
+        config: Dict[str, Any] = DEFAULT_CFG,
+    ):
+        LoFTR.__init__(self, config)
+
+    def forward(
+        self,
+        image0: torch.Tensor,
+        image1: torch.Tensor,
+    ) -> Dict[str, torch.Tensor]:
+        data = {
+            "image0": image0,
+            "image1": image1,
+        }
+        del image0, image1
+
+        data.update(
+            {
+                "bs": data["image0"].size(0),
+                "hw0_i": data["image0"].shape[2:],
+                "hw1_i": data["image1"].shape[2:],
+            }
+        )
+
+        if data["hw0_i"] == data["hw1_i"]:  # faster & better BN convergence
+            feats_c, feats_f = self.backbone(
+                torch.cat([data["image0"], data["image1"]], dim=0)
+            )
+            (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split(
+                data["bs"]
+            ), feats_f.split(data["bs"])
+        else:  # handle different input shapes
+            (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone(
+                data["image0"]
+            ), self.backbone(data["image1"])
+
+        data.update(
+            {
+                "hw0_c": feat_c0.shape[2:],
+                "hw1_c": feat_c1.shape[2:],
+                "hw0_f": feat_f0.shape[2:],
+                "hw1_f": feat_f1.shape[2:],
+            }
+        )
+
+        # 2. coarse-level loftr module
+        # add featmap with positional encoding, then flatten it to sequence [N, HW, C]
+        feat_c0 = rearrange(self.pos_encoding(feat_c0), "n c h w -> n (h w) c")
+        feat_c1 = rearrange(self.pos_encoding(feat_c1), "n c h w -> n (h w) c")
+
+        mask_c0 = mask_c1 = None  # mask is useful in training
+        if "mask0" in data:
+            mask_c0, mask_c1 = data["mask0"].flatten(-2), data["mask1"].flatten(-2)
+        feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1)
+
+        # 3. match coarse-level
+        self.coarse_matching(feat_c0, feat_c1, data, mask_c0=mask_c0, mask_c1=mask_c1)
+
+        # 4. fine-level refinement
+        feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(
+            feat_f0, feat_f1, feat_c0, feat_c1, data
+        )
+        if feat_f0_unfold.size(0) != 0:  # at least one coarse level predicted
+            feat_f0_unfold, feat_f1_unfold = self.loftr_fine(
+                feat_f0_unfold, feat_f1_unfold
+            )
+
+        # 5. match fine-level
+        self.fine_matching(feat_f0_unfold, feat_f1_unfold, data)
+
+        rename_keys: Dict[str, str] = {
+            "mkpts0_f": "keypoints0",
+            "mkpts1_f": "keypoints1",
+            "mconf": "confidence",
+        }
+        out: Dict[str, torch.Tensor] = {}
+        for k, v in rename_keys.items():
+            _d = data[k]
+            if isinstance(_d, torch.Tensor):
+                out[v] = _d
+            else:
+                raise TypeError(
+                    f"Expected torch.Tensor for item `{k}`. Gotcha {type(_d)}"
+                )
+        del data
+
+        return out
diff --git a/scripts/loftr/requirements.txt b/scripts/loftr/requirements.txt
@@ -0,0 +1,2 @@
+kornia==0.6.10
+onnxruntime