simpler-env
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎simpler_env/utils/metrics.py‎
Lines changed: 33 additions & 24 deletions b/‎simpler_env/utils/metrics.py‎
Lines changed: 33 additions & 24 deletions
diff --git a/‎simpler_env/utils/visualization.py‎
Lines changed: 0 additions & 122 deletions b/‎simpler_env/utils/visualization.py‎
Lines changed: 0 additions & 122 deletions
@@ -20,6 +20,7 @@ We hope that our work guides and inspires future real-to-sim evaluation efforts.
   - [Examples](#examples)
   - [Current Environments](#current-environments)
   - [Customizing Evaluation Configs](#customizing-evaluation-configs)
+  - [Metrics for Assessing the Effectiveness of Simulated Evaluation Pipelines](#metrics-for-assessing-the-effectiveness-of-simulated-evaluation-pipelines)
   - [Code Structure](#code-structure)
   - [Adding New Policies](#adding-new-policies)
   - [Adding New Real-to-Sim Evaluation Environments and Robots](#adding-new-real-to-sim-evaluation-environments-and-robots)
@@ -131,7 +132,9 @@ By default, Google Robot environments use a control frequency of 3hz, and Bridge
 
 Please see `scripts/` for examples of how to customize evaluation configs. The inference script `simpler_env/main_inference.py` supports advanced environment building and logging. For example, you can perform a sweep over object and robot poses for evaluation. (Note, however, varying robot poses is not meaningful under the visual matching evaluation setup.)
 
+## Metrics for Assessing the Effectiveness of Simulated Evaluation Pipelines
 
+In our paper, we use the Mean Maximum Rank Violation (MMRV) metric and the Pearson Correlation Coefficient metric to assess the correlation between real and simulated evaluation results. You can reproduce the metrics in `tools/calc_metrics.py` and assess your own real-to-sim evaluation pipeline.
 
 ## Code Structure
 
@@ -165,7 +168,7 @@ simpler_env/
 tools/
    robot_object_visualization/: tools for visualizing robots and objects when creating new environments
    sysid/: tools for system identification when adding new robots
-   calc_metrics.py: tools for summarizing eval results and calculating metrics, such as Normalized Rank Loss, Pearson Correlation, and Kruskal-Wallis test, to reproduce our paper results
+   calc_metrics.py: tools for summarizing eval results and calculating metrics, such as Mean Maximum Rank Violation (MMRV) and Pearson Correlation
    coacd_process_mesh.py: tools for generating convex collision meshes through CoACD when adding new assets
    merge_videos.py: tools for merging videos into one
    ...
 
@@ -1,49 +1,52 @@
 import glob
 from pathlib import Path
+from typing import Sequence, Optional
 
 import numpy as np
-from scipy.stats import kruskal
 
 
-def pearson_correlation(x, y):
-    x, y = np.array(x), np.array(y)
-    assert x.shape == y.shape
-    x = x - np.mean(x)
-    y = y - np.mean(y)
-    if np.all(x == y):
+def pearson_correlation(perf_sim: Sequence[float], perf_real: Sequence[float]) -> float:
+    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
+    assert perf_sim.shape == perf_real.shape
+    perf_sim = perf_sim - np.mean(perf_sim)
+    perf_real = perf_real - np.mean(perf_real)
+    if np.all(perf_sim == perf_real):
         pearson = 1
     else:
-        pearson = np.sum(x * y) / (np.sqrt(np.sum(x**2) * np.sum(y**2)) + 1e-8)
+        pearson = np.sum(perf_sim * perf_real) / (
+            np.sqrt(np.sum(perf_sim**2) * np.sum(perf_real**2)) + 1e-8
+        )
     return pearson
 
 
-def mean_maximum_rank_violation(x, y):
-    # assuming x is sim result and y is real result
-    x, y = np.array(x), np.array(y)
-    assert x.shape == y.shape
+def mean_maximum_rank_violation(
+    perf_sim: Sequence[float], perf_real: Sequence[float]
+) -> float:
+    perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
+    assert perf_sim.shape == perf_real.shape
     rank_violations = []
-    for i in range(len(x)):
+    for i in range(len(perf_sim)):
         rank_violation = 0.0
-        for j in range(len(x)):
-            if (x[i] > x[j]) != (y[i] > y[j]):
-                rank_violation = max(rank_violation, np.abs(y[i] - y[j]))
+        for j in range(len(perf_sim)):
+            if (perf_sim[i] > perf_sim[j]) != (perf_real[i] > perf_real[j]):
+                rank_violation = max(
+                    rank_violation, np.abs(perf_real[i] - perf_real[j])
+                )
         rank_violations.append(rank_violation)
     rank_violation = np.mean(rank_violations)
-    # rank_violation = 0.0
-    # for i in range(len(x) - 1):
-    #     for j in range(i + 1, len(x)):
-    #         if (x[i] > x[j]) != (y[i] > y[j]):
-    #             rank_violation = max(rank_violation, np.abs(y[i] - y[j]))
     return rank_violation
 
 
-def print_all_kruskal_results(sim, real, title):
+def print_all_kruskal_results(
+    sim: Sequence[Sequence[float]], real: Sequence[Sequence[float]], title: str
+) -> None:
     """
     sim, real: shape [n_ckpt, n_trials]
         The trial-by-trial success indicator of each checkpoint
         (within each checkpoint, the ordering doesn't matter)
     Prints out the Kruskal-Wallis test for each checkpoint
     """
+    from scipy.stats import kruskal
     sim, real = np.array(sim), np.array(real)
     assert sim.shape == real.shape
     print(title)
@@ -57,7 +60,9 @@ def print_all_kruskal_results(sim, real, title):
             print(" " * 12, kruskal(sim[i], real[i]))
 
 
-def construct_unordered_trial_results(n_trials_per_ckpt, success):
+def construct_unordered_trial_results(
+    n_trials_per_ckpt: int, success: Sequence[float]
+) -> np.ndarray:
     success = np.array(success)
     success = np.where(np.isnan(success), 0, success)
     n_success_trials = np.round(n_trials_per_ckpt * success).astype(np.int32)
@@ -68,7 +73,11 @@ def construct_unordered_trial_results(n_trials_per_ckpt, success):
 
 
 # util to get success / failure results from a directory
-def get_dir_stats(dir_name, extra_pattern_require=[], succ_fail_pattern=["success", "failure"]):
+def get_dir_stats(
+    dir_name: str,
+    extra_pattern_require: Optional[Sequence[str]] = [],
+    succ_fail_pattern: Sequence[str] = ["success", "failure"],
+) -> Sequence[int]:
     if dir_name[-1] == "/":
         dir_name = dir_name[:-1]
 
 
@@ -87,125 +87,3 @@ def plot_pred_and_gt_action_trajectory(predicted_actions, gt_actions, stacked_im
 
     plt.legend()
     plt.show()
-
-
-def colorize_mask(pred_mask: np.ndarray) -> np.ndarray:
-    """Colorize a predicted mask
-    :param pred_mask: [H, W] bool/np.uint8 np.ndarray
-    :return mask: colorized mask, [H, W, 3] np.uint8 np.ndarray
-    """
-    save_mask = Image.fromarray(pred_mask.astype(np.uint8))
-    save_mask = save_mask.convert(mode="P")
-    save_mask.putpalette(_palette)
-    save_mask = save_mask.convert(mode="RGB")
-    return np.asarray(save_mask)
-
-
-def draw_mask(rgb_img, mask, alpha=0.5, id_countour=False) -> np.ndarray:
-    """Overlay predicted mask on rgb image
-    :param rgb_img: RGB image, [H, W, 3] np.uint8 np.ndarray
-    :param mask: [H, W] bool/np.uint8 np.ndarray
-    :param alpha: overlay transparency
-    :return img_mask: mask-overlayed image, [H, W, 3] np.uint8 np.ndarray
-    """
-    img_mask = rgb_img.copy()
-    if id_countour:
-        # very slow ~ 1s per image
-        obj_ids = np.unique(mask)
-        obj_ids = obj_ids[obj_ids != 0]
-
-        for id in obj_ids:
-            # Overlay color on binary mask
-            if id <= 255:
-                color = _palette[id * 3 : id * 3 + 3]
-            else:
-                color = [0, 0, 0]
-            foreground = rgb_img * (1 - alpha) + np.ones_like(rgb_img) * alpha * np.asarray(color)
-            binary_mask = mask == id
-
-            # Compose image
-            img_mask[binary_mask] = foreground[binary_mask]
-
-            countours = binary_dilation(binary_mask, iterations=1) ^ binary_mask
-            img_mask[countours, :] = 0
-    else:
-        binary_mask = mask != 0
-        countours = binary_dilation(binary_mask, iterations=1) ^ binary_mask
-        foreground = rgb_img * (1 - alpha) + colorize_mask(mask) * alpha
-        img_mask[binary_mask] = foreground[binary_mask]
-        img_mask[countours, :] = 0
-    return img_mask
-
-
-def draw_bbox(
-    rgb_image: np.ndarray,
-    labels: List[str],
-    bboxes: np.ndarray,
-    pred_indices: np.ndarray,
-    pred_scores: np.ndarray,
-    bbox_width=2,
-    text_size=25,
-    sort_by_score=True,
-) -> np.ndarray:
-    """Draw bbox predictions on rgb image
-
-    :param rgb_image: RGB image, [H, W, 3] np.uint8 np.ndarray
-    :param labels: list of label strings
-    :param bboxes: bbox as XYXY pixel coordinates, [n_bbox, 4] np.float32 np.ndarray
-    :param pred_indices: predicted label indices, [n_bbox,] integer np.ndarray
-    :param pred_scores: predicted scores, [n_bbox,] np.float32 np.ndarray
-    :param bbox_width: line width to draw bbox
-    :param text_size: text size to write predicted label
-    :param sort_by_score: plot bboxes with lower scores first
-                          so bboxes with higher score are visible
-    :return out_image: rgb_image with drawn bboxes, [H, W, 3] np.uint8 np.ndarray
-    """
-    font = ImageFont.truetype(FONT_PATH, text_size)
-
-    H, W = rgb_image.shape[:2]
-    rgb_im = Image.fromarray(rgb_image).convert("RGBA")
-    # make a blank image for text, initialized to transparent text color
-    txt_im = Image.new("RGBA", rgb_im.size, (255, 255, 255, 0))
-    d = ImageDraw.Draw(txt_im)
-
-    if sort_by_score:
-        sorted_idx = pred_scores.argsort()
-        bboxes = bboxes[sorted_idx]
-        pred_indices = pred_indices[sorted_idx]
-        pred_scores = pred_scores[sorted_idx]
-
-    def _pad_bbox(bbox: Tuple[float], pad: float) -> Tuple[float]:
-        left, top, right, bottom = bbox
-        return (left - pad, top - pad, right + pad, bottom + pad)
-
-    for (x1, y1, x2, y2), pred_index, pred_score in zip(bboxes, pred_indices, pred_scores):
-        # draw bbox (left, top, right, bottom)
-        d.rectangle([x1, y1, x2, y2], fill=None, outline=(255, 0, 0), width=bbox_width)
-
-        # draw text
-        text = f"{labels[pred_index]}: {pred_score:1.2f}"
-        anchor_xy = [x1 + text_size * 0.1, y2 + text_size * 0.1 + 1]
-        anchor = "lt"
-        text_bbox = d.textbbox(anchor_xy, text, font=font, anchor=anchor)
-        text_bbox = _pad_bbox(text_bbox, text_size * 0.1)
-        if text_bbox[3] > H and text_bbox[2] > W:  # bottom-right
-            anchor_xy = [x2 - text_size * 0.1, y1 - text_size * 0.1 - 1]
-            anchor = "rb"
-            text_bbox = d.textbbox(anchor_xy, text, font=font, anchor=anchor)
-            text_bbox = _pad_bbox(text_bbox, text_size * 0.1)
-        elif text_bbox[3] > H:  # bottom
-            anchor_xy = [x1 + text_size * 0.1, y1 - text_size * 0.1 - 1]
-            anchor = "lb"
-            text_bbox = d.textbbox(anchor_xy, text, font=font, anchor=anchor)
-            text_bbox = _pad_bbox(text_bbox, text_size * 0.1)
-        elif text_bbox[2] > W:  # right
-            anchor_xy = [x2 - text_size * 0.1, y2 + text_size * 0.1 + 1]
-            anchor = "rt"
-            text_bbox = d.textbbox(anchor_xy, text, font=font, anchor=anchor)
-            text_bbox = _pad_bbox(text_bbox, text_size * 0.1)
-        # draw text bbox (bg only)
-        d.rectangle(text_bbox, fill=(255, 255, 255), outline=None, width=1)
-        d.text(anchor_xy, text, fill=(0, 0, 0), font=font, anchor=anchor)
-
-    out_im = Image.alpha_composite(rgb_im, txt_im).convert("RGB")
-    return np.asarray(out_im).copy()  # copy makes it writable