Merge pull request #169 from MigoXLab/main

e06084 · web-flow · commit e82ceb31ff55 · 2025-09-08T13:02:40.000+08:00
Sync main to dev
diff --git a/dingo/model/llm/llm_text_3h.py b/dingo/model/llm/llm_text_3h.py
@@ -40,7 +40,7 @@ def process_response(cls, response: str) -> ModelRes:
         result = ModelRes()
 
         # error_status
-        if response_model.score == "1":
+        if response_model.score == 1:
             result.reason = [response_model.reason]
             result.name = cls.prompt.__name__[8:].upper()
         else:
diff --git a/dingo/run/vsl.py b/dingo/run/vsl.py
@@ -169,6 +169,11 @@ def parse_args():
             "app"],
         default="visualization",
         help="Choose the mode: visualization or app")
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=8000,
+        help="Port for local HTTP server in visualization mode (default: 8000)")
     return parser.parse_args()
 
 
@@ -195,7 +200,7 @@ def main():
         success, new_html_filename = process_and_inject(args.input)
         if success:
             web_static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "web-static")
-            port = 8000
+            port = args.port
             try:
                 server = start_http_server(web_static_dir, port)
                 url = f"http://localhost:{port}/{new_html_filename}"
diff --git a/docs/assets/wechat.jpg b/docs/assets/wechat.jpg
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -16,6 +16,7 @@ This document provides comprehensive information about all quality metrics used
 
 | Type | Metric | Description | Paper Source | Evaluation Results |
 |------|--------|-------------|--------------|-------------------|
+| `MathCompare` | PromptMathCompare | Compares the effectiveness of two tools in extracting mathematical formulas from HTML to Markdown format by evaluatin... | Internal Implementation | N/A |
 | `QUALITY_BAD_HALLUCINATION` | PromptHallucination | Evaluates whether the response contains factual contradictions or hallucinations against provided context information | [TruthfulQA: Measuring How Models Mimic Human Falsehoods](https://arxiv.org/abs/2109.07958) (Lin et al., 2021) | N/A |
 | `QUALITY_BAD_HALLUCINATION` | RuleHallucinationHHEM | Uses Vectara's HHEM-2.1-Open model for local hallucination detection by evaluating consistency between response and c... | [HHEM-2.1-Open](https://huggingface.co/vectara/hallucination_evaluation_model) (Forrest Bao, Miaoran Li, Rogger Luo, Ofer Mendelevitch) | N/A |
 | `QUALITY_HARMLESS` | PromptTextHarmless | Checks if responses avoid harmful content, discriminatory language, and dangerous assistance | [Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback](https://arxiv.org/pdf/2204.05862) (Bai et al., 2022) | [📊 See Results](eval/prompt/qa_data_evaluated_by_3h.md) |