diff --git a/dingo/model/llm/llm_text_3h.py b/dingo/model/llm/llm_text_3h.py index ad4eb8e7..62be1027 100644 --- a/dingo/model/llm/llm_text_3h.py +++ b/dingo/model/llm/llm_text_3h.py @@ -40,7 +40,7 @@ def process_response(cls, response: str) -> ModelRes: result = ModelRes() # error_status - if response_model.score == "1": + if response_model.score == 1: result.reason = [response_model.reason] result.name = cls.prompt.__name__[8:].upper() else: diff --git a/dingo/run/vsl.py b/dingo/run/vsl.py index e8fbe905..a17f1967 100644 --- a/dingo/run/vsl.py +++ b/dingo/run/vsl.py @@ -169,6 +169,11 @@ def parse_args(): "app"], default="visualization", help="Choose the mode: visualization or app") + parser.add_argument( + "--port", + type=int, + default=8000, + help="Port for local HTTP server in visualization mode (default: 8000)") return parser.parse_args() @@ -195,7 +200,7 @@ def main(): success, new_html_filename = process_and_inject(args.input) if success: web_static_dir = os.path.join(os.path.dirname(__file__), "..", "..", "web-static") - port = 8000 + port = args.port try: server = start_http_server(web_static_dir, port) url = f"http://localhost:{port}/{new_html_filename}" diff --git a/docs/assets/wechat.jpg b/docs/assets/wechat.jpg new file mode 100644 index 00000000..52cb8d8e Binary files /dev/null and b/docs/assets/wechat.jpg differ diff --git a/docs/metrics.md b/docs/metrics.md index 5070c554..89e7695b 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -16,6 +16,7 @@ This document provides comprehensive information about all quality metrics used | Type | Metric | Description | Paper Source | Evaluation Results | |------|--------|-------------|--------------|-------------------| +| `MathCompare` | PromptMathCompare | Compares the effectiveness of two tools in extracting mathematical formulas from HTML to Markdown format by evaluatin... | Internal Implementation | N/A | | `QUALITY_BAD_HALLUCINATION` | PromptHallucination | Evaluates whether the response contains factual contradictions or hallucinations against provided context information | [TruthfulQA: Measuring How Models Mimic Human Falsehoods](https://arxiv.org/abs/2109.07958) (Lin et al., 2021) | N/A | | `QUALITY_BAD_HALLUCINATION` | RuleHallucinationHHEM | Uses Vectara's HHEM-2.1-Open model for local hallucination detection by evaluating consistency between response and c... | [HHEM-2.1-Open](https://huggingface.co/vectara/hallucination_evaluation_model) (Forrest Bao, Miaoran Li, Rogger Luo, Ofer Mendelevitch) | N/A | | `QUALITY_HARMLESS` | PromptTextHarmless | Checks if responses avoid harmful content, discriminatory language, and dangerous assistance | [Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback](https://arxiv.org/pdf/2204.05862) (Bai et al., 2022) | [📊 See Results](eval/prompt/qa_data_evaluated_by_3h.md) |