fix gardio bug. (#100)

shijinpjlab · web-flow · commit dfe030b8cf01 · 2025-06-10T21:53:49.000+08:00
* feat: update radio summary and get info list

* feat: update header path

* feat: add except show.

* feat: fix lint
diff --git a/app_gradio/app.py b/app_gradio/app.py
@@ -1,6 +1,7 @@
 import json
 import os
 import shutil
+from pathlib import Path
 
 import gradio as gr
 from dingo.exec import Executor
@@ -31,41 +32,43 @@ def dingo_demo(dataset_source, input_path, uploaded_file, data_format, column_co
 
         final_input_path = uploaded_file.name
 
-    input_data = {
-        "dataset": dataset_source,
-        "input_path": final_input_path,
-        "output_path": "" if dataset_source == 'hugging_face' else os.path.dirname(final_input_path),
-        "save_data": True,
-        "save_raw": True,
-        "data_format": data_format,
-        "column_content": column_content,
-        "custom_config":{
-            "rule_list": rule_list,
-            "prompt_list": prompt_list,
-            "llm_config":
-                {
-                    "detect_text_quality_detail":
-                        {
-                            "model": model,
-                            "key": key,
-                            "api_url": api_url,
-                        }
-                }
+    try:
+        input_data = {
+            "dataset": dataset_source,
+            "input_path": final_input_path,
+            "output_path": "" if dataset_source == 'hugging_face' else os.path.dirname(final_input_path),
+            "save_data": True,
+            "save_raw": True,
+            "data_format": data_format,
+            "column_content": column_content,
+            "custom_config":{
+                "rule_list": rule_list,
+                "prompt_list": prompt_list,
+                "llm_config":
+                    {
+                        "LLMTextQualityPromptBase":
+                            {
+                                "model": model,
+                                "key": key,
+                                "api_url": api_url,
+                            }
+                    }
+            }
         }
-    }
-    input_args = InputArgs(**input_data)
-    executor = Executor.exec_map["local"](input_args)
-    executor.execute()
-    summary = executor.get_summary().to_dict()
-    detail = executor.get_bad_info_list()
-    new_detail = []
-    for item in detail:
-        new_detail.append(item.to_raw_dict())
-    if summary['output_path']:
-        shutil.rmtree(summary['output_path'])
-
-    # 返回两个值：概要信息和详细信息
-    return json.dumps(summary, indent=4), new_detail
+        input_args = InputArgs(**input_data)
+        executor = Executor.exec_map["local"](input_args)
+        summary = executor.execute().to_dict()
+        detail = executor.get_bad_info_list()
+        new_detail = []
+        for item in detail:
+            new_detail.append(item)
+        if summary['output_path']:
+            shutil.rmtree(summary['output_path'])
+
+        # 返回两个值：概要信息和详细信息
+        return json.dumps(summary, indent=4), new_detail
+    except Exception as e:
+        raise gr.Error(str(e))
 
 
 def update_input_components(dataset_source):
@@ -88,7 +91,8 @@ def update_input_components(dataset_source):
     rule_options = ['RuleAbnormalChar', 'RuleAbnormalHtml', 'RuleContentNull', 'RuleContentShort', 'RuleEnterAndSpace', 'RuleOnlyUrl']
     prompt_options = ['PromptRepeat', 'PromptContentChaos']
 
-    with open("header.html", "r") as file:
+    current_dir = Path(__file__).parent
+    with open(os.path.join(current_dir, 'header.html'), "r") as file:
         header = file.read()
     with gr.Blocks() as demo:
         gr.HTML(header)
diff --git a/dingo/exec/local.py b/dingo/exec/local.py
@@ -315,10 +315,39 @@ def write_summary(self, path: str, input_args: InputArgs, summary: SummaryModel)
             json.dump(summary.to_dict(), f, indent=4, ensure_ascii=False)
 
     def get_summary(self):
-        pass
+        return self.summary
+
+    def get_info_list(self, high_quality: bool) -> list:
+        info_list = []
+
+        save_raw = self.input_args.save_raw
+        output_path = self.summary.output_path
+        if not os.path.isdir(output_path):
+            raise ValueError(f"output_path not exists: {output_path}")
+
+        for root, dirs, files in os.walk(output_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                file_name = file
+                if file_name == "summary.json":
+                    continue
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        data = json.loads(line.strip())
+
+                        if save_raw:
+                            error_status = data['dingo_result']['error_status']
+                        else:
+                            error_status = data['error_status']
+                        if high_quality and not error_status:
+                            info_list.append(data)
+                        if not high_quality and error_status:
+                            info_list.append(data)
+
+        return info_list
 
     def get_bad_info_list(self):
-        pass
+        return self.get_info_list(high_quality=False)
 
     def get_good_info_list(self):
-        pass
+        return self.get_info_list(high_quality=True)