11import json
22import os
33import shutil
4+ from pathlib import Path
45
56import gradio as gr
67from dingo .exec import Executor
@@ -31,41 +32,43 @@ def dingo_demo(dataset_source, input_path, uploaded_file, data_format, column_co
3132
3233 final_input_path = uploaded_file .name
3334
34- input_data = {
35- "dataset" : dataset_source ,
36- "input_path" : final_input_path ,
37- "output_path" : "" if dataset_source == 'hugging_face' else os .path .dirname (final_input_path ),
38- "save_data" : True ,
39- "save_raw" : True ,
40- "data_format" : data_format ,
41- "column_content" : column_content ,
42- "custom_config" :{
43- "rule_list" : rule_list ,
44- "prompt_list" : prompt_list ,
45- "llm_config" :
46- {
47- "detect_text_quality_detail" :
48- {
49- "model" : model ,
50- "key" : key ,
51- "api_url" : api_url ,
52- }
53- }
35+ try :
36+ input_data = {
37+ "dataset" : dataset_source ,
38+ "input_path" : final_input_path ,
39+ "output_path" : "" if dataset_source == 'hugging_face' else os .path .dirname (final_input_path ),
40+ "save_data" : True ,
41+ "save_raw" : True ,
42+ "data_format" : data_format ,
43+ "column_content" : column_content ,
44+ "custom_config" :{
45+ "rule_list" : rule_list ,
46+ "prompt_list" : prompt_list ,
47+ "llm_config" :
48+ {
49+ "LLMTextQualityPromptBase" :
50+ {
51+ "model" : model ,
52+ "key" : key ,
53+ "api_url" : api_url ,
54+ }
55+ }
56+ }
5457 }
55- }
56- input_args = InputArgs ( ** input_data )
57- executor = Executor . exec_map [ "local" ]( input_args )
58- executor .execute ()
59- summary = executor . get_summary (). to_dict ()
60- detail = executor . get_bad_info_list ()
61- new_detail = []
62- for item in detail :
63- new_detail . append ( item . to_raw_dict () )
64- if summary [ 'output_path' ]:
65- shutil . rmtree ( summary [ 'output_path' ])
66-
67- # 返回两个值:概要信息和详细信息
68- return json . dumps ( summary , indent = 4 ), new_detail
58+ input_args = InputArgs ( ** input_data )
59+ executor = Executor . exec_map [ "local" ]( input_args )
60+ summary = executor . execute (). to_dict ( )
61+ detail = executor .get_bad_info_list ()
62+ new_detail = []
63+ for item in detail :
64+ new_detail . append ( item )
65+ if summary [ 'output_path' ] :
66+ shutil . rmtree ( summary [ 'output_path' ] )
67+
68+ # 返回两个值:概要信息和详细信息
69+ return json . dumps ( summary , indent = 4 ), new_detail
70+ except Exception as e :
71+ raise gr . Error ( str ( e ))
6972
7073
7174def update_input_components (dataset_source ):
@@ -88,7 +91,8 @@ def update_input_components(dataset_source):
8891 rule_options = ['RuleAbnormalChar' , 'RuleAbnormalHtml' , 'RuleContentNull' , 'RuleContentShort' , 'RuleEnterAndSpace' , 'RuleOnlyUrl' ]
8992 prompt_options = ['PromptRepeat' , 'PromptContentChaos' ]
9093
91- with open ("header.html" , "r" ) as file :
94+ current_dir = Path (__file__ ).parent
95+ with open (os .path .join (current_dir , 'header.html' ), "r" ) as file :
9296 header = file .read ()
9397 with gr .Blocks () as demo :
9498 gr .HTML (header )
0 commit comments