Skip to content

Commit dfe030b

Browse files
authored
fix gardio bug. (#100)
* feat: update radio summary and get info list * feat: update header path * feat: add except show. * feat: fix lint
1 parent 0afb19d commit dfe030b

File tree

2 files changed

+71
-38
lines changed

2 files changed

+71
-38
lines changed

app_gradio/app.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
33
import shutil
4+
from pathlib import Path
45

56
import gradio as gr
67
from dingo.exec import Executor
@@ -31,41 +32,43 @@ def dingo_demo(dataset_source, input_path, uploaded_file, data_format, column_co
3132

3233
final_input_path = uploaded_file.name
3334

34-
input_data = {
35-
"dataset": dataset_source,
36-
"input_path": final_input_path,
37-
"output_path": "" if dataset_source == 'hugging_face' else os.path.dirname(final_input_path),
38-
"save_data": True,
39-
"save_raw": True,
40-
"data_format": data_format,
41-
"column_content": column_content,
42-
"custom_config":{
43-
"rule_list": rule_list,
44-
"prompt_list": prompt_list,
45-
"llm_config":
46-
{
47-
"detect_text_quality_detail":
48-
{
49-
"model": model,
50-
"key": key,
51-
"api_url": api_url,
52-
}
53-
}
35+
try:
36+
input_data = {
37+
"dataset": dataset_source,
38+
"input_path": final_input_path,
39+
"output_path": "" if dataset_source == 'hugging_face' else os.path.dirname(final_input_path),
40+
"save_data": True,
41+
"save_raw": True,
42+
"data_format": data_format,
43+
"column_content": column_content,
44+
"custom_config":{
45+
"rule_list": rule_list,
46+
"prompt_list": prompt_list,
47+
"llm_config":
48+
{
49+
"LLMTextQualityPromptBase":
50+
{
51+
"model": model,
52+
"key": key,
53+
"api_url": api_url,
54+
}
55+
}
56+
}
5457
}
55-
}
56-
input_args = InputArgs(**input_data)
57-
executor = Executor.exec_map["local"](input_args)
58-
executor.execute()
59-
summary = executor.get_summary().to_dict()
60-
detail = executor.get_bad_info_list()
61-
new_detail = []
62-
for item in detail:
63-
new_detail.append(item.to_raw_dict())
64-
if summary['output_path']:
65-
shutil.rmtree(summary['output_path'])
66-
67-
# 返回两个值:概要信息和详细信息
68-
return json.dumps(summary, indent=4), new_detail
58+
input_args = InputArgs(**input_data)
59+
executor = Executor.exec_map["local"](input_args)
60+
summary = executor.execute().to_dict()
61+
detail = executor.get_bad_info_list()
62+
new_detail = []
63+
for item in detail:
64+
new_detail.append(item)
65+
if summary['output_path']:
66+
shutil.rmtree(summary['output_path'])
67+
68+
# 返回两个值:概要信息和详细信息
69+
return json.dumps(summary, indent=4), new_detail
70+
except Exception as e:
71+
raise gr.Error(str(e))
6972

7073

7174
def update_input_components(dataset_source):
@@ -88,7 +91,8 @@ def update_input_components(dataset_source):
8891
rule_options = ['RuleAbnormalChar', 'RuleAbnormalHtml', 'RuleContentNull', 'RuleContentShort', 'RuleEnterAndSpace', 'RuleOnlyUrl']
8992
prompt_options = ['PromptRepeat', 'PromptContentChaos']
9093

91-
with open("header.html", "r") as file:
94+
current_dir = Path(__file__).parent
95+
with open(os.path.join(current_dir, 'header.html'), "r") as file:
9296
header = file.read()
9397
with gr.Blocks() as demo:
9498
gr.HTML(header)

dingo/exec/local.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,10 +315,39 @@ def write_summary(self, path: str, input_args: InputArgs, summary: SummaryModel)
315315
json.dump(summary.to_dict(), f, indent=4, ensure_ascii=False)
316316

317317
def get_summary(self):
318-
pass
318+
return self.summary
319+
320+
def get_info_list(self, high_quality: bool) -> list:
321+
info_list = []
322+
323+
save_raw = self.input_args.save_raw
324+
output_path = self.summary.output_path
325+
if not os.path.isdir(output_path):
326+
raise ValueError(f"output_path not exists: {output_path}")
327+
328+
for root, dirs, files in os.walk(output_path):
329+
for file in files:
330+
file_path = os.path.join(root, file)
331+
file_name = file
332+
if file_name == "summary.json":
333+
continue
334+
with open(file_path, 'r', encoding='utf-8') as f:
335+
for line in f:
336+
data = json.loads(line.strip())
337+
338+
if save_raw:
339+
error_status = data['dingo_result']['error_status']
340+
else:
341+
error_status = data['error_status']
342+
if high_quality and not error_status:
343+
info_list.append(data)
344+
if not high_quality and error_status:
345+
info_list.append(data)
346+
347+
return info_list
319348

320349
def get_bad_info_list(self):
321-
pass
350+
return self.get_info_list(high_quality=False)
322351

323352
def get_good_info_list(self):
324-
pass
353+
return self.get_info_list(high_quality=True)

0 commit comments

Comments
 (0)