Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 31 additions & 20 deletions examples/app_huggingface/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,24 @@
from dingo.io import InputArgs


def dingo_demo(input_path, data_format, column_content, input_rules, input_prompts, key, api_url):
def dingo_demo(input_path, data_format, column_content, rule_list, prompt_list, key, api_url):
if not input_path:
return 'ValueError: input_path can not be empty, please input.'
if not data_format:
return 'ValueError: data_format can not be empty, please input.'
if not column_content:
return 'ValueError: column_content can not be empty, please input.'
if not input_rules and not input_prompts:
return 'ValueError: input_rules and input_prompts can not be empty at the same time.'
if not rule_list and not prompt_list:
return 'ValueError: rule_list and prompt_list can not be empty at the same time.'

input_data = {
"input_path": input_path,
"data_format": data_format,
"column_content": column_content,
"custom_config":
{
"rule_list": input_rules,
"prompt_list": input_prompts,
"rule_list": rule_list,
"prompt_list": prompt_list,
"llm_config":
{
"detect_text_quality_detail":
Expand All @@ -44,19 +44,30 @@ def dingo_demo(input_path, data_format, column_content, input_rules, input_promp
rule_options = ['RuleAbnormalChar', 'RuleAbnormalHtml', 'RuleContentNull', 'RuleContentShort', 'RuleEnterAndSpace', 'RuleOnlyUrl']
prompt_options = ['PromptRepeat', 'PromptContentChaos']

#接口创建函数
#fn设置处理函数,inputs设置输入接口组件,outputs设置输出接口组件
#fn,inputs,outputs都是必填函数
demo = gr.Interface(
fn=dingo_demo,
inputs=[
gr.Textbox(value='chupei/format-jsonl', placeholder="please input huggingface dataset path"),
gr.Dropdown(["jsonl", "json", "plaintext", "listjson"], label="data_format"),
gr.Textbox(value="content", placeholder="please input column name of content in dataset"),
gr.CheckboxGroup(choices=rule_options, label="rule_list"),
gr.CheckboxGroup(choices=prompt_options, label="prompt_list"),
'text',
'text',
],
outputs="text")
with open("header.html", "r") as file:
header = file.read()
with gr.Blocks() as demo:
gr.HTML(header)
with gr.Row():
with gr.Column():
input_path = gr.Textbox(value='chupei/format-jsonl', placeholder="please input huggingface dataset path", label="input_path")
data_format = gr.Dropdown(["jsonl", "json", "plaintext", "listjson"], label="data_format")
column_content = gr.Textbox(value="content", placeholder="please input column name of content in dataset", label="column_content")
rule_list = gr.CheckboxGroup(choices=rule_options, label="rule_list")
prompt_list = gr.CheckboxGroup(choices=prompt_options, label="prompt_list")
key = gr.Textbox(placeholder="If want to use llm, please input the key of it.", label="key")
api_url = gr.Textbox(placeholder="If want to use llm, please input the api_url of it.", label="api_url")
with gr.Row():
submit_single = gr.Button(value="Submit", interactive=True, variant="primary")
with gr.Column():
# 输出组件
output = gr.Textbox(label="output")

submit_single.click(
fn=dingo_demo,
inputs=[input_path, data_format, column_content, rule_list, prompt_list, key, api_url],
outputs=output
)

# 启动界面
demo.launch()
109 changes: 109 additions & 0 deletions examples/app_huggingface/header.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
<html><head>
<!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css"> -->
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
<style>
.link-block {
border: 1px solid transparent;
border-radius: 24px;
background-color: rgba(54, 54, 54, 1);
cursor: pointer !important;
}
.link-block:hover {
background-color: rgba(54, 54, 54, 0.75) !important;
cursor: pointer !important;
}
.external-link {
display: inline-flex;
align-items: center;
height: 36px;
line-height: 36px;
padding: 0 16px;
cursor: pointer !important;
}
.external-link,
.external-link:hover {
cursor: pointer !important;
}
a {
text-decoration: none;
}
</style></head>

<body>
<div style="
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
text-align: center;
background: linear-gradient(45deg, #007bff 0%, #0056b3 100%);
padding: 24px;
gap: 24px;
border-radius: 8px;
">
<div style="
display: flex;
flex-direction: column;
align-items: center;
gap: 16px;
">
<div style="display: flex; flex-direction: column; gap: 8px">
<h1 style="
font-size: 48px;
color: #fafafa;
margin: 0;
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
">
Dingo
</h1>
</div>
</div>

<p style="
margin: 0;
line-height: 1.6rem;
font-size: 16px;
color: #fafafa;
opacity: 0.8;
">
Dingo: A Comprehensive Data Quality Evaluation Tool.<br>
</p>
<style>
.link-block {
display: inline-block;
}
.link-block + .link-block {
margin-left: 20px;
}
</style>

<div class="column has-text-centered">
<div class="publication-links">
<!-- Code Link. -->
<span class="link-block">
<a href="https://github.com/DataEval/dingo" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
<span class="icon" style="margin-right: 4px">
<i class="fab fa-github" style="color: white; margin-right: 4px"></i>
</span>
<span style="color: white">Code</span>
</a>
</span>

<!-- Paper Link. -->
<span class="link-block">
<a href="https://pypi.org/project/dingo-python/" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
<span class="icon" style="margin-right: 8px">
<i class="fas fa-globe" style="color: white"></i>
</span>
<span style="color: white">Package</span>
</a>
</span>
</div>
</div>

<!-- New Demo Links -->
</div>


</body></html>