Skip to content

Commit f3b02aa

Browse files
authored
Merge pull request #29 from SHUzhangshuo/main
feat:Data Modification Tools
2 parents 0d60eca + d53059c commit f3b02aa

File tree

2 files changed

+90
-1
lines changed

2 files changed

+90
-1
lines changed

requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ html2text
88
resiliparse
99
trafilatura
1010
# llm-web-kit==3.2.0
11-
https://github.com/opendatalab/magic-html/releases/download/magic_html-0.1.5-released/magic_html-0.1.5-py3-none-any.whl
11+
https://github.com/opendatalab/magic-html/releases/download/magic_html-0.1.5-released/magic_html-0.1.5-py3-none-any.whl
12+
streamlit
13+
markdown
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
'''
2+
This script is used to read the table data from the jsonl file and display it in a streamlit app.
3+
4+
命令行输入:
5+
streamlit run WebMainBench/webmainbench/utils/Data_Modification_Tools.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl
6+
注意:-- 后面有个空格,否则会报错,然后再接数据文件路径
7+
8+
'''
9+
10+
import json
11+
import os
12+
import streamlit as st
13+
import markdown
14+
15+
import sys
16+
17+
# 支持通过命令行参数传入数据文件路径
18+
if len(sys.argv) > 1:
19+
DATA_FILE = sys.argv[1]
20+
else:
21+
st.error("请通过命令行参数传入数据文件路径,例如:python read_table.py /path/to/data.jsonl")
22+
st.stop()
23+
24+
# 读取所有数据
25+
@st.cache_data(show_spinner=False)
26+
def load_data():
27+
data = []
28+
if not os.path.exists(DATA_FILE):
29+
return data
30+
with open(DATA_FILE, "r", encoding="utf-8") as f:
31+
for line in f:
32+
if line.strip():
33+
data.append(json.loads(line))
34+
return data
35+
36+
# 保存所有数据
37+
def save_data(data):
38+
with open(DATA_FILE, "w", encoding="utf-8") as f:
39+
for item in data:
40+
print(item)
41+
f.write(json.dumps(item, ensure_ascii=False) + "\n")
42+
43+
# Streamlit页面布局
44+
st.set_page_config(layout="wide")
45+
st.title("WebMainBench Table Editor")
46+
47+
data = load_data()
48+
if not data:
49+
st.warning("未找到数据文件或文件为空。")
50+
st.stop()
51+
52+
# 选择要编辑的条目
53+
index = st.sidebar.number_input("选择条目索引", min_value=0, max_value=len(data)-1, value=0, step=1)
54+
item = data[index]
55+
56+
# 三列布局
57+
col1, col2, col3 = st.columns([1.5, 1.5, 2])
58+
59+
with col1:
60+
st.subheader("HTML 渲染")
61+
st.markdown(
62+
f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
63+
unsafe_allow_html=True
64+
)
65+
66+
with col2:
67+
st.subheader("Markdown 渲染")
68+
st.markdown(
69+
item.get("groundtruth_content", ""),
70+
unsafe_allow_html=True
71+
)
72+
73+
with col3:
74+
st.subheader("Markdown 源代码(可编辑)")
75+
new_markdown = st.text_area(
76+
"编辑 Markdown",
77+
value=item.get("groundtruth_content", ""),
78+
height=600,
79+
key=f"markdown_edit_{index}"
80+
)
81+
if st.button("提交更改", key=f"submit_{index}"):
82+
if new_markdown != item.get("groundtruth_content", ""):
83+
data[index]["groundtruth_content"] = new_markdown
84+
save_data(data)
85+
st.success("更改已保存!")
86+
else:
87+
st.info("内容未更改,无需保存。")

0 commit comments

Comments
 (0)