Skip to content

Commit 5dd4c52

Browse files
authored
Merge pull request #32 from SHUzhangshuo/main
fix:label_tool bug
2 parents 34a1c9c + 63438e1 commit 5dd4c52

File tree

1 file changed

+40
-17
lines changed

1 file changed

+40
-17
lines changed

tools/label_tool.py

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
'''
2-
This script is used to read the table data from the jsonl file and display it in a streamlit app.
3-
4-
命令行输入:
5-
streamlit run WebMainBench/webmainbench/utils/Data_Modification_Tools.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl
2+
本脚本用于从jsonl文件读取表格数据,并在streamlit应用中展示和编辑。
3+
命令行输入示例:
4+
streamlit run /home/zhangshuo/Desktop/vscodeworkspace/WebMainBench/tools/label_tool.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl
65
注意:-- 后面有个空格,否则会报错,然后再接数据文件路径
76
87
'''
@@ -33,12 +32,18 @@ def load_data():
3332
data.append(json.loads(line))
3433
return data
3534

36-
# 保存所有数据
37-
def save_data(data):
38-
with open(DATA_FILE, "w", encoding="utf-8") as f:
39-
for item in data:
40-
print(item)
41-
f.write(json.dumps(item, ensure_ascii=False) + "\n")
35+
# 只更新单条数据,避免全文件重写
36+
def update_single_item(index, new_item):
37+
lines = []
38+
if not os.path.exists(DATA_FILE):
39+
return
40+
with open(DATA_FILE, "r", encoding="utf-8") as f:
41+
lines = f.readlines()
42+
if 0 <= index < len(lines):
43+
# 保证只替换对应行
44+
lines[index] = json.dumps(new_item, ensure_ascii=False) + "\n"
45+
with open(DATA_FILE, "w", encoding="utf-8") as f:
46+
f.writelines(lines)
4247

4348
# Streamlit页面布局
4449
st.set_page_config(layout="wide")
@@ -58,30 +63,48 @@ def save_data(data):
5863

5964
with col1:
6065
st.subheader("HTML 渲染")
61-
st.markdown(
62-
f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
63-
unsafe_allow_html=True
64-
)
66+
# 尝试用st.components.v1.html增强HTML+CSS渲染能力
67+
try:
68+
st.components.v1.html(
69+
f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
70+
height=620,
71+
scrolling=True,
72+
)
73+
except Exception as e:
74+
st.markdown(
75+
f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>',
76+
unsafe_allow_html=True
77+
)
78+
st.info("st.components.v1.html不可用,已回退为st.markdown。")
6579

6680
with col2:
6781
st.subheader("Markdown 渲染")
82+
# 实时渲染 Markdown,优先显示编辑区内容
83+
# 使用 session_state 保持编辑内容
84+
if f"markdown_edit_{index}" not in st.session_state:
85+
st.session_state[f"markdown_edit_{index}"] = item.get("groundtruth_content", "")
86+
current_markdown = st.session_state[f"markdown_edit_{index}"]
6887
st.markdown(
69-
item.get("groundtruth_content", ""),
88+
current_markdown,
7089
unsafe_allow_html=True
7190
)
7291

7392
with col3:
7493
st.subheader("Markdown 源代码(可编辑)")
94+
# 实时更新 session_state
7595
new_markdown = st.text_area(
7696
"编辑 Markdown",
77-
value=item.get("groundtruth_content", ""),
97+
value=st.session_state.get(f"markdown_edit_{index}", item.get("groundtruth_content", "")),
7898
height=600,
7999
key=f"markdown_edit_{index}"
80100
)
101+
# 提交按钮只用于保存到文件
81102
if st.button("提交更改", key=f"submit_{index}"):
82103
if new_markdown != item.get("groundtruth_content", ""):
104+
# 只更新当前条目,避免全文件重写和数据覆盖
83105
data[index]["groundtruth_content"] = new_markdown
84-
save_data(data)
106+
update_single_item(index, data[index])
107+
load_data.clear() # 清理缓存,确保下次读取到最新数据
85108
st.success("更改已保存!")
86109
else:
87110
st.info("内容未更改,无需保存。")

0 commit comments

Comments
 (0)