|
| 1 | +''' |
| 2 | +This script is used to read the table data from the jsonl file and display it in a streamlit app. |
| 3 | +
|
| 4 | +命令行输入: |
| 5 | +streamlit run WebMainBench/webmainbench/utils/Data_Modification_Tools.py -- WebMainBench/data/WebMainBench_test_0814_llm-webkit_filtered_table_results.jsonl |
| 6 | +注意:-- 后面有个空格,否则会报错,然后再接数据文件路径 |
| 7 | +
|
| 8 | +''' |
| 9 | + |
| 10 | +import json |
| 11 | +import os |
| 12 | +import streamlit as st |
| 13 | +import markdown |
| 14 | + |
| 15 | +import sys |
| 16 | + |
| 17 | +# 支持通过命令行参数传入数据文件路径 |
| 18 | +if len(sys.argv) > 1: |
| 19 | + DATA_FILE = sys.argv[1] |
| 20 | +else: |
| 21 | + st.error("请通过命令行参数传入数据文件路径,例如:python read_table.py /path/to/data.jsonl") |
| 22 | + st.stop() |
| 23 | + |
| 24 | +# 读取所有数据 |
| 25 | +@st.cache_data(show_spinner=False) |
| 26 | +def load_data(): |
| 27 | + data = [] |
| 28 | + if not os.path.exists(DATA_FILE): |
| 29 | + return data |
| 30 | + with open(DATA_FILE, "r", encoding="utf-8") as f: |
| 31 | + for line in f: |
| 32 | + if line.strip(): |
| 33 | + data.append(json.loads(line)) |
| 34 | + return data |
| 35 | + |
| 36 | +# 保存所有数据 |
| 37 | +def save_data(data): |
| 38 | + with open(DATA_FILE, "w", encoding="utf-8") as f: |
| 39 | + for item in data: |
| 40 | + print(item) |
| 41 | + f.write(json.dumps(item, ensure_ascii=False) + "\n") |
| 42 | + |
| 43 | +# Streamlit页面布局 |
| 44 | +st.set_page_config(layout="wide") |
| 45 | +st.title("WebMainBench Table Editor") |
| 46 | + |
| 47 | +data = load_data() |
| 48 | +if not data: |
| 49 | + st.warning("未找到数据文件或文件为空。") |
| 50 | + st.stop() |
| 51 | + |
| 52 | +# 选择要编辑的条目 |
| 53 | +index = st.sidebar.number_input("选择条目索引", min_value=0, max_value=len(data)-1, value=0, step=1) |
| 54 | +item = data[index] |
| 55 | + |
| 56 | +# 三列布局 |
| 57 | +col1, col2, col3 = st.columns([1.5, 1.5, 2]) |
| 58 | + |
| 59 | +with col1: |
| 60 | + st.subheader("HTML 渲染") |
| 61 | + st.markdown( |
| 62 | + f'<div style="border:1px solid #ddd;padding:8px;overflow:auto;max-height:600px">{item.get("html","")}</div>', |
| 63 | + unsafe_allow_html=True |
| 64 | + ) |
| 65 | + |
| 66 | +with col2: |
| 67 | + st.subheader("Markdown 渲染") |
| 68 | + st.markdown( |
| 69 | + item.get("groundtruth_content", ""), |
| 70 | + unsafe_allow_html=True |
| 71 | + ) |
| 72 | + |
| 73 | +with col3: |
| 74 | + st.subheader("Markdown 源代码(可编辑)") |
| 75 | + new_markdown = st.text_area( |
| 76 | + "编辑 Markdown", |
| 77 | + value=item.get("groundtruth_content", ""), |
| 78 | + height=600, |
| 79 | + key=f"markdown_edit_{index}" |
| 80 | + ) |
| 81 | + if st.button("提交更改", key=f"submit_{index}"): |
| 82 | + if new_markdown != item.get("groundtruth_content", ""): |
| 83 | + data[index]["groundtruth_content"] = new_markdown |
| 84 | + save_data(data) |
| 85 | + st.success("更改已保存!") |
| 86 | + else: |
| 87 | + st.info("内容未更改,无需保存。") |
0 commit comments