vtt-innovation-resolution-ai/app.py at main · Lithiumcr/vtt-innovation-resolution-ai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#streamlit run app.py 前端全在这个app
#chatbot，项目介绍
#引导用户点击

import os
import sys
import streamlit as st
from streamlit.components.v1 import html
import warnings

from pathlib import Path

# 抑制所有警告
warnings.filterwarnings("ignore")

# 尝试生成配置文件（如果文件存在）
# try:
#     config_generator = os.path.join(os.path.dirname(os.path.abspath(__file__)), "generate_config_from_toml.py")
#     if os.path.exists(config_generator):
#         print("尝试生成配置文件...")
#         import subprocess
#         result = subprocess.run([sys.executable, config_generator], capture_output=True, text=True)
#         if result.returncode == 0:
#             print("配置文件生成成功！")
#             print(result.stdout)
#         else:
#             print(f"配置文件生成失败: {result.stderr}")
#     else:
#         print(f"配置生成脚本不存在: {config_generator}")
# except Exception as e:
#     print(f"尝试生成配置文件时出错: {str(e)}")

# # 创建数据目录和密钥目录（如果不存在）
# data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
# keys_dir = Path(__file__).resolve().parent / "config"
# os.makedirs(keys_dir, exist_ok=True)


from azure_openai import chat_bot

# ----------------------------
# Page Config & Title
# ----------------------------
st.set_page_config(page_title="VTT Innovation Platform", layout="wide")
st.title("🔍 VTT Innovation Knowledge Graph Platform")

# ----------------------------
# Introduction
# ----------------------------
st.markdown("""
#### Welcome to the **VTT Innovation Knowledge Graph Platform**!

#### This tool helps you explore relationships between innovations and organizations based on publicly available data
#### The platform includes:
- 🌐 **Interactive resultsizations** of innovation networks in both 2D and 3D.
- 🌟 **Statistical dashboards** that summarize key patterns and contributors.
- 🧠 A **semantic assistant** that helps you navigate the graph via natural language queries.
- 🧪 A Clustering Method Explorer for different clustering method metrics

#### Scroll down to explore each module below:
""")

from PIL import Image
import os

@st.cache_data
def load_image(path):
    return Image.open(path)

#wanchengle
# --- Display HTML with Expand Button ---
st.header("🌐 Network Graph Visualizations")


#在ein的本地
html_path = "results/innovation_network_3d.html"
if os.path.exists(html_path):
    st.subheader("Interactive Network (Before Dedupulication)")
    with open(html_path, "r", encoding="utf-8") as f:
        html(f.read(), height=600)
else:
    st.warning("3D HTML file not found. Please run the backend script to generate it.")

# --- Display HTML with Expand Button ---

st.markdown("""
These visualizations represent the relationships between innovations and organizations:

- **Blue nodes**: Innovations
- **Green nodes**: Organizations
- **Red edges**: "Developed By" relationships
- **Blue edges**: Collaborations

Hover or zoom to explore the connections. The layout is generated based on semantic clustering.
""")

html_path = "results/innovation_network_tufte_3D.html"
if os.path.exists(html_path):
    st.subheader("3D Interactive Network (After Dedupulication)")
    with open(html_path, "r", encoding="utf-8") as f:
        html(f.read(), height=600)
else:
    st.warning("3D HTML file not found. Please run the backend script to generate it.")
st.divider()


# ----------------------------
# Innovation Metrics Dashboard
# ----------------------------
st.header(" 🌟 Innovation Metrics Dashboard")
st.markdown("""
These charts summarize statistical patterns in the innovation network:

- Count of innovations
- Proportion of multi-source or multi-developer innovations
- Top contributing organizations
""")


img_path = "results/innovation_network_tufte_2D.png"
if os.path.exists(img_path):
    img = load_image(img_path)
    st.subheader("2D Network Snapshot")
    st.image(img, use_container_width=True)
else:
    st.warning("2D PNG image not found.")


# 第二行：两列展示 Statistics 和 Top Organizations
col1, col2 = st.columns(2)

with col2:
    st.subheader("Key Innovation Statistics")
    img_stat = "results/innovation_stats_tufte.png"
    if os.path.exists(img_stat):
        img = load_image(img_stat)
        st.image(img, use_container_width=True)
        st.markdown("""
        Summary statistics highlighting:
        - Total innovations in the dataset
        - Innovations sourced from multiple data providers
        - Innovations developed by more than one organization
        """)
    else:
        st.warning("Innovation stats image not found.")

with col1:
    st.subheader("Top Contributing Organizations")
    img_top_orgs = "results/top_organizations.png"
    if os.path.exists(img_top_orgs):
        img = load_image(img_top_orgs)
        st.image(img, use_container_width=True)
        st.markdown("""
        - Organizations ranked by the number of innovations they have contributed to.
        - A great way to identify major innovation players in the ecosystem.
        """)
    else:
        st.warning("Top organizations image not found.")
# ----------------------------
# Semantic Graph Assistant
# ----------------------------

#chatbot部分
# st.header("🧠 Semantic Graph Assistant")

# query = st.text_input("💬 free-form questions like 'Who developed nuclear energy innovations?', 'Which organizations developed the most innovations?':")

# if query:
#     with st.spinner("Retrieving relevant information..."):
#         reply = chat_bot(query)
#     st.success("🧠 Answer:")
#     st.markdown(reply)
#     st.info("🔎 This answer is based on the top 3 semantically similar innovation descriptions retrieved from the knowledge graph.")

# st.divider()


with st.sidebar:
    st.header("💬 Ask the AI Assistant")
    st.markdown("""
        💬 free-form questions like : Who developed nuclear energy innovations?, Which organizations developed the most innovations?
        """)
    user_input = st.chat_input("Ask something...")

    if user_input:
        with st.chat_message("user"):
            st.markdown(user_input)

        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                response = chat_bot(user_input)
                st.markdown(response)


import pandas as pd
import matplotlib.pyplot as plt

# ------------------------------
# Clustering Method Explorer
# ------------------------------
st.divider()
st.header("🧪 Clustering Method Explorer")

cluster_data = {
    "Threshold-based (0.85)": {"clusters": 1911, "edges": 12502, "note": "Baseline"},
    "HDBSCAN": {"clusters": 1735, "edges": 12341, "note": "Aggressive deduplication"},
    "KMeans (n=1911)": {"clusters": 1911, "edges": 12544, "note": "Balanced"},
    "Agglomerative (n=1911)": {"clusters": 1911, "edges": 12544, "note": "Similar to KMeans"},
    "Spectral (n=1911, k=15)": {"clusters": 1911, "edges": 12612, "note": "Highest edge count (dense)"}
}


method = st.selectbox("🔘 Select a clustering method", list(cluster_data.keys()), index=0)
selected = cluster_data[method]

# 卡片样式展示
with st.container():
    col1, col2, col3 = st.columns(3)

    with col1:
        st.markdown("####  Clusters")
        st.metric(label="Innovation Clusters", value=selected["clusters"])

    with col2:
        st.markdown("####  Edges")
        st.metric(label="Edges in Graph", value=selected["edges"])

    with col3:
        st.markdown("####  Notes")
        st.markdown(f"<div style='padding: 10px; border-radius: 8px; background-color: #f0f2f6;'>{selected['note']}</div>", unsafe_allow_html=True)

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

# 准备数据
methods = list(cluster_data.keys())
edges = [v["edges"] for v in cluster_data.values()]

# 🎨 使用彩虹渐变色（可选其他 colormap，如 viridis, plasma 等）
cmap = cm.get_cmap('rainbow')
colors = [cmap(i / len(methods)) for i in range(len(methods))]

# 📊 绘图
fig, ax = plt.subplots(figsize=(8, 4))
bars = ax.bar(methods, edges, color=colors)

# 设置 Y 轴起始值为 10000
ax.set_ylim(12000, max(edges) + 1000)
ax.set_ylabel("Edge Count")
ax.set_title("Edge Count Comparison Across Clustering Methods")

# X轴文字旋转
plt.xticks(rotation=15, ha='right')
st.pyplot(fig)