Skip to content

Commit 3211114

Browse files
d0richanngoroshi
andauthored
Fix indexing (#10)
* Define database path in single place * Fix formatting check * Format * Automatically close DB connection * Hotreload streamlit app * Fix index requirements page * Make a submit form for requirements upload * Better reporting during requirements upload * Fix index annotations function * Improved information on upload annotations form * Fix annotations uploading * Fix distances report * Move annotations embedding to controls page with distance caching * Remove unused import * Controls page - import services inside page function for faster load * Apply suggestions from code review Co-authored-by: Anna Yamkovaya <[email protected]> * Add lacking import * Fix mistake in label --------- Co-authored-by: Anna Yamkovaya <[email protected]>
1 parent 095d35a commit 3211114

File tree

24 files changed

+582
-260
lines changed

24 files changed

+582
-260
lines changed

.github/workflows/test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ jobs:
2929
run: uvx ruff check
3030

3131
- name: Check formatting
32-
run: uvx ruff check --select E --ignore "E402,E501" --fix
32+
run: uvx ruff format --check

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
.idea
22
.venv
33
.coverage
4+
.test2text

.streamlit/config.toml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
[server]
2-
fileWatcherType = "none"
2+
# Change the type of file watcher used by Streamlit, or turn it off
3+
# completely.
4+
#
5+
# Allowed values:
6+
# - "auto" : Streamlit will attempt to use the watchdog module, and
7+
# falls back to polling if watchdog isn't available.
8+
# - "watchdog" : Force Streamlit to use the watchdog module.
9+
# - "poll" : Force Streamlit to always use polling.
10+
# - "none" : Streamlit will not watch files.
11+
#
12+
# Default: "auto"
13+
fileWatcherType = "poll"

convert_trace_annos.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import csv
33
from pathlib import Path
4-
from test2text.db import DbClient
4+
from test2text.services.db import get_db_client
55

66
logging.basicConfig(level=logging.INFO)
77
logger = logging.getLogger(__name__)
@@ -14,11 +14,11 @@ def is_empty(value):
1414

1515

1616
def trace_test_cases_to_annos(db_path: Path, trace_file_path: Path):
17-
db = DbClient(db_path)
17+
db = get_db_client()
1818

1919
insertions = list()
2020
logger.info("Reading trace file and inserting annotations into table...")
21-
with open(trace_file_path, mode='r', newline='', encoding='utf-8') as trace_file:
21+
with open(trace_file_path, mode="r", newline="", encoding="utf-8") as trace_file:
2222
reader = csv.reader(trace_file)
2323
current_tc = EMPTY
2424
concat_summary = EMPTY
@@ -34,26 +34,32 @@ def trace_test_cases_to_annos(db_path: Path, trace_file_path: Path):
3434
continue
3535
elif row[0] == "TestCaseEnd":
3636
if not is_empty(current_tc) and not is_empty(concat_summary):
37-
case_id = db.test_cases.get_or_insert(test_script=test_script, test_case=current_tc)
37+
case_id = db.test_cases.get_or_insert(
38+
test_script=test_script, test_case=current_tc
39+
)
3840
annotation_id = db.annotations.get_or_insert(summary=concat_summary)
39-
insertions.append(db.cases_to_annos.insert(case_id=case_id, annotation_id=annotation_id))
41+
insertions.append(
42+
db.cases_to_annos.insert(
43+
case_id=case_id, annotation_id=annotation_id
44+
)
45+
)
4046
else:
4147
if not is_empty(row[global_columns.index("TestCase")]):
4248
if current_tc != row[global_columns.index("TestCase")]:
4349
current_tc = row[global_columns.index("TestCase")]
44-
if is_empty(test_script) and not is_empty(row[global_columns.index("TestScript")]):
50+
if is_empty(test_script) and not is_empty(
51+
row[global_columns.index("TestScript")]
52+
):
4553
test_script = row[global_columns.index("TestScript")]
4654
concat_summary += row[0]
4755

4856
db.conn.commit()
49-
logger.info(f"Inserted {len(insertions)} testcase-annotations pairs to database. Successful: {sum(insertions)}")
57+
logger.info(
58+
f"Inserted {len(insertions)} testcase-annotations pairs to database. Successful: {sum(insertions)}"
59+
)
5060

5161

52-
if __name__ == '__main__':
53-
db_path = Path('./private/requirements.db')
54-
trace_file_path = Path('./private/annotations/stp_0006.Trace.csv')
62+
if __name__ == "__main__":
63+
db_path = Path("./private/requirements.db")
64+
trace_file_path = Path("./private/annotations/stp_0006.Trace.csv")
5565
trace_test_cases_to_annos(db_path, trace_file_path)
56-
57-
58-
59-

main.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import streamlit as st
22

3-
from test2text.pages.upload.annotations import show_annotations
4-
from test2text.pages.upload.requirements import show_requirements
5-
from test2text.services.embeddings.cache_distances import show_distances_histogram
3+
from test2text.pages.upload.annotations import show_annotations
4+
from test2text.pages.upload.requirements import show_requirements
5+
from test2text.pages.controls.controls_page import controls_page
66
from test2text.pages.report import make_a_report
7-
from test2text.services.visualisation.visualize_vectors import visualize_vectors
7+
from test2text.services.visualisation.visualize_vectors import visualize_vectors
88

99

1010
def add_logo():
@@ -32,19 +32,22 @@ def add_logo():
3232

3333

3434
if __name__ == "__main__":
35-
st.set_page_config(page_title="Test2Text App", layout="wide", initial_sidebar_state="auto")
35+
st.set_page_config(
36+
page_title="Test2Text App", layout="wide", initial_sidebar_state="auto"
37+
)
3638
add_logo()
3739

38-
annotations = st.Page(show_annotations,
39-
title="Annotations", icon=":material/database_upload:")
40-
requirements = st.Page(show_requirements,
41-
title="Requirements", icon=":material/database_upload:")
42-
cache_distances = st.Page(show_distances_histogram,
43-
title="Cache Distances", icon=":material/cached:")
44-
report = st.Page(make_a_report,
45-
title="Report", icon=":material/publish:")
46-
visualization = st.Page(visualize_vectors,
47-
title="Visualize Vectors", icon=":material/dataset:")
40+
annotations = st.Page(
41+
show_annotations, title="Annotations", icon=":material/database_upload:"
42+
)
43+
requirements = st.Page(
44+
show_requirements, title="Requirements", icon=":material/database_upload:"
45+
)
46+
cache_distances = st.Page(controls_page, title="Controls", icon=":material/cached:")
47+
report = st.Page(make_a_report, title="Report", icon=":material/publish:")
48+
visualization = st.Page(
49+
visualize_vectors, title="Visualize Vectors", icon=":material/dataset:"
50+
)
4851
pages = {
4952
"Upload": [annotations, requirements],
5053
"Update": [cache_distances],
@@ -53,4 +56,3 @@ def add_logo():
5356
pg = st.navigation(pages)
5457

5558
pg.run()
56-

test2text/pages/controls/__init__.py

Whitespace-only changes.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
def controls_page():
2+
import streamlit as st
3+
import plotly.express as px
4+
5+
from test2text.services.embeddings.annotation_embeddings_controls import (
6+
count_all_annotations,
7+
count_embedded_annotations,
8+
)
9+
10+
st.header("Controls page")
11+
embedding_col, distances_col = st.columns(2)
12+
with embedding_col:
13+
st.subheader("Embedding")
14+
15+
def refresh_counts():
16+
st.session_state["all_annotations_count"] = count_all_annotations()
17+
st.session_state["embedded_annotations_count"] = (
18+
count_embedded_annotations()
19+
)
20+
21+
refresh_counts()
22+
23+
st.write("Annotations count: ", st.session_state["all_annotations_count"])
24+
st.write(
25+
"Annotations with embeddings: ",
26+
st.session_state["embedded_annotations_count"],
27+
)
28+
29+
embed_all = st.checkbox("Overwrite existing embeddings", value=False)
30+
embed_btn = st.button("Start embedding annotations")
31+
32+
if embed_btn:
33+
progress_bar = st.progress(0, "Embedding annotations...")
34+
35+
def update_progress(progress: float):
36+
progress_bar.progress(progress, "Embedding annotations...")
37+
38+
from test2text.services.embeddings.annotation_embeddings_controls import (
39+
embed_annotations,
40+
)
41+
42+
embed_annotations(embed_all=embed_all, on_progress=update_progress)
43+
refresh_counts()
44+
st.success("Annotations embedded successfully")
45+
46+
with distances_col:
47+
st.subheader("Distances")
48+
49+
if st.button("Refresh distances"):
50+
with st.spinner("Refreshing distances", show_time=True):
51+
from test2text.services.embeddings.cache_distances import (
52+
refresh_and_get_distances,
53+
)
54+
55+
distances = refresh_and_get_distances()
56+
st.success("Distances refreshed successfully")
57+
58+
fig = px.histogram(distances, nbins=100, title="Distances histogram")
59+
st.plotly_chart(fig)

test2text/pages/report.py

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
from itertools import groupby
22

33
import streamlit as st
4-
from test2text.services.db import DbClient
4+
from test2text.services.db import get_db_client
55

66

77
def add_new_line(summary):
88
return summary.replace("\n", "<br>")
99

1010

1111
def make_a_report():
12-
st.header("Test2Text Report")
12+
st.header("Test2Text Report")
1313

14-
db = DbClient("./private/requirements.db")
14+
db = get_db_client()
1515

16-
st.subheader("Table of Contents")
16+
st.subheader("Table of Contents")
1717

18-
data = db.conn.execute("""
18+
data = db.conn.execute("""
1919
SELECT
2020
Requirements.id as req_id,
2121
Requirements.external_id as req_external_id,
@@ -39,61 +39,80 @@ def make_a_report():
3939
Requirements.id, AnnotationsToRequirements.cached_distance, TestCases.id
4040
""")
4141

42-
current_annotations = {}
43-
current_test_scripts = set()
44-
45-
def write_requirement(req_id, req_external_id, req_summary,
46-
current_annotations: set[tuple], current_test_scripts: set):
47-
if req_id is None and req_external_id is None:
48-
return False
49-
50-
with st.expander(f"#{req_id} Requirement {req_external_id}"):
51-
st.subheader(f"Requirement {req_external_id}")
52-
st.html(f"<p>{add_new_line(req_summary)}</p>")
53-
st.subheader("Annotations")
42+
current_annotations = {}
43+
current_test_scripts = set()
44+
45+
def write_requirement(
46+
req_id,
47+
req_external_id,
48+
req_summary,
49+
current_annotations: set[tuple],
50+
current_test_scripts: set,
51+
):
52+
if req_id is None and req_external_id is None:
53+
return False
54+
55+
with st.expander(f"#{req_id} Requirement {req_external_id}"):
56+
st.subheader(f"Requirement {req_external_id}")
57+
st.html(f"<p>{add_new_line(req_summary)}</p>")
58+
st.subheader("Annotations")
59+
anno, summary, dist = st.columns(3)
60+
with anno:
61+
st.write("Annonation's id")
62+
with summary:
63+
st.write("Summary")
64+
with dist:
65+
st.write("Distance")
66+
for anno_id, anno_summary, distance in current_annotations:
5467
anno, summary, dist = st.columns(3)
5568
with anno:
56-
st.write("Annonation's id")
69+
st.write(f"{anno_id}")
5770
with summary:
58-
st.write("Summary")
71+
st.html(f"{add_new_line(anno_summary)}")
5972
with dist:
60-
st.write("Distance")
61-
for anno_id, anno_summary, distance in current_annotations:
62-
anno, summary, dist = st.columns(3)
63-
with anno:
64-
st.write(f"{anno_id}")
65-
with summary:
66-
st.html(
67-
f"{add_new_line(anno_summary)}"
68-
)
69-
with dist:
70-
st.write(round(distance, 2))
71-
72-
st.subheader("Test Scripts")
73-
for test_script in current_test_scripts:
74-
st.markdown(f"- {test_script}")
75-
76-
progress_bar = st.empty()
77-
rows = data.fetchall()
78-
if not rows:
79-
st.error("There is no data to inspect.\nPlease upload annotations.")
80-
return None
81-
max_progress = len(rows)
82-
index = 0
83-
for (req_id, req_external_id, req_summary), group in groupby(rows, lambda x: x[0:3]):
84-
current_annotations = set()
85-
current_test_scripts = set()
86-
index += 1
87-
for _, _, _, anno_id, anno_summary, distance, case_id, test_script, test_case in group:
88-
current_annotations.add((anno_id, anno_summary, distance))
89-
current_test_scripts.add(test_script)
90-
write_requirement(req_id=req_id, req_external_id=req_external_id, req_summary=req_summary,
91-
current_annotations=current_annotations, current_test_scripts=current_test_scripts)
92-
93-
94-
progress_bar.progress(round(index*100/max_progress), text="Processing...")
95-
progress_bar.empty()
96-
db.conn.close()
73+
st.write(round(distance, 2))
74+
75+
st.subheader("Test Scripts")
76+
for test_script in current_test_scripts:
77+
st.markdown(f"- {test_script}")
78+
79+
progress_bar = st.empty()
80+
rows = data.fetchall()
81+
if not rows:
82+
st.error("There is no data to inspect.\nPlease upload annotations.")
83+
return None
84+
max_progress = len(rows)
85+
index = 0
86+
for (req_id, req_external_id, req_summary), group in groupby(
87+
rows, lambda x: x[0:3]
88+
):
89+
current_annotations = set()
90+
current_test_scripts = set()
91+
index += 1
92+
for (
93+
_,
94+
_,
95+
_,
96+
anno_id,
97+
anno_summary,
98+
distance,
99+
case_id,
100+
test_script,
101+
test_case,
102+
) in group:
103+
current_annotations.add((anno_id, anno_summary, distance))
104+
current_test_scripts.add(test_script)
105+
write_requirement(
106+
req_id=req_id,
107+
req_external_id=req_external_id,
108+
req_summary=req_summary,
109+
current_annotations=current_annotations,
110+
current_test_scripts=current_test_scripts,
111+
)
112+
113+
progress_bar.progress(round(index * 100 / max_progress), text="Processing...")
114+
progress_bar.empty()
115+
db.conn.close()
97116

98117

99118
if __name__ == "__main__":

test2text/pages/upload/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
11

2-
3-

0 commit comments

Comments
 (0)