Skip to content

Commit efd6bdb

Browse files
committed
added functions for counting entries
1 parent a307b36 commit efd6bdb

File tree

3 files changed

+91
-23
lines changed

3 files changed

+91
-23
lines changed

test2text/pages/documentation.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import streamlit as st
22

3+
from test2text.services.db import get_db_client
4+
35

46
def show_documentation():
57
st.markdown("""
@@ -9,32 +11,50 @@ def show_documentation():
911
1012
**Test2Text** is a tool for computing requirement's coverage by tests and generating relevant reports.
1113
The application provides a convenient interface for analysis the relationships between test cases and requirements.
14+
1215
""")
1316
st.divider()
1417
st.markdown("""
1518
## HOW TO USE
1619
1720
### Upload data
18-
Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database.
21+
Click :gray-badge[:material/database_upload: Annotations] or :gray-badge[:material/database_upload: Requirements] to upload annotations and requirements from CSV files to the app's database.
22+
Then Annotations and Requirements are loaded and Test cases are linked to Annotations go to the next chapter.
1923
2024
### Renew data
2125
Click :gray-badge[:material/cached: Controls] to transform missed and new texts into numeral vectors (embeddings).
22-
Update distances by embeddings for intelligent matching of requirements and annotations.
26+
Update distances by embeddings for intelligent matching of Requirements and Annotations.
27+
After distances are refreshed (all Annotations linked with Requirement by distances) go to the next chapter.
2328
2429
### Generate reports
2530
Click :gray-badge[:material/publish: Requirement's Report] or :gray-badge[:material/publish: Test cases Report] to make a report.
26-
Use filters to select desired information. Analyze selected requirements or test cases by showed and plotted distances
31+
Use filters and Smart search based on embeddings to select desired information.
32+
Analyze selected requirements or test cases by plotted distances.
33+
List of all requirements/test cases and their annotations are shown here.
2734
2835
### Visualize saved data
29-
Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations.
36+
Click :gray-badge[:material/dataset: Visualize vectors] to plot distances between vector representations of all requirements and annotations in multidimensional spaces.
3037
3138
""")
3239
st.divider()
40+
with get_db_client() as db:
41+
st.markdown("""## Database overview""")
42+
table, row_count = st.columns(2)
43+
with table:
44+
st.write("Table name")
45+
with row_count:
46+
st.write("Number of entries")
47+
for table_name, count in db.get_db_full_info.items():
48+
with table:
49+
st.write(table_name)
50+
with row_count:
51+
st.write(count)
52+
st.divider()
3353
st.markdown("""
3454
### Methodology
3555
The application use a pre-trained transformer model from the [sentence-transformers library](https://huggingface.co/sentence-transformers), specifically [nomic-ai/nomic-embed-text-v1](https://huggingface.co/nomic-ai/nomic-embed-text-v1), a model trained to produce high-quality vector embeddings for text.
3656
The model returns, for each input text, a high-dimensional NumPy array (vector) of floating point numbers (the embedding).
37-
This arrays give us a possibility to calculate Euclidian distances between test cases annotations and requirements to view how similar or dissimilar the two texts.
57+
This arrays give a possibility to calculate Euclidian distances between test cases annotations and requirements to show how similar or dissimilar the two texts.
3858
""")
3959

4060
st.markdown("""
@@ -46,26 +66,26 @@ def show_documentation():
4666
Suppose we have two vectors:
4767
""")
4868
st.latex(r"""
49-
[ \mathbf{a} = [a_1, a_2, ..., a_n] ],
69+
\mathbf{a} = [a_1, a_2, ..., a_n] ,
5070
""")
5171
st.latex(r"""
52-
[ \mathbf{b} = [b_1, b_2, ..., b_n] ]
72+
\mathbf{b} = [b_1, b_2, ..., b_n]
5373
""")
5474

5575
st.markdown("""
5676
The L2 distance between **a** and **b** is calculated as:
5777
""")
5878

5979
st.latex(r"""
60-
[ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2} ]
80+
L_2(\mathbf{a}, \mathbf{b}) = \sqrt{(a_1 - b_1)^2 + (a_2 - b_2)^2 + \cdots + (a_n - b_n)^2}
6181
""")
6282

6383
st.markdown("""
6484
Or, more compactly:
6585
""")
6686

6787
st.latex(r"""
68-
[ L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2} ]
88+
L_2(\mathbf{a}, \mathbf{b}) = \sqrt{\sum_{i=1}^n (a_i - b_i)^2}
6989
""")
7090

7191
st.markdown("""

test2text/services/db/client.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import sqlite3
2+
from typing import Union
3+
24
import sqlite_vec
35
import logging
46

@@ -71,4 +73,61 @@ def __exit__(self, exc_type, exc_val, exc_tb):
7173
self.close()
7274

7375
def __enter__(self):
74-
return self
76+
return self
77+
78+
def get_table_names(self):
79+
"""
80+
Returns a list of all user-defined tables in the database.
81+
82+
:return: List[str] - table names
83+
"""
84+
cursor = self.conn.execute(
85+
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
86+
)
87+
tables = [row[0] for row in cursor.fetchall()]
88+
cursor.close()
89+
return tables
90+
91+
92+
@property
93+
def get_db_full_info(self):
94+
"""
95+
Returns table information:
96+
- row_count: number of records in the table
97+
- columns: list of dicts as in get_extended_table_info (name, type, non-NULL count, typeof distribution)
98+
99+
:return: dict
100+
"""
101+
db_tables_info = {}
102+
table_names = self.get_table_names()
103+
for table_name in table_names:
104+
row_count = self.count_all_entries_in_table(table_name)
105+
db_tables_info.update({
106+
table_name: row_count,
107+
})
108+
return db_tables_info
109+
110+
def count_all_entries_in_table(self, table: str) -> int:
111+
count = self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
112+
return count
113+
114+
def count_notnull_entries_in_table(self,column: str, table: str) -> Union[int, None]:
115+
if self.has_column(column, table):
116+
count = self.conn.execute(
117+
f"SELECT COUNT(*) FROM {table} WHERE {column} IS NOT NULL"
118+
).fetchone()[0]
119+
return count
120+
return None
121+
122+
def has_column(self, column_name: str, table_name: str) -> bool:
123+
"""
124+
Returns True if the table has a column, otherwise False.
125+
126+
:param column_name: name of the column
127+
:param table_name: name of the table
128+
:return: bool
129+
"""
130+
cursor = self.conn.execute(f'PRAGMA table_info("{table_name}")')
131+
columns = [row[1] for row in cursor.fetchall()] # row[1] is the column name
132+
cursor.close()
133+
return column_name in columns

test2text/services/embeddings/annotation_embeddings_controls.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,7 @@
55
BATCH_SIZE = 30
66

77

8-
def count_all_annotations() -> int:
9-
with get_db_client() as db:
10-
count = db.conn.execute("SELECT COUNT(*) FROM Annotations").fetchone()[0]
11-
return count
12-
138

14-
def count_embedded_annotations() -> int:
15-
with get_db_client() as db:
16-
count = db.conn.execute(
17-
"SELECT COUNT(*) FROM Annotations WHERE embedding IS NOT NULL"
18-
).fetchone()[0]
19-
return count
209

2110

2211
OnProgress = Callable[[float], None]
@@ -25,8 +14,8 @@ def count_embedded_annotations() -> int:
2514
def embed_annotations(*_, embed_all=False, on_progress: OnProgress = None):
2615
with get_db_client() as db:
2716
from .embed import embed_annotations_batch
28-
annotations_count = count_all_annotations()
29-
embedded_annotations_count = count_embedded_annotations()
17+
annotations_count = db.count_all_entries_in_table("Annotations")
18+
embedded_annotations_count = db.count_embedded_entries_in_table("Annotations")
3019
if embed_all:
3120
annotations_to_embed = annotations_count
3221
else:

0 commit comments

Comments
 (0)