Skip to content

Commit 4a58954

Browse files
committed
feat: add new SQL queries for embedding and keyword searches
1 parent a7ec04e commit 4a58954

File tree

5 files changed

+80
-3
lines changed

5 files changed

+80
-3
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
SELECT
2+
paragraph_id,
3+
comprehensive_score,
4+
comprehensive_score AS similarity
5+
FROM
6+
(
7+
SELECT DISTINCT ON
8+
( "paragraph_id" ) ( similarity ),* ,
9+
similarity AS comprehensive_score
10+
FROM
11+
(
12+
SELECT
13+
*,
14+
(( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity
15+
FROM
16+
embedding ${embedding_query}
17+
) TEMP
18+
ORDER BY
19+
paragraph_id,
20+
similarity DESC
21+
) DISTINCT_TEMP
22+
WHERE
23+
comprehensive_score >%s
24+
ORDER BY
25+
comprehensive_score DESC
26+
LIMIT %s
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
SELECT
2+
paragraph_id,
3+
comprehensive_score,
4+
comprehensive_score as similarity
5+
FROM
6+
(
7+
SELECT DISTINCT ON
8+
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
9+
FROM
10+
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP
11+
ORDER BY
12+
paragraph_id,
13+
similarity DESC
14+
) DISTINCT_TEMP
15+
WHERE comprehensive_score>%s
16+
ORDER BY comprehensive_score DESC
17+
LIMIT %s

apps/knowledge/sql/hit_test.sql

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
SELECT
2+
paragraph_id,
3+
comprehensive_score,
4+
comprehensive_score as similarity
5+
FROM
6+
(
7+
SELECT DISTINCT ON
8+
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
9+
FROM
10+
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query} ) TEMP
11+
ORDER BY
12+
paragraph_id,
13+
similarity DESC
14+
) DISTINCT_TEMP
15+
WHERE comprehensive_score>%s
16+
ORDER BY comprehensive_score DESC
17+
LIMIT %s
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
SELECT
2+
paragraph_id,
3+
comprehensive_score,
4+
comprehensive_score as similarity
5+
FROM
6+
(
7+
SELECT DISTINCT ON
8+
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
9+
FROM
10+
( SELECT *,ts_rank_cd(embedding.search_vector,websearch_to_tsquery('simple',%s),32) AS similarity FROM embedding ${keywords_query}) TEMP
11+
ORDER BY
12+
paragraph_id,
13+
similarity DESC
14+
) DISTINCT_TEMP
15+
WHERE comprehensive_score>%s
16+
ORDER BY comprehensive_score DESC
17+
LIMIT %s

apps/knowledge/vector/pg_vector.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def handle(self,
165165
search_mode: SearchMode):
166166
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
167167
select_string=get_file_content(
168-
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
168+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
169169
'embedding_search.sql')),
170170
with_table_name=True)
171171
embedding_model = select_list(exec_sql,
@@ -186,7 +186,7 @@ def handle(self,
186186
search_mode: SearchMode):
187187
exec_sql, exec_params = generate_sql_by_query_dict({'keywords_query': query_set},
188188
select_string=get_file_content(
189-
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
189+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
190190
'keywords_search.sql')),
191191
with_table_name=True)
192192
embedding_model = select_list(exec_sql,
@@ -207,7 +207,7 @@ def handle(self,
207207
search_mode: SearchMode):
208208
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
209209
select_string=get_file_content(
210-
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
210+
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
211211
'blend_search.sql')),
212212
with_table_name=True)
213213
embedding_model = select_list(exec_sql,

0 commit comments

Comments
 (0)