Skip to content

Commit 0a29a89

Browse files
committed
Add exemplos embeddings sql server 2025
1 parent d8a16f0 commit 0a29a89

File tree

2 files changed

+231
-0
lines changed

2 files changed

+231
-0
lines changed

SqlLibEmbeddings/IndexData2025.sql

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*#info
2+
3+
# Autor
4+
Rodrigo ribeiro Gomes
5+
6+
# Descricao
7+
Esse script permite indexar este repositorio em um SQL Server 2025.
8+
Crie um banco chamado SqlServerLib (ou ajuste o Use abaixo com o banco desejado).
9+
E rode o script.
10+
Opcionalmente, crie uma database scoped credential do github para evitar problemas de limites (veja abaixo no codigo como fazer)
11+
Para pesquisa, use o script ./SearchData2025.sql (após executar este)
12+
---
13+
14+
This script allows you to index this repo using only T-SQL and the new SQL Server 2025 AI features.
15+
Create a database called SqlServerLib (or change the USE statement in the script below).
16+
You can also create a scoped credential for the GitHub API to avoid rate limits (see later in the code).
17+
For search, use the script ./SearchData2025.sql (after running this)
18+
19+
*/
20+
-- enable external endpoint
21+
EXEC sp_configure 'external rest endpoint enabled',1
22+
RECONFIGURE
23+
GO
24+
25+
-- Create if not exists
26+
-- create database SqlServerLib
27+
USE SqlServerLib;
28+
29+
30+
---- create the scripts table
31+
if object_id('dbo.Scripts','U') IS NULL
32+
CREATE TABLE Scripts (
33+
id int IDENTITY PRIMARY KEY WITH(DATA_COMPRESSION = PAGE)
34+
,RelPath varchar(1000) NOT NULL
35+
,ChunkNum int NOT NULL
36+
,ChunkContent nvarchar(max) NOT NULL
37+
,embeddings vector(1024)
38+
)
39+
40+
/*
41+
Let's create an external model to generate our embeddings!
42+
The CREATE EXTERNAL MODEL command only supports OpenAI, Ollama, or Azure.
43+
However, our embedding function resides in the rrg92/sqlserver-lib-assistant Hugging Face Space.
44+
45+
To allow SQL Server to connect, we need a way to expose that space as one of the supported APIs.
46+
The solution is simply a third space: https://huggingface.co/spaces/rrg92/sqlserver
47+
48+
49+
Simple diagram:
50+
51+
SQL SERVER 2025 ----> rrg92/sqlserver space ---> rrg92/sqlserver-lib-assistant
52+
53+
The rrg92/sqlserver space works as a wrapper that exposes an OpenAI-like API for any other Hugging Face space.
54+
You just need to expose a Gradio API with the endpoint /embed that receives text and returns the embeddings!
55+
56+
For a large number of rows, this method can be inefficient, as it involves two requests.
57+
But for a small set of rows or for testing purposes, this is not a big problem.
58+
*/
59+
IF NOT EXISTS(SELECT * FROM sys.external_models WHERE name = 'HuggingFace')
60+
CREATE EXTERNAL MODEL HuggingFace
61+
WITH (
62+
LOCATION = 'https://rrg92-sqlserver.hf.space/v1/embeddings',
63+
API_FORMAT = 'OpenAI',
64+
MODEL_TYPE = EMBEDDINGS,
65+
MODEL = 'rrg92/sqlserver-lib-assistant'
66+
);
67+
68+
-- test
69+
-- select AI_GENERATE_EMBEDDINGS('fROM sQL SERVER' use model HuggingFace)
70+
71+
72+
73+
-- Load data from github API
74+
-- Remember api can
75+
76+
declare
77+
@result nvarchar(max)
78+
,@GitHubCredential sysname
79+
80+
-- if not exists, will return null
81+
select @GitHubCredential = name from sys.database_scoped_credentials cr where cr.name like 'https://api.github.com'
82+
83+
EXEC sp_invoke_external_rest_endpoint 'https://api.github.com/repos/rrg92/sqlserver-lib/git/trees/main?recursive=1'
84+
,@method = 'GET'
85+
,@response = @result OUTPUT
86+
,@credential = @GitHubCredential
87+
88+
/* TIP: GitHub Api Tokens
89+
to avoid github rate limits, you can generate a token in your account and create a credential for SQL use.
90+
Generate here: https://github.com/settings/personal-access-tokens
91+
92+
-- must create master key in current db
93+
create master key encryption by password = 'StrongPass@2025'
94+
95+
-- create the credential, replace YOUR_GITHUB_TOKEN with generated token!
96+
CREATE DATABASE SCOPED CREDENTIAL [https://api.github.com]
97+
with Identity = 'HTTPEndpointHeaders', SECRET = '{"Authorization":"Bearer YOUR_GITHUB_TOKEN"}'
98+
*/
99+
100+
101+
IF JSON_VALUE(@result,'$.response.status.http.code') != 200
102+
BEGIN
103+
SELECT
104+
[=== github http errror====] = 'error'
105+
,*
106+
from
107+
openjson(@result)
108+
RETURN;
109+
END
110+
111+
drop table if exists #gitfiles;
112+
select
113+
*
114+
,id = identity(int,1,1)
115+
into
116+
#gitfiles
117+
from
118+
openjson(@result,'$.result.tree') with (
119+
path varchar(1000)
120+
,sha varchar(200)
121+
)
122+
where
123+
path like '%.sql'
124+
AND path not in (
125+
select RelPath from Scripts
126+
)
127+
128+
declare
129+
@id int = 0
130+
,@sha varchar(100)
131+
,@url varchar(1000)
132+
,@content varchar(max)
133+
,@embeddings vector(1024)
134+
,@path varchar(1000)
135+
136+
-- for each file get base64 content, and calculate embbeddings!
137+
while 1 = 1
138+
begin
139+
select top 1
140+
@id = id
141+
,@sha = sha
142+
,@url = 'https://api.github.com/repos/rrg92/sqlserver-lib/git/blobs/'+sha
143+
,@path = path
144+
from
145+
#gitfiles
146+
where
147+
id > @id
148+
order by
149+
id
150+
if @@ROWCOUNT = 0
151+
break
152+
153+
set @result = NULL
154+
exec sp_invoke_external_rest_endpoint @url
155+
,@method = 'GET'
156+
,@response = @result OUTPUT
157+
,@credential = @GitHubCredential
158+
159+
-- AI_GENERATE_EMBEDDINGS('test' use model HuggingFace)
160+
raiserror('Generating for %s',0,1,@path) with nowait;
161+
162+
SELECT
163+
-- here the conversion from bin to char can result some incorrect chars
164+
-- due to pt-bR special chars with accents (e.g, é = are/is)
165+
-- I should handle this with collation functions and adjust on repo
166+
-- but I will ignore this for avoid script being more complex!
167+
@content = CONVERT(varchar(max),BASE64_DECODE(CONVERT(varchar(max),value)))
168+
from
169+
OPENJSON(@result,'$.result')
170+
where
171+
[key] = 'content'
172+
173+
174+
if @content is not null
175+
begin
176+
set @embeddings = AI_GENERATE_EMBEDDINGS(@content use model HuggingFace)
177+
-- if someerror happens, result will be null.
178+
-- To debug, use extended events!
179+
end
180+
181+
182+
insert into Scripts(RelPath,ChunkNum,ChunkContent,embeddings)
183+
select
184+
@path,1,@content,@embeddings
185+
186+
187+
end
188+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*#info
2+
3+
# Autor
4+
Rodrigo ribeiro Gomes
5+
6+
# Descricao
7+
Exemplo de como procurar os embeddings na tabela de scripts, usando sql server 2025.
8+
Antes de executar, use o script ./IndexData2025.sql
9+
Ajuste o texto na variável @SearchText e rode a query.
10+
11+
--
12+
13+
Sample script showing how to search using embeddings in SQL Server 2025.
14+
Before running this, follow the instructions in ./IndexData2025.sql.
15+
Change the search text in the variable @SearchText and run the script.
16+
*/
17+
use SqlServerLib
18+
go
19+
20+
declare @SearchText varchar(max) = 'cpu performance'
21+
22+
declare @search vector(1024) = AI_GENERATE_EMBEDDINGS(@SearchText use model HuggingFace)
23+
24+
select top 10
25+
*
26+
from (
27+
select
28+
RelPath
29+
,Similaridade = 1-CosDistance
30+
,ScriptContent = ChunkContent
31+
,ContentLength = LEN(ChunkContent)
32+
,CosDistance
33+
from
34+
(
35+
select
36+
*
37+
,CosDistance = vector_distance('cosine',embeddings,@search)
38+
from
39+
Scripts
40+
) C
41+
) v
42+
order by
43+
CosDistance

0 commit comments

Comments
 (0)