-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.cr
More file actions
47 lines (40 loc) · 1.45 KB
/
example.cr
File metadata and controls
47 lines (40 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# good resources
# https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/
# https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1
#
# run with
# text-embeddings-router --model-id opensearch-project/opensearch-neural-sparse-encoding-v1 --pooling splade
require "db"
require "http/client"
require "json"
require "pg"
db = DB.open("postgres://localhost/pgvector_example")
db.exec "CREATE EXTENSION IF NOT EXISTS vector"
db.exec "DROP TABLE IF EXISTS documents"
db.exec "CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))"
def embed(inputs)
url = "http://localhost:3000/embed_sparse"
data = {
"inputs" => inputs,
}
headers = HTTP::Headers.new
headers["Content-Type"] = "application/json"
response = HTTP::Client.post url, headers, data.to_json
JSON.parse(response.body).as_a.map do |item|
elements = item.as_a.map { |e| "#{e["index"].as_i + 1}:#{e["value"]}" }.join(",")
"{#{elements}}/30522"
end
end
documents = ["The dog is barking", "The cat is purring", "The bear is growling"]
embeddings = embed(documents)
documents.zip(embeddings) do |content, embedding|
db.exec "INSERT INTO documents (content, embedding) VALUES ($1, $2)", content, embedding
end
query = "forest"
embedding = embed([query])[0]
db.query("SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5", embedding) do |rs|
rs.each do
puts rs.read(String)
end
end
db.close