-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathembed-service.go
More file actions
155 lines (133 loc) · 4.62 KB
/
embed-service.go
File metadata and controls
155 lines (133 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package main
import "C"
import (
"context"
"fmt"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"github.com/wailsapp/wails/v2/pkg/logger"
)
var (
llamaEmbedMutex sync.Mutex
)
func parseFloatArray(input string) []float32 {
// Clean up the input string by removing the outer brackets
input = strings.TrimSpace(input)
input = strings.TrimPrefix(input, "[[")
input = strings.TrimSuffix(input, "]]")
// Split by comma to get individual float values
stringValues := strings.Split(input, ",")
// Create the result slice with the appropriate capacity
result := make([]float32, 0, len(stringValues))
// Parse each value into a float32
for _, strVal := range stringValues {
// Remove any leading/trailing spaces
strVal = strings.TrimSpace(strVal)
// Convert string to float64 first
floatVal, err := strconv.ParseFloat(strVal, 32)
if err != nil {
// Handle error or skip invalid values
continue
}
// Append as float32
result = append(result, float32(floatVal))
}
return result
}
// GenerateEmbedWithCancel generates embeddings for a given text, supporting cancellation via context.
// Takes a context and a string input; returns an embedding slice or an error upon failure.
// Cancels the operation if the context is done or the timeout is reached.
// Executes an external embedding command asynchronously, gathering the result or handling cancellation.
func GenerateEmbedWithCancel(ctx context.Context, llamaEmbedArgs LlamaEmbedArgs, appArgs DefaultAppArgs, text string) ([]float32, error) {
// Lock to prevent concurrent embed calls
llamaEmbedMutex.Lock()
defer llamaEmbedMutex.Unlock()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Create a channel to capture the result
result := make(chan struct {
output []byte
err error
})
//Create a prompt for embedding.
llamaEmbedArgs.EmbedPromptCmd = "-p"
llamaEmbedArgs.EmbedPromptText = text
args := LlamaEmbedStructToArgs(llamaEmbedArgs)
// Run the command in a goroutine
go func() {
cmd := exec.CommandContext(ctx, appArgs.LLamaEmbedCliPath, args...)
// Hide the window on Windows
if runtime.GOOS == "windows" {
cmd.SysProcAttr = &syscall.SysProcAttr{
HideWindow: true,
}
}
out, err := cmd.Output()
result <- struct {
output []byte
err error
}{output: out, err: err}
close(result)
}()
select {
case res := <-result:
output := parseFloatArray(string(res.output))
return output, res.err
case <-ctx.Done():
// Context was canceled or timed out
return nil, ctx.Err()
}
}
func IngestTextData(log logger.Logger, appArgs DefaultAppArgs, sourceLocation string, chunkSize int, chunkOverlap int, enableStopWordRemoval bool) ([]Document, error) {
meta := Meta{}
meta["type"] = "text"
//Create docs with metad
documents, err := NewTextLoader(sourceLocation, meta).Load(log, context.Background(), appArgs, enableStopWordRemoval)
if err != nil {
log.Error(err.Error())
return nil, fmt.Errorf("error in IngestTextData: %w", err)
}
if chunkSize > 0 && chunkOverlap > 0 {
//Split up text into chunks
textSplitter := NewRecursiveCharacterTextSplitter(chunkSize, chunkOverlap)
documentChunks := textSplitter.SplitDocuments(log, appArgs, enableStopWordRemoval, documents)
return documentChunks, nil
} else {
return documents, nil
}
}
func IngestCVSData(log logger.Logger, appArgs DefaultAppArgs, sourceLocation string, chunkSize int, chunkOverlap int, enableStopWordRemoval bool) ([]Document, error) {
documents, err := NewCSVLoader(log, sourceLocation).Load(context.Background())
if err != nil {
log.Error(err.Error())
return nil, fmt.Errorf("error in IngestTextData: %w", err)
}
if chunkSize > 0 && chunkOverlap > 0 {
textSplitter := NewRecursiveCharacterTextSplitter(chunkSize, chunkOverlap)
documentChunks := textSplitter.SplitDocuments(log, appArgs, enableStopWordRemoval, documents)
return documentChunks, nil
} else {
return documents, nil
}
}
func IngestPdfData(log logger.Logger, appArgs DefaultAppArgs, sourceLocation string, chunkSize int, chunkOverlap int, enableStopWordRemoval bool) ([]Document, error) {
//Load xpdf exe
loader := NewPDFToTextLoader(sourceLocation).WithPDFToTextPath(appArgs.PDFToTextPath)
//Create docs
documents, err := loader.Load(context.Background(), log, appArgs, enableStopWordRemoval)
if err != nil {
log.Error(err.Error())
return nil, fmt.Errorf("error in IngestPdfData: %w", err)
}
if chunkSize > 0 && chunkOverlap > 0 {
textSplitter := NewRecursiveCharacterTextSplitter(chunkSize, chunkOverlap)
documentChunks := textSplitter.SplitDocuments(log, appArgs, enableStopWordRemoval, documents)
return documentChunks, nil
} else {
return documents, nil
}
}