Skip to content

Commit a023bed

Browse files
committed
Vectors: Added command to regenerate for all
Also made models configurable. Tested system scales via 86k vector entries.
1 parent 0ffcb3d commit a023bed

File tree

6 files changed

+68
-10
lines changed

6 files changed

+68
-10
lines changed

app/Config/services.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
'openai' => [
3131
'endpoint' => env('OPENAI_ENDPOINT', 'https://api.openai.com'),
3232
'key' => env('OPENAI_KEY', ''),
33+
'embedding_model' => env('OPENAI_EMBEDDING_MODEL', 'text-embedding-3-small'),
34+
'query_model' => env('OPENAI_QUERY_MODEL', 'gpt-4o'),
3335
],
3436

3537
'github' => [
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?php
2+
3+
namespace BookStack\Console\Commands;
4+
5+
use BookStack\Entities\EntityProvider;
6+
use BookStack\Entities\Models\Entity;
7+
use BookStack\Search\Vectors\SearchVector;
8+
use BookStack\Search\Vectors\StoreEntityVectorsJob;
9+
use Illuminate\Console\Command;
10+
11+
class RegenerateVectorsCommand extends Command
12+
{
13+
/**
14+
* The name and signature of the console command.
15+
*
16+
* @var string
17+
*/
18+
protected $signature = 'bookstack:regenerate-vectors';
19+
20+
/**
21+
* The console command description.
22+
*
23+
* @var string
24+
*/
25+
protected $description = 'Re-index vectors for all content in the system';
26+
27+
/**
28+
* Execute the console command.
29+
*/
30+
public function handle(EntityProvider $entityProvider)
31+
{
32+
// TODO - Add confirmation before run regarding deletion/time/effort/api-cost etc...
33+
SearchVector::query()->delete();
34+
35+
$types = $entityProvider->all();
36+
foreach ($types as $type => $typeInstance) {
37+
$this->info("Creating jobs to store vectors for {$type} data...");
38+
/** @var Entity[] $entities */
39+
$typeInstance->newQuery()->chunkById(100, function ($entities) {
40+
foreach ($entities as $entity) {
41+
dispatch(new StoreEntityVectorsJob($entity));
42+
}
43+
});
44+
}
45+
}
46+
}

app/Search/Vectors/Services/OpenAiVectorQueryService.php

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,26 @@
66

77
class OpenAiVectorQueryService implements VectorQueryService
88
{
9+
protected string $key;
10+
protected string $endpoint;
11+
protected string $embeddingModel;
12+
protected string $queryModel;
13+
914
public function __construct(
10-
protected string $endpoint,
11-
protected string $key,
15+
protected array $options,
1216
protected HttpRequestService $http,
1317
) {
18+
// TODO - Some kind of validation of options
19+
$this->key = $this->options['key'] ?? '';
20+
$this->endpoint = $this->options['endpoint'] ?? '';
21+
$this->embeddingModel = $this->options['embedding_model'] ?? '';
22+
$this->queryModel = $this->options['query_model'] ?? '';
1423
}
1524

1625
protected function jsonRequest(string $method, string $uri, array $data): array
1726
{
1827
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
19-
$client = $this->http->buildClient(10);
28+
$client = $this->http->buildClient(30);
2029
$request = $this->http->jsonRequest($method, $fullUrl, $data)
2130
->withHeader('Authorization', 'Bearer ' . $this->key);
2231

@@ -28,7 +37,7 @@ public function generateEmbeddings(string $text): array
2837
{
2938
$response = $this->jsonRequest('POST', 'v1/embeddings', [
3039
'input' => $text,
31-
'model' => 'text-embedding-3-small',
40+
'model' => $this->embeddingModel,
3241
]);
3342

3443
return $response['data'][0]['embedding'];
@@ -39,15 +48,15 @@ public function query(string $input, array $context): string
3948
$formattedContext = implode("\n", $context);
4049

4150
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
42-
'model' => 'gpt-4o',
51+
'model' => $this->queryModel,
4352
'messages' => [
4453
[
4554
'role' => 'developer',
46-
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.'
55+
'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response. Don\'t try to converse or continue the conversation.'
4756
],
4857
[
4958
'role' => 'user',
50-
'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
59+
'content' => "Provide a response to the below given QUERY using the below given CONTEXT. The CONTEXT is split into parts via lines. Ignore any nonsensical lines of CONTEXT.\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}",
5160
]
5261
],
5362
]);

app/Search/Vectors/VectorQueryServiceProvider.php

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ public function get(): VectorQueryService
1818
$service = $this->getServiceName();
1919

2020
if ($service === 'openai') {
21-
$key = config('services.openai.key');
22-
$endpoint = config('services.openai.endpoint');
23-
return new OpenAiVectorQueryService($endpoint, $key, $this->http);
21+
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
2422
}
2523

2624
throw new \Exception("No '{$service}' LLM service found");

app/Search/Vectors/VectorSearchRunner.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ public function run(string $query): array
1919
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
2020
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
2121
->orderBy('distance', 'asc')
22+
->having('distance', '<', 0.6)
2223
->limit(10)
2324
->get();
2425

database/migrations/2025_03_24_155748_create_search_vectors_table.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ public function up(): void
2121
});
2222

2323
$table = DB::getTablePrefix() . 'search_vectors';
24+
25+
// TODO - Vector size might need to be dynamic
2426
DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)");
2527
DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine");
2628
}

0 commit comments

Comments
 (0)