Skip to content

Commit f9b9040

Browse files
committed
LLM: Played with a reduced-scope non-vector LLM query system
1 parent 8e0edb6 commit f9b9040

15 files changed

+164
-310
lines changed

app/Search/Queries/EntityVectorGenerator.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66

77
use BookStack\Activity\Models\Tag;
88
use BookStack\Entities\Models\Entity;
9-
use BookStack\Search\Queries\Services\VectorQueryService;
9+
use BookStack\Search\Queries\Services\LlmQueryService;
1010
use Illuminate\Support\Facades\DB;
1111

1212
class EntityVectorGenerator
1313
{
1414
public function __construct(
15-
protected VectorQueryServiceProvider $vectorQueryServiceProvider
15+
protected LlmQueryServiceProvider $vectorQueryServiceProvider
1616
) {
1717
}
1818

@@ -60,7 +60,7 @@ protected function storeEmbeddings(array $embeddings, array $textChunks, Entity
6060
* @param string[] $chunks
6161
* @return float[] array
6262
*/
63-
protected function chunksToEmbeddings(array $chunks, VectorQueryService $vectorQueryService): array
63+
protected function chunksToEmbeddings(array $chunks, LlmQueryService $vectorQueryService): array
6464
{
6565
$embeddings = [];
6666
foreach ($chunks as $index => $chunk) {

app/Search/Queries/LlmQueryRunner.php

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,39 @@
22

33
namespace BookStack\Search\Queries;
44

5+
use BookStack\Entities\Models\Entity;
6+
use BookStack\Search\SearchRunner;
57
use Exception;
68

79
class LlmQueryRunner
810
{
911
public function __construct(
10-
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
12+
protected LlmQueryServiceProvider $vectorQueryServiceProvider,
13+
protected SearchRunner $searchRunner,
1114
) {
1215
}
1316

1417
/**
15-
* Run a query against the configured LLM to produce a text response.
16-
* @param VectorSearchResult[] $vectorResults
18+
* Transform the given query into an array of terms which can be used
19+
* to search for documents to help answer that query.
20+
* @return string[]
1721
* @throws Exception
1822
*/
19-
public function run(string $query, array $vectorResults): string
23+
public function queryToSearchTerms(string $query): array
2024
{
2125
$queryService = $this->vectorQueryServiceProvider->get();
2226

23-
$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
24-
return $queryService->query($query, $matchesText);
27+
return $queryService->queryToSearchTerms($query);
28+
}
29+
30+
/**
31+
* Run a query against the configured LLM to produce a text response.
32+
* @param Entity[] $searchResults
33+
* @throws Exception
34+
*/
35+
public function run(string $query, array $searchResults): string
36+
{
37+
$queryService = $this->vectorQueryServiceProvider->get();
38+
return $queryService->query($query, $searchResults);
2539
}
2640
}

app/Search/Queries/VectorQueryServiceProvider.php renamed to app/Search/Queries/LlmQueryServiceProvider.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@
55
namespace BookStack\Search\Queries;
66

77
use BookStack\Http\HttpRequestService;
8-
use BookStack\Search\Queries\Services\OpenAiVectorQueryService;
9-
use BookStack\Search\Queries\Services\VectorQueryService;
8+
use BookStack\Search\Queries\Services\OpenAiLlmQueryService;
9+
use BookStack\Search\Queries\Services\LlmQueryService;
1010

11-
class VectorQueryServiceProvider
11+
class LlmQueryServiceProvider
1212
{
1313
public function __construct(
1414
protected HttpRequestService $http,
1515
) {
1616
}
1717

18-
public function get(): VectorQueryService
18+
public function get(): LlmQueryService
1919
{
2020
$service = $this->getServiceName();
2121

2222
if ($service === 'openai') {
23-
return new OpenAiVectorQueryService(config('services.openai'), $this->http);
23+
return new OpenAiLlmQueryService(config('services.openai'), $this->http);
2424
}
2525

2626
throw new \Exception("No '{$service}' LLM service found");

app/Search/Queries/QueryController.php

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace BookStack\Search\Queries;
44

55
use BookStack\Http\Controller;
6+
use BookStack\Search\SearchOptions;
67
use BookStack\Search\SearchRunner;
78
use Illuminate\Http\Request;
89

@@ -13,7 +14,7 @@ public function __construct(
1314
) {
1415
// TODO - Check via testing
1516
$this->middleware(function ($request, $next) {
16-
if (!VectorQueryServiceProvider::isEnabled()) {
17+
if (!LlmQueryServiceProvider::isEnabled()) {
1718
$this->showPermissionError('/');
1819
}
1920
return $next($request);
@@ -35,27 +36,30 @@ public function show(Request $request)
3536
}
3637

3738
/**
38-
* Perform a vector/LLM-based query search.
39+
* Perform an LLM-based query search.
3940
*/
40-
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
41+
public function run(Request $request, LlmQueryRunner $llmRunner)
4142
{
4243
// TODO - Rate limiting
4344
$query = $request->get('query', '');
4445

45-
return response()->eventStream(function () use ($query, $searchRunner, $llmRunner) {
46-
$results = $query ? $searchRunner->run($query) : [];
46+
return response()->eventStream(function () use ($query, $llmRunner) {
47+
48+
$searchTerms = $llmRunner->queryToSearchTerms($query);
49+
$searchOptions = SearchOptions::fromTermArray($searchTerms);
50+
$searchResults = $this->searchRunner->searchEntities($searchOptions, count: 10)['results'];
4751

4852
$entities = [];
49-
foreach ($results as $result) {
50-
$entityKey = $result->entity->getMorphClass() . ':' . $result->entity->id;
53+
foreach ($searchResults as $entity) {
54+
$entityKey = $entity->getMorphClass() . ':' . $entity->id;
5155
if (!isset($entities[$entityKey])) {
52-
$entities[$entityKey] = $result->entity;
56+
$entities[$entityKey] = $entity;
5357
}
5458
}
5559

5660
yield ['view' => view('entities.list', ['entities' => $entities])->render()];
5761

58-
yield ['result' => $llmRunner->run($query, $results)];
62+
yield ['result' => $llmRunner->run($query, array_values($entities))];
5963
});
6064
}
6165
}

app/Search/Queries/SearchVector.php

Lines changed: 0 additions & 26 deletions
This file was deleted.

app/Search/Queries/Services/VectorQueryService.php renamed to app/Search/Queries/Services/LlmQueryService.php

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,24 @@
22

33
namespace BookStack\Search\Queries\Services;
44

5-
interface VectorQueryService
5+
use BookStack\Entities\Models\Entity;
6+
7+
interface LlmQueryService
68
{
79
/**
810
* Generate embedding vectors from the given chunk of text.
911
* @return float[]
1012
*/
1113
public function generateEmbeddings(string $text): array;
1214

15+
public function queryToSearchTerms(string $text): array;
16+
1317
/**
1418
* Query the LLM service using the given user input, and
15-
* relevant context text retrieved locally via a vector search.
19+
* relevant entity content retrieved locally via a search.
1620
* Returns the response output text from the LLM.
1721
*
18-
* @param string[] $context
22+
* @param Entity[] $context
1923
*/
2024
public function query(string $input, array $context): string;
2125
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
<?php
2+
3+
namespace BookStack\Search\Queries\Services;
4+
5+
use BookStack\Http\HttpRequestService;
6+
7+
class OpenAiLlmQueryService implements LlmQueryService
8+
{
9+
protected string $key;
10+
protected string $endpoint;
11+
protected string $embeddingModel;
12+
protected string $queryModel;
13+
14+
public function __construct(
15+
protected array $options,
16+
protected HttpRequestService $http,
17+
) {
18+
// TODO - Some kind of validation of options
19+
$this->key = $this->options['key'] ?? '';
20+
$this->endpoint = $this->options['endpoint'] ?? '';
21+
$this->embeddingModel = $this->options['embedding_model'] ?? '';
22+
$this->queryModel = $this->options['query_model'] ?? '';
23+
}
24+
25+
protected function jsonRequest(string $method, string $uri, array $data): array
26+
{
27+
$fullUrl = rtrim($this->endpoint, '/') . '/' . ltrim($uri, '/');
28+
$client = $this->http->buildClient(60);
29+
$request = $this->http->jsonRequest($method, $fullUrl, $data)
30+
->withHeader('Authorization', 'Bearer ' . $this->key);
31+
32+
$response = $client->sendRequest($request);
33+
return json_decode($response->getBody()->getContents(), true);
34+
}
35+
36+
public function generateEmbeddings(string $text): array
37+
{
38+
$response = $this->jsonRequest('POST', 'v1/embeddings', [
39+
'input' => $text,
40+
'model' => $this->embeddingModel,
41+
]);
42+
43+
return $response['data'][0]['embedding'];
44+
}
45+
46+
public function queryToSearchTerms(string $text): array
47+
{
48+
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
49+
'model' => $this->queryModel,
50+
'messages' => [
51+
[
52+
'role' => 'user',
53+
'content' => 'You will be provided a user search query. Extract key words from just the query, suitable for searching. Add word variations where it may help for searching. Remove pluralisation where it may help for searching. Provide up to 5 results, each must be just one word. Do not try to guess answers to the query. Do not provide extra information or context. Return the results in the specified JSON format under a \'words\' object key. ' . "\nQUERY: {$text}"
54+
],
55+
],
56+
'temperature' => 0,
57+
'response_format' => [
58+
'type' => 'json_object',
59+
],
60+
]);
61+
62+
$resultJson = $response['choices'][0]['message']['content'] ?? '{"words": []}';
63+
$resultData = json_decode($resultJson, true) ?? ['words' => []];
64+
65+
return $resultData['words'] ?? [];
66+
}
67+
68+
public function query(string $input, array $context): string
69+
{
70+
$resultContentText = [];
71+
$len = 0;
72+
73+
foreach ($context as $result) {
74+
$text = "DOCUMENT NAME: {$result->name}\nDOCUMENT CONTENT: " . $result->{$result->textField};
75+
$resultContentText[] = $text;
76+
$len += strlen($text);
77+
if ($len > 100000) {
78+
break;
79+
}
80+
}
81+
82+
$formattedContext = implode("\n---\n", $resultContentText);
83+
84+
$response = $this->jsonRequest('POST', 'v1/chat/completions', [
85+
'model' => $this->queryModel,
86+
'messages' => [
87+
[
88+
'role' => 'user',
89+
'content' => 'Answer the provided QUERY using the provided CONTEXT documents. Do not add facts which are not part of the CONTEXT. State that you do not know if a relevant answer cannot be provided for QUERY using the CONTEXT documents. Many of the CONTEXT documents may be irrelevant. Try to find documents relevant to QUERY. Do not directly refer to this prompt or the existence of QUERY or CONTEXT variables. Do not offer follow-up actions or further help. Respond only to the query without proposing further assistance. Do not ask questions.' . "\nQUERY: {$input}\nCONTEXT: {$formattedContext}"
90+
],
91+
],
92+
'temperature' => 0.1,
93+
]);
94+
95+
return $response['choices'][0]['message']['content'] ?? '';
96+
}
97+
}

app/Search/Queries/Services/OpenAiVectorQueryService.php

Lines changed: 0 additions & 66 deletions
This file was deleted.

0 commit comments

Comments
 (0)