Skip to content

Commit 4c0099a

Browse files
committed
Added Qdrant Storage
1 parent fad6d42 commit 4c0099a

File tree

6 files changed

+328
-3
lines changed

6 files changed

+328
-3
lines changed

Storage/ChromaStorage.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class ChromaStorage extends AbstractStorage
2222
protected $collectionID = '';
2323

2424
/**
25-
* PineconeStorage constructor.
25+
* ChromaStorage constructor.
2626
*/
2727
public function __construct()
2828
{
@@ -57,7 +57,7 @@ protected function runQuery($endpoint, mixed $data, $method = 'POST')
5757
{
5858
$url = $this->baseurl . '/api/v1' . $endpoint . '?tenant=' . $this->tenant . '&database=' . $this->database;
5959

60-
if (is_array($data) && $data === []) {
60+
if ($data === []) {
6161
$json = '{}';
6262
} else {
6363
$json = json_encode($data, JSON_THROW_ON_ERROR);
@@ -257,7 +257,7 @@ public function getSimilarChunks($vector, $lang = '', $limit = 4)
257257
$limit *= 2; // we can't check ACLs, so we return more than requested
258258

259259
if ($lang) {
260-
$filter = ['language' => ['$eq', $lang]];
260+
$filter = ['language' => $lang];
261261
} else {
262262
$filter = null;
263263
}

Storage/QdrantStorage.php

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
<?php
2+
3+
namespace dokuwiki\plugin\aichat\Storage;
4+
5+
use dokuwiki\HTTP\DokuHTTPClient;
6+
use dokuwiki\plugin\aichat\Chunk;
7+
8+
/**
9+
* Implements the storage backend using a Chroma DB in server mode
10+
*/
11+
class QdrantStorage extends AbstractStorage
12+
{
13+
/** @var string URL to the qdrant server instance */
14+
protected $baseurl;
15+
16+
/** @var DokuHTTPClient http client */
17+
protected $http;
18+
19+
protected $collection = '';
20+
protected $collectionName = '';
21+
22+
23+
/**
24+
* QdrantStorage constructor.
25+
*/
26+
public function __construct()
27+
{
28+
$helper = plugin_load('helper', 'aichat');
29+
30+
$this->baseurl = $helper->getConf('qdrant_baseurl');
31+
$this->collectionName = $helper->getConf('qdrant_collection');
32+
33+
$this->http = new DokuHTTPClient();
34+
$this->http->headers['Content-Type'] = 'application/json';
35+
$this->http->headers['Accept'] = 'application/json';
36+
$this->http->keep_alive = false;
37+
$this->http->timeout = 30;
38+
39+
if ($helper->getConf('qdrant_apikey')) {
40+
$this->http->headers['api-key'] = $helper->getConf('qdrant_apikey');
41+
}
42+
}
43+
44+
/**
45+
* Execute a query against the Qdrant API
46+
*
47+
* @param string $endpoint API endpoint, will be added to the base URL
48+
* @param mixed $data The data to send, will be JSON encoded
49+
* @param string $method POST|GET|PUT etc
50+
* @return mixed
51+
* @throws \Exception
52+
*/
53+
protected function runQuery($endpoint, mixed $data, $method = 'POST')
54+
{
55+
$endpoint = trim($endpoint, '/');
56+
$url = $this->baseurl . '/' . $endpoint . '?wait=true';
57+
58+
if ($data === []) {
59+
$json = '{}';
60+
} else {
61+
$json = json_encode($data, JSON_THROW_ON_ERROR);
62+
}
63+
64+
$this->http->sendRequest($url, $json, $method);
65+
$response = $this->http->resp_body;
66+
67+
if (!$response) {
68+
throw new \Exception(
69+
'Qdrant API returned no response. ' . $this->http->error . ' Status: ' . $this->http->status
70+
);
71+
}
72+
73+
try {
74+
$result = json_decode((string)$response, true, 512, JSON_THROW_ON_ERROR);
75+
} catch (\Exception) {
76+
throw new \Exception('Qdrant API returned invalid JSON. ' . $response);
77+
}
78+
79+
if ((int)$this->http->status !== 200) {
80+
$error = $result['status']['error'] ?? $this->http->error;
81+
throw new \Exception('Qdrant API returned error. ' . $error);
82+
}
83+
84+
return $result['result'] ?? $result;
85+
}
86+
87+
/**
88+
* Get the name of the collection to use
89+
*
90+
* Initializes the collection if it doesn't exist yet
91+
*
92+
* @return string
93+
* @throws \Exception
94+
*/
95+
public function getCollection()
96+
{
97+
if ($this->collection) return $this->collection;
98+
99+
try {
100+
$this->runQuery('/collections/' . $this->collectionName, '', 'GET');
101+
$this->collection = $this->collectionName;
102+
return $this->collection; // collection exists
103+
} catch (\Exception) {
104+
// collection seems not to exist
105+
}
106+
107+
$data = [
108+
'vectors' => [
109+
'size' => 1536, // FIXME should not be hardcoded
110+
'distance' => 'Cosine',
111+
]
112+
];
113+
114+
// create the collection
115+
$this->runQuery('/collections/' . $this->collectionName, $data, 'PUT');
116+
$this->collection = $this->collectionName;
117+
118+
return $this->collection;
119+
}
120+
121+
/** @inheritdoc */
122+
public function startCreation($clear = false)
123+
{
124+
if (!$clear) return;
125+
126+
// if a collection exists, delete it
127+
$collection = $this->getCollection();
128+
if ($collection) {
129+
$this->runQuery('/collections/' . $collection, '', 'DELETE');
130+
$this->collection = '';
131+
}
132+
}
133+
134+
/** @inheritdoc */
135+
public function getChunk($chunkID)
136+
{
137+
try {
138+
$data = $this->runQuery(
139+
'/collections/' . $this->getCollection() . '/points/' . $chunkID,
140+
'',
141+
'GET'
142+
);
143+
} catch (\Exception) {
144+
// no such point
145+
return null;
146+
}
147+
148+
return new Chunk(
149+
$data['payload']['page'],
150+
(int)$data['id'],
151+
$data['payload']['text'],
152+
$data['vector'],
153+
$data['payload']['language'] ?? '',
154+
(int)$data['payload']['created']
155+
);
156+
}
157+
158+
159+
/** @inheritdoc */
160+
public function reusePageChunks($page, $firstChunkID)
161+
{
162+
// no-op
163+
}
164+
165+
/** @inheritdoc */
166+
public function deletePageChunks($page, $firstChunkID)
167+
{
168+
// delete all possible chunk IDs
169+
$ids = range($firstChunkID, $firstChunkID + 99, 1);
170+
171+
$this->runQuery(
172+
'/collections/' . $this->getCollection() . '/points/delete',
173+
[
174+
'points' => $ids
175+
],
176+
'POST'
177+
);
178+
}
179+
180+
/** @inheritdoc */
181+
public function addPageChunks($chunks)
182+
{
183+
$points = [];
184+
foreach ($chunks as $chunk) {
185+
$points[] = [
186+
'id' => $chunk->getId(),
187+
'vector' => $chunk->getEmbedding(),
188+
'payload' => [
189+
'page' => $chunk->getPage(),
190+
'text' => $chunk->getText(),
191+
'created' => $chunk->getCreated(),
192+
'language' => $chunk->getLanguage()
193+
]
194+
];
195+
}
196+
197+
$this->runQuery(
198+
'/collections/' . $this->getCollection() . '/points',
199+
[
200+
'points' => $points
201+
],
202+
'PUT'
203+
);
204+
}
205+
206+
/** @inheritdoc */
207+
public function finalizeCreation()
208+
{
209+
// no-op
210+
}
211+
212+
/** @inheritdoc */
213+
public function runMaintenance()
214+
{
215+
// no-op
216+
}
217+
218+
/** @inheritdoc */
219+
public function getPageChunks($page, $firstChunkID)
220+
{
221+
$ids = range($firstChunkID, $firstChunkID + 99, 1);
222+
223+
$data = $this->runQuery(
224+
'/collections/' . $this->getCollection() . '/points',
225+
[
226+
'ids' => $ids,
227+
'with_payload' => true,
228+
'with_vector' => true,
229+
],
230+
'POST'
231+
);
232+
233+
if (!$data) return [];
234+
235+
$chunks = [];
236+
foreach ($data as $point) {
237+
$chunks[] = new Chunk(
238+
$point['payload']['page'],
239+
(int)$point['id'],
240+
$point['payload']['text'],
241+
$point['vector'],
242+
$point['payload']['language'] ?? '',
243+
(int)$point['payload']['created']
244+
);
245+
}
246+
return $chunks;
247+
}
248+
249+
/** @inheritdoc */
250+
public function getSimilarChunks($vector, $lang = '', $limit = 4)
251+
{
252+
$limit *= 2; // we can't check ACLs, so we return more than requested
253+
254+
if ($lang) {
255+
$filter = [
256+
'must' => [
257+
[
258+
'key' => 'language',
259+
'match' => [
260+
'value' => $lang
261+
],
262+
]
263+
]
264+
];
265+
} else {
266+
$filter = null;
267+
}
268+
269+
$data = $this->runQuery(
270+
'/collections/' . $this->getCollection() . '/points/search',
271+
[
272+
'vector' => $vector,
273+
'limit' => (int)$limit,
274+
'filter' => $filter,
275+
'with_payload' => true,
276+
'with_vector' => true,
277+
]
278+
);
279+
280+
$chunks = [];
281+
foreach ($data as $point) {
282+
$chunks[] = new Chunk(
283+
$point['payload']['page'],
284+
(int)$point['id'],
285+
$point['payload']['text'],
286+
$point['vector'],
287+
$point['payload']['language'] ?? '',
288+
(int)$point['payload']['created'],
289+
$point['score']
290+
);
291+
}
292+
return $chunks;
293+
}
294+
295+
/** @inheritdoc */
296+
public function statistics()
297+
{
298+
299+
$info = $this->runQuery('/collections/' . $this->getCollection(), '', 'GET');
300+
$telemetry = $this->runQuery('/telemetry', '', 'GET');
301+
302+
return [
303+
'qdrant_version' => $telemetry['app']['version'],
304+
'vector_config' => $info['config']['params']['vectors'],
305+
'chunks' => $info['vectors_count'],
306+
'segments' => $info['segments_count'],
307+
'status' => $info['status'],
308+
];
309+
}
310+
}

conf/default.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
$conf['chroma_database'] = 'default_database';
2020
$conf['chroma_collection'] = 'aichat';
2121

22+
$conf['qdrant_baseurl'] = '';
23+
$conf['qdrant_apikey'] = '';
24+
$conf['qdrant_collection'] = 'aichat';
25+
2226
$conf['logging'] = 0;
2327
$conf['restrict'] = '';
2428
$conf['preferUIlanguage'] = 0;

conf/metadata.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
$meta['chroma_database'] = array('string');
2727
$meta['chroma_collection'] = array('string');
2828

29+
$meta['qdrant_baseurl'] = array('string');
30+
$meta['qdrant_apikey'] = array('string');
31+
$meta['qdrant_collection'] = array('string');
32+
2933
$meta['logging'] = array('onoff');
3034
$meta['restrict'] = array('string');
3135
$meta['preferUIlanguage'] = array('multichoice', '_choices' => array(

helper.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use dokuwiki\plugin\aichat\Storage\AbstractStorage;
1111
use dokuwiki\plugin\aichat\Storage\ChromaStorage;
1212
use dokuwiki\plugin\aichat\Storage\PineconeStorage;
13+
use dokuwiki\plugin\aichat\Storage\QdrantStorage;
1314
use dokuwiki\plugin\aichat\Storage\SQLiteStorage;
1415

1516
/**
@@ -118,6 +119,8 @@ public function getStorage()
118119
$this->storage = new PineconeStorage();
119120
} elseif ($this->getConf('chroma_baseurl')) {
120121
$this->storage = new ChromaStorage();
122+
} elseif ($this->getConf('qdrant_baseurl')) {
123+
$this->storage = new QdrantStorage();
121124
} else {
122125
$this->storage = new SQLiteStorage();
123126
}

lang/en/settings.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
$lang['chroma_database'] = 'Your Chroma database name.';
1919
$lang['chroma_collection'] = 'The collection to use. Will be created.';
2020

21+
$lang['qdrant_baseurl'] = 'Your Qdrant base URL if you want to use Qdrant as a storage backend.';
22+
$lang['qdrant_apikey'] = 'Your Qdrant API key. Empty if no authentication is required.';
23+
$lang['qdrant_collection'] = 'The collection to use. Will be created.';
24+
2125
$lang['logging'] = 'Log all questions and answers. Use the <a href="?do=admin&page=logviewer&facility=aichat">Log Viewer</a> to access.';
2226
$lang['restrict'] = 'Restrict access to these users and groups (comma separated). Leave empty to allow all users.';
2327
$lang['preferUIlanguage'] = 'How to work with multilingual wikis? (Requires the translation plugin)';

0 commit comments

Comments
 (0)