44
55use dokuwiki \Extension \Event ;
66use dokuwiki \Extension \PluginInterface ;
7+ use dokuwiki \File \PageResolver ;
78use dokuwiki \plugin \aichat \Model \ChatInterface ;
89use dokuwiki \plugin \aichat \Model \EmbeddingInterface ;
910use dokuwiki \plugin \aichat \Storage \AbstractStorage ;
@@ -177,23 +178,20 @@ public function createPageChunks($page, $firstChunkID)
177178 {
178179 $ chunkList = [];
179180
180- $ textRenderer = plugin_load ('renderer ' , 'text ' );
181- if ($ textRenderer instanceof PluginInterface) {
182- global $ ID ;
183- $ ID = $ page ;
184- try {
185- $ text = p_cached_output (wikiFN ($ page ), 'text ' , $ page );
186- } catch (\Throwable $ e ) {
187- if ($ this ->logger ) $ this ->logger ->error (
188- 'Failed to render page {page} using raw text instead. {msg} ' ,
189- ['page ' => $ page , 'msg ' => $ e ->getMessage ()]
190- );
191- $ text = rawWiki ($ page );
192- }
193- } else {
181+ global $ ID ;
182+ $ ID = $ page ;
183+ try {
184+ $ text = p_cached_output (wikiFN ($ page ), 'aichat ' , $ page );
185+ } catch (\Throwable $ e ) {
186+ if ($ this ->logger ) $ this ->logger ->error (
187+ 'Failed to render page {page}. Using raw text instead. {msg} ' ,
188+ ['page ' => $ page , 'msg ' => $ e ->getMessage ()]
189+ );
194190 $ text = rawWiki ($ page );
195191 }
196192
193+ $ crumbs = $ this ->breadcrumbTrail ($ page );
194+
197195 // allow plugins to modify the text before splitting
198196 $ eventData = [
199197 'page ' => $ page ,
@@ -211,6 +209,8 @@ public function createPageChunks($page, $firstChunkID)
211209 foreach ($ parts as $ part ) {
212210 if (trim ((string )$ part ) == '' ) continue ; // skip empty chunks
213211
212+ $ part = $ crumbs . "\n\n" . $ part ; // add breadcrumbs to each chunk
213+
214214 try {
215215 $ embedding = $ this ->embedModel ->getEmbedding ($ part );
216216 } catch (\Exception $ e ) {
@@ -285,6 +285,37 @@ public function getSimilarChunks($query, $lang = '')
285285 return $ result ;
286286 }
287287
288+ /**
289+ * Create a breadcrumb trail for the given page
290+ *
291+ * Uses the first heading of each namespace and the page itself. This is added as a prefix to
292+ * each chunk to give the AI some context.
293+ *
294+ * @param string $id
295+ * @return string
296+ */
297+ protected function breadcrumbTrail ($ id )
298+ {
299+ $ namespaces = explode (': ' , getNS ($ id ));
300+ $ resolver = new PageResolver ($ id );
301+ $ crumbs = [];
302+
303+ // all namespaces
304+ $ check = '' ;
305+ foreach ($ namespaces as $ namespace ) {
306+ $ check .= $ namespace . ': ' ;
307+ $ page = $ resolver ->resolveId ($ check );
308+ $ title = p_get_first_heading ($ page );
309+ $ crumbs [] = $ title ? "$ title ( $ namespace) " : $ namespace ;
310+ }
311+
312+ // the page itself
313+ $ title = p_get_first_heading ($ id );
314+ $ page = noNS ($ id );
315+ $ crumbs [] = $ title ? "$ title ( $ page) " : $ page ;
316+
317+ return implode (' » ' , $ crumbs );
318+ }
288319
289320 /**
290321 * @param $text
0 commit comments