diff --git a/composer.lock b/composer.lock index 484a9ac0..a5ab6a9e 100644 --- a/composer.lock +++ b/composer.lock @@ -8,16 +8,16 @@ "packages": [ { "name": "composer/ca-bundle", - "version": "1.5.8", + "version": "1.5.9", "source": { "type": "git", "url": "https://github.com/composer/ca-bundle.git", - "reference": "719026bb30813accb68271fee7e39552a58e9f65" + "reference": "1905981ee626e6f852448b7aaa978f8666c5bc54" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/composer/ca-bundle/zipball/719026bb30813accb68271fee7e39552a58e9f65", - "reference": "719026bb30813accb68271fee7e39552a58e9f65", + "url": "https://api.github.com/repos/composer/ca-bundle/zipball/1905981ee626e6f852448b7aaa978f8666c5bc54", + "reference": "1905981ee626e6f852448b7aaa978f8666c5bc54", "shasum": "" }, "require": { @@ -64,7 +64,7 @@ "support": { "irc": "irc://irc.freenode.org/composer", "issues": "https://github.com/composer/ca-bundle/issues", - "source": "https://github.com/composer/ca-bundle/tree/1.5.8" + "source": "https://github.com/composer/ca-bundle/tree/1.5.9" }, "funding": [ { @@ -76,7 +76,7 @@ "type": "github" } ], - "time": "2025-08-20T18:49:47+00:00" + "time": "2025-11-06T11:46:17+00:00" }, { "name": "composer/class-map-generator", @@ -630,16 +630,16 @@ }, { "name": "justinrainbow/json-schema", - "version": "6.6.0", + "version": "6.6.1", "source": { "type": "git", "url": "https://github.com/jsonrainbow/json-schema.git", - "reference": "68ba7677532803cc0c5900dd5a4d730537f2b2f3" + "reference": "fd8e5c6b1badb998844ad34ce0abcd71a0aeb396" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/jsonrainbow/json-schema/zipball/68ba7677532803cc0c5900dd5a4d730537f2b2f3", - "reference": "68ba7677532803cc0c5900dd5a4d730537f2b2f3", + "url": "https://api.github.com/repos/jsonrainbow/json-schema/zipball/fd8e5c6b1badb998844ad34ce0abcd71a0aeb396", + "reference": "fd8e5c6b1badb998844ad34ce0abcd71a0aeb396", "shasum": "" }, "require": { @@ -699,9 +699,9 @@ ], "support": { "issues": "https://github.com/jsonrainbow/json-schema/issues", - "source": "https://github.com/jsonrainbow/json-schema/tree/6.6.0" + "source": "https://github.com/jsonrainbow/json-schema/tree/6.6.1" }, - "time": "2025-10-10T11:34:09+00:00" + "time": "2025-11-07T18:30:29+00:00" }, { "name": "manticoresoftware/buddy-core", @@ -1508,16 +1508,16 @@ }, { "name": "symfony/cache", - "version": "v6.4.26", + "version": "v6.4.28", "source": { "type": "git", "url": "https://github.com/symfony/cache.git", - "reference": "66c853ddcbf85c1984169869be498c3e7597b367" + "reference": "31628f36fc97c5714d181b3a8d29efb85c6a7677" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/cache/zipball/66c853ddcbf85c1984169869be498c3e7597b367", - "reference": "66c853ddcbf85c1984169869be498c3e7597b367", + "url": "https://api.github.com/repos/symfony/cache/zipball/31628f36fc97c5714d181b3a8d29efb85c6a7677", + "reference": "31628f36fc97c5714d181b3a8d29efb85c6a7677", "shasum": "" }, "require": { @@ -1584,7 +1584,7 @@ "psr6" ], "support": { - "source": "https://github.com/symfony/cache/tree/v6.4.26" + "source": "https://github.com/symfony/cache/tree/v6.4.28" }, "funding": [ { @@ -1604,7 +1604,7 @@ "type": "tidelift" } ], - "time": "2025-09-11T09:57:09+00:00" + "time": "2025-10-30T08:37:02+00:00" }, { "name": "symfony/cache-contracts", @@ -1684,16 +1684,16 @@ }, { "name": "symfony/console", - "version": "v6.4.26", + "version": "v6.4.27", "source": { "type": "git", "url": "https://github.com/symfony/console.git", - "reference": "492de6dfd93910d7d7a729c5a04ddcd2b9e99c4f" + "reference": "13d3176cf8ad8ced24202844e9f95af11e2959fc" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/console/zipball/492de6dfd93910d7d7a729c5a04ddcd2b9e99c4f", - "reference": "492de6dfd93910d7d7a729c5a04ddcd2b9e99c4f", + "url": "https://api.github.com/repos/symfony/console/zipball/13d3176cf8ad8ced24202844e9f95af11e2959fc", + "reference": "13d3176cf8ad8ced24202844e9f95af11e2959fc", "shasum": "" }, "require": { @@ -1758,7 +1758,7 @@ "terminal" ], "support": { - "source": "https://github.com/symfony/console/tree/v6.4.26" + "source": "https://github.com/symfony/console/tree/v6.4.27" }, "funding": [ { @@ -1778,7 +1778,7 @@ "type": "tidelift" } ], - "time": "2025-09-26T12:13:46+00:00" + "time": "2025-10-06T10:25:16+00:00" }, { "name": "symfony/dependency-injection", @@ -2072,16 +2072,16 @@ }, { "name": "symfony/finder", - "version": "v6.4.24", + "version": "v6.4.27", "source": { "type": "git", "url": "https://github.com/symfony/finder.git", - "reference": "73089124388c8510efb8d2d1689285d285937b08" + "reference": "a1b6aa435d2fba50793b994a839c32b6064f063b" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/finder/zipball/73089124388c8510efb8d2d1689285d285937b08", - "reference": "73089124388c8510efb8d2d1689285d285937b08", + "url": "https://api.github.com/repos/symfony/finder/zipball/a1b6aa435d2fba50793b994a839c32b6064f063b", + "reference": "a1b6aa435d2fba50793b994a839c32b6064f063b", "shasum": "" }, "require": { @@ -2116,7 +2116,7 @@ "description": "Finds files and directories via an intuitive fluent interface", "homepage": "https://symfony.com", "support": { - "source": "https://github.com/symfony/finder/tree/v6.4.24" + "source": "https://github.com/symfony/finder/tree/v6.4.27" }, "funding": [ { @@ -2136,7 +2136,7 @@ "type": "tidelift" } ], - "time": "2025-07-15T12:02:45+00:00" + "time": "2025-10-15T18:32:00+00:00" }, { "name": "symfony/polyfill-ctype", @@ -2784,16 +2784,16 @@ }, { "name": "symfony/service-contracts", - "version": "v3.6.0", + "version": "v3.6.1", "source": { "type": "git", "url": "https://github.com/symfony/service-contracts.git", - "reference": "f021b05a130d35510bd6b25fe9053c2a8a15d5d4" + "reference": "45112560a3ba2d715666a509a0bc9521d10b6c43" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/service-contracts/zipball/f021b05a130d35510bd6b25fe9053c2a8a15d5d4", - "reference": "f021b05a130d35510bd6b25fe9053c2a8a15d5d4", + "url": "https://api.github.com/repos/symfony/service-contracts/zipball/45112560a3ba2d715666a509a0bc9521d10b6c43", + "reference": "45112560a3ba2d715666a509a0bc9521d10b6c43", "shasum": "" }, "require": { @@ -2847,7 +2847,7 @@ "standards" ], "support": { - "source": "https://github.com/symfony/service-contracts/tree/v3.6.0" + "source": "https://github.com/symfony/service-contracts/tree/v3.6.1" }, "funding": [ { @@ -2858,12 +2858,16 @@ "url": "https://github.com/fabpot", "type": "github" }, + { + "url": "https://github.com/nicolas-grekas", + "type": "github" + }, { "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", "type": "tidelift" } ], - "time": "2025-04-25T09:37:31+00:00" + "time": "2025-07-15T11:30:57+00:00" }, { "name": "symfony/string", @@ -5085,16 +5089,16 @@ }, { "name": "squizlabs/php_codesniffer", - "version": "3.13.4", + "version": "3.13.5", "source": { "type": "git", "url": "https://github.com/PHPCSStandards/PHP_CodeSniffer.git", - "reference": "ad545ea9c1b7d270ce0fc9cbfb884161cd706119" + "reference": "0ca86845ce43291e8f5692c7356fccf3bcf02bf4" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/PHPCSStandards/PHP_CodeSniffer/zipball/ad545ea9c1b7d270ce0fc9cbfb884161cd706119", - "reference": "ad545ea9c1b7d270ce0fc9cbfb884161cd706119", + "url": "https://api.github.com/repos/PHPCSStandards/PHP_CodeSniffer/zipball/0ca86845ce43291e8f5692c7356fccf3bcf02bf4", + "reference": "0ca86845ce43291e8f5692c7356fccf3bcf02bf4", "shasum": "" }, "require": { @@ -5111,11 +5115,6 @@ "bin/phpcs" ], "type": "library", - "extra": { - "branch-alias": { - "dev-master": "3.x-dev" - } - }, "notification-url": "https://packagist.org/downloads/", "license": [ "BSD-3-Clause" @@ -5165,7 +5164,7 @@ "type": "thanks_dev" } ], - "time": "2025-09-05T05:47:09+00:00" + "time": "2025-11-04T16:30:35+00:00" }, { "name": "swoole/ide-helper", diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 851cedb4..fd55b3b6 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -29,7 +29,7 @@ services: entrypoint: - "/bin/sh" - "-c" - - "grep buddy_path /etc/manticoresearch/manticore.conf > /dev/null 2>&1 || sed -i '/searchd {/a \\ buddy_path = manticore-executor /workdir/src/main.php --debugvv' /etc/manticoresearch/manticore.conf && sed -i '/^searchd {/a \\ listen = /var/run/mysqld/mysqld.sock:mysql41' /etc/manticoresearch/manticore.conf; exec /bin/bash" + - "grep buddy_path /etc/manticoresearch/manticore.conf > /dev/null 2>&1 || sed -i '/searchd {/a \\ buddy_path = manticore-executor /workdir/src/main.php --debugvv' /etc/manticoresearch/manticore.conf && sed -i '/^searchd {/a \\ listen = /var/run/mysqld/mysqld.sock:mysql41' /etc/manticoresearch/manticore.conf; echo \"alias s='searchd --nodetach --logdebug'\" >> /root/.bashrc; echo \"alias m='mysql'\" >> /root/.bashrc; exec /bin/bash" working_dir: "/workdir" networks: - app-network diff --git a/phpstan.neon b/phpstan.neon index b2cb0c04..68f0939f 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -3,5 +3,6 @@ parameters: - src - test level: 9 + phpVersion: 80300 inferPrivatePropertyTypeFromConstructor: true checkGenericClassInNonGenericObjectType: true diff --git a/src/Plugin/ConversationalRag/ConversationManager.php b/src/Plugin/ConversationalRag/ConversationManager.php new file mode 100644 index 00000000..ff2a3fda --- /dev/null +++ b/src/Plugin/ConversationalRag/ConversationManager.php @@ -0,0 +1,284 @@ +sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchClientError::create('Failed to create conversations table: ' . $response->getError()); + } + } + + /** + * @param string $conversationUuid + * @param string $modelUuid + * @param string $role + * @param string $message + * @param int $tokensUsed + * @param string|null $intent + * @param string|null $searchQuery + * @param string|null $excludeQuery + * @param array|null $excludedIds + * + * @return void + * @throws ManticoreSearchClientError|\JsonException + */ + public function saveMessage( + string $conversationUuid, + string $modelUuid, + string $role, + string $message, + int $tokensUsed = 0, + ?string $intent = null, + ?string $searchQuery = null, + ?string $excludeQuery = null, + ?array $excludedIds = null + ): void { + // Debug: Log message saving + Buddy::info("\n[DEBUG CONVERSATION SAVE]"); + Buddy::info("├─ Conversation UUID: {$conversationUuid}"); + Buddy::info("├─ Model UUID: {$modelUuid}"); + Buddy::info("├─ Role: {$role}"); + Buddy::info('├─ Message: ' . substr($message, 0, 100) . (strlen($message) > 100 ? '...' : '')); + Buddy::info("├─ Tokens used: {$tokensUsed}"); + Buddy::info('├─ Intent: ' . ($intent ?? 'none')); + Buddy::info('├─ Search query: ' . ($searchQuery ? substr($searchQuery, 0, 50) . '...' : 'none')); + Buddy::info('├─ Exclude query: ' . ($excludeQuery ? substr($excludeQuery, 0, 50) . '...' : 'none')); + Buddy::info('└─ Excluded IDs count: ' . ($excludedIds ? sizeof($excludedIds) : 0)); + + $currentTime = time(); + $ttlTime = $currentTime + (30 * 24 * 60 * 60); // 30 days + + $intentValue = $intent ? $this->quote($intent) : "''"; + $searchQueryValue = $searchQuery ? $this->quote($searchQuery) : "''"; + $excludeQueryValue = $excludeQuery ? $this->quote($excludeQuery) : "''"; + $excludedIdsValue = $excludedIds ? $this->quote( + json_encode($excludedIds, JSON_THROW_ON_ERROR) + ) : "''"; + + $sql = sprintf( + 'INSERT INTO %s (conversation_uuid, model_uuid, created_at, role, message, tokens_used, ' + . 'intent, search_query, exclude_query, excluded_ids, ttl) ' + . 'VALUES (%s, %s, %d, %s, %s, %d, %s, %s, %s, %s, %d)', + self::CONVERSATIONS_TABLE, + $this->quote($conversationUuid), + $this->quote($modelUuid), + $currentTime, + $this->quote($role), + $this->quote($message), + $tokensUsed, + $intentValue, + $searchQueryValue, + $excludeQueryValue, + $excludedIdsValue, + $ttlTime + ); + + $result = $this->client->sendRequest($sql); + if ($result->hasError()) { + throw ManticoreSearchClientError::create( + 'Failed to insert into conversations table: ' . $result->getError() + ); + } + + Buddy::info('└─ Message saved successfully'); + } + + /** + * @param string $conversationUuid + * @param int $limit + * + * @return string + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function getConversationHistory(string $conversationUuid, int $limit = 100): string { + // Debug: Log history retrieval + Buddy::info("\n[DEBUG CONVERSATION HISTORY RETRIEVAL]"); + Buddy::info("├─ Conversation UUID: {$conversationUuid}"); + Buddy::info("├─ Limit: {$limit}"); + + $sql = /** @lang Manticore */ 'SELECT role, message FROM ' . self::CONVERSATIONS_TABLE . ' '. + "WHERE conversation_uuid = '$conversationUuid' ". + "ORDER BY created_at ASC LIMIT $limit"; + + Buddy::info("├─ SQL: {$sql}"); + + $result = $this->client->sendRequest($sql); + if ($result->hasError()) { + throw ManticoreSearchClientError::create('Failed to retrieve conversation history: ' . $result->getError()); + } + + $data = $result->getResult(); + $history = ''; + if (is_array($data[0])) { + $rows = $data[0]['data']; + Buddy::info('├─ Messages found: ' . $data->count()); + foreach ($rows as $row) { + $role = (string)$row['role']; + $message = (string)$row['message']; + $history .= "{$role}: {$message}\n"; + } + } + + $historyLength = strlen($history); + Buddy::info("├─ History length: {$historyLength} chars"); + Buddy::info('└─ History preview: ' . substr($history, 0, 150) . ($historyLength > 150 ? '...' : '')); + + return $history; + } + + + /** + * Get the latest search context that was NOT from a CONTENT_QUESTION intent + * + * @param string $conversationUuid + * + * @return array{search_query: string, exclude_query: string, excluded_ids: string}|null + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function getLatestSearchContext(string $conversationUuid): ?array { + // Debug: Log search context retrieval + Buddy::info("\n[DEBUG SEARCH CONTEXT RETRIEVAL]"); + Buddy::info("├─ Conversation UUID: {$conversationUuid}"); + + $sql = /** @lang Manticore */ 'SELECT search_query, exclude_query, excluded_ids FROM ' + . self::CONVERSATIONS_TABLE . ' ' . + "WHERE conversation_uuid = '$conversationUuid' " . + "AND role = 'user' " . + "AND intent != 'CONTENT_QUESTION' " . + 'ORDER BY created_at DESC LIMIT 1'; + + Buddy::info("├─ SQL: {$sql}"); + + $result = $this->client->sendRequest($sql); + if ($result->hasError()) { + throw ManticoreSearchClientError::create('Failed to retrieve search context: ' . $result->getError()); + } + + $data = $result->getResult(); + + if (is_array($data[0]) && isset($data[0]['data'])) { + $rows = $data[0]['data']; + + if (empty($rows)) { + Buddy::info('└─ No search context found'); + return null; + } + + $searchContext = [ + 'search_query' => (string)$rows[0]['search_query'], + 'exclude_query' => (string)$rows[0]['exclude_query'], + 'excluded_ids' => (string)$rows[0]['excluded_ids'], + ]; + + /** @var array $excludedIdsArray */ + $excludedIdsArray = json_decode($searchContext['excluded_ids'], true) ?? []; + Buddy::info('├─ Search query: ' . substr($searchContext['search_query'], 0, 50) . '...'); + $excludePreview = $searchContext['exclude_query'] + ? substr($searchContext['exclude_query'], 0, 50) . '...' + : 'none'; + Buddy::info('├─ Exclude query: ' . $excludePreview); + Buddy::info('└─ Excluded IDs count: ' . sizeof($excludedIdsArray)); + + return $searchContext; + } + + throw ManticoreSearchClientError::create('Manticore returned wrong context structure'); + } + + /** + * Get conversation history filtered for query generation (excludes CONTENT_QUESTION exchanges) + * + * @param string $conversationUuid + * @param int $limit + * @return string + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function getConversationHistoryForQueryGeneration(string $conversationUuid, int $limit = 100): string { + // Debug: Log filtered history retrieval for query generation + Buddy::info("\n[DEBUG FILTERED HISTORY RETRIEVAL]"); + Buddy::info("├─ Conversation UUID: {$conversationUuid}"); + Buddy::info("├─ Limit: {$limit}"); + + $sql = /** @lang Manticore */ 'SELECT role, message FROM ' . self::CONVERSATIONS_TABLE . ' '. + "WHERE conversation_uuid = '$conversationUuid' " . + "AND intent != 'CONTENT_QUESTION' " . + "ORDER BY created_at ASC LIMIT $limit"; + + Buddy::info("├─ SQL: {$sql}"); + + $result = $this->client->sendRequest($sql); + if ($result->hasError()) { + throw ManticoreSearchClientError::create('Failed to retrieve conversation history: ' . $result->getError()); + } + + $data = $result->getResult(); + + $history = ''; + + if (is_array($data[0])) { + $rows = $data[0]['data']; + + Buddy::info('├─ Filtered messages found: ' . sizeof($rows)); + foreach ($rows as $row) { + $role = (string)$row['role']; + $message = (string)$row['message']; + $history .= "{$role}: {$message}\n"; + } + } + + $historyLength = strlen($history); + Buddy::info("├─ Filtered history length: {$historyLength} chars"); + Buddy::info('└─ Filtered history preview: ' . substr($history, 0, 150) . ($historyLength > 150 ? '...' : '')); + + return $history; + } +} diff --git a/src/Plugin/ConversationalRag/DynamicThresholdManager.php b/src/Plugin/ConversationalRag/DynamicThresholdManager.php new file mode 100644 index 00000000..51356f3e --- /dev/null +++ b/src/Plugin/ConversationalRag/DynamicThresholdManager.php @@ -0,0 +1,225 @@ + */ + private static array $expansionState = []; + + /** + * Calculate dynamic threshold based on LLM expansion intent detection + * Based on original calculateDynamicThreshold and detectExpansionIntent + * + * @param string $userQuery + * @param string $conversationHistory + * @param LLMProviderManager $llmProvider + * @param array $modelConfig + * @param float $baseThreshold + * @return array{threshold: float, expansion_level: int, is_expanded: bool, max_threshold: float, + * expansion_percent: float, expansion_limit_reached: bool} + */ + public function calculateDynamicThreshold( + string $userQuery, + string $conversationHistory, + LLMProviderManager $llmProvider, + array $modelConfig, + float $baseThreshold = 0.8 + ): array { + $conversationId = $this->getConversationId($conversationHistory); + + // Reset expansion count for new conversations + if (!isset(self::$expansionState[$conversationId])) { + self::$expansionState[$conversationId] = [ + 'count' => 0, + 'last_conversation_hash' => md5($conversationHistory), + ]; + } + + // Reset expansion count if conversation changed significantly + $currentHash = md5($conversationHistory); + if (self::$expansionState[$conversationId]['last_conversation_hash'] !== $currentHash) { + self::$expansionState[$conversationId] = [ + 'count' => 0, + 'last_conversation_hash' => $currentHash, + ]; + } + + $expansionCount = self::$expansionState[$conversationId]['count']; + + // Check if we've hit maximum expansions + if ($expansionCount >= self::MAX_EXPANSIONS) { + $maxThreshold = $baseThreshold * (1 + self::MAX_EXPANSION_PERCENT); + return [ + 'threshold' => $maxThreshold, + 'expansion_level' => $expansionCount, + 'is_expanded' => true, + 'max_threshold' => $maxThreshold, + 'expansion_percent' => round((($maxThreshold - $baseThreshold) / $baseThreshold) * 100, 1), + 'expansion_limit_reached' => true, + ]; + } + + // Detect if user wants broader search using LLM (from original detectExpansionIntent) + $wantsExpansion = $this->detectExpansionIntent($userQuery, $conversationHistory, $llmProvider, $modelConfig); + + Buddy::info("\n[DEBUG DYNAMIC THRESHOLD]"); + Buddy::info('├─ Wants expansion: ' . ($wantsExpansion ? 'YES' : 'NO')); + + if ($wantsExpansion) { + self::$expansionState[$conversationId]['count']++; + $expansionCount = self::$expansionState[$conversationId]['count']; + + // Calculate maximum threshold based on percentage (original logic) + $maxThreshold = $baseThreshold * (1 + self::MAX_EXPANSION_PERCENT); + + // Calculate step size: divide expansion range into 4 steps (original logic) + $expansionRange = $maxThreshold - $baseThreshold; + $step = $expansionRange / self::EXPANSION_STEPS; + + // Progressive expansion with calculated step (original logic) + $threshold = min($baseThreshold + ($expansionCount * $step), $maxThreshold); + + Buddy::info("├─ Expansion level: {$expansionCount} / " . self::MAX_EXPANSIONS); + Buddy::info("├─ Base threshold: {$baseThreshold}"); + Buddy::info("├─ Max threshold: {$maxThreshold} (+" . (self::MAX_EXPANSION_PERCENT * 100) . '%)'); + Buddy::info("├─ Step size: {$step}"); + Buddy::info("├─ Calculated threshold: {$threshold}"); + Buddy::info( + '└─ Expansion percent: ' . + round((($threshold - $baseThreshold) / $baseThreshold) * 100, 1) . '%' + ); + + return [ + 'threshold' => $threshold, + 'expansion_level' => $expansionCount, + 'is_expanded' => true, + 'max_threshold' => $maxThreshold, + 'expansion_percent' => round((($threshold - $baseThreshold) / $baseThreshold) * 100, 1), + 'expansion_limit_reached' => $expansionCount >= 5, + ]; + } + + // Reset on non-expansion queries (original logic) + self::$expansionState[$conversationId]['count'] = 0; + + Buddy::info("├─ Using base threshold: {$baseThreshold}"); + Buddy::info('└─ Expansion count reset to 0'); + + return [ + 'threshold' => $baseThreshold, + 'expansion_level' => 0, + 'is_expanded' => false, + 'max_threshold' => $baseThreshold, + 'expansion_percent' => 0, + 'expansion_limit_reached' => false, + ]; + } + + /** + * Get conversation ID from history + * + * @param string $conversationHistory + * @return string + */ + private function getConversationId(string $conversationHistory): string { + // Generate a consistent ID based on conversation content (first 200 chars for consistency) + return md5(substr($conversationHistory, 0, 200)); + } + + /** + * Detect if user wants to broaden their search (expansion intent) + * + * @param string $userQuery + * @param string $conversationHistory + * @param LLMProviderManager $llmProvider + * @param array $modelConfig + * @return bool + */ + private function detectExpansionIntent( + string $userQuery, + string $conversationHistory, + LLMProviderManager $llmProvider, + array $modelConfig + ): bool { + try { + // CRITICAL: If no conversation history, cannot be expansion (from original) + if (empty(trim($conversationHistory))) { + Buddy::info("\n[DEBUG EXPANSION CHECK]"); + Buddy::info('├─ No conversation history'); + Buddy::info('└─ Expansion: NO (no prior results to expand from)'); + return false; + } + + $historyText = $conversationHistory; + + // Use original expansion prompt + $expansionPrompt = " +Analyze if the user wants to BROADEN their search beyond previously shown results. + +EXPANSION CONCEPT: +- User has seen specific results before and wants MORE options beyond what was shown +- User wants to discover additional content in the same general area +- User wants to widen the search scope, not change topics or narrow focus + +REQUIREMENTS for expansion: +1. Previous results must exist in conversation history +2. User wants additional options (not replacement of what was shown) +3. User wants broader scope (not more specific criteria) + +This is NOT expansion: +- First requests with no prior results +- Asking for different genre/topic (that's topic change) +- Asking for more specific criteria (that's refinement) +- Questions about shown content (that's clarification) + +Conversation history: +{$historyText} + +Current query: {$userQuery} + +Does this query request BROADENING beyond previous results? +Answer: YES or NO"; + + $provider = $llmProvider->getConnection('expansion_detector', $modelConfig); + $response = $provider->generateResponse($expansionPrompt, ['temperature' => 0.1, 'max_tokens' => 10]); + + if (!$response['success']) { + return false; + } + + $result = trim(strtolower($response['content'])); + + Buddy::info("\n[DEBUG EXPANSION CHECK]"); + Buddy::info('├─ Has conversation history: YES'); + Buddy::info("├─ LLM response: {$result}"); + Buddy::info('└─ Expansion: ' . ($result === 'yes' ? 'YES' : 'NO')); + + return $result === 'yes'; + } catch (Exception $e) { + // Fallback to false on error + return false; + } + } + + +} diff --git a/src/Plugin/ConversationalRag/Handler.php b/src/Plugin/ConversationalRag/Handler.php new file mode 100644 index 00000000..763a9b34 --- /dev/null +++ b/src/Plugin/ConversationalRag/Handler.php @@ -0,0 +1,888 @@ +llmProviderManager = $llmProviderManager; + } + + /** + * Process the request and return self for chaining + * + * @return Task + */ + public function run(): Task { + $taskFn = static function ( + Payload $payload, + Client $client, + ?LLMProviderManager $injectedProviderManager + ): TaskResult { + // Initialize components with the client + $modelManager = new ModelManager(); + $providerManager = $injectedProviderManager ?? new LLMProviderManager(); + $conversationManager = new ConversationManager($client); + $intentClassifier = new IntentClassifier(); + $searchEngine = new SearchEngine(); + + // Ensure database tables exist + self::initializeTables($modelManager, $conversationManager, $client); + + // Route to appropriate handler based on action + return match ($payload->action) { + Payload::ACTION_CREATE_MODEL => self::createModel($payload, $modelManager, $client), + Payload::ACTION_SHOW_MODELS => self::showModels($modelManager, $client), + Payload::ACTION_DESCRIBE_MODEL => self::describeModel($payload, $modelManager, $client), + Payload::ACTION_DROP_MODEL => self::dropModel($payload, $modelManager, $client), + Payload::ACTION_CONVERSATION => self::handleConversation( + $payload, $modelManager, $providerManager, + $conversationManager, $intentClassifier, $searchEngine, $client + ), + default => throw QueryParseError::create("Unknown action: {$payload->action}") + }; + }; + + return Task::create($taskFn, [$this->payload, $this->manticoreClient, $this->llmProviderManager])->run(); + } + + /** + * Initialize database tables if they don't exist + * + * @param ModelManager $modelManager + * @param ConversationManager $conversationManager + * @param Client $client + * + * @return void + * @throws ManticoreSearchClientError + */ + private static function initializeTables( + ModelManager $modelManager, + ConversationManager $conversationManager, + Client $client + ): void { + $modelManager->initializeTables($client); + $conversationManager->initializeTable($client); + } + + /** + * Create a new RAG model + * + * @param Payload $payload + * @param ModelManager $modelManager + * @param Client $client + * + * @return TaskResult + * @throws ManticoreSearchClientError + * @throws ManticoreSearchClientError|ManticoreSearchResponseError|QueryParseError + * @throws RandomException + */ + private static function createModel( + Payload $payload, + ModelManager $modelManager, + Client $client + ): TaskResult { + /** @var array{name: string, llm_provider:string, llm_model: string, + * style_prompt?: string, temperature?: string, max_tokens?: string, + * k_results?: string, similarity_threshold?: string, + * max_document_length?: string} $config */ + $config = $payload->params; + + + self::validateModelConfig($config); + + // Create model + $uuid = $modelManager->createModel($client, $config); + + return TaskResult::withRow(['uuid' => $uuid]) + ->column('uuid', Column::String); + } + + /** + * Validate model configuration + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * @return void + * @throws QueryParseError + */ + private static function validateModelConfig(array $config): void { + self::validateRequiredFields($config); + self::validateLlmProvider($config); + self::validateTemperature($config); + self::validateMaxTokens($config); + self::validateKResults($config); + } + + /** + * Validate required fields + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * + * @return void + * @throws QueryParseError + */ + private static function validateRequiredFields(array $config): void { + $required = ['llm_provider', 'llm_model']; + + foreach ($required as $field) { + if (empty($config[$field])) { + throw QueryParseError::create("Required field '{$field}' is missing or empty"); + } + } + } + + /** + * Validate LLM provider + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * + * @return void + * @throws QueryParseError + */ + private static function validateLlmProvider(array $config): void { + $validProviders = ['openai']; + if (!in_array($config['llm_provider'], $validProviders)) { + throw QueryParseError::create( + "Invalid LLM provider: {$config['llm_provider']}. Only 'openai' is supported." + ); + } + } + + /** + * Validate temperature parameter + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * + * @return void + * @throws QueryParseError + */ + private static function validateTemperature(array $config): void { + if (!isset($config['temperature'])) { + return; + } + + $temp = (float)$config['temperature']; + if ($temp < 0 || $temp > 2) { + throw QueryParseError::create('Temperature must be between 0 and 2'); + } + } + + /** + * Validate max_tokens parameter + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * + * @return void + * @throws QueryParseError + */ + private static function validateMaxTokens(array $config): void { + if (!isset($config['max_tokens'])) { + return; + } + + $tokens = (int)$config['max_tokens']; + if ($tokens < 1 || $tokens > 32768) { + throw QueryParseError::create('max_tokens must be between 1 and 32768'); + } + } + + /** + * Validate k_results parameter + * + * @param array{llm_provider:string, llm_model: string, style_prompt?: string, + * temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $config + * + * @return void + * @throws QueryParseError + */ + private static function validateKResults(array $config): void { + if (!isset($config['k_results'])) { + return; + } + + $k = (int)$config['k_results']; + if ($k < 1 || $k > 50) { + throw QueryParseError::create('k_results must be between 1 and 50'); + } + } + + /** + * Show all RAG models + * + * @param ModelManager $modelManager + * @param Client $client + * + * @return TaskResult + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + private static function showModels(ModelManager $modelManager, Client $client): TaskResult { + + $models = $modelManager->getAllModels($client); + + $data = []; + foreach ($models as $model) { + $data[] = [ + 'uuid' => $model['uuid'], + 'name' => $model['name'], + 'llm_provider' => $model['llm_provider'], + 'llm_model' => $model['llm_model'], + 'created_at' => $model['created_at'], + ]; + } + + return TaskResult::withData($data) + ->column('uuid', Column::String) + ->column('name', Column::String) + ->column('llm_provider', Column::String) + ->column('llm_model', Column::String) + ->column('created_at', Column::String); + } + + /** + * Describe a specific RAG model + * + * @param Payload $payload + * @param ModelManager $modelManager + * @param Client $client + * + * @return TaskResult + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + */ + private static function describeModel(Payload $payload, ModelManager $modelManager, Client $client): TaskResult { + + $modelNameOrUuid = $payload->params['model_name_or_uuid']; + $model = $modelManager->getModelByUuidOrName($client, $modelNameOrUuid); + + $data = []; + foreach ($model as $key => $value) { + if (is_array($value)) { // Settings key + foreach ($value as $setting => $settingValue) { + $data[] = [ + 'property' => "settings.{$setting}", + 'value' => (string)$settingValue, + ]; + } + } else { + $data[] = [ + 'property' => $key, + 'value' => (string)$value, + ]; + } + } + + + + return TaskResult::withData($data) + ->column('property', Column::String) + ->column('value', Column::String); + } + + /** + * Drop a RAG model + * + * @param Payload $payload + * @param ModelManager $modelManager + * @param Client $client + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + * @return TaskResult + */ + private static function dropModel(Payload $payload, ModelManager $modelManager, Client $client): TaskResult { + $modelNameOrUuid = $payload->params['model_name_or_uuid']; + + $modelManager->deleteModelByUuidOrName($client, $modelNameOrUuid); + + return TaskResult::none(); + } + + /** + * Handle conversation (CALL CONVERSATIONAL_RAG) + * + * @param Payload $payload + * @param ModelManager $modelManager + * @param LLMProviderManager $providerManager + * @param ConversationManager $conversationManager + * @param IntentClassifier $intentClassifier + * @param SearchEngine $searchEngine + * @param Client $client + * + * @return TaskResult + * @throws ManticoreSearchClientError|ManticoreSearchResponseError|JsonException + */ + private static function handleConversation( + Payload $payload, + ModelManager $modelManager, + LLMProviderManager $providerManager, + ConversationManager $conversationManager, + IntentClassifier $intentClassifier, + SearchEngine $searchEngine, + Client $client + ): TaskResult { + /** @var array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params */ + $params = self::parseCallRagParams($payload); + $conversationUuid = self::ensureConversationUuid($params); + $model = self::getModel($modelManager, $client, $params['model_uuid']); + + self::logConversationStart($conversationUuid); + $conversationHistory = $conversationManager->getConversationHistory($conversationUuid); + self::logConversationHistory($conversationHistory); + + $intent = self::classifyIntent( + $intentClassifier, $params['query'], $conversationHistory, $providerManager, $model + ); + + $settings = $model['settings']; + [$searchResults, $queries, $excludedIds] = self::performSearch( + $intent, $params, $conversationHistory, $conversationManager, + $conversationUuid, $providerManager, $model, $searchEngine, $client + ); + + self::logPreprocessingResults($params, $intent, $queries); + $context = self::buildContext($searchResults, $settings, $params['content_fields']); + self::logContextBuilding($searchResults, $context, $settings); + $response = self::generateResponse( + $model, $params['query'], $context, $conversationHistory, $settings, $providerManager + ); + + if (!$response['success']) { + return TaskResult::withError('LLM request failed: ' . ($response['error'] ?? 'Unknown error')); + } + + $responseText = $response['content']; + $tokensUsed = $response['metadata']['tokens_used'] ?? 0; + + self::saveConversationMessages( + $conversationManager, $conversationUuid, $model['uuid'], $intent, + $params, $queries, $excludedIds, $responseText, $tokensUsed + ); + + return TaskResult::withRow( + [ + 'conversation_uuid' => $conversationUuid, + 'response' => $responseText, + 'sources' => json_encode($searchResults), + ] + )->column('conversation_uuid', Column::String) + ->column('response', Column::String) + ->column('sources', Column::String); + } + + /** + * @param Payload $payload + * + * @return array{query:string, table: string, model_uuid: string, conversation_uuid: string, content_fields: string} + */ + private static function parseCallRagParams(Payload $payload): array { + // Parse CALL CONVERSATIONAL_RAG parameters from payload + return [ + 'query' => $payload->params['query'] ?? '', + 'table' => $payload->params['table'] ?? '', + 'model_uuid' => $payload->params['model_uuid'] ?? '', + 'content_fields' => $payload->params['content_fields'], + 'conversation_uuid' => $payload->params['conversation_uuid'] ?? '', + ]; + } + + /** + * Ensure conversation UUID exists + * + * @param array $params + * @return string + */ + private static function ensureConversationUuid(array $params): string { + if (empty($params['conversation_uuid'])) { + $params['conversation_uuid'] = self::generateUuid(); + } + return $params['conversation_uuid']; + } + + /** + * Generate a UUID v4 + * + * @return string + */ + private static function generateUuid(): string { + $data = random_bytes(16); + $data[6] = chr(ord($data[6]) & 0x0f | 0x40); // Version 4 + $data[8] = chr(ord($data[8]) & 0x3f | 0x80); // Variant 10 + return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4)); + } + + /** + * Get model by UUID or name + * + * @param ModelManager $modelManager + * @param Client $client + * @param string $modelUuid + * + * @return array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + */ + private static function getModel(ModelManager $modelManager, Client $client, string $modelUuid): array { + return $modelManager->getModelByUuidOrName($client, $modelUuid); + } + + /** + * Log conversation start + * + * @param string $conversationUuid + * @return void + */ + private static function logConversationStart(string $conversationUuid): void { + Buddy::info("\n[DEBUG CONVERSATION FLOW]"); + Buddy::info('├─ Starting conversation processing'); + Buddy::info("├─ Conversation UUID: {$conversationUuid}"); + } + + /** + * Log conversation history + * + * @param string $conversationHistory + * @return void + */ + private static function logConversationHistory(string $conversationHistory): void { + Buddy::info('├─ Retrieved history for intent classification'); + Buddy::info('├─ History length: ' . strlen($conversationHistory) . ' chars'); + } + + /** + * Classify intent + * + * @param IntentClassifier $intentClassifier + * @param string $query + * @param string $conversationHistory + * @param LLMProviderManager $providerManager + * @param array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} $model + * @return string + */ + private static function classifyIntent( + IntentClassifier $intentClassifier, + string $query, + string $conversationHistory, + LLMProviderManager $providerManager, + array $model + ): string { + $intent = $intentClassifier->classifyIntent( + $query, $conversationHistory, $providerManager, $model + ); + Buddy::info("├─ Intent classified: {$intent}"); + return $intent; + } + + /** + * Perform search based on intent + * + * @param string $intent + * @param array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params + * @param string $conversationHistory + * @param ConversationManager $conversationManager + * @param string $conversationUuid + * @param LLMProviderManager $providerManager + * @param array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} $model + * @param SearchEngine $searchEngine + * @param Client $client + * @return array{array>, array{search_query:string, + * exclude_query:string}, array} + */ + private static function performSearch( + string $intent, + array $params, + string $conversationHistory, + ConversationManager $conversationManager, + string $conversationUuid, + LLMProviderManager $providerManager, + array $model, + SearchEngine $searchEngine, + Client $client + ): array { + if ($intent === 'CONTENT_QUESTION') { + return self::handleContentQuestionIntent( + $params, $conversationHistory, $conversationManager, $conversationUuid, + $providerManager, $model, $searchEngine, $client + ); + } + + return self::handleQueryGeneratingIntent( + $intent, $params, $conversationHistory, $conversationManager, + $conversationUuid, $providerManager, $model, $searchEngine, $client + ); + } + + /** + * Handle CONTENT_QUESTION intent + * + * @param array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params + * @param string $conversationHistory + * @param ConversationManager $conversationManager + * @param string $conversationUuid + * @param LLMProviderManager $providerManager + * @param array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} $model + * @param SearchEngine $searchEngine + * @param Client $client + * + * @return array{array>, array{search_query: string, + * exclude_query: string}, array} + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + private static function handleContentQuestionIntent( + array $params, + string $conversationHistory, + ConversationManager $conversationManager, + string $conversationUuid, + LLMProviderManager $providerManager, + array $model, + SearchEngine $searchEngine, + Client $client + ): array { + Buddy::info('├─ Processing CONTENT_QUESTION intent'); + $lastContext = $conversationManager->getLatestSearchContext($conversationUuid); + + if ($lastContext) { + Buddy::info('├─ Found previous search context to reuse'); + $queries = [ + 'search_query' => $lastContext['search_query'], + 'exclude_query' => $lastContext['exclude_query'], + ]; + + $thresholdManager = new DynamicThresholdManager(); + $thresholdInfo = $thresholdManager->calculateDynamicThreshold( + $params['query'], $conversationHistory, $providerManager, $model + ); + + $excludedIds = json_decode($lastContext['excluded_ids'], true) ?? []; + if (!is_array($excludedIds)) { + throw ManticoreSearchClientError::create('Excluded IDs must be an array'); + } + + $searchResults = $searchEngine->performSearchWithExcludedIds( + $client, $params['table'], $queries['search_query'], $excludedIds, + $model, $thresholdInfo['threshold'] + ); + Buddy::info('├─ CONTENT_QUESTION performed KNN search with previous query parameters'); + return [$searchResults, $queries, $excludedIds]; + } + + Buddy::info('├─ No previous search context found, falling back to NEW_SEARCH'); + // Fallback to NEW_SEARCH logic + return self::handleQueryGeneratingIntent( + 'NEW_SEARCH', $params, $conversationHistory, $conversationManager, + $conversationUuid, $providerManager, $model, $searchEngine, $client + ); + } + + /** + * Handle query-generating intents + * + * @param string $intent + * @param array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params + * @param string $conversationHistory + * @param ConversationManager $conversationManager + * @param string $conversationUuid + * @param LLMProviderManager $providerManager + * @param array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} $model + * @param SearchEngine $searchEngine + * @param Client $client + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + * + * @return array{array>, array{search_query:string, + * exclude_query:string}, array} + */ + private static function handleQueryGeneratingIntent( + string $intent, + array $params, + string $conversationHistory, + ConversationManager $conversationManager, + string $conversationUuid, + LLMProviderManager $providerManager, + array $model, + SearchEngine $searchEngine, + Client $client + ): array { + Buddy::info("├─ Processing query-generating intent: {$intent}"); + $cleanHistory = $conversationManager->getConversationHistoryForQueryGeneration($conversationUuid); + + Buddy::info('├─ Using filtered history for query generation'); + Buddy::info('├─ Clean history length: ' . strlen($cleanHistory) . ' chars'); + + $intentClassifierInstance = new IntentClassifier(); + $queries = $intentClassifierInstance->generateQueries( + $params['query'], $intent, $cleanHistory, $providerManager, $model + ); + + $thresholdManager = new DynamicThresholdManager(); + $thresholdInfo = $thresholdManager->calculateDynamicThreshold( + $params['query'], $conversationHistory, $providerManager, $model + ); + + $excludedIds = []; + if (!empty($queries['exclude_query']) && $queries['exclude_query'] !== 'none') { + $excludedIds = $searchEngine->getExcludedIds( + $client, $params['table'], $queries['exclude_query'] + ); + } + + $searchResults = $searchEngine->performSearchWithExcludedIds( + $client, $params['table'], $queries['search_query'], $excludedIds, + $model, $thresholdInfo['threshold'] + ); + + return [$searchResults, $queries, $excludedIds]; + } + + /** + * Log preprocessing results + * + * @param array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params + * @param string $intent + * @param array{search_query:string, exclude_query:string} $queries + * @return void + */ + private static function logPreprocessingResults(array $params, string $intent, array $queries): void { + Buddy::info('[DEBUG PREPROCESSING]'); + Buddy::info("├─ User query: '{$params['query']}'"); + Buddy::info("├─ Intent: $intent"); + Buddy::info("├─ Search query: '{$queries['search_query']}'"); + Buddy::info("└─ Exclude query: '{$queries['exclude_query']}'"); + } + + /** + * @param array> $searchResults + * @param array{ temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $settings + * @param string $contentFields + * + * @return string + */ + private static function buildContext( + array $searchResults, + array $settings, + string $contentFields + ): string { + if (empty($searchResults)) { + return ''; + } + + // Parse content fields (comma-separated) + $fields = array_map('trim', explode(',', $contentFields)); + $maxDocLength = $settings['max_document_length'] ?? 2000; + + // Validate fields exist in first result (for warning) + if (isset($searchResults[0])) { + $availableFields = array_keys($searchResults[0]); + $missingFields = array_diff($fields, $availableFields); + if (!empty($missingFields)) { + Buddy::warning('Content fields not found in search results: ' . implode(', ', $missingFields)); + } + } + + $truncatedDocs = array_map( + function ($doc) use ($fields, $maxDocLength) { + $contentParts = []; + foreach ($fields as $field) { + if (!isset($doc[$field]) || !is_string($doc[$field]) || empty(trim($doc[$field]))) { + continue; + } + + $contentParts[] = $doc[$field]; + } + + // Use comma + space as separator between fields + $content = implode(', ', $contentParts); + $maxLength = (int)$maxDocLength; + return strlen($content) > $maxLength ? substr($content, 0, $maxLength) . '...' : $content; + }, $searchResults + ); + + return implode("\n", $truncatedDocs); + } + + /** + * Log context building + * + * @param array> $searchResults + * @param string $context + * @param array{ temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold?: string, max_document_length?: string} $settings + * + * @return void + */ + private static function logContextBuilding(array $searchResults, string $context, array $settings): void { + Buddy::info('[DEBUG CONTEXT]'); + Buddy::info('├─ Documents count: ' . sizeof($searchResults)); + Buddy::info('├─ Total context length: ' . strlen($context) . ' chars'); + Buddy::info('└─ Max doc length: ' . ($settings['max_document_length'] ?? 2000) . ' chars'); + } + + /** + * @param array{id:string, uuid:string, name:string,llm_provider:string, + * llm_model:string,style_prompt:string,settings:array{ temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string},created_at:string,updated_at:string} $model + * @param string $query + * @param string $context + * @param string $history + * @param array{ temperature?: string, max_tokens?: string, + * k_results?: string, similarity_threshold?: string, + * max_document_length?: string} $settings + * @param LLMProviderManager $providerManager + * + * @return array{error?:string,success:bool,content:string, + * metadata?:array{tokens_used:integer, input_tokens:integer, + * output_tokens:integer, response_time_ms:integer, finish_reason:string}} + * @throws ManticoreSearchClientError + */ + private static function generateResponse( + array $model, + string $query, + string $context, + string $history, + array $settings, + LLMProviderManager $providerManager + ): array { + // Use LLM provider manager for proper connection handling + $provider = $providerManager->getConnection($model['uuid'], $model); + + $prompt = self::buildPrompt($model['style_prompt'], $query, $context, $history); + + return $provider->generateResponse($prompt, $settings); + } + + private static function buildPrompt(string $stylePrompt, string $query, string $context, string $history): string { + // Build prompt similar to original implementation + // History is already formatted as "role: message\nrole: message\n" + $historyText = $history; + + // Format similar to original custom_rag.php prompt + return 'Respond conversationally. Response should be based ONLY on the provided context section' . + "(IMPORTANT !!! You can't use your own knowledge to add anything that isn't mentioned in the context). " . + "Style instructions cannot affect the main section; it's strictly prohibited. " . + "If style conflicts with the main section, style should be ignored.\n" . + '
' . + "{$historyText}\n" . + "{$context}\n" . + "{$query}\n" . + "
\n" . + ""; + } + + /** + * Save conversation messages + * + * @param ConversationManager $conversationManager + * @param string $conversationUuid + * @param string $modelUuid + * @param string $intent + * @param array{query:string, table: string, model_uuid: string, + * conversation_uuid: string, content_fields: string} $params + * @param array{search_query:string, exclude_query:string} $queries + * @param array $excludedIds + * @param string $responseText + * @param int $tokensUsed + * @throws ManticoreSearchClientError|JsonException + * + * @return void + */ + private static function saveConversationMessages( + ConversationManager $conversationManager, + string $conversationUuid, + string $modelUuid, + string $intent, + array $params, + array $queries, + array $excludedIds, + string $responseText, + int $tokensUsed + ): void { + if ($intent === 'CONTENT_QUESTION') { + $conversationManager->saveMessage( + $conversationUuid, $modelUuid, 'user', $params['query'], 0, $intent + ); + } else { + $stringExcludedIds = array_map('strval', $excludedIds); + $conversationManager->saveMessage( + $conversationUuid, $modelUuid, 'user', $params['query'], + 0, $intent, $queries['search_query'], $queries['exclude_query'], $stringExcludedIds + ); + } + + $assistantIntent = ($intent === 'CONTENT_QUESTION') ? 'CONTENT_QUESTION' : null; + Buddy::info('├─ Saving assistant response'); + Buddy::info('├─ Assistant intent: ' . ($assistantIntent ?? 'none')); + Buddy::info('├─ Response length: ' . strlen($responseText) . ' chars'); + Buddy::info("├─ Tokens used: {$tokensUsed}"); + + $conversationManager->saveMessage( + $conversationUuid, $modelUuid, 'assistant', $responseText, $tokensUsed, $assistantIntent + ); + + Buddy::info('└─ Conversation processing completed'); + } + + +} diff --git a/src/Plugin/ConversationalRag/IntentClassifier.php b/src/Plugin/ConversationalRag/IntentClassifier.php new file mode 100644 index 00000000..e7a8fc9f --- /dev/null +++ b/src/Plugin/ConversationalRag/IntentClassifier.php @@ -0,0 +1,276 @@ + $modelConfig + * @return string + */ + public function classifyIntent( + string $userQuery, + string $conversationHistory, + LLMProviderManager $llmProvider, + array $modelConfig + ): string { + try { + // Limit history size for LLM context (conversationHistory is already a formatted string) + $conversationHistory = $this->limitConversationHistory($conversationHistory); + $historyText = $conversationHistory; + + $intentPrompt = "Analyze user intent from conversation. + +History: +{$historyText} + +Query: {$userQuery} + +Classify as ONE of: +- REJECTION: User declining shown content (like 'no', 'not interested', 'don't like') +- ALTERNATIVES: User wants more options (like 'what else', 'other options', 'anything else') +- TOPIC_CHANGE: User switching to new topic (like 'I want comedies instead', + 'show me action movies') +- INTEREST: User likes content and wants similar (like 'sounds good, what else like this', + 'tell me more') +- NEW_SEARCH: Fresh search with no prior context +- CONTENT_QUESTION: User asking about previously shown content (like 'what's the cast', + 'who directed it', 'when was it made', 'what's it about') +- NEW_QUESTION: User asking about new topic requiring search (like 'what about action movies', + 'show me comedies', 'tell me about programming') +- CLARIFICATION: User providing additional details or correcting previous query + (like 'no it's from a movie', 'I meant something else') +- UNCLEAR: Cannot determine intent (like gibberish, confusing, ambiguous) + +Answer ONLY with one word: REJECTION, ALTERNATIVES, TOPIC_CHANGE, INTEREST, +NEW_SEARCH, CONTENT_QUESTION, NEW_QUESTION, CLARIFICATION, or UNCLEAR"; + + $provider = $llmProvider->getConnection('intent_classifier', $modelConfig); + $response = $provider->generateResponse($intentPrompt, ['temperature' => 0.1, 'max_tokens' => 50]); + + if (!$response['success']) { + throw new ManticoreSearchClientError( + 'Intent classification failed: ' . ($response['error'] ?? 'Unknown error') + ); + } + + $content = (string)$response['content']; + $intent = $this->validateIntent(trim(strtoupper($content))); + + // Debug: Log intent classification + Buddy::info("\n[DEBUG INTENT CLASSIFICATION]"); + Buddy::info("└─ Detected intent: {$intent}"); + + return $intent; + } catch (Exception $e) { + // Fallback to NEW_SEARCH + Buddy::debug("Error intent classification: {$e->getMessage()}"); + return 'NEW_SEARCH'; + } + } + + /** + * Limit conversation history size for LLM context (matches original php_rag implementation) + * + * @param string $history + * @param int $maxExchanges + * @return string + */ + private function limitConversationHistory(string $history, int $maxExchanges = 10): string { + // Split by role markers (matches php_rag Line 57) + $lines = explode("\n", $history); + + // Keep only last N exchanges (2 lines per exchange) (matches php_rag Line 60-63) + $maxLines = $maxExchanges * 2; + if (sizeof($lines) > $maxLines) { + $lines = array_slice($lines, -$maxLines); + } + + return implode("\n", $lines); + } + + /** + * Validate and clean intent from LLM response + * + * @param string $intent + * @return string + */ + private function validateIntent(string $intent): string { + $validIntents = ['REJECTION', + 'ALTERNATIVES', 'TOPIC_CHANGE', + 'INTEREST', 'NEW_SEARCH', 'CONTENT_QUESTION', + 'NEW_QUESTION', 'CLARIFICATION', 'UNCLEAR']; + + // Extract just the intent word if LLM added explanation + foreach ($validIntents as $valid) { + if (stripos($intent, $valid) !== false) { + return $valid; + } + } + + // Default fallback + return 'NEW_SEARCH'; + } + + /** + * Generate search and exclude queries based on intent + * + * @param string $userQuery + * @param string $intent + * @param string $conversationHistory + * @param LLMProviderManager $llmProvider + * @param array $modelConfig + * @return array{search_query:string, exclude_query: string, llm_response: string} + */ + public function generateQueries( + string $userQuery, + string $intent, + string $conversationHistory, + LLMProviderManager $llmProvider, + array $modelConfig + ): array { + + $searchQuery = ''; + $excludeQuery = ''; + + try { + // Limit history size for LLM context (conversationHistory is already a formatted string) + $conversationHistory = $this->limitConversationHistory($conversationHistory); + $historyText = $conversationHistory; + + $queryPrompt = "Generate search query based on user request. + +History: +{$historyText} + +Query: {$userQuery} +Intent: {$intent} + +Generate a rich search query separated by commas, sorted by relativity with: +- Content type (movies, TV shows, books, etc.) +- Genre/theme/topic keywords +- Multiple relevant terms + +Answer format: +SEARCH_QUERY: [your query] +EXCLUDE_QUERY: [titles to exclude OR 'none'] + +Rules for EXCLUDE_QUERY - Extract exclusions from BOTH current query AND history: +1. EXPLICIT exclusions in current query (regardless of intent): + - 'I already watched X' → EXCLUDE: X + - 'similar to X but not Y' → EXCLUDE: Y + - 'I like that direction but not Z' → EXCLUDE: Z + - 'tell me about programming but not Python' → EXCLUDE: Python + +2. HISTORY-based exclusions (CRITICAL - Read carefully): + - REJECTION/ALTERNATIVES: Include titles previously rejected from history + - TOPIC_CHANGE: Use 'none' (fresh start, ignore history) + - IMPORTANT: Scan history BACKWARDS from most recent message + - STOP extracting exclusions if you encounter a topic change in history + - Only include exclusions from the CURRENT topic context + - Indicators of topic change: 'instead', 'actually', 'now show me', 'switch to', genre changes + +3. If NO exclusions found in query OR history: Use 'none' + +Examples: +- Query: 'I already watched Breaking Bad but I like that direction' + EXCLUDE_QUERY: Breaking Bad + +- Query: 'What else like Game of Thrones? Not The Witcher though' + EXCLUDE_QUERY: The Witcher + +- Query: 'I love this!' (with no exclusions) + EXCLUDE_QUERY: none + +- History: 'crime dramas' → 'I don't like Breaking Bad' → 'Show me comedies instead' → 'What else?' + Query: 'What else?' + Intent: ALTERNATIVES + Analysis: Topic changed to comedies, Breaking Bad rejection was in previous topic + EXCLUDE_QUERY: none (Breaking Bad is from old topic context) + +- History: 'comedies' → 'I don't like The Office' → 'What else?' + Query: 'What else?' + Intent: ALTERNATIVES + Analysis: Same topic (comedies), The Office rejection is current + EXCLUDE_QUERY: The Office (same topic context) + +Answer ONLY in the format above."; + + $provider = $llmProvider->getConnection('query_generator', $modelConfig); + $response = $provider->generateResponse($queryPrompt, ['temperature' => 0.3, 'max_tokens' => 200]); + + if (!$response['success']) { + throw new ManticoreSearchClientError( + 'Query generation failed: ' . ($response['error'] ?? 'Unknown error') + ); + } + + $lines = explode("\n", trim((string)$response['content'])); + foreach ($lines as $line) { + $line = trim($line); + if (preg_match('/^SEARCH_QUERY:\s*(.+)$/i', $line, $matches)) { + $searchQuery = trim($matches[1]); + } + if (!preg_match('/^EXCLUDE_QUERY:\s*(.+)$/i', $line, $matches)) { + continue; + } + + $excludeQuery = trim($matches[1]); + } + + // Fallback to user query if parsing failed + if (empty($searchQuery)) { + $searchQuery = $userQuery; + } + + // Clean up exclude query + if (empty($excludeQuery) || strtolower($excludeQuery) === 'none') { + $excludeQuery = ''; + } + + return [ + 'search_query' => $searchQuery, + 'exclude_query' => $excludeQuery, + 'llm_response' => $response['content'], + ]; + } catch (Exception $e) { + // Fallback to safe defaults + $searchQuery = $userQuery; + + // Debug: Log query generation + Buddy::info("\n[DEBUG QUERY GENERATION]"); + Buddy::info("├─ User query: '{$userQuery}'"); + Buddy::info("├─ Intent: {$intent}"); + Buddy::info("├─ Generated SEARCH_QUERY: '{$searchQuery}'"); + Buddy::info("└─ Generated EXCLUDE_QUERY: '{$excludeQuery}'"); + + return [ + 'search_query' => $searchQuery, + 'exclude_query' => $excludeQuery, + 'llm_response' => $response['content'] ?? $e->getMessage(), + ]; + } + } +} diff --git a/src/Plugin/ConversationalRag/LLMProviderManager.php b/src/Plugin/ConversationalRag/LLMProviderManager.php new file mode 100644 index 00000000..5b043e30 --- /dev/null +++ b/src/Plugin/ConversationalRag/LLMProviderManager.php @@ -0,0 +1,88 @@ + + */ + private array $providers = []; + + /** + * @var array + */ + private array $connections = []; + + /** + * Get configured connection for a model + * + * @param string $modelId + * @param array $modelConfig + * @return BaseProvider + * @throws ManticoreSearchClientError + */ + public function getConnection(string $modelId, array $modelConfig): BaseProvider { + if (!isset($this->connections[$modelId])) { + /** @var string $providerName */ + $providerName = $modelConfig['llm_provider']; + $provider = $this->createProvider($providerName); + $provider->configure($modelConfig); + $this->connections[$modelId] = $provider; + } + + return $this->connections[$modelId]; + } + + /** + * Create provider instance by name + * + * @param string $providerName + * @return BaseProvider + * @throws ManticoreSearchClientError + */ + private function createProvider(string $providerName): BaseProvider { + return match ($providerName) { + 'openai' => new OpenAIProvider(), + default => throw ManticoreSearchClientError::create( + "Unsupported LLM provider: {$providerName}. Only 'openai' is supported." + ) + }; + } + + /** + * Get provider instance by name + * + * @param string $providerName + * @return BaseProvider + * @throws ManticoreSearchClientError + */ + public function getProvider(string $providerName): BaseProvider { + if (!isset($this->providers[$providerName])) { + $this->providers[$providerName] = $this->createProvider($providerName); + } + + return $this->providers[$providerName]; + } + + + + + + +} diff --git a/src/Plugin/ConversationalRag/LLMProviders/BaseProvider.php b/src/Plugin/ConversationalRag/LLMProviders/BaseProvider.php new file mode 100644 index 00000000..460030d1 --- /dev/null +++ b/src/Plugin/ConversationalRag/LLMProviders/BaseProvider.php @@ -0,0 +1,285 @@ + + */ + protected array $config = []; + protected ?object $client = null; + + /** + * Configure provider with model settings + * + * @param array $config + * @return void + */ + public function configure(array $config): void { + $this->config = $config; + $this->client = null; // Reset client to force recreation + } + + /** + * Generate a response from the LLM + * + * @param string $prompt + * @param array{ temperature?: string|float, max_tokens?: string|int, + * k_results?: string|int, similarity_threshold?: string|int, + * max_document_length?: string|int} $options + * + * @return array{error?:string,success:bool,content:string, + * metadata?:array{tokens_used:integer, input_tokens:integer, + * output_tokens:integer, response_time_ms:integer, finish_reason:string}} + */ + abstract public function generateResponse(string $prompt, array $options = []): array; + + /** + * Estimate token count for a text + * + * @param string $text + * @return int + */ + public function estimateTokens(string $text): int { + // Simple estimation: ~4 characters per token + return (int)ceil(strlen($text) / 4); + } + + /** + * Get or create HTTP client + * + * @return object + */ + protected function getClient(): object { + if ($this->client === null) { + $this->client = $this->createClient(); + } + + return $this->client; + } + + /** + * Create HTTP client for the provider + * + * @return object + */ + abstract protected function createClient(): object; + + /** + * Merge settings from configuration and overrides + * + * @param array $overrides + * + * @return array + */ + protected function getSettings(array $overrides = []): array { + $settings = []; + + // Extract settings from main config + if (isset($this->config['settings']) && is_string($this->config['settings'])) { + $settings = json_decode($this->config['settings'], true) ?? []; + // Convert string numeric values to proper types + $settings = $this->convertSettingsTypes(is_array($settings) ? $settings : []); + } elseif (isset($this->config['settings']) && is_array($this->config['settings'])) { + $settings = $this->config['settings']; + } + + // Merge direct config values with type conversion + $directSettings = [ + 'temperature' => $this->convertToFloat($this->getConfig('temperature')), + 'max_tokens' => $this->convertToInt($this->getConfig('max_tokens')), + 'top_p' => $this->convertToFloat($this->getConfig('top_p')), + 'frequency_penalty' => $this->convertToFloat($this->getConfig('frequency_penalty')), + 'presence_penalty' => $this->convertToFloat($this->getConfig('presence_penalty')), + ]; + + foreach ($directSettings as $key => $value) { + if ($value === null) { + continue; + } + + $settings[$key] = $value; + } + + // Apply overrides with type conversion + $overrides = $this->convertSettingsTypes($overrides); + return array_merge($settings, $overrides); + } + + /** + * Convert settings array types from strings to proper types + * + * @param array $settings + * @return array + */ + protected function convertSettingsTypes(array $settings): array { + $numericFields = ['temperature', 'max_tokens', 'top_p', 'frequency_penalty', 'presence_penalty', 'k_results']; + + foreach ($numericFields as $field) { + if (!isset($settings[$field]) || !is_string($settings[$field]) || !is_numeric($settings[$field])) { + continue; + } + + // Convert to int for integer fields, float for others + if (in_array($field, ['max_tokens', 'k_results'])) { + $settings[$field] = (int)$settings[$field]; + } else { + $settings[$field] = (float)$settings[$field]; + } + } + + return $settings; + } + + /** + * Convert value to float if it's a numeric string + */ + protected function convertToFloat(mixed $value): mixed { + if (is_string($value) && is_numeric($value)) { + return (float)$value; + } + return $value; + } + + /** + * Get configuration value + * + * @param string $key + * @param mixed $default + * @return mixed + */ + protected function getConfig(string $key, mixed $default = null): mixed { + return $this->config[$key] ?? $default; + } + + /** + * Convert value to integer if it's a numeric string + */ + protected function convertToInt(mixed $value): mixed { + if (is_string($value) && is_numeric($value)) { + return (int)$value; + } + return $value; + } + + /** + * Build style prompt + * + * @return string + */ + protected function getStylePrompt(): string { + /** @var string $prompt */ + $prompt = $this->getConfig('style_prompt', ''); + + if (empty($prompt)) { + $prompt = 'You are a helpful AI assistant. Answer questions based on the provided context.'; + } + + return $prompt; + } + + /** + * Format error response + * + * @param string $message + * @param Exception|null $exception + * @return array{success:bool, error:string, details: string|null, provider:string} + */ + protected function formatError(string $message, ?Exception $exception = null): array { + return [ + 'success' => false, + 'error' => $message, + 'details' => $exception?->getMessage(), + 'provider' => $this->getName(), + ]; + } + + /** + * Get the provider name + * + * @return string + */ + abstract public function getName(): string; + + /** + * Format success response + * + * @param string $content + * @param array $metadata + * @return array + */ + protected function formatSuccess(string $content, array $metadata = []): array { + return [ + 'success' => true, + 'content' => $content, + 'metadata' => array_merge( + [ + 'provider' => $this->getName(), + 'model' => $this->getConfig('llm_model'), + ], $metadata + ), + ]; + } + + /** + * Get API key for the current provider + * + * @return string + * @throws QueryParseError + */ + protected function getApiKey(): string { + $provider = $this->getConfig('llm_provider'); + if ($provider === null || $provider === '') { + throw new QueryParseError('LLM provider not configured'); + } + return $this->getApiKeyForProvider(is_string($provider) ? $provider : ''); + } + + /** + * Get API key for a given provider (consolidated method) + * + * @param string $provider Provider name (e.g., 'openai') + * @return string Actual API key value (e.g., 'sk-proj-abc123...') + * @throws QueryParseError If provider unsupported or env var missing/empty + */ + private function getApiKeyForProvider(string $provider): string { + if (empty($provider)) { + throw new QueryParseError('LLM provider not configured'); + } + + + if (!isset(ModelManager::PROVIDER_ENV_VARS[$provider])) { + $supportedProviders = implode(', ', array_keys(ModelManager::PROVIDER_ENV_VARS)); + throw new QueryParseError( + "Unsupported LLM provider: '$provider'. Supported providers: {$supportedProviders}" + ); + } + + $envVarName = ModelManager::PROVIDER_ENV_VARS[$provider]; + + $actualApiKey = getenv($envVarName); + if (empty($actualApiKey)) { + throw new QueryParseError( + "Environment variable '$envVarName' not found or empty. Please set this variable with your API key." + ); + } + + return $actualApiKey; + } +} diff --git a/src/Plugin/ConversationalRag/LLMProviders/OpenAIProvider.php b/src/Plugin/ConversationalRag/LLMProviders/OpenAIProvider.php new file mode 100644 index 00000000..2b077749 --- /dev/null +++ b/src/Plugin/ConversationalRag/LLMProviders/OpenAIProvider.php @@ -0,0 +1,199 @@ + + */ + public function generateResponse(string $prompt, array $options = []): array { + try { + // Provider handles its own base URL - no user configuration needed + $baseUrl = self::BASE_URL; + + $apiKey = $this->getApiKey(); + $model = $this->getConfig('llm_model', self::DEFAULT_MODEL); + $settings = $this->getSettings($options); + $stylePrompt = $this->getStylePrompt(); + + // Build messages array + $messages = []; + + if (!empty($stylePrompt)) { + $messages[] = ['role' => 'system', 'content' => $stylePrompt]; + } + + $messages[] = ['role' => 'user', 'content' => $prompt]; + + // Prepare request data + $data = [ + 'model' => $model, + 'messages' => $messages, + 'temperature' => $settings['temperature'] ?? 0.7, + 'max_tokens' => $settings['max_tokens'] ?? 4000, + ]; + + // Optional parameters + if (isset($settings['top_p'])) { + $data['top_p'] = $settings['top_p']; + } + if (isset($settings['frequency_penalty'])) { + $data['frequency_penalty'] = $settings['frequency_penalty']; + } + if (isset($settings['presence_penalty'])) { + $data['presence_penalty'] = $settings['presence_penalty']; + } + + $startTime = microtime(true); + $response = $this->makeRequest($baseUrl, $apiKey, 'chat/completions', $data); + $responseTime = (int)((microtime(true) - $startTime) * 1000); + + if (!$response['success']) { + $error = $response['error'] ?? 'OpenAI API request failed'; + return $this->formatError(is_string($error) ? $error : 'OpenAI API request failed'); + } + + $result = $response['data']; + if (!is_array($result)) { + return $this->formatError('Invalid API response format'); + } + + $content = $result['choices'][0]['message']['content'] ?? ''; + $usage = $result['usage'] ?? []; + + return $this->formatSuccess( + $content, [ + 'tokens_used' => (int)($usage['total_tokens'] ?? 0), + 'input_tokens' => (int)($usage['prompt_tokens'] ?? 0), + 'output_tokens' => (int)($usage['completion_tokens'] ?? 0), + 'response_time_ms' => $responseTime, + 'finish_reason' => $result['choices'][0]['finish_reason'] ?? 'unknown', + ] + ); + } catch (Exception $e) { + return $this->formatError('OpenAI request failed', $e); + } + } + + /** + * Generate streaming response from OpenAI + * + * @param string $prompt + * @param array{temperature?: string|float, max_tokens?: string|int, + * k_results?: string|int, similarity_threshold?: string|int, + * max_document_length?: string|int} $options + * @param callable|null $callback + * @return array{success: true, content: string, metadata: array{provider: string, + * model: string}}|array{success: false, error: string} + */ + + /** + * Make HTTP request to OpenAI API + * + * @param string $baseUrl + * @param string $apiKey + * @param string $endpoint + * @param array{model:mixed, messages:array, + * temperature: mixed, max_tokens:mixed, top_p?:mixed, frequency_penalty?:mixed, + * presence_penalty?:mixed} $data + * + * @return array + * @throws \JsonException + */ + protected function makeRequest( + string $baseUrl, + string $apiKey, + string $endpoint, + array $data + ): array { + /** @var CurlHandle $curl */ + $curl = $this->getClient(); + + $url = rtrim($baseUrl, '/') . '/' . ltrim($endpoint, '/'); + + /** @var array $curlOptions */ + $curlOptions = [ + CURLOPT_URL => $url, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => json_encode($data), + CURLOPT_HTTPHEADER => [ + 'Content-Type: application/json', + 'Authorization: Bearer ' . $apiKey, + ], + ]; + curl_setopt_array($curl, $curlOptions); + + $response = (string)curl_exec($curl); + $httpCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); + $error = curl_error($curl); + + if ($error) { + return ['success' => false, 'error' => 'HTTP request failed: ' . $error]; + } + + $response = json_decode($response, true, 512, JSON_THROW_ON_ERROR); + if (JSON_ERROR_NONE !== json_last_error() || !is_array($response)) { + return ['success' => false, 'error' => 'Invalid JSON response']; + } + if ($httpCode !== 200) { + return ['success' => false, 'error' => $response['error']['message'] ?? "HTTP {$httpCode} error"]; + } + + return ['success' => true, 'data' => $response]; + } + + /** + * Create HTTP client + * + * @return CurlHandle + */ + protected function createClient(): CurlHandle { + $curl = curl_init(); + + /** @var array $curlOptions */ + $curlOptions = [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => 120, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_SSL_VERIFYPEER => true, + CURLOPT_USERAGENT => 'ManticoreSearch-Buddy-RAG/1.0', + ]; + curl_setopt_array($curl, $curlOptions); + + return $curl; + } +} diff --git a/src/Plugin/ConversationalRag/ModelManager.php b/src/Plugin/ConversationalRag/ModelManager.php new file mode 100644 index 00000000..cee8e220 --- /dev/null +++ b/src/Plugin/ConversationalRag/ModelManager.php @@ -0,0 +1,327 @@ + 'OPENAI_API_KEY', + 'anthropic' => 'ANTHROPIC_API_KEY', + 'grok' => 'GROK_API_KEY', + 'mistral' => 'MISTRAL_API_KEY', + 'ollama' => 'OLLAMA_API_KEY', + ]; + + private bool $tablesInitialized = false; + + /** + * Initialize database tables + * + * @param HTTPClient $client + * + * @return void + * @throws ManticoreSearchClientError + */ + public function initializeTables(HTTPClient $client): void { + if ($this->tablesInitialized) { + return; + } + + $this->createModelsTable($client); + + $this->tablesInitialized = true; + } + + /** + * Create RAG models table + * + * @param HTTPClient $client + * + * @return void + * @throws ManticoreSearchClientError + */ + private function createModelsTable(HTTPClient $client): void { + $sql = /** @lang Manticore */ 'CREATE TABLE IF NOT EXISTS ' . self::MODELS_TABLE . ' ( + uuid string, + name string, + llm_provider text, + llm_model text, + style_prompt text, + settings json, + created_at bigint, + updated_at bigint + )'; + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchClientError::create('Failed to create models table: ' . $response->getError()); + } + } + + + /** + * Create a new RAG model + * + * @param HTTPClient $client + * @param array{name: string, llm_provider: string, llm_model: string, style_prompt?:string, + * settings?: string|array, temperature?: string, + * max_tokens?: string, k_results?: string, similarity_threshold?: string, + * max_document_length?: string} $config + * + * @return string Model ID + * @throws ManticoreSearchClientError|ManticoreSearchResponseError|RandomException + */ + public function createModel(HTTPClient $client, array $config): string { + $modelName = $config['name']; + $modelUuid = $this->generateUuid(); + + // Check if the model already exists + if ($this->modelExists($client, $modelName)) { + throw ManticoreSearchClientError::create("RAG model '$modelName' already exists"); + } + + // Prepare settings + $settings = $this->extractSettings($config); + + // Insert model + $currentTime = time(); + $sql = sprintf( + 'INSERT INTO %s (uuid, name, llm_provider, llm_model, style_prompt, settings, created_at, updated_at) '. + 'VALUES (%s, %s, %s, %s, %s, %s, %d, %d)', + self::MODELS_TABLE, + $this->quote($modelUuid), + $this->quote($config['name']), + $this->quote($config['llm_provider']), + $this->quote($config['llm_model']), + $this->quote($config['style_prompt'] ?? ''), + $this->quote($this->encodeSettings($settings)), + $currentTime, + $currentTime + ); + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchClientError::create('Failed to create model: ' . $response->getError()); + } + + return $modelUuid; + } + + /** + * Generate a UUID v4 + * + * @return string + * @throws RandomException + */ + private function generateUuid(): string { + $data = random_bytes(16); + $data[6] = chr(ord($data[6]) & 0x0f | 0x40); // Version 4 + $data[8] = chr(ord($data[8]) & 0x3f | 0x80); // Variant 10 + return vsprintf('%s%s-%s-%s-%s-%s%s%s', str_split(bin2hex($data), 4)); + } + + /** + * Check if model exists + * + * @param HTTPClient $client + * @param string $modelName + * + * @return bool + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + */ + private function modelExists(HTTPClient $client, string $modelName): bool { + $sql = sprintf( + /** @lang Manticore */ 'SELECT COUNT(*) as count FROM %s WHERE name = %s', + self::MODELS_TABLE, + $this->quote($modelName) + ); + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchClientError::create('Failed to check model existence: ' . $response->getError()); + } + + /** @var array}> $result */ + $result = $response->getResult(); + $data = $result[0]['data'] ?? []; + + return !empty($data) && ($data[0]['count'] ?? 0) > 0; + } + + /** + * Extract settings from config into separate array + * + * @param array $config + * @return array + */ + private function extractSettings(array $config): array { + /** @var array $coreFields */ + $coreFields = ['id', 'name', 'llm_provider', 'llm_model', 'llm_api_key', 'style_prompt']; + /** @var array $settings */ + $settings = []; + + foreach ($config as $key => $value) { + if (in_array($key, $coreFields)) { + continue; + } + + // If settings is a JSON string, parse it + if ($key === 'settings' && is_string($value)) { + $parsedSettings = json_decode($value, true); + if (json_last_error() === JSON_ERROR_NONE && is_array($parsedSettings)) { + $settings = array_merge($settings, $parsedSettings); + } else { + $settings[$key] = $value; + } + } elseif ($key === 'settings' && is_array($value)) { + // If settings is already an array, merge it + $settings = array_merge($settings, $value); + } else { + $settings[$key] = $value; + } + } + + return $settings; + } + + /** + * Safely encode settings to JSON string + * + * @param array $settings + * + * @return string + * @throws ManticoreSearchClientError + */ + private function encodeSettings(array $settings): string { + $encoded = json_encode($settings); + if (json_last_error() !== JSON_ERROR_NONE || empty($encoded)) { + throw ManticoreSearchClientError::create('Failed to encode settings to JSON: ' . json_last_error_msg()); + } + return $encoded; + } + + /** + * Get all RAG models + * + * @param HTTPClient $client + * + * @return array + * @throws ManticoreSearchResponseError + * @throws ManticoreSearchClientError + */ + public function getAllModels(HTTPClient $client): array { + $sql = /** @lang manticore */'SELECT id, uuid, name, llm_provider, llm_model, created_at + FROM ' . self::MODELS_TABLE . ' + ORDER BY created_at DESC'; + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchResponseError::create('Failed to get all models: ' . $response->getError()); + } + + /** @var array}> $result */ + $result = $response->getResult(); + return $result[0]['data'] ?? []; + } + + /** + * Delete RAG model by UUID or name + * + * @param HTTPClient $client + * @param string $modelUuidOrName + * + * @return void + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function deleteModelByUuidOrName(HTTPClient $client, string $modelUuidOrName): void { + + $model = $this->getModelByUuidOrName($client, $modelUuidOrName); + + // Soft delete by setting is_active = false + $sql = sprintf( + /** @lang Manticore */ 'DELETE FROM %s WHERE uuid = %s', + self::MODELS_TABLE, + $this->quote($model['uuid']) + ); + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchClientError::create('Failed to delete model: ' . $response->getError()); + } + } + + /** + * Clean up conversations for a deleted model + * + * @param HTTPClient $client + * @param string $modelUuid + * + * @return void + */ + + /** + * Get model by name or UUID (returns environment variable names, does not resolve them) + * + * @param HTTPClient $client + * @param string $modelNameOrUuid Model name or UUID + * + * @return array{id:string, uuid:string, name:string,llm_provider:string,llm_model:string, + * style_prompt:string,settings:array{ temperature?: string, max_tokens?: string, k_results?: string, + * similarity_threshold: string, max_document_length: string},created_at:string,updated_at:string} + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + */ + public function getModelByUuidOrName(HTTPClient $client, string $modelNameOrUuid): array { + $sql = /** @lang Manticore */ 'SELECT * FROM ' . self::MODELS_TABLE . + ' WHERE (name = ' . $this->quote($modelNameOrUuid) . ' OR uuid = ' . $this->quote($modelNameOrUuid) . ')'; + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchResponseError::create('Failed to get model by UUID/name: ' . $response->getError()); + } + + $data = $response->getResult(); + if (is_array($data[0]) && !empty($data[0]['data'])) { + $model = $data[0]['data'][0]; + if (!empty($model['settings'])) { + $decoded = json_decode($model['settings'], true); + if (json_last_error() === JSON_ERROR_NONE && is_array($decoded)) { + $model['settings'] = $decoded; + } else { + $model['settings'] = []; + } + } else { + $model['settings'] = []; + } + + return $model; + } + + throw ManticoreSearchClientError::create("RAG model '$modelNameOrUuid' not found"); + } + + +} diff --git a/src/Plugin/ConversationalRag/Payload.php b/src/Plugin/ConversationalRag/Payload.php new file mode 100644 index 00000000..fc4cab9a --- /dev/null +++ b/src/Plugin/ConversationalRag/Payload.php @@ -0,0 +1,361 @@ + + */ +final class Payload extends BasePayload { + public const string ACTION_CREATE_MODEL = 'create_model'; + public const string ACTION_SHOW_MODELS = 'show_models'; + public const string ACTION_DESCRIBE_MODEL = 'describe_model'; + public const string ACTION_DROP_MODEL = 'drop_model'; + public const string ACTION_CONVERSATION = 'conversation'; + + /** @var string */ + public string $action; + + /** @var string */ + public string $query; + + /** @var array */ + public array $params = []; + + /** + * Check if the request matches this plugin + * + * @param Request $request + * @return bool + */ + public static function hasMatch(Request $request): bool { + // Debug: log that our hasMatch is being called + error_log('[ConversationalRag] hasMatch called with payload: ' . substr($request->payload, 0, 100)); + error_log('[ConversationalRag] Request error: ' . ($request->error ?? 'none')); + + // Check SQL patterns first + if (self::matchesSQL($request)) { + error_log('[ConversationalRag] SQL pattern matched!'); + return true; + } + + + + error_log('[ConversationalRag] No patterns matched'); + return false; + } + + /** + * Check if SQL query matches RAG patterns + * + * @param Request $request + * @return bool + */ + private static function matchesSQL(Request $request): bool { + // Check for error-based matching (ManticoreSearch tried and failed) + if (isset($request->error)) { + $errorBasedPatterns = [ + 'no such built-in procedure CONVERSATIONAL_RAG', + 'syntax error, unexpected tablename, expecting CLUSTER or FUNCTION or PLUGIN or TABLE near \'RAG MODEL', + ]; + + foreach ($errorBasedPatterns as $errorPattern) { + if (str_contains($request->error, $errorPattern)) { + return true; + } + } + } + + // Also check direct payload patterns for CREATE RAG MODEL syntax + $payload = strtoupper(trim($request->payload)); + $patterns = [ + '/^CALL\s+CONVERSATIONAL_RAG\s*\(/i', + '/^CREATE\s+RAG\s+MODEL\s+/i', + '/^SHOW\s+RAG\s+MODELS/i', + '/^DESCRIBE\s+RAG\s+MODEL\s+/i', + '/^DROP\s+RAG\s+MODEL\s+/i', + ]; + + foreach ($patterns as $pattern) { + if (preg_match($pattern, $payload)) { + return true; + } + } + + return false; + } + + + + /** + * Create payload from request + * + * @param Request $request + * @return static + * @throws QueryParseError + */ + public static function fromRequest(Request $request): static { + $payload = new static(); + $payload->query = $request->payload; + + // Parse SQL request only (HTTP API not supported) + $payload->parseSQLRequest($request); + + return $payload; + } + + + + /** + * Parse SQL request + * + * @param Request $request + * @return void + * @throws QueryParseError + */ + private function parseSQLRequest(Request $request): void { + $sql = trim($request->payload); + + // SQL syntax for model management + if (preg_match('/^CREATE\s+RAG\s+MODEL\s+[\'"]?(\w+)[\'"]?\s*\((.*)\)$/si', $sql, $matches)) { + $this->action = self::ACTION_CREATE_MODEL; + $this->params = $this->parseCreateModelParams($matches[1], $matches[2]); + } elseif (preg_match('/^SHOW\s+RAG\s+MODELS$/i', $sql)) { + $this->action = self::ACTION_SHOW_MODELS; + } elseif (preg_match('/^DESCRIBE\s+RAG\s+MODEL\s+[\'"]?([^\'"]+)[\'"]?$/i', $sql, $matches)) { + $this->action = self::ACTION_DESCRIBE_MODEL; + $this->params = ['model_name_or_uuid' => $matches[1]]; + } elseif (preg_match('/^DROP\s+RAG\s+MODEL\s+[\'"]?([^\'"]+)[\'"]?$/i', $sql, $matches)) { + $this->action = self::ACTION_DROP_MODEL; + $this->params = ['model_name_or_uuid' => $matches[1]]; + } elseif (preg_match('/^CALL\s+CONVERSATIONAL_RAG\s*\((.*)\)$/si', $sql, $matches)) { + $this->action = self::ACTION_CONVERSATION; + $this->params = $this->parseConversationParams($matches[1]); + } else { + throw new QueryParseError('Invalid RAG query syntax'); + } + } + + + + /** + * Parse CREATE RAG MODEL parameters + * + * @param string $modelName + * @param string $params + * @return array + */ + private function parseCreateModelParams(string $modelName, string $params): array { + // Parse key=value pairs from CREATE RAG MODEL syntax + $config = ['name' => $modelName]; + $paramPairs = $this->parseKeyValueParams($params); + + foreach ($paramPairs as $key => $value) { + if ($key === 'name') { + $config['display_name'] = $this->unquoteString($value); + } else { + $config[$key] = $this->unquoteString($value); + } + } + + if (!isset($config['name'])) { + $config['name'] = $modelName; + } + + return $config; + } + + /** + * Parse key=value parameters + * + * @param string $params + * @return array + */ + private function parseKeyValueParams(string $params): array { + $result = []; + $lines = array_map('trim', explode(',', $params)); + + foreach ($lines as $line) { + if (empty($line)) { + continue; + } + + // Split on first = sign + $parts = explode('=', $line, 2); + if (sizeof($parts) !== 2) { + continue; + } + + $key = trim($parts[0]); + $value = trim($parts[1]); + $result[$key] = $value; + } + + return $result; + } + + /** + * Remove quotes from string + * + * @param string $str + * @return string + */ + private function unquoteString(string $str): string { + $str = trim($str); + + if ((str_starts_with($str, '"') + && str_ends_with($str, '"')) + || (str_starts_with($str, "'") + && str_ends_with($str, "'"))) { + $unquoted = substr($str, 1, -1); + // Handle SQL escaped quotes + $unquoted = str_replace("\\'", "'", $unquoted); + $unquoted = str_replace('\\"', '"', $unquoted); + return $unquoted; + } + + return $str; + } + + /** + * Parse conversation parameters + * + * @param string $params + * @return array + * @throws QueryParseError + */ + private function parseConversationParams(string $params): array { + $parts = $this->parseCommaSeparatedParams($params); + + $result = [ + 'query' => $this->unquoteString($parts[0] ?? ''), + 'table' => $this->unquoteString($parts[1] ?? ''), + 'model_uuid' => $this->unquoteString($parts[2] ?? ''), + ]; + + if (!isset($parts[3])) { + throw QueryParseError::create('content_fields parameter is required (position 4)'); + } + + $contentFields = trim($this->unquoteString($parts[3])); + if (empty($contentFields)) { + throw QueryParseError::create('content_fields parameter cannot be empty'); + } + + $result['content_fields'] = $contentFields; + + // conversation_uuid is now the 5th parameter and OPTIONAL + if (isset($parts[4])) { + $result['conversation_uuid'] = $this->unquoteString($parts[4]); + } + + return $result; + } + + /** + * Parse comma-separated parameters with quote handling + * + * @param string $params + * @return array + */ + private function parseCommaSeparatedParams(string $params): array { + $result = []; + $current = ''; + $depth = 0; + $inQuotes = false; + $quoteChar = ''; + + $i = 0; + while ($i < strlen($params)) { + $char = $params[$i]; + $charsConsumed = 1; + $skipAppend = false; + + [$inQuotes, $quoteChar, $current, $charsConsumed, $skipAppend] = $this->handleCharacterInQuotes( + $char, $inQuotes, $quoteChar, $current, $params, $i + ); + + if (!$inQuotes && $char === ',' && $depth === 0) { + // We found a parameter separator + $result[] = trim($current); + $current = ''; + } elseif (!$inQuotes) { + // Handle other non-quote characters outside quotes + if ($char === '(' || $char === '{' || $char === '[') { + $depth++; + } elseif ($char === ')' || $char === '}' || $char === ']') { + $depth--; + } + $current .= $char; + } elseif (!$skipAppend) { + // We're in quotes and should append this character + $current .= $char; + } + + $i += $charsConsumed; + } + + if (!empty(trim($current))) { + $result[] = trim($current); + } + + return $result; + } + + /** + * Handle character processing when inside quotes + * + * @param string $char + * @param bool $inQuotes + * @param string $quoteChar + * @param string $current + * @param string $params + * @param int $i + * @return array{0: bool, 1: string, 2: string, 3: int, 4: bool} + */ + private function handleCharacterInQuotes( + string $char, + bool $inQuotes, + string $quoteChar, + string $current, + string $params, + int $i + ): array { + $charsConsumed = 1; + $skipAppend = false; + + // Check for escaped characters when in quotes + if ($inQuotes && $char === '\\' && $i + 1 < strlen($params)) { + // Add both the escape and the escaped character + $current .= $char; + $current .= $params[$i + 1]; + $charsConsumed = 2; + $skipAppend = true; + return [$inQuotes, $quoteChar, $current, $charsConsumed, $skipAppend]; + } + + if (!$inQuotes && ($char === '"' || $char === "'")) { + $inQuotes = true; + $quoteChar = $char; + } elseif ($inQuotes && $char === $quoteChar) { + $inQuotes = false; + $quoteChar = ''; + } + + return [$inQuotes, $quoteChar, $current, $charsConsumed, $skipAppend]; + } + + +} diff --git a/src/Plugin/ConversationalRag/README.md b/src/Plugin/ConversationalRag/README.md new file mode 100644 index 00000000..ebb04bab --- /dev/null +++ b/src/Plugin/ConversationalRag/README.md @@ -0,0 +1,332 @@ +# ConversationalRag Plugin + +The ConversationalRag plugin enables conversational Retrieval-Augmented Generation (RAG) capabilities in ManticoreSearch. It combines vector search with OpenAI's language models to provide intelligent, context-aware responses based on your data. + +## Features + +- **Conversational AI**: Maintains conversation context with automatic UUID generation +- **Vector Search**: Uses KNN search on float_vector fields to find relevant documents +- **OpenAI Integration**: Supports OpenAI models (gpt-4o, gpt-4o-mini, gpt-3.5-turbo, etc.) +- **Intent Classification**: Detects user intent (NEW_SEARCH, ALTERNATIVES, TOPIC_CHANGE, etc.) +- **Dynamic Thresholds**: Adjusts search similarity thresholds based on conversation flow +- **SQL-based Management**: Native SQL syntax for model and conversation management + +## Quick Start + +### 1. Prerequisites + +- ManticoreSearch with Buddy plugin enabled +- OpenAI API key (set as environment variable `OPENAI_API_KEY`) +- Table with vector embeddings (float_vector field) + +### 2. Environment Setup + +Before starting searchd, you need to set the LLM API key as an environment variable. + +```bash +# Set your OpenAI API key +export OPENAI_API_KEY="sk-proj-your-api-key-here" +``` + +### 3. Create a RAG Model + +```sql +CREATE RAG MODEL 'my_assistant' ( + llm_provider='openai', + llm_model='gpt-4o-mini' +); +``` + +### 4. Start a Conversation + +```sql +-- Basic usage with single content field +CALL CONVERSATIONAL_RAG( + 'What is vector search?', + 'documents_table', + 'my_assistant', + 'content' +); + +-- With multiple content fields +CALL CONVERSATIONAL_RAG( + 'What is vector search?', + 'documents_table', + 'my_assistant', + 'title,content,summary' +); +``` + +## Detailed Usage + +### Model Management + +#### Creating Models + +```sql +-- Basic model +CREATE RAG MODEL basic_assistant ( + llm_provider='openai', + llm_model='gpt-4o-mini' +); + +-- Advanced model with custom settings +CREATE RAG MODEL advanced_assistant ( + llm_provider='openai', + llm_model='gpt-4o', + style_prompt='You are an expert database administrator. Provide detailed, technical answers with examples.', + settings='{"temperature":0.3, "max_tokens":4000, "k_results":10, "similarity_threshold": 0.8, "max_document_length":3000}' +); +``` + +#### Viewing Models + +```sql +-- List all models +SHOW RAG MODELS; + +-- Describe specific model +DESCRIBE RAG MODEL my_assistant; +``` + +#### Dropping Models + +```sql +DROP RAG MODEL my_assistant; +``` + +### Conversational Queries + +#### Basic Conversation + +```sql +-- Start new conversation +CALL CONVERSATIONAL_RAG( + 'Explain full-text search in ManticoreSearch', + 'knowledge_base', + 'my_assistant', + 'content' +); +``` + +#### Conversation with UUID Tracking + +The `conversation_id` argument (e.g., `conv-12345678-1234-1234-1234-123456789abc`) can be provided by the user to track conversations. + +```sql +-- Start conversation with specific UUID +CALL CONVERSATIONAL_RAG( + 'What are the best indexing strategies?', + 'documents', + 'my_assistant', + 'content', + 'conv-12345678-1234-1234-1234-123456789abc' +); + +-- Continue same conversation +CALL CONVERSATIONAL_RAG( + 'Can you give me a practical example?', + 'documents', + 'my_assistant', + 'content', + 'conv-12345678-1234-1234-1234-123456789abc' +); +``` + +#### Content Fields Specification + +You must specify which fields to use for document context. You can specify a single field or multiple fields: + +```sql +-- Use a single field +CALL CONVERSATIONAL_RAG( + 'What are the latest updates?', + 'news', + 'my_assistant', + 'summary' +); + +-- Use multiple fields (comma-separated in output) +CALL CONVERSATIONAL_RAG( + 'Explain this topic', + 'articles', + 'my_assistant', + 'title,content,conclusion' +); + +-- With specific conversation UUID +CALL CONVERSATIONAL_RAG( + 'Continue our discussion', + 'articles', + 'my_assistant', + 'content', + 'conversation-uuid-12345' +); +``` + +#### Parameter Reference + +| Position | Parameter | Required | Default | Description | +|----------|-----------------|----------|---------|--------------------------------------| +| 1 | query | Yes | - | User's question | +| 2 | table | Yes | - | Table to search | +| 3 | model_uuid | Yes | - | RAG model UUID or name | +| 4 | content_fields | Yes | - | Comma-separated field names | +| 5 | conversation_uuid | No | - | Conversation UUID | + +#### Content Field Behavior + +- **Single field**: `'content'` - Uses only the content field +- **Multiple fields**: `'title,content,summary'` - Concatenates all specified fields with a comma and space +- **Missing fields**: Automatically skipped with a warning logged +- **Empty fields**: Fields with empty or whitespace-only content are excluded +- **Output format**: Multiple fields joined as: `"field1, field2, field3"` +- **Required**: The `content_fields` parameter is mandatory; queries without it will throw an exception + +## Data Preparation + +### Table Requirements + +Your table must have: +- A vector field (float_vector type) containing embeddings +- Content fields with the actual document content + +```sql +-- Example table schema +CREATE TABLE docs ( + id BIGINT, + content TEXT, + title TEXT, + embedding_vector FLOAT_VECTOR + KNN_TYPE='hnsw' + HNSW_SIMILARITY='cosine' + MODEL_NAME='sentence-transformers/all-MiniLM-L6-v2' + FROM='content,title' +) TYPE='rt'; +``` + +## Configuration Options + +### Model Parameters + +| Parameter | Type | Required | Default | Description | +|---------------|------------|----------|---------|--------------------------------------------------| +| `llm_provider`| string | Yes | - | Must be 'openai' | +| `llm_model` | string | Yes | - | OpenAI model (gpt-4o-mini, gpt-4o, gpt-3.5-turbo)| +| `style_prompt`| string | No | '' | System prompt for the LLM | +| `temperature` | float | No | 0.7 | Creativity level (0.0-2.0) | +| `max_tokens` | int | No | 4000 | Maximum response length (1-32768) | +| `k_results` | int | No | 5 | Number of documents to retrieve (1-50) | +| `settings` | JSON string| No | - | Additional settings as JSON | + +## Advanced Features + +### Intent Classification + +The plugin automatically classifies user intent to optimize search: + +- **NEW_SEARCH**: Fresh search with no prior context +- **ALTERNATIVES**: User wants more options ("what else?") +- **TOPIC_CHANGE**: User switching topics ("show me comedies instead") +- **INTEREST**: User likes content, wants similar items +- **REJECTION**: User doesn't like shown content +- **QUESTION**: User asking about specific content +- **CLARIFICATION**: User providing additional details +- **UNCLEAR**: Cannot determine intent + +### Dynamic Thresholds + +The system automatically adjusts similarity thresholds based on: +- Conversation history +- User intent (expansion requests increase the threshold) +- Previous search results effectiveness + +### Exclusion Handling + +The plugin intelligently excludes previously shown or rejected content: + +```sql +-- Example: User says: "I already watched Breaking Bad but want similar shows" +-- Plugin automatically excludes "Breaking Bad" from results +``` + +## Response Format + +Conversation responses include: + +```json +{ + "conversation_uuid": "generated-or-provided-uuid", + "response": "AI-generated response text", + "sources": "[{\"id\": 1, \"title\": \"Doc Title\", \"content\": \"...\"}, ...]" +} +``` + +## Examples + +### Basic Usage + +```sql +-- Create a model +CREATE RAG MODEL assistant ( + llm_provider='openai', + llm_model='gpt-4o-mini' +); + +-- Ask a question +CALL CONVERSATIONAL_RAG( + 'What is vector search?', + 'docs', + 'assistant', + 'content' +); + +-- Continue conversation +CALL CONVERSATIONAL_RAG( + 'How does it work?', + 'docs', + 'assistant', + 'content', + 'returned-conversation-uuid' +); +``` + +### Content Field Examples + +```sql +-- E-commerce product search with multiple fields +CALL CONVERSATIONAL_RAG( + 'Show me gaming laptops under $2000', + 'products', + 'shopping_assistant', + 'name,description,specifications' +); +-- Output format: "Gaming Laptop XYZ, High-performance laptop for gaming, Intel i7, 16GB RAM, RTX 4060" + +-- News articles with title and summary +CALL CONVERSATIONAL_RAG( + 'What happened in tech news today?', + 'news_articles', + 'news_assistant', + 'headline,summary' +); +-- Output format: "Tech Company Announces New AI Model, Revolutionary breakthrough in natural language processing" + +-- Documentation search with structured content +CALL CONVERSATIONAL_RAG( + 'How do I configure clustering?', + 'documentation', + 'help_assistant', + 'section_title,content,code_examples' +); +-- Output format: "Clustering Setup, Follow these steps to configure..., CREATE TABLE cluster_table..." + +-- Continue a specific conversation with multiple fields +CALL CONVERSATIONAL_RAG( + 'Show me more options', + 'products', + 'shopping_assistant', + 'name,price,rating', + 'conv-shopping-session-123' +); +``` diff --git a/src/Plugin/ConversationalRag/SearchEngine.php b/src/Plugin/ConversationalRag/SearchEngine.php new file mode 100644 index 00000000..942fbfeb --- /dev/null +++ b/src/Plugin/ConversationalRag/SearchEngine.php @@ -0,0 +1,352 @@ +, + * k_results?: string|int, similarity_threshold?: string|float} $modelConfig + * @param float|null $threshold + * + * @return array> + */ + public function performSearch( + HTTPClient $client, + string $table, + string $searchQuery, + string $excludeQuery, + array $modelConfig, + ?float $threshold = null + ): array { + // Get excluded IDs first + $excludedIds = $this->getExcludedIds($client, $table, $excludeQuery); + + // Use optimized search with pre-computed excluded IDs + return $this->performSearchWithExcludedIds( + $client, + $table, + $searchQuery, + $excludedIds, + $modelConfig, + $threshold ?? $this->getSimilarityThreshold($modelConfig) + ); + } + + /** + * Get excluded document IDs for a given exclusion query + * + * @param HTTPClient $client + * @param string $table + * @param string $excludeQuery + * + * @return array + * @throws ManticoreSearchResponseError|ManticoreSearchClientError + */ + public function getExcludedIds( + HTTPClient $client, + string $table, + string $excludeQuery + ): array { + if (empty($excludeQuery) || $excludeQuery === 'none') { + return []; + } + + $vectorField = $this->detectVectorField($client, $table); + if (!$vectorField) { + return []; + } + + $excludeEscaped = $this->escapeString($excludeQuery); + $sql = "SELECT id, knn_dist() as knn_dist FROM {$table} + WHERE knn({$vectorField}, 15, '{$excludeEscaped}') + AND knn_dist < 0.75"; + + Buddy::info("\n[DEBUG EXCLUSION QUERY]"); + Buddy::info("├─ Exclude query: '{$excludeQuery}'"); + Buddy::info("├─ Table: {$table}"); + Buddy::info("├─ Vector field: {$vectorField}"); + Buddy::info('├─ Threshold: 0.75'); + Buddy::info("├─ Final SQL: {$sql}"); + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + Buddy::info('└─ Error: ' . $response->getError()); + return []; // Return empty array on error + } + + /** @var array}> $result */ + $result = $response->getResult(); + $excludeResults = $result[0]['data'] ?? []; + + $excludedIds = array_column($excludeResults, 'id'); + Buddy::info('├─ Raw results count: ' . sizeof($excludeResults)); + Buddy::info('└─ Excluded IDs found: [' . implode(', ', $excludedIds) . ']'); + + return $excludedIds; + } + + /** + * Detect vector field in table + * + * @param HTTPClient $client + * @param string $table + * + * @return string|null + * @throws ManticoreSearchResponseError|ManticoreSearchClientError + */ + private function detectVectorField(HTTPClient $client, string $table): ?string { + + $query = "DESCRIBE {$table}"; + $response = $client->sendRequest($query); + if ($response->hasError()) { + throw ManticoreSearchResponseError::create( + 'Schema detection failed: ' . $response->getError() + ); + } + + /** @var array}> $result */ + $result = $response->getResult(); + $schema = $result[0]['data']; + + // Look for FLOAT_VECTOR fields + foreach ($schema as $field) { + if (str_contains(strtoupper($field['Type']), 'FLOAT_VECTOR')) { + return $field['Field']; + } + } + return null; + } + + /** + * Escape string for SQL safety + * + * @param string $string + * @return string + */ + private function escapeString(string $string): string { + return str_replace("'", "''", $string); + } + + /** + * Perform vector search with pre-computed excluded IDs (optimized for reuse) + * + * @param HTTPClient $client + * @param string $table + * @param string $searchQuery + * @param array $excludedIds + * @param array{llm_provider: string, llm_model: string, settings?: string|array, + * k_results?: string|int, similarity_threshold?: string|float} $modelConfig + * @param float $threshold + * + * @return array> + * @throws ManticoreSearchClientError|ManticoreSearchResponseError + */ + public function performSearchWithExcludedIds( + HTTPClient $client, + string $table, + string $searchQuery, + array $excludedIds, + array $modelConfig, + float $threshold + ): array { + $kResults = $this->getKResults($modelConfig); + $vectorField = $this->detectVectorField($client, $table); + + if (!$vectorField) { + return []; + } + + $searchEscaped = $this->escapeString($searchQuery); + + if (!empty($excludedIds)) { + // Use pre-computed excluded IDs - NO additional KNN search needed! + $safeExcludeIds = array_map('intval', $excludedIds); + $excludeList = implode(',', $safeExcludeIds); + $adjustedK = $kResults + sizeof($excludedIds) + 5; + + $sql = "SELECT *, knn_dist() as knn_dist + FROM {$table} + WHERE knn({$vectorField}, {$adjustedK}, '{$searchEscaped}') + AND knn_dist < {$threshold} + AND id NOT IN ({$excludeList}) + LIMIT {$kResults}"; + } else { + $sql = "SELECT *, knn_dist() as knn_dist + FROM {$table} + WHERE knn({$vectorField}, {$kResults}, '{$searchEscaped}') + AND knn_dist < {$threshold}"; + } + + Buddy::info("\n[DEBUG KNN SEARCH]"); + Buddy::info("├─ Search query: '{$searchQuery}'"); + Buddy::info('├─ Excluded IDs: [' . implode(', ', $excludedIds) . ']'); + Buddy::info("├─ k: {$kResults}"); + Buddy::info("├─ Threshold: {$threshold}"); + Buddy::info("├─ Final SQL: {$sql}"); + + $response = $client->sendRequest($sql); + if ($response->hasError()) { + throw ManticoreSearchResponseError::create('Vector search failed: ' . $response->getError()); + } + + /** @var array>}> $responseResult */ + $responseResult = $response->getResult(); + $result = $responseResult[0]['data'] ?? []; + Buddy::info('└─ Results found: ' . sizeof($result)); + + return $this->filterVectorFields($result, $table, $client); + } + + /** + * Get K results from configuration + * + * @param array{llm_provider: string, llm_model: string, + * settings?: string|array, k_results?: string|int} $modelConfig + * @return int + */ + private function getKResults(array $modelConfig): int { + // Check direct config + if (isset($modelConfig['k_results'])) { + return (int)$modelConfig['k_results']; + } + + // Check settings + if (isset($modelConfig['settings'])) { + $settings = is_string($modelConfig['settings']) + ? json_decode($modelConfig['settings'], true) ?? [] + : $modelConfig['settings']; + + if (is_array($settings) && isset($settings['k_results'])) { + return (int)$settings['k_results']; + } + } + + return self::DEFAULT_K_RESULTS; + } + + /** + * Remove embedding vector fields from search results (matches original php_rag behavior) + * + * @param array> $results + * @param string $table + * @param HTTPClient $client + * + * @return array> + * @throws ManticoreSearchResponseError|ManticoreSearchClientError + */ + private function filterVectorFields(array $results, string $table, HTTPClient $client): array { + if (empty($results)) { + return $results; + } + + // Get all float_vector fields from table schema + $vectorFields = $this->getVectorFields($client, $table); + + if (empty($vectorFields)) { + return $results; + } + + return array_map( + function ($result) use ($vectorFields) { + foreach ($vectorFields as $field) { + unset($result[$field]); + } + return $result; + }, $results + ); + } + + /** + * Get all float_vector field names from table schema + * + * @param HTTPClient $client + * @param string $table + * + * @return array + * @throws ManticoreSearchResponseError|ManticoreSearchClientError + */ + private function getVectorFields(HTTPClient $client, string $table): array { + + $query = "DESCRIBE $table"; + $response = $client->sendRequest($query); + + if ($response->hasError()) { + throw ManticoreSearchResponseError::create('Vector fields detection failed: ' . $response->getError()); + } + + /** @var array}> $result */ + $result = $response->getResult(); + $schema = $result[0]['data']; + + /** @var array $vectorFields */ + $vectorFields = []; + foreach ($schema as $field) { + $fieldType = strtoupper($field['Type']); + // Match any float_vector type + if (!str_contains($fieldType, 'FLOAT_VECTOR')) { + continue; + } + + $vectorFields[] = $field['Field']; + } + + return $vectorFields; + } + + /** + * Get similarity threshold from configuration + * + * @param array{llm_provider: string, llm_model: string, + * settings?: string|array, + * similarity_threshold?: string|float} $modelConfig + * + * @return float + */ + private function getSimilarityThreshold(array $modelConfig): float { + + // Check direct config + if (isset($modelConfig['similarity_threshold'])) { + return (float)$modelConfig['similarity_threshold']; + } + + // Check settings + if (isset($modelConfig['settings'])) { + $settings = is_string($modelConfig['settings']) + ? json_decode($modelConfig['settings'], true) ?? [] + : $modelConfig['settings']; + + if (is_array($settings) && isset($settings['similarity_threshold'])) { + return (float)$settings['similarity_threshold']; + } + } + + return self::DEFAULT_SIMILARITY_THRESHOLD; + } +} diff --git a/src/Plugin/ConversationalRag/SqlEscapeTrait.php b/src/Plugin/ConversationalRag/SqlEscapeTrait.php new file mode 100644 index 00000000..5e2ab8e2 --- /dev/null +++ b/src/Plugin/ConversationalRag/SqlEscapeTrait.php @@ -0,0 +1,37 @@ +sqlEscape($value) . "'"; + } + + /** + * Escape string values for SQL queries using backslash escaping for ManticoreSearch + * + * @param string $value + * @return string + */ + protected function sqlEscape(string $value): string { + return str_replace("'", "\\'", $value); + } +} diff --git a/src/Plugin/Insert/QueryParser/SQLInsertParser.php b/src/Plugin/Insert/QueryParser/SQLInsertParser.php index 5f0a63f1..cd800484 100644 --- a/src/Plugin/Insert/QueryParser/SQLInsertParser.php +++ b/src/Plugin/Insert/QueryParser/SQLInsertParser.php @@ -264,7 +264,6 @@ function (&$v) { * @return bool */ protected static function isValidJSONVal(string $val): bool { - /** @phpstan-ignore-next-line */ return json_validate(substr($val, 1, -1)); } diff --git a/src/init.php b/src/init.php index 453fe889..0fcf7496 100644 --- a/src/init.php +++ b/src/init.php @@ -91,6 +91,7 @@ 'manticoresoftware/buddy-plugin-distributed-insert', 'manticoresoftware/buddy-plugin-truncate', 'manticoresoftware/buddy-plugin-metrics', + 'manticoresoftware/buddy-plugin-conversational-rag', ]; // Filtering out the plugins that we don't need if (!empty($opts['enable-plugin'])) { diff --git a/test/Buddy/functional/ConversationManagerIntegrationTest.php b/test/Buddy/functional/ConversationManagerIntegrationTest.php new file mode 100644 index 00000000..bc2bf1bc --- /dev/null +++ b/test/Buddy/functional/ConversationManagerIntegrationTest.php @@ -0,0 +1,355 @@ +client = new Client('http://127.0.0.1:' . static::getListenHttpPort()); + $this->conversationManager = new ConversationManager($this->client); + } + + /** + * Test complete conversation flow with real database + */ + public function testCompleteConversationFlowWithRealDatabase(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + // Save a user message + $this->conversationManager->saveMessage( + 'test-conversation-1', + 'test-model-1', + 'user', + 'What is machine learning?', + 50, + 'NEW_QUESTION', + 'machine learning information', + '', + [] + ); + + // Small delay to ensure different timestamps + sleep(1); + + // Save an assistant response + $this->conversationManager->saveMessage( + 'test-conversation-1', + 'test-model-1', + 'assistant', + 'Machine learning is a subset of artificial intelligence that focuses ' + . 'on algorithms that can learn from data.', + 75, + 'ANSWER', + '', + '', + [] + ); + + // Retrieve conversation history + $history = $this->conversationManager->getConversationHistory('test-conversation-1'); + + $expectedHistory = "user: What is machine learning?\n" + . 'assistant: Machine learning is a subset of artificial intelligence that focuses ' + . "on algorithms that can learn from data.\n"; + + $this->assertEquals($expectedHistory, $history); + + // Test search context retrieval + $searchContext = $this->conversationManager->getLatestSearchContext('test-conversation-1'); + $this->assertIsArray($searchContext); + $this->assertEquals('machine learning information', $searchContext['search_query']); + $this->assertEquals('', $searchContext['exclude_query']); + $this->assertEquals('', $searchContext['excluded_ids']); // Database stores empty array as empty string + + // Test filtered history for query generation + $filteredHistory = $this->conversationManager->getConversationHistoryForQueryGeneration('test-conversation-1'); + $this->assertEquals($expectedHistory, $filteredHistory); + } + + /** + * Test conversation with multiple exchanges and search context + */ + public function testMultipleConversationExchangesWithSearchContext(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + // First exchange with search context + $this->conversationManager->saveMessage( + 'test-conversation-2', + 'test-model-2', + 'user', + 'Show me movies about space', + 30, + 'NEW_SEARCH', + 'movies about space', + 'Star Wars', + ['1', '2', '3'] + ); + + $this->conversationManager->saveMessage( + 'test-conversation-2', + 'test-model-2', + 'assistant', + 'Here are some great space movies: 2001: A Space Odyssey, Interstellar, Gravity...', + 60, + 'ANSWER', + '', + '', + [] + ); + + // Second exchange with different search context + $this->conversationManager->saveMessage( + 'test-conversation-2', + 'test-model-2', + 'user', + 'What about documentaries?', + 25, + 'NEW_SEARCH', + 'documentaries about space', + 'fiction movies', + ['4', '5', '6'] + ); + + $this->conversationManager->saveMessage( + 'test-conversation-2', + 'test-model-2', + 'assistant', + 'Here are some space documentaries: Cosmos, The Farthest, Apollo 13...', + 55, + 'ANSWER', + '', + '', + [] + ); + + // Test that we get the latest search context + $searchContext = $this->conversationManager->getLatestSearchContext('test-conversation-2'); + $this->assertNotNull($searchContext); + $this->assertEquals('documentaries about space', $searchContext['search_query']); + $this->assertEquals('fiction movies', $searchContext['exclude_query']); + $this->assertEquals('["4","5","6"]', $searchContext['excluded_ids']); + + // Test complete history + $history = $this->conversationManager->getConversationHistory('test-conversation-2'); + $this->assertStringContainsString('Show me movies about space', $history); + $this->assertStringContainsString('What about documentaries?', $history); + $this->assertStringContainsString('Here are some great space movies', $history); + $this->assertStringContainsString('Here are some space documentaries', $history); + } + + /** + * Test conversation with CONTENT_QUESTION intent filtering + */ + public function testContentQuestionIntentFiltering(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + // Regular search question + $this->conversationManager->saveMessage( + 'test-conversation-3', + 'test-model-3', + 'user', + 'What are the best sci-fi movies?', + 25, + 'NEW_SEARCH', + 'best sci-fi movies', + '', + [] + ); + + $this->conversationManager->saveMessage( + 'test-conversation-3', + 'test-model-3', + 'assistant', + 'Here are some highly-rated sci-fi movies: Blade Runner 2049, Arrival, The Matrix...', + 50, + 'ANSWER', + '', + '', + [] + ); + + // Content question (should be filtered out in query generation) + $this->conversationManager->saveMessage( + 'test-conversation-3', + 'test-model-3', + 'user', + 'Tell me more about Blade Runner 2049', + 20, + 'CONTENT_QUESTION', + '', + '', + [] + ); + + $this->conversationManager->saveMessage( + 'test-conversation-3', + 'test-model-3', + 'assistant', + 'Blade Runner 2049 is a 2017 science fiction film directed by Denis Villeneuve...', + 45, + 'ANSWER', + '', + '', + [] + ); + + // Test that complete history includes everything + $this->assertStringContainsString( + 'What are the best sci-fi movies?', + $this->conversationManager->getConversationHistory('test-conversation-3') + ); + $this->assertStringContainsString( + 'Tell me more about Blade Runner 2049', + $this->conversationManager->getConversationHistory('test-conversation-3') + ); + + // Test that filtered history excludes CONTENT_QUESTION + $filteredHistory = $this->conversationManager->getConversationHistoryForQueryGeneration('test-conversation-3'); + $this->assertStringContainsString('What are the best sci-fi movies?', $filteredHistory); + $this->assertStringNotContainsString('Tell me more about Blade Runner 2049', $filteredHistory); + + // Test that search context is not affected by CONTENT_QUESTION + $searchContext = $this->conversationManager->getLatestSearchContext('test-conversation-3'); + $this->assertNotNull($searchContext); + $this->assertEquals('best sci-fi movies', $searchContext['search_query']); + } + + /** + * Test empty conversation handling + */ + public function testEmptyConversationHandling(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + // Test history for non-existent conversation + $history = $this->conversationManager->getConversationHistory('non-existent-conversation'); + $this->assertEquals('', $history); + + // Test filtered history for non-existent conversation + $this->assertEquals( + '', + $this->conversationManager->getConversationHistoryForQueryGeneration('non-existent-conversation') + ); + + // Test search context for non-existent conversation + $searchContext = $this->conversationManager->getLatestSearchContext('non-existent-conversation'); + $this->assertNull($searchContext); + } + + /** + * Test conversation with special characters and long messages + */ + public function testSpecialCharactersAndLongMessages(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + // Message with special characters + $specialMessage = "This message contains: quotes 'single' and \"double\", " . + "newlines\nand\ttabs, backslashes\\, and unicode: ñáéíóú 🚀"; + + $this->conversationManager->saveMessage( + 'test-conversation-4', + 'test-model-4', + 'user', + $specialMessage, + 100, + 'NEW_SEARCH', + 'special characters test', + '', + [] + ); + + // Retrieve and verify the message is preserved correctly + $history = $this->conversationManager->getConversationHistory('test-conversation-4'); + $this->assertStringContainsString('quotes \'single\' and "double"', $history); + $this->assertStringContainsString('newlines', $history); + $this->assertStringContainsString('tabs', $history); + $this->assertStringContainsString('backslashes', $history); + $this->assertStringContainsString('unicode: ñáéíóú 🚀', $history); + } + + /** + * Test conversation with JSON excluded IDs + */ + public function testJsonExcludedIdsHandling(): void { + // Initialize the conversations table + $this->conversationManager->initializeTable($this->client); + + $excludedIds = ['10', '20', '30', '40', '50']; + $this->conversationManager->saveMessage( + 'test-conversation-5', + 'test-model-5', + 'user', + 'Show me results excluding some items', + 35, + 'NEW_SEARCH', + 'results excluding items', + 'items to exclude', + $excludedIds + ); + + // Test search context retrieval with JSON + $searchContext = $this->conversationManager->getLatestSearchContext('test-conversation-5'); + $this->assertIsArray($searchContext); + $this->assertEquals('results excluding items', $searchContext['search_query']); + $this->assertEquals('items to exclude', $searchContext['exclude_query']); + + // Verify JSON is properly stored and retrieved + $retrievedIds = json_decode($searchContext['excluded_ids'], true); + $this->assertEquals($excludedIds, $retrievedIds); + } + + /** + * Test table creation with real database + */ + public function testTableCreationWithRealDatabase(): void { + // This should create the table without errors + $this->conversationManager->initializeTable($this->client); + + // Verify table exists by trying to insert into it + $this->conversationManager->saveMessage( + 'test-conversation-6', + 'test-model-6', + 'user', + 'Test message for table creation', + 10, + 'NEW_SEARCH', + 'table creation test', + '', + [] + ); + + // Verify we can retrieve the message + $history = $this->conversationManager->getConversationHistory('test-conversation-6'); + $this->assertStringContainsString('Test message for table creation', $history); + } +} diff --git a/test/Buddy/functional/ConversationalRagSqlTest.php b/test/Buddy/functional/ConversationalRagSqlTest.php new file mode 100644 index 00000000..b7d574f5 --- /dev/null +++ b/test/Buddy/functional/ConversationalRagSqlTest.php @@ -0,0 +1,363 @@ +assertEmpty($result); // CREATE TABLE via SQL returns empty result + + // Insert some test documents + $result = static::runSqlQuery( + "INSERT INTO {$tableName} (id, title, content) VALUES (1, 'Machine Learning Basics', " + . "'Machine learning is a subset of AI')" + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + $result = static::runSqlQuery( + "INSERT INTO {$tableName} (id, title, content) VALUES (2, 'Deep Learning', " + . "'Deep learning uses neural networks')" + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + // Test RAG model creation (this should create the conversations table) + $result = static::runSqlQuery( + "CREATE RAG MODEL '{$modelName}' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )" + ); + $this->assertNotEmpty($result); // CREATE RAG MODEL returns UUID table + $this->assertStringContainsString('uuid', implode(' ', $result)); // Should contain UUID header + + // Verify conversations table was created + $result = static::runSqlQuery('SHOW TABLES LIKE rag_conversations'); + $this->assertNotEmpty($result); + $this->assertStringContainsString('rag_conversations', implode(' ', $result)); + + // Test conversation call (this should insert into conversations table) + // Note: This might fail due to missing API keys, but we can check SQL structure + $query = "CALL CONVERSATIONAL_RAG('What is machine learning?', '{$tableName}', '{$modelName}',"; + $query .= " 'content', 'test-conv-1')"; + $result = $this->runHttpQuery($query); + + // Even if the call fails due to API issues, we should see the conversations table being used + $errorValue = $result['error'] ?? ''; + if (is_string($errorValue)) { + $error = $errorValue; + } elseif (is_array($errorValue)) { + $error = $errorValue['error']; + } else { + $error = ''; + } + if (!isset($result['error']) || !str_contains($error, 'Failed to insert into conversations table')) { + return; + } + + // This would catch SQL syntax errors like the one we fixed + $this->fail('SQL syntax error in conversation insertion: ' . $error); + } + + /** + * Test conversations table structure directly + */ + public function testConversationsTableStructure(): void { + // Use unique names to avoid conflicts + $uniqueId = uniqid(); + $modelName = "structure_test_model_{$uniqueId}"; + + // Create RAG model to ensure conversations table exists + $result = static::runSqlQuery( + "CREATE RAG MODEL '{$modelName}' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'Test assistant', + temperature = 0.5, + max_tokens = 500, + k_results = 3 + )" + ); + $this->assertNotEmpty($result); // CREATE RAG MODEL returns UUID table + $this->assertStringContainsString('uuid', implode(' ', $result)); // Should contain UUID header + + // Check table structure + $result = static::runSqlQuery('DESCRIBE rag_conversations'); + $this->assertNotEmpty($result); + $expectedColumns = [ + 'conversation_uuid', 'model_uuid', 'created_at', 'role', 'message', + 'tokens_used', 'intent', 'search_query', 'exclude_query', 'excluded_ids', 'ttl', + ]; + foreach ($expectedColumns as $column) { + $this->assertStringContainsString($column, implode(' ', $result)); + } + + // Test direct INSERT into conversations table + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('test-conv-1', 'test-model-1', " . time() . ", 'user', 'Test message', 50, " + . "'NEW_SEARCH', '', '[]', " . (time() + 86400) . ')' + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + // Verify the data was inserted correctly + $result = static::runSqlQuery( + "SELECT role, message FROM rag_conversations WHERE conversation_uuid = 'test-conv-1'" + ); + $this->assertNotEmpty($result); + $this->assertStringContainsString('user', implode(' ', $result)); + $this->assertStringContainsString('Test message', implode(' ', $result)); + } + + /** + * Test SQL injection prevention and proper escaping + */ + public function testSqlInjectionPrevention(): void { + // Use unique names to avoid conflicts + $uniqueId = uniqid(); + $modelName = "security_test_model_{$uniqueId}"; + $tableName = "test_docs_{$uniqueId}"; + + // Create test table first + static::runSqlQuery("CREATE TABLE {$tableName} (title text, content text, id int)"); + + // Create RAG model + $result = static::runSqlQuery( + "CREATE RAG MODEL '{$modelName}' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'Security test assistant', + temperature = 0.5, + max_tokens = 500, + k_results = 3 + )" + ); + $this->assertNotEmpty($result); // CREATE RAG MODEL returns UUID table + $this->assertStringContainsString('uuid', implode(' ', $result)); // Should contain UUID header + + // Test with potentially dangerous input + $dangerousInputs = [ + "Robert'); DROP TABLE rag_conversations; --", + "' OR '1'='1", + "'; INSERT INTO rag_conversations VALUES ('hack', 'hack', 0, 'hacker', 'pwned', 0, " + . "'' , '', '', '[]', 0); --", + 'CONCAT(char(39), char(32), char(79), char(82), char(32), char(39), char(49), ' + . 'char(39), char(61), char(39), char(49), char(39))', + ]; + + foreach ($dangerousInputs as $input) { + // Try to insert dangerous input + $this->runHttpQuery( + "CALL CONVERSATIONAL_RAG('" . addslashes($input) . "', '{$tableName}', " + . "'{$modelName}', 'content', 'security-conv-" . uniqid() . "')" + ); + + // Check if any SQL injection succeeded (table should still exist) + $tableCheck = $this->runSqlQuery('SHOW TABLES LIKE rag_conversations'); + $this->assertNotEmpty( + $tableCheck, + 'Conversations table should still exist after potential SQL injection attempt' + ); + } + } + + /** + * Test conversation history retrieval with SQL + */ + public function testConversationHistoryRetrievalWithSql(): void { + // Use unique names to avoid conflicts + $uniqueId = uniqid(); + $modelName = "history_test_model_{$uniqueId}"; + + // Create RAG model + $result = static::runSqlQuery( + "CREATE RAG MODEL '{$modelName}' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'History test assistant', + temperature = 0.5, + max_tokens = 500, + k_results = 3 + )" + ); + $this->assertNotEmpty($result); // CREATE RAG MODEL returns UUID table + $this->assertStringContainsString('uuid', implode(' ', $result)); // Should contain UUID header + + // Insert test conversation data directly + $currentTime = time(); + $conversationId = 'history-test-conv'; + + // Insert multiple messages + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('$conversationId', '{$modelName}', $currentTime, 'user', 'First question', " + . "30, 'NEW_SEARCH', '', '[]', " . ($currentTime + 86400) . ')' + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('$conversationId', '{$modelName}', " . ($currentTime + 60) . ", 'assistant', " + . "'First answer', 50, 'ANSWER', '', '[]', " . ($currentTime + 86400) . ')' + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('$conversationId', '{$modelName}', " . ($currentTime + 120) . ", 'user', " + . "'Second question', 25, 'CONTENT_QUESTION', '', '[]', " . ($currentTime + 86400) . ')' + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + + // Test complete history retrieval + $historyResult = $this->runSqlQuery( + 'SELECT role, message FROM rag_conversations WHERE conversation_uuid = ' + . "'$conversationId' ORDER BY created_at ASC" + ); + $this->assertNotEmpty($historyResult); + $this->assertStringContainsString('First question', implode(' ', $historyResult)); + $this->assertStringContainsString('First answer', implode(' ', $historyResult)); + $this->assertStringContainsString('Second question', implode(' ', $historyResult)); + + // Test filtered history (excluding CONTENT_QUESTION) + $filteredResult = $this->runSqlQuery( + 'SELECT role, message FROM rag_conversations WHERE conversation_uuid = ' + . "'$conversationId' AND intent != 'CONTENT_QUESTION' ORDER BY created_at ASC" + ); + $this->assertNotEmpty($filteredResult); + $this->assertStringContainsString('First question', implode(' ', $filteredResult)); + $this->assertStringContainsString('First answer', implode(' ', $filteredResult)); + $this->assertStringNotContainsString('Second question', implode(' ', $filteredResult)); + + // Test search context retrieval (only select attributes, not stored fields) + $contextResult = $this->runSqlQuery( + 'SELECT exclude_query, excluded_ids FROM rag_conversations WHERE ' + . "conversation_uuid = '$conversationId' AND role = 'user' " + . "AND intent != 'CONTENT_QUESTION' ORDER BY created_at DESC LIMIT 1" + ); + $this->assertNotEmpty($contextResult); + } + + /** + * Test error handling for malformed SQL + */ + public function testErrorHandlingForMalformedSql(): void { + // Test various malformed SQL scenarios that should be caught + + // Test INSERT with wrong column count + $this->assertQueryResultContainsError( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) ' + . "VALUES ('test', 'test', 0)", // Missing 6 values + 'P01: wrong number of values here near \')\'' + ); + + // Test INSERT with wrong data types (Manticore might be more permissive than expected) + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('test', 'test', 'not_a_number', 'user', 'test', 'not_a_number', 'test', " + . "'test', 'test', 'not_a_number')" + ); + // This might succeed or fail depending on Manticore's type conversion + // We just check that it doesn't crash the system + $this->assertTrue(is_array($result)); + + // Test SELECT with invalid column names + $this->assertQueryResultContainsError( + 'SELECT invalid_column FROM rag_conversations', + 'table rag_conversations: parse error: unknown column: invalid_column' + ); + } + + /** + * Test concurrent conversation handling + */ + public function testConcurrentConversationHandling(): void { + // Use unique names to avoid conflicts + $uniqueId = uniqid(); + $modelName = "concurrent_test_model_{$uniqueId}"; + + // Create RAG model + $result = static::runSqlQuery( + "CREATE RAG MODEL '{$modelName}' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'Concurrent test assistant', + temperature = 0.5, + max_tokens = 500, + k_results = 3 + )" + ); + $this->assertNotEmpty($result); // CREATE RAG MODEL returns UUID table + $this->assertStringContainsString('uuid', implode(' ', $result)); // Should contain UUID header + + // Insert conversations for different users concurrently + $currentTime = time(); + $conversations = [ + ['conv1', 'user1', 'User 1 message'], + ['conv2', 'user2', 'User 2 message'], + ['conv3', 'user3', 'User 3 message'], + ]; + + foreach ($conversations as [$convId, $user, $message]) { + $result = static::runSqlQuery( + 'INSERT INTO rag_conversations (conversation_uuid, model_uuid, created_at, role, message, ' + . 'tokens_used, intent, exclude_query, excluded_ids, ttl) VALUES ' + . "('$convId', '{$modelName}', $currentTime, '$user', '$message', 30, " + . "'NEW_SEARCH', '', '[]', " . ($currentTime + 86400) . ')' + ); + $this->assertEmpty($result); // INSERT via SQL returns empty result + } + + // Verify each conversation can be retrieved independently + foreach ($conversations as [$convId, $user, $message]) { + $result = $this->runSqlQuery( + "SELECT role, message FROM rag_conversations WHERE conversation_uuid = '$convId'" + ); + $this->assertNotEmpty($result); + $this->assertStringContainsString($user, implode(' ', $result)); + $this->assertStringContainsString($message, implode(' ', $result)); + } + + // Verify no cross-contamination + $conv1Result = $this->runSqlQuery( + "SELECT COUNT(*) as count FROM rag_conversations WHERE conversation_uuid = 'conv1'" + ); + $conv2Result = $this->runSqlQuery( + "SELECT COUNT(*) as count FROM rag_conversations WHERE conversation_uuid = 'conv2'" + ); + $this->assertStringContainsString('1', implode(' ', $conv1Result)); + $this->assertStringContainsString('1', implode(' ', $conv2Result)); + } +} diff --git a/test/Buddy/functional/ConversationalTest.php b/test/Buddy/functional/ConversationalTest.php new file mode 100644 index 00000000..d8c07f4c --- /dev/null +++ b/test/Buddy/functional/ConversationalTest.php @@ -0,0 +1,356 @@ +assertIsArray($result); + + // Verify the model was created by checking it exists + $this->assertQueryResult( + 'SHOW RAG MODELS', + ['test_model'] + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model'"); + } + + + public function testCreateRagModelInvalidProvider(): void { + $this->assertQueryResultContainsError( + "CREATE RAG MODEL 'bad_model' ( + llm_provider = 'invalid_provider', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key' + )", + "Invalid LLM provider: invalid_provider. Only 'openai' is supported." + ); + } + + public function testCreateRagModelInvalidTemperature(): void { + $this->assertQueryResultContainsError( + "CREATE RAG MODEL 'bad_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + temperature = 3.0 + )", + 'Temperature must be between 0 and 2' + ); + } + + public function testCreateRagModelInvalidMaxTokens(): void { + $this->assertQueryResultContainsError( + "CREATE RAG MODEL 'bad_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + max_tokens = 50000 + )", + 'max_tokens must be between 1 and 32768' + ); + } + + public function testCreateRagModelInvalidKResults(): void { + $this->assertQueryResultContainsError( + "CREATE RAG MODEL 'bad_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + k_results = 100 + )", + 'k_results must be between 1 and 50' + ); + } + + public function testShowRagModelsEmpty(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model1'"); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model2'"); + + $result = static::runSqlQuery('SHOW RAG MODELS'); + $this->assertIsArray($result); + } + + public function testShowRagModelsWithData(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model1'"); + + static::runSqlQuery( + "CREATE RAG MODEL 'test_model1' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789' + )" + ); + + $this->assertQueryResult( + 'SHOW RAG MODELS', + ['test_model1', 'openai', 'gpt-4'] + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model1'"); + } + + public function testDescribeRagModelSuccess(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model'"); + + static::runSqlQuery( + "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )" + ); + + $this->assertQueryResult( + "DESCRIBE RAG MODEL 'test_model'", + ['test_model', 'openai', 'gpt-4'] + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model'"); + } + + public function testDescribeRagModelNotFound(): void { + $this->assertQueryResultContainsError( + "DESCRIBE RAG MODEL 'non_existent_model'", + "RAG model 'non_existent_model' not found" + ); + } + + public function testDropRagModelSuccess(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'test_model'"); + + static::runSqlQuery( + "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789' + )" + ); + + $result = static::runSqlQuery("DROP RAG MODEL 'test_model'"); + $this->assertIsArray($result); + } + + public function testDropRagModelNotFound(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'non_existent_model'"); + + $this->assertQueryResultContainsError( + "DROP RAG MODEL 'non_existent_model'", + "RAG model 'non_existent_model' not found" + ); + } + + public function testCreateDuplicateRagModel(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'duplicate_model'"); + + static::runSqlQuery( + "CREATE RAG MODEL 'duplicate_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789' + )" + ); + + $this->assertQueryResultContainsError( + "CREATE RAG MODEL 'duplicate_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789' + )", + "RAG model 'duplicate_model' already exists" + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'duplicate_model'"); + } + + public function testFullModelLifecycle(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'lifecycle_model'"); + + static::runSqlQuery( + "CREATE RAG MODEL 'lifecycle_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )" + ); + + $this->assertQueryResult( + 'SHOW RAG MODELS', + ['lifecycle_model'] + ); + + $this->assertQueryResult( + "DESCRIBE RAG MODEL 'lifecycle_model'", + ['lifecycle_model', 'openai', 'gpt-4'] + ); + + static::runSqlQuery("DROP RAG MODEL 'lifecycle_model'"); + + $this->assertQueryResultContainsError( + "DESCRIBE RAG MODEL 'lifecycle_model'", + "RAG model 'lifecycle_model' not found" + ); + } + + public function testCreateModelWithMinimalParameters(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'minimal_model'"); + + $result = static::runSqlQuery( + "CREATE RAG MODEL 'minimal_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789' + )" + ); + $this->assertIsArray($result); + + // Verify the model was created + $this->assertQueryResult( + 'SHOW RAG MODELS', + ['minimal_model'] + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'minimal_model'"); + } + + public function testCreateModelWithAllParameters(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'full_model'"); + + $result = static::runSqlQuery( + "CREATE RAG MODEL 'full_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key-123456789', + style_prompt = 'You are a helpful assistant with extensive knowledge.', + temperature = 1.5, + max_tokens = 2000, + k_results = 10 + )" + ); + $this->assertIsArray($result); + + $this->assertQueryResult( + "DESCRIBE RAG MODEL 'full_model'", + ['full_model', 'You are a helpful assistant with extensive knowledge.'] + ); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'full_model'"); + } + + public function testTemperatureBoundaryValues(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'temp_min'"); + + $result1 = static::runSqlQuery( + "CREATE RAG MODEL 'temp_min' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + temperature = 0 + )" + ); + $this->assertIsArray($result1); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'temp_min'"); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'temp_max'"); + + $result2 = static::runSqlQuery( + "CREATE RAG MODEL 'temp_max' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + temperature = 2 + )" + ); + $this->assertIsArray($result2); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'temp_max'"); + } + + public function testMaxTokensBoundaryValues(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'tokens_min'"); + + $result1 = static::runSqlQuery( + "CREATE RAG MODEL 'tokens_min' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + max_tokens = 1 + )" + ); + $this->assertIsArray($result1); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'tokens_min'"); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'tokens_max'"); + + $result2 = static::runSqlQuery( + "CREATE RAG MODEL 'tokens_max' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + max_tokens = 32768 + )" + ); + $this->assertIsArray($result2); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'tokens_max'"); + } + + public function testKResultsBoundaryValues(): void { + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'k_min'"); + + $result1 = static::runSqlQuery( + "CREATE RAG MODEL 'k_min' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + k_results = 1 + )" + ); + $this->assertIsArray($result1); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'k_min'"); + + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'k_max'"); + + $result2 = static::runSqlQuery( + "CREATE RAG MODEL 'k_max' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + llm_api_key = 'sk-test-key', + k_results = 50 + )" + ); + $this->assertIsArray($result2); + static::runSqlQuery("DROP RAG MODEL IF EXISTS 'k_max'"); + } +} diff --git a/test/Plugin/ConversationalRag/BaseProviderApiKeyTest.php b/test/Plugin/ConversationalRag/BaseProviderApiKeyTest.php new file mode 100644 index 00000000..ecc1588f --- /dev/null +++ b/test/Plugin/ConversationalRag/BaseProviderApiKeyTest.php @@ -0,0 +1,169 @@ +baseProvider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + // Test OpenAI provider resolution - simulates CALL RAG with openai provider + $result = $method->invoke($this->baseProvider, 'openai'); + $this->assertEquals('sk-test-key-12345678901234567890123456789012', $result); + + // Test Anthropic provider resolution - simulates CALL RAG with anthropic provider + $result = $method->invoke($this->baseProvider, 'anthropic'); + $this->assertEquals('sk-ant-test123456789012345678901234567890', $result); + } + + /** + * Test API key resolution fails for unsupported providers + * This simulates CALL RAG failing when an invalid provider is specified + */ + public function testApiKeyResolutionUnsupportedProvider(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + // Simulates CALL RAG failing when model has invalid provider like 'unsupported' + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage("Unsupported LLM provider: 'unsupported'"); + + $method->invoke($this->baseProvider, 'unsupported'); + } + + /** + * Test API key resolution fails when environment variable is missing + * This simulates CALL RAG failing when the required env var isn't set + */ + public function testApiKeyResolutionMissingEnvVar(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + // Simulates CALL RAG failing when GROK_API_KEY environment variable is not set + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage("Environment variable 'GROK_API_KEY' not found or empty"); + + $method->invoke($this->baseProvider, 'grok'); + } + + /** + * Test API key resolution fails when environment variable is empty + * This simulates CALL RAG failing when the env var exists but is empty + */ + public function testApiKeyResolutionEmptyEnvVar(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + // Simulates CALL RAG failing when GROK_API_KEY exists but is empty + putenv('GROK_API_KEY='); + + try { + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage("Environment variable 'GROK_API_KEY' not found or empty"); + + $method->invoke($this->baseProvider, 'grok'); + } finally { + // Clean up + putenv('GROK_API_KEY'); + } + } + + /** + * Test API key resolution fails when provider is not configured + * This simulates CALL RAG failing when no provider is specified in the model + */ + public function testApiKeyResolutionEmptyProvider(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + // Simulates CALL RAG failing when provider string is empty + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage('LLM provider not configured'); + + $method->invoke($this->baseProvider, ''); + } + + /** + * Test the getApiKey method works when provider is properly configured + * This simulates the normal CALL RAG flow with valid configuration + */ + public function testGetApiKeyWithValidConfig(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKey'); + $method->setAccessible(true); + + // Simulates normal CALL RAG flow: model configured with openai provider + $this->baseProvider->configure(['llm_provider' => 'openai']); + + $result = $method->invoke($this->baseProvider); + $this->assertEquals('sk-test-key-12345678901234567890123456789012', $result); + } + + /** + * Test getApiKey fails when provider configuration is missing + * This simulates CALL RAG failing when the model wasn't configured with a provider + */ + public function testGetApiKeyMissingProviderConfig(): void { + $reflection = new ReflectionClass($this->baseProvider); + $method = $reflection->getMethod('getApiKey'); + $method->setAccessible(true); + + // Simulates CALL RAG failing when model was created without specifying llm_provider + $this->baseProvider->configure([]); + + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage('LLM provider not configured'); + + $method->invoke($this->baseProvider); + } + + protected function setUp(): void { + $this->modelManager = new ModelManager(); + $this->baseProvider = new OpenAIProvider(); + + // Ensure modelManager is properly initialized (PHPStan requirement) + $this->assertInstanceOf(ModelManager::class, $this->modelManager); + + // Set up test environment variables + putenv('OPENAI_API_KEY=sk-test-key-12345678901234567890123456789012'); + putenv('ANTHROPIC_API_KEY=sk-ant-test123456789012345678901234567890'); + putenv('EMPTY_KEY='); + } + + protected function tearDown(): void { + // Clean up environment variables + putenv('OPENAI_API_KEY'); + putenv('ANTHROPIC_API_KEY'); + putenv('EMPTY_KEY'); + } +} diff --git a/test/Plugin/ConversationalRag/ContentFieldsTest.php b/test/Plugin/ConversationalRag/ContentFieldsTest.php new file mode 100644 index 00000000..a8757cd7 --- /dev/null +++ b/test/Plugin/ConversationalRag/ContentFieldsTest.php @@ -0,0 +1,169 @@ + 1, + 'title' => 'Database Basics', + 'content' => 'This is the main content.', + 'summary' => 'A quick overview of databases.', + 'vector_field' => [0.1, 0.2, 0.3], // This should be filtered out + ], + [ + 'id' => 2, + 'title' => 'Advanced Queries', + 'content' => 'Complex SQL queries explained.', + 'summary' => 'Learn advanced database techniques.', + 'vector_field' => [0.4, 0.5, 0.6], + ], + ]; + + $settings = ['max_document_length' => 1000]; + + // Test reflection to access private method + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + // Test single field (backward compatibility) + $context = $method->invoke(null, $searchResults, $settings, 'content'); + $this->assertIsString($context); + $this->assertStringContainsString('This is the main content.', $context); + $this->assertStringContainsString('Complex SQL queries explained.', $context); + + // Test multiple fields with comma separator + $context = $method->invoke(null, $searchResults, $settings, 'title,content'); + $expected = "Database Basics, This is the main content.\nAdvanced Queries, Complex SQL queries explained."; + $this->assertEquals($expected, $context); + + // Test three fields + $context = $method->invoke(null, $searchResults, $settings, 'title,summary,content'); + $expected = "Database Basics, A quick overview of databases., This is the main content.\n" . + 'Advanced Queries, Learn advanced database techniques., Complex SQL queries explained.'; + $this->assertEquals($expected, $context); + + // Test missing field (should skip gracefully) + $context = $method->invoke(null, $searchResults, $settings, 'title,nonexistent,content'); + $expected = "Database Basics, This is the main content.\nAdvanced Queries, Complex SQL queries explained."; + $this->assertEquals($expected, $context); + } + + public function testBuildContextWithEmptyFields(): void { + $searchResults = [ + [ + 'id' => 1, + 'title' => 'Test', + 'content' => '', // Empty content should be skipped + 'summary' => 'Summary text', + ], + ]; + + $settings = []; + + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + $context = $method->invoke(null, $searchResults, $settings, 'title,content,summary'); + $expected = 'Test, Summary text'; // Empty content should be excluded + $this->assertEquals($expected, $context); + } + + public function testBuildContextWithWhitespaceFields(): void { + $searchResults = [ + [ + 'id' => 1, + 'title' => 'Test Title', + 'content' => ' ', // Whitespace-only content should be skipped + 'summary' => 'Valid summary', + ], + ]; + + $settings = []; + + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + $context = $method->invoke(null, $searchResults, $settings, 'title,content,summary'); + $expected = 'Test Title, Valid summary'; // Whitespace-only content should be excluded + $this->assertEquals($expected, $context); + } + + public function testBuildContextWithEmptyResults(): void { + $searchResults = []; + $settings = []; + + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + $context = $method->invoke(null, $searchResults, $settings, 'title,content'); + $this->assertEquals('', $context); + } + + public function testBuildContextWithSingleField(): void { + $searchResults = [ + [ + 'id' => 1, + 'content' => 'Single content field', + ], + ]; + + $settings = []; + + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + // Test explicit single field + $context = $method->invoke(null, $searchResults, $settings, 'content'); + $expected = 'Single content field'; + $this->assertEquals($expected, $context); + } + + public function testBuildContextWithTruncation(): void { + $searchResults = [ + [ + 'id' => 1, + 'title' => 'Short Title', + 'content' => str_repeat('A very long content string. ', 50), // Create long content + ], + ]; + + $settings = ['max_document_length' => 50]; + + $reflection = new ReflectionClass(Handler::class); + $method = $reflection->getMethod('buildContext'); + $method->setAccessible(true); + + $context = $method->invoke(null, $searchResults, $settings, 'title,content'); + $this->assertIsString($context); + $this->assertStringEndsWith('...', $context); + $this->assertLessThanOrEqual(53, strlen($context)); // Allow for "..." and separator + } +} diff --git a/test/Plugin/ConversationalRag/ConversationHandlerTest.php b/test/Plugin/ConversationalRag/ConversationHandlerTest.php new file mode 100644 index 00000000..feef34a4 --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationHandlerTest.php @@ -0,0 +1,1022 @@ + + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => 'SHOW RAG MODELS', + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $this->assertInstanceOf(RagHandler::class, $handler); + } + + public function testCreateModelSuccess(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - createModel calls sendRequest 4 times (init tables + modelExists check + insert) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call - successful operations return standard result format + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('getResult') + ->willReturn(Struct::fromData([['data' => [['count' => 0]]]])); + + $insertResponse = $this->createMock(Response::class); + $insertResponse->method('getResult') + ->willReturn(Struct::fromData([['total' => 1, 'error' => '', 'warning' => '']])); + + $mockClient->expects($this->exactly(4)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $modelExistsResponse, $insertResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + if (!$task->isSucceed()) { + $error = $task->getError(); + $this->fail('Task failed: ' . $error::class . ' - ' . $error->getResponseError()); + } + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertArrayHasKey('uuid', $struct[0]['data'][0]); + $this->assertNotEmpty($struct[0]['data'][0]['uuid']); + } + + public function testShowModelsSuccess(): void { + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => 'SHOW RAG MODELS', + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - showModels calls sendRequest 3 times (init tables + getAllModels) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + // getAllModels expects getResult()[0]['data'] to contain the models array + $selectResponse->method('getResult')->willReturn( + Struct::fromData( + [['data' => [ + [ + 'uuid' => 'test-uuid', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'created_at' => '2023-01-01', + ], + ]]] + ) + ); + + $mockClient->expects($this->exactly(3)) // initializeTables (2 calls) + getAllModels (1 call) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertCount(1, $struct[0]['data']); + $this->assertEquals('test-uuid', $struct[0]['data'][0]['uuid']); + $this->assertEquals('test_model', $struct[0]['data'][0]['name']); + } + + public function testDescribeModelSuccess(): void { + $query = "DESCRIBE RAG MODEL 'test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - describeModel calls sendRequest 3 times (init tables + getModelByUiidOrName) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + // getModelByUiidOrName expects getResult()[0]['data'][0] to be the model + $selectResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [ + [ + 'uuid' => 'test-uuid', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_api_key' => '', + 'style_prompt' => 'You are a helpful assistant.', + 'settings' => '{"temperature":0.7,"max_tokens":1000,"k_results":5}', + 'created_at' => '2023-01-01 00:00:00', + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(3)) // initializeTables (2 calls) + getModelByUiidOrName (1 call) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertGreaterThan(0, sizeof($struct[0]['data'])); + // Should have multiple property-value pairs for the model description + $this->assertEquals('uuid', $struct[0]['data'][0]['property']); + $this->assertEquals('test-uuid', $struct[0]['data'][0]['value']); + } + + public function testDropModelSuccess(): void { + $query = "DROP RAG MODEL 'test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - dropModel calls sendRequest 4 times (init tables + getModel + delete model) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $getModelResponse = $this->createMock(Response::class); + // getModelByUiidOrName expects getResult()[0]['data'][0] to be the model + $getModelResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [ + [ + 'uuid' => 'test-uuid', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_api_key' => '', + 'style_prompt' => 'You are a helpful assistant.', + 'settings' => '{"temperature":0.7,"max_tokens":1000,"k_results":5}', + 'created_at' => '2023-01-01 00:00:00', + ], + ], + ], + ] + ) + ); + + $deleteModelResponse = $this->createMock(Response::class); + $deleteModelResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(4)) // initializeTables (2) + getModel (1) + delete model (1) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $getModelResponse, $deleteModelResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array $struct */ + $this->assertCount(1, $struct); + $this->assertEquals(0, $struct[0]['total']); + $this->assertEmpty($struct[0]['error']); + $this->assertEmpty($struct[0]['warning']); + } + + public function testCreateModelValidationMissingProvider(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_model = 'gpt-4', + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString( + "Required field 'llm_provider' is missing or empty", + $error->getResponseError() + ); + } + + public function testCreateModelValidationInvalidProvider(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'anthropic', + llm_model = 'gpt-4', + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString( + "Invalid LLM provider: anthropic. Only 'openai' is supported.", + $error->getResponseError() + ); + } + + public function testCreateModelValidationTemperatureTooHigh(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + temperature = 5.0 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('Temperature must be between 0 and 2', $error->getResponseError()); + } + + public function testCreateModelValidationMaxTokensTooHigh(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + max_tokens = 100000 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('max_tokens must be between 1 and 32768', $error->getResponseError()); + } + + public function testCreateModelValidationKResultsTooLow(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + k_results = 0 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('k_results must be between 1 and 50', $error->getResponseError()); + } + + public function testCreateModelWithEncryptionIntegration(): void { + $query = "CREATE RAG MODEL 'encrypted_test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - createModel calls sendRequest 4 times (init tables + modelExists check + insert) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call - successful operations return standard result format + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('getResult')->willReturn(Struct::fromData([['data' => [['count' => 0]]]])); + + $insertResponse = $this->createMock(Response::class); + $insertResponse->method('getResult') + ->willReturn(Struct::fromData([['total' => 1, 'error' => '', 'warning' => '']])); + + $mockClient->expects($this->exactly(4)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $modelExistsResponse, $insertResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + if (!$task->isSucceed()) { + $error = $task->getError(); + $this->fail('Task failed: ' . $error::class . ' - ' . $error->getResponseError()); + } + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertCount(1, $struct[0]['data']); + $this->assertArrayHasKey('uuid', $struct[0]['data'][0]); + $this->assertIsString($struct[0]['data'][0]['uuid']); + } + + public function testDescribeModelWithApiKeyMasking(): void { + $query = "DESCRIBE RAG MODEL 'encrypted_test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client - describeModel calls sendRequest 3 times (init tables + getModelByUiidOrName) + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses for each call + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + // getModelByUiidOrName expects getResult()[0]['data'][0] to be the model + $selectResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [ + [ + 'uuid' => 'encrypted-uuid-123', + 'name' => 'encrypted_test_model', + 'llm_provider' => 'openai', + 'llm_api_key' => '', + 'style_prompt' => 'You are a helpful assistant.', + 'settings' => '{"temperature":0.7,"max_tokens":1000,"k_results":5}', + 'created_at' => '2023-01-01 00:00:00', + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(3)) // initializeTables (2 calls) + getModelByUiidOrName (1 call) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array>}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertGreaterThan(0, sizeof($struct[0]['data'])); + } + + public function testEncryptionKeyFileIntegration(): void { + // Create a temporary key file for testing + $tempKeyFile = sys_get_temp_dir() . '/test_buddy_key_' . uniqid() . '.key'; + $testKey = 'integration-test-key-12345'; + file_put_contents($tempKeyFile, $testKey); + + try { + $query = "CREATE RAG MODEL 'keyfile_test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the manticore client + $mockClient = $this->createMock(HTTPClient::class); + + // Create mock responses + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('getResult')->willReturn(Struct::fromData([['data' => [['count' => 0]]]])); + + $insertResponse = $this->createMock(Response::class); + $insertResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 1, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(4)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $modelExistsResponse, $insertResponse); + $handler->setManticoreClient($mockClient); + + try { + echo "About to call handler->run()...\n"; + $task = $handler->run(); + echo "Handler->run() completed\n"; + + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + } catch (Exception $e) { + echo 'Exception: ' . $e->getMessage() . "\n"; + echo 'Stack trace: ' . $e->getTraceAsString() . "\n"; + throw $e; + } + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array>}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertGreaterThan(0, sizeof($struct[0]['data'])); + } finally { + // Clean up the temporary key file + if (file_exists($tempKeyFile)) { + unlink($tempKeyFile); + } + } + } + + public function testHandleConversationNewQuestionGeneratesNewContext(): void { + $query = "CALL CONVERSATIONAL_RAG('Show me action movies', 'movies', 'model-uuid', 'content', 'conv-uuid')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('content', $payload->params['content_fields']); + + $handler = new RagHandler( + $payload, $this->createMockLLMProviderManager( + [ + ['content' => 'NEW_SEARCH', 'success' => true, 'metadata' => []], // classifyIntent response + [ + 'content' => 'SEARCH_QUERY: action movies\nEXCLUDE_QUERY: none', + 'success' => true, + 'metadata' => [], + ], // generateQueries response + ['content' => 'YES', 'success' => true, 'metadata' => []], // detectExpansionIntent response + [ + 'content' => 'Here are some action movies!', + 'metadata' => ['tokens_used' => 120], + 'success' => true, + ], // generateResponse + ] + ) + ); + + // Mock the manticore client with all expected calls + $mockClient = $this->createMock(HTTPClient::class); + + // Expected responses in order: + // 1-2: initializeTables (model and conversation tables) + $initResponse = $this->createMock(Response::class); + $initResponse->method('hasError')->willReturn(false); + + // 3: getModelByUuidOrName + $modelResponse = $this->createMock(Response::class); + $modelResponse->method('hasError')->willReturn(false); + $modelResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'model-uuid', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_api_key' => '', + 'style_prompt' => 'You are a helpful assistant.', + 'settings' => '{"temperature":0.7,"max_tokens":1000,"k_results":5}', + 'created_at' => '2023-01-01 00:00:00', + ], + ], + ], + ] + ) + ); + + // 4: getConversationHistory + $historyResponse = $this->createMock(Response::class); + $historyResponse->method('hasError')->willReturn(false); + $historyResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 0, + 'error' => '', + 'warning' => '', + 'data' => [], + ], + ] + ) + ); + + // 5: saveMessage (user) + $saveUserResponse = $this->createMock(Response::class); + $saveUserResponse->method('hasError')->willReturn(false); + $saveUserResponse->method('getResult')->willReturn( + Struct::fromData( + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [], + ] + ) + ); + + // 6: getConversationHistoryForQueryGeneration + $queryHistoryResponse = $this->createMock(Response::class); + $queryHistoryResponse->method('hasError')->willReturn(false); + $queryHistoryResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 0, + 'error' => '', + 'warning' => '', + 'data' => [], + ], + ] + ) + ); + + // 7: detectVectorField (DESCRIBE table for main search) + $describeResponse = $this->createMock(Response::class); + $describeResponse->method('hasError')->willReturn(false); + $describeResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'FLOAT_VECTOR(1536)'], + ], + ], + ] + ) + ); + + // 8: getExcludedIds (KNN search for exclusions - but since exclude_query is 'none', this might not be called) + // Since exclude_query is 'none', getExcludedIds returns [] + // So no call here + + // 9: detectVectorField (DESCRIBE table for main search) + $describeResponse2 = $this->createMock(Response::class); + $describeResponse2->method('hasError')->willReturn(false); + $describeResponse2->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'FLOAT_VECTOR(1536)'], + ], + ], + ] + ) + ); + + + + // 8: performSearchWithExcludedIds (main KNN search) + $searchResponse = $this->createMock(Response::class); + $searchResponse->method('hasError')->willReturn(false); + $searchResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['id' => 1, 'content' => 'Action movie content...', 'knn_dist' => 0.1], + ], + ], + ] + ) + ); + + // 9: saveMessage (user with context) + $saveUserContextResponse = $this->createMock(Response::class); + $saveUserContextResponse->method('hasError')->willReturn(false); + + // 10: saveMessage (assistant) + $saveAssistantResponse = $this->createMock(Response::class); + $saveAssistantResponse->method('hasError')->willReturn(false); + + $callCounter = 0; + $responses = [ + $initResponse, $initResponse, // initializeTables + $modelResponse, // getModelByUuidOrName + $historyResponse, // getConversationHistory + $saveUserResponse, // saveMessage user initial + $queryHistoryResponse, // getConversationHistoryForQueryGeneration + $describeResponse, // detectVectorField for main search + $searchResponse, // performSearchWithExcludedIds + $saveUserContextResponse, // saveMessage user with context + $saveAssistantResponse, // saveMessage assistant + ]; + + $mockClient->method('sendRequest') + ->willReturnCallback( + function ($sql) use (&$callCounter, $responses) { + echo 'DB Call #' . (++$callCounter) . ': ' . substr($sql, 0, 100) . "...\n"; + if ($callCounter > sizeof($responses)) { + echo "ERROR: More calls than expected responses!\n"; + echo 'Available responses: ' . sizeof($responses) . "\n"; + echo 'This is call #' . $callCounter . "\n"; + throw new Exception("Unexpected database call #$callCounter"); + } + $response = $responses[$callCounter - 1]; + echo ' Returning response type: ' . $response::class . "\n"; + return $response; + } + ); + + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + if (!$task->isSucceed()) { + $error = $task->getError(); + $this->fail('Task failed: ' . $error->getMessage()); + } + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertArrayHasKey('conversation_uuid', $struct[0]['data'][0]); + $this->assertArrayHasKey('response', $struct[0]['data'][0]); + $this->assertArrayHasKey('sources', $struct[0]['data'][0]); + $this->assertEquals('conv-uuid', $struct[0]['data'][0]['conversation_uuid']); + } + + /** + * Create a mock LLM provider manager with predefined responses + * + * @param array> $responses Array of LLM response arrays + * @return LLMProviderManager + */ + private function createMockLLMProviderManager(array $responses): LLMProviderManager { + $mockProviderManager = $this->createMock(LLMProviderManager::class); + $mockProvider = $this->createMock(BaseProvider::class); + + $mockProviderManager->method('getConnection')->willReturn($mockProvider); + + $callCount = 0; + $mockProvider->method('generateResponse') + ->willReturnCallback( + function ($_prompt, $_options = []) use (&$responses, &$callCount) { + unset($_prompt, $_options); + if ($callCount >= sizeof($responses)) { + throw new \Exception( + 'Too many LLM calls: expected ' . sizeof($responses) . ', got ' . ($callCount + 1) + ); + } + $result = $responses[$callCount]; + $callCount++; + return $result; + } + ); + + return $mockProviderManager; + } +} diff --git a/test/Plugin/ConversationalRag/ConversationManagerSqlValidationTest.php b/test/Plugin/ConversationalRag/ConversationManagerSqlValidationTest.php new file mode 100644 index 00000000..ef0dcc67 --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationManagerSqlValidationTest.php @@ -0,0 +1,435 @@ +createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return $this->validateInsertSql($sql); + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->saveMessage( + 'conv-123', + 'model-456', + 'user', + 'Hello, how are you?', + 150 + ); + } + + /** + * Validate INSERT SQL structure and values + */ + private function validateInsertSql(string $sql): bool { + $this->validateBasicSqlStructure($sql); + $this->validateValuesCount($sql); + $this->validateTableNameNotInValues($sql); + $this->validateQuotedStrings($sql); + + return true; + } + + /** + * Validate basic SQL structure + */ + private function validateBasicSqlStructure(string $sql): void { + $this->assertStringStartsWith('INSERT INTO rag_conversations', $sql); + $this->assertStringContainsString( + '(conversation_uuid, model_uuid, created_at, role, message, tokens_used, intent, ' + . 'search_query, exclude_query, excluded_ids, ttl)', + $sql + ); + $this->assertStringContainsString('VALUES (', $sql); + } + + /** + * Validate that VALUES clause has exactly 11 values + */ + private function validateValuesCount(string $sql): void { + $valuesMatch = []; + if (!preg_match('/VALUES\s*\((.*)\)/', $sql, $valuesMatch)) { + return; + } + + $values = $valuesMatch[1]; + $valueArray = $this->parseValuesFromSql($values); + + $valueCount = sizeof(array_filter($valueArray)); + if ($valueCount !== 11) { + echo '[DEBUG SQL] ' . $sql . "\n"; + echo "[DEBUG VALUE COUNT] $valueCount\n"; + echo '[DEBUG VALUES] ' . print_r($valueArray, true) . "\n"; + } + $this->assertEquals( + 11, + $valueCount, + 'VALUES clause should have exactly 11 values matching the column count' + ); + } + + /** + * Parse values from SQL VALUES clause, handling quoted strings properly + * @return array + */ + private function parseValuesFromSql(string $values): array { + $valueArray = []; + $currentValue = ''; + $inQuotes = false; + $quoteChar = ''; + + for ($i = 0; $i < strlen($values); $i++) { + $char = $values[$i]; + if (!$inQuotes && ($char === "'" || $char === '"')) { + $inQuotes = true; + $quoteChar = $char; + $currentValue .= $char; + } elseif ($inQuotes && $char === $quoteChar) { + $inQuotes = false; + $currentValue .= $char; + } elseif (!$inQuotes && $char === ',') { + $valueArray[] = trim($currentValue); + $currentValue = ''; + } else { + $currentValue .= $char; + } + } + $valueArray[] = trim($currentValue); // Add last value + + return $valueArray; + } + + /** + * Validate that table name doesn't appear in VALUES clause + */ + private function validateTableNameNotInValues(string $sql): void { + $this->assertDoesNotMatchRegularExpression( + '/VALUES\s*\([^)]*rag_conversations[^)]*\)/', + $sql, + 'Table name should not appear in VALUES clause' + ); + } + + /** + * Validate quoted strings in VALUES clause + */ + private function validateQuotedStrings(string $sql): void { + $this->assertMatchesRegularExpression( + "/VALUES\s*\('[^']*'/", + $sql, + 'First value should be a quoted string' + ); + } + + + /** + * Test saveMessage with all optional parameters generates valid SQL + */ + public function testSaveMessageWithAllParametersGeneratesValidInsertSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return $this->validateInsertSqlWithAllParameters($sql); + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->saveMessage( + 'conv-with-all-params', + 'model-with-all-params', + 'user', + 'Message with all parameters', + 100, + 'NEW_SEARCH', + 'search query here', + 'exclude query here', + ['1', '2', '3'] + ); + } + + /** + * Validate INSERT SQL with all parameters + */ + private function validateInsertSqlWithAllParameters(string $sql): bool { + $this->validateBasicSqlStructure($sql); + $this->validateAllFieldsPresent($sql); + $this->validateValuesCount($sql); + + return true; + } + + /** + * Validate that all expected fields are present in SQL + */ + private function validateAllFieldsPresent(string $sql): void { + $expectedFields = [ + 'conversation_uuid', 'model_uuid', 'created_at', 'role', 'message', + 'tokens_used', 'intent', 'search_query', 'exclude_query', 'excluded_ids', 'ttl', + ]; + + foreach ($expectedFields as $field) { + $this->assertStringContainsString($field, $sql); + } + } + + /** + * Test that saveMessage properly handles special characters in SQL + */ + public function testSaveMessageHandlesSpecialCharactersInSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + // Check that special characters are properly escaped + $this->assertStringContainsString("O\\'Reilly", $sql); + $this->assertStringContainsString("line\nbreak", $sql); + $this->assertStringContainsString('quote"test', $sql); + + // Validate that the SQL is still syntactically valid + $this->assertStringStartsWith('INSERT INTO rag_conversations', $sql); + $this->assertStringContainsString('VALUES (', $sql); + + return true; + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->saveMessage( + 'conv-123', + 'model-456', + 'user', + "O'Reilly's book has a line\nbreak and quote\"test", + 150 + ); + } + + /** + * Test that getConversationHistory generates valid SELECT SQL + */ + public function testGetConversationHistoryGeneratesValidSelectSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock response with conversation data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['role' => 'user', 'message' => 'Hello'], + ['role' => 'assistant', 'message' => 'Hi there!'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + // Validate SELECT SQL structure + $this->assertStringStartsWith('SELECT role, message FROM rag_conversations', $sql); + $this->assertStringContainsString("WHERE conversation_uuid = 'conv-123'", $sql); + $this->assertStringContainsString('ORDER BY created_at ASC', $sql); + $this->assertStringContainsString('LIMIT 100', $sql); + + return true; + } + ) + ) + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistory('conv-123'); + $this->assertEquals("user: Hello\nassistant: Hi there!\n", $result); + } + + /** + * Test that getLatestSearchContext generates valid SELECT SQL with proper filtering + */ + public function testGetLatestSearchContextGeneratesValidSelectSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock response with search context data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'search_query' => 'movies about space', + 'exclude_query' => 'Star Wars', + 'excluded_ids' => '[1,2,3]', + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + // Validate SELECT SQL structure for search context + $this->assertStringStartsWith( + 'SELECT search_query, exclude_query, excluded_ids FROM rag_conversations', + $sql + ); + $this->assertStringContainsString("WHERE conversation_uuid = 'conv-123'", $sql); + $this->assertStringContainsString("AND role = 'user'", $sql); + $this->assertStringContainsString("AND intent != 'CONTENT_QUESTION'", $sql); + $this->assertStringContainsString('ORDER BY created_at DESC', $sql); + $this->assertStringContainsString('LIMIT 1', $sql); + + return true; + } + ) + ) + ->willReturn($mockResponse); + + $result = $conversationManager->getLatestSearchContext('conv-123'); + $this->assertIsArray($result); + $this->assertEquals('movies about space', $result['search_query']); + } + + /** + * Test that getConversationHistoryForQueryGeneration generates valid filtered SELECT SQL + */ + public function testGetConversationHistoryForQueryGenerationGeneratesValidSelectSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock response with filtered conversation data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['role' => 'user', 'message' => 'Show me movies'], + ['role' => 'assistant', 'message' => 'Here are movies...'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + // Validate filtered SELECT SQL structure + $this->assertStringStartsWith('SELECT role, message FROM rag_conversations', $sql); + $this->assertStringContainsString("WHERE conversation_uuid = 'conv-123'", $sql); + $this->assertStringContainsString("AND intent != 'CONTENT_QUESTION'", $sql); + $this->assertStringContainsString('ORDER BY created_at ASC', $sql); + $this->assertStringContainsString('LIMIT 50', $sql); + + return true; + } + ) + ) + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistoryForQueryGeneration('conv-123', 50); + $this->assertEquals("user: Show me movies\nassistant: Here are movies...\n", $result); + } + + /** + * Test that initializeTable generates valid CREATE TABLE SQL + */ + public function testInitializeTableGeneratesValidCreateTableSql(): void { + $mockClient = $this->createMock(HTTPClient::class); + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + // Validate CREATE TABLE SQL structure + $this->assertStringStartsWith('CREATE TABLE IF NOT EXISTS rag_conversations', $sql); + $this->assertStringContainsString('conversation_uuid string', $sql); + $this->assertStringContainsString('model_uuid string', $sql); + $this->assertStringContainsString('created_at bigint', $sql); + $this->assertStringContainsString('role string', $sql); + $this->assertStringContainsString('message text', $sql); + $this->assertStringContainsString('tokens_used int', $sql); + $this->assertStringContainsString('intent string', $sql); + $this->assertStringContainsString('search_query text', $sql); + $this->assertStringContainsString('exclude_query text', $sql); + $this->assertStringContainsString('excluded_ids text', $sql); + $this->assertStringContainsString('ttl bigint', $sql); + + return true; + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->initializeTable($mockClient); + } +} diff --git a/test/Plugin/ConversationalRag/ConversationManagerTest.php b/test/Plugin/ConversationalRag/ConversationManagerTest.php new file mode 100644 index 00000000..00bb6f90 --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationManagerTest.php @@ -0,0 +1,353 @@ +createMock(HTTPClient::class)); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('CREATE TABLE IF NOT EXISTS rag_conversations')) + ->willReturn($mockResponse); + + $conversationManager->initializeTable($mockClient); + } + + public function testInitializeTableAlreadyExists(): void { + $conversationManager = new ConversationManager($this->createMock(HTTPClient::class)); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock successful response (table already exists) + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('CREATE TABLE IF NOT EXISTS rag_conversations')) + ->willReturn($mockResponse); + + $conversationManager->initializeTable($mockClient); + } + + public function testSaveMessageSuccessful(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return strpos($sql, 'INSERT INTO rag_conversations') !== false && + strpos($sql, 'conversation_uuid') !== false && + strpos($sql, 'model_uuid') !== false && + strpos($sql, 'role') !== false && + strpos($sql, 'message') !== false; + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->saveMessage( + 'conv-123', + 'model-456', + 'user', + 'Hello, how are you?', + 150 + ); + } + + public function testGetConversationHistoryOrdered(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with conversation data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['role' => 'user', 'message' => 'Hello'], + ['role' => 'assistant', 'message' => 'Hi there!'], + ['role' => 'user', 'message' => 'How are you?'], + ['role' => 'assistant', 'message' => 'I am doing well, thank you!'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('SELECT role, message FROM rag_conversations')) + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistory('conv-123'); + + $expected = "user: Hello\nassistant: Hi there!\nuser: How are you?\nassistant: I am doing well, thank you!\n"; + $this->assertEquals($expected, $result); + } + + + public function testGetConversationHistoryEmpty(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with no data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistory('conv-123'); + + $this->assertEquals('', $result); + } + + public function testGetConversationHistoryWithLimit(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with limited data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['role' => 'user', 'message' => 'First message'], + ['role' => 'assistant', 'message' => 'First response'], + ['role' => 'user', 'message' => 'Second message'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('SELECT role, message FROM rag_conversations')) + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistory('conv-123', 5); + + $expected = "user: First message\nassistant: First response\nuser: Second message\n"; + $this->assertEquals($expected, $result); + } + + public function testGetLatestSearchContextReturnsCorrectData(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with search context data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'search_query' => 'movies about space', + 'exclude_query' => 'Star Wars', + 'excluded_ids' => '[1,2,3]', + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return str_contains($sql, "intent != 'CONTENT_QUESTION'"); + } + ) + ) + ->willReturn($mockResponse); + + $result = $conversationManager->getLatestSearchContext('conv-123'); + + $this->assertIsArray($result); + $this->assertEquals('movies about space', $result['search_query']); + $this->assertEquals('Star Wars', $result['exclude_query']); + $this->assertEquals('[1,2,3]', $result['excluded_ids']); + } + + public function testGetLatestSearchContextNoContextFound(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with no data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $conversationManager->getLatestSearchContext('conv-123'); + + $this->assertNull($result); + } + + public function testGetConversationHistoryForQueryGenerationFiltersContentQuestions(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock response with conversation data AFTER SQL filtering (CONTENT_QUESTION records excluded) + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['role' => 'user', 'message' => 'Show me movies about space', 'intent' => 'NEW_SEARCH'], + ['role' => 'assistant', 'message' => 'Here are some space movies...', 'intent' => null], + ['role' => 'user', 'message' => 'Show me more like this', 'intent' => 'INTEREST'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return strpos($sql, 'intent != \'CONTENT_QUESTION\'') !== false; + } + ) + ) + ->willReturn($mockResponse); + + $result = $conversationManager->getConversationHistoryForQueryGeneration('conv-123'); + + // Should exclude the CONTENT_QUESTION exchange + $expected = "user: Show me movies about space\n" . + "assistant: Here are some space movies...\n" . + "user: Show me more like this\n"; + $this->assertEquals($expected, $result); + } + + public function testSaveMessageWithSearchContextStoresAllFields(): void { + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $conversationManager = new ConversationManager($mockClient); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with( + $this->callback( + function ($sql) { + return strpos($sql, 'INSERT INTO rag_conversations') !== false && + strpos($sql, 'intent') !== false && + strpos($sql, 'search_query') !== false && + strpos($sql, 'exclude_query') !== false && + strpos($sql, 'excluded_ids') !== false; + } + ) + ) + ->willReturn($mockResponse); + + $conversationManager->saveMessage( + 'conv-123', + 'model-456', + 'user', + 'Show me movies about space', + 150, + 'NEW_SEARCH', + 'movies about space', + 'Star Wars', + ['1', '2', '3'] + ); + } +} diff --git a/test/Plugin/ConversationalRag/ConversationValidationTest.php b/test/Plugin/ConversationalRag/ConversationValidationTest.php new file mode 100644 index 00000000..79ea7996 --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationValidationTest.php @@ -0,0 +1,485 @@ + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed(), 'Task should fail for missing llm_provider'); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error, 'Error should be QueryParseError'); + $this->assertStringContainsString( + "Required field 'llm_provider' is missing or empty", $error->getResponseError() + ); + } + + public function testMissingRequiredFieldLlmModel(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString("Required field 'llm_model' is missing or empty", $error->getResponseError()); + } + + public function testInvalidLlmProvider(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'invalid_provider', + llm_model = 'gpt-4', + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString( + "Invalid LLM provider: invalid_provider. Only 'openai' is supported.", $error->getResponseError() + ); + } + + public function testTemperatureBelowMinimum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + temperature = -0.1 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('Temperature must be between 0 and 2', $error->getResponseError()); + } + + public function testTemperatureAboveMaximum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + temperature = 2.1 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('Temperature must be between 0 and 2', $error->getResponseError()); + } + + public function testMaxTokensBelowMinimum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + max_tokens = 0 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('max_tokens must be between 1 and 32768', $error->getResponseError()); + } + + public function testMaxTokensAboveMaximum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + max_tokens = 32769 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('max_tokens must be between 1 and 32768', $error->getResponseError()); + } + + public function testKResultsBelowMinimum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + k_results = 0 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('k_results must be between 1 and 50', $error->getResponseError()); + } + + public function testKResultsAboveMaximum(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + k_results = 51 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + + $task = $handler->run(); + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(QueryParseError::class, $error); + $this->assertStringContainsString('k_results must be between 1 and 50', $error->getResponseError()); + } + + public function testValidModelConfiguration(): void { + $query = "CREATE RAG MODEL 'test_model' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + style_prompt = 'You are a helpful assistant.', + temperature = 0.7, + max_tokens = 1000, + k_results = 5 + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('create_model', $payload->action); + $this->assertEquals('test_model', $payload->params['name']); + $this->assertEquals('openai', $payload->params['llm_provider']); + $this->assertEquals('gpt-4', $payload->params['llm_model']); + } + + public function testValidTemperatureEdgeCases(): void { + $query1 = "CREATE RAG MODEL 'test_model1' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + temperature = 0 + )"; + + $payload1 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query1, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(0, $payload1->params['temperature']); + + $query2 = "CREATE RAG MODEL 'test_model2' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + temperature = 2 + )"; + + $payload2 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query2, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(2, $payload2->params['temperature']); + } + + public function testValidMaxTokensEdgeCases(): void { + $query1 = "CREATE RAG MODEL 'test_model1' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + max_tokens = 1 + )"; + + $payload1 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query1, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(1, $payload1->params['max_tokens']); + + $query2 = "CREATE RAG MODEL 'test_model2' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + max_tokens = 32768 + )"; + + $payload2 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query2, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(32768, $payload2->params['max_tokens']); + } + + public function testValidKResultsEdgeCases(): void { + $query1 = "CREATE RAG MODEL 'test_model1' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + k_results = 1 + )"; + + $payload1 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query1, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(1, $payload1->params['k_results']); + + $query2 = "CREATE RAG MODEL 'test_model2' ( + llm_provider = 'openai', + llm_model = 'gpt-4', + k_results = 50 + )"; + + $payload2 = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query2, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals(50, $payload2->params['k_results']); + } +} diff --git a/test/Plugin/ConversationalRag/ConversationalPayloadTest.php b/test/Plugin/ConversationalRag/ConversationalPayloadTest.php new file mode 100644 index 00000000..2e9205ae --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationalPayloadTest.php @@ -0,0 +1,308 @@ + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('create_model', $payload->action); + $this->assertEquals('test_model', $payload->params['name']); + $this->assertEquals('openai', $payload->params['llm_provider']); + $this->assertEquals('gpt-4', $payload->params['llm_model']); + $this->assertEquals('You are a helpful assistant.', $payload->params['style_prompt']); + $this->assertEquals(0.7, $payload->params['temperature']); + $this->assertEquals(1000, $payload->params['max_tokens']); + $this->assertEquals(5, $payload->params['k_results']); + } + + public function testSQLShowModelsParsing(): void { + $query = 'SHOW RAG MODELS'; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('show_models', $payload->action); + } + + public function testSQLDescribeModelParsing(): void { + $query = "DESCRIBE RAG MODEL 'sfa-2742-dshd6'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('describe_model', $payload->action); + $this->assertEquals('sfa-2742-dshd6', $payload->params['model_name_or_uuid']); + } + + public function testSQLDropModelParsing(): void { + $query = "DROP RAG MODEL 'test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('drop_model', $payload->action); + $this->assertEquals('test_model', $payload->params['model_name_or_uuid']); + } + + + + public function testHTTPNotSupported(): void { + $this->expectException(QueryParseError::class); + + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => (string)json_encode( + [ + 'id' => 'test_model', + 'name' => 'Test Model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4o', + ] + ), + 'format' => RequestFormat::JSON, + 'endpointBundle' => ManticoreEndpoint::Search, + 'path' => '/rag/models', + 'httpMethod' => 'POST', + ] + ) + ); + } + + + + public function testInvalidQueryThrowsException(): void { + $this->expectException(QueryParseError::class); + + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => 'INVALID RAG QUERY', + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + } + + public function testEscapedQuotesInConversationParams(): void { + $query = "CALL CONVERSATIONAL_RAG('I\\'m like programming, ". + "lets talk about it', 'docs', 'test_model', 'content', 'conversation_1')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $payload->action); + $this->assertEquals("I'm like programming, lets talk about it", $payload->params['query']); + $this->assertEquals('docs', $payload->params['table']); + $this->assertEquals('test_model', $payload->params['model_uuid']); + $this->assertEquals('content', $payload->params['content_fields']); + $this->assertEquals('conversation_1', $payload->params['conversation_uuid']); + } + + public function testConversationParsingWithContentFields(): void { + $query = "CALL CONVERSATIONAL_RAG('test query', 'docs', 'model123', 'title,content')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $payload->action); + $this->assertEquals('test query', $payload->params['query']); + $this->assertEquals('docs', $payload->params['table']); + $this->assertEquals('model123', $payload->params['model_uuid']); + $this->assertEquals('title,content', $payload->params['content_fields']); + } + + public function testConversationParsingWithSingleCustomField(): void { + $query = "CALL CONVERSATIONAL_RAG('test query', 'docs', 'model123', 'summary')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('summary', $payload->params['content_fields']); + } + + public function testMissingContentFieldsThrowsException(): void { + $this->expectException(QueryParseError::class); + + $query = "CALL CONVERSATIONAL_RAG('test query', 'docs', 'model123')"; + + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + } + + public function testEmptyContentFieldsThrowsException(): void { + $this->expectException(QueryParseError::class); + + $query = "CALL CONVERSATIONAL_RAG('test query', 'docs', 'model123', '')"; + + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + } + + public function testWhitespaceContentFieldsThrowsException(): void { + $this->expectException(QueryParseError::class); + + $query = "CALL CONVERSATIONAL_RAG('test query', 'docs', 'model123', ' ')"; + + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + } + + +} diff --git a/test/Plugin/ConversationalRag/ConversationalRagTest.php b/test/Plugin/ConversationalRag/ConversationalRagTest.php new file mode 100644 index 00000000..8924bc90 --- /dev/null +++ b/test/Plugin/ConversationalRag/ConversationalRagTest.php @@ -0,0 +1,478 @@ + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2 calls) + modelExists (1 call) + createModel (1 call) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $insertResponse = $this->createMock(Response::class); + $insertResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 1, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(4)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $modelExistsResponse, $insertResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + if (!$task->isSucceed()) { + $error = $task->getError(); + $this->fail('Task failed: ' . $error::class . ' - ' . $error->getResponseError()); + } + + $result = $task->getResult(); + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array}> $struct */ + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + $this->assertCount(1, $struct[0]['data']); + $this->assertArrayHasKey('uuid', $struct[0]['data'][0]); + $this->assertIsString($struct[0]['data'][0]['uuid']); + } + + public function testShowModelsEndToEnd(): void { + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => 'SHOW RAG MODELS', + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2 calls) + getAllModels (1 call) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + $selectResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [], 'total' => 0, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(3)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + $struct = (array)$result->getStruct(); + $this->assertIsArray($struct); + /** @var array>}> $struct */ + $this->assertCount(1, $struct); + + // For empty results, 'data' key may not be present in TaskResult + if (!isset($struct[0]['data'])) { + // Empty result case - no models found + return; + } + + $data = $struct[0]['data']; + + // Should return array of models, possibly empty if no models exist + $this->assertIsArray($data); + if (empty($data)) { + return; + } + + $this->assertArrayHasKey('uuid', $data[0]); + $this->assertArrayHasKey('name', $data[0]); + $this->assertArrayHasKey('llm_provider', $data[0]); + } + + public function testDescribeModelEndToEnd(): void { + $query = "DESCRIBE RAG MODEL 'functional_test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2 calls) + getModelByUuidOrName (1 call) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + $selectResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'functional_test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'llm_api_key' => '', // Empty for security + 'style_prompt' => 'You are a helpful assistant.', + 'temperature' => 0.7, + 'max_tokens' => 1000, + 'k_results' => 5, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(3)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + /** @var array> $struct */ + $struct = $result->getStruct(); + $this->assertCount(1, $struct); + $this->assertArrayHasKey('data', $struct[0]); + + /** @var array> $data */ + $data = $struct[0]['data']; + $this->assertGreaterThan(0, sizeof($data)); + + // Should have property-value pairs for the model description + $this->assertEquals('uuid', $data[0]['property']); + $this->assertIsString($data[0]['value']); + } + + public function testDropModelEndToEnd(): void { + $query = "DROP RAG MODEL 'functional_test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2) + getModel (1) + delete model (1) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $getModelResponse = $this->createMock(Response::class); + $getModelResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'total' => 1, + 'error' => '', + 'warning' => '', + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'functional_test_model', + ], + ], + ], + ] + ) + ); + + $deleteModelResponse = $this->createMock(Response::class); + $deleteModelResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 1, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(4)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $getModelResponse, $deleteModelResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertTrue($task->isSucceed()); + $result = $task->getResult(); + + $this->assertInstanceOf(TaskResult::class, $result); + /** @var array> $struct */ + $struct = $result->getStruct(); + $this->assertCount(1, $struct); + $this->assertEquals(0, $struct[0]['total']); + } + + public function testCreateModelValidationErrorEndToEnd(): void { + $query = "CREATE RAG MODEL 'invalid_model' ( + llm_model = 'gpt-4', + )"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $mockClient = $this->createMock(HTTPClient::class); + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(\Manticoresearch\Buddy\Core\Error\QueryParseError::class, $error); + $this->assertStringContainsString("Required field 'llm_provider' is missing", $error->getResponseError()); + } + + public function testDescribeNonExistentModelEndToEnd(): void { + $query = "DESCRIBE RAG MODEL 'non_existent_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2 calls) + getModelByUuidOrName (1 call that returns no results) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $selectResponse = $this->createMock(Response::class); + $selectResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(3)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $selectResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(\Manticoresearch\Buddy\Core\Error\ManticoreSearchClientError::class, $error); + } + + public function testDropNonExistentModelEndToEnd(): void { + $query = "DROP RAG MODEL 'non_existent_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + + // Mock the HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for initializeTables (2 calls) + getModel (1 call that returns no results) + $initResponse1 = $this->createMock(Response::class); + $initResponse1->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $initResponse2 = $this->createMock(Response::class); + $initResponse2->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $getModelResponse = $this->createMock(Response::class); + $getModelResponse->method('getResult')->willReturn( + Struct::fromData([['total' => 0, 'error' => '', 'warning' => '']]) + ); + + $mockClient->expects($this->exactly(3)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($initResponse1, $initResponse2, $getModelResponse); + + $handler->setManticoreClient($mockClient); + $task = $handler->run(); + + $this->assertFalse($task->isSucceed()); + $error = $task->getError(); + $this->assertInstanceOf(\Manticoresearch\Buddy\Core\Error\ManticoreSearchClientError::class, $error); + } +} diff --git a/test/Plugin/ConversationalRag/DynamicThresholdManagerTest.php b/test/Plugin/ConversationalRag/DynamicThresholdManagerTest.php new file mode 100644 index 00000000..af616b2f --- /dev/null +++ b/test/Plugin/ConversationalRag/DynamicThresholdManagerTest.php @@ -0,0 +1,323 @@ +createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'no', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $result = $thresholdManager->calculateDynamicThreshold( + 'Show me comedies', + '', // No history + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertEquals(0.8, $result['threshold']); // Should use base threshold + $this->assertEquals(0, $result['expansion_level']); + $this->assertFalse($result['is_expanded']); + $this->assertEquals(0, $result['expansion_percent']); + } + + public function testCalculateDynamicThresholdWithExpansion(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider that says yes to expansion + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'yes', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $result = $thresholdManager->calculateDynamicThreshold( + 'What else do you have?', + "user: Show me comedies\nassistant: I recommend The Office\nuser: What else do you have?", + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertGreaterThan(0.8, $result['threshold']); // Should be expanded + $this->assertEquals(1, $result['expansion_level']); + $this->assertTrue($result['is_expanded']); + $this->assertGreaterThan(0, $result['expansion_percent']); + } + + public function testCalculateDynamicThresholdMaxExpansion(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider that always says yes + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'yes', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // Call multiple times to reach max expansion + for ($i = 0; $i < 6; $i++) { + $result = $thresholdManager->calculateDynamicThreshold( + 'What else?', + "user: Show me comedies\nassistant: I recommend The Office\nuser: What else?", + $mockProviderManager, + $modelConfig, + 0.8 + ); + } + + $this->assertEquals(5, $result['expansion_level']); // Max expansion level + $this->assertTrue($result['expansion_limit_reached']); + $this->assertEquals(0.8 * 1.2, $result['max_threshold']); // 20% expansion + } + + public function testDetectExpansionIntentNoHistory(): void { + $thresholdManager = new DynamicThresholdManager(); + + $mockProvider = $this->createMock(BaseProvider::class); + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // Use reflection to access private method + $reflection = new ReflectionClass($thresholdManager); + $method = $reflection->getMethod('detectExpansionIntent'); + $method->setAccessible(true); + + $result = $method->invoke($thresholdManager, 'Show me movies', '', $mockProviderManager, $modelConfig); + + $this->assertFalse($result); // Should be false with no history + } + + public function testDetectExpansionIntentWithHistory(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider that says yes + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'yes', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // Use reflection to access private method + $reflection = new ReflectionClass($thresholdManager); + $method = $reflection->getMethod('detectExpansionIntent'); + $method->setAccessible(true); + + $result = $method->invoke( + $thresholdManager, 'What else do you have?', + "user: Show me comedies\nassistant: I recommend The Office\nuser: What else do you have?", + $mockProviderManager, $modelConfig + ); + + $this->assertTrue($result); + } + + public function testDetectExpansionIntentLLMFailure(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider that fails + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => false, + 'error' => 'API Error', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // Use reflection to access private method + $reflection = new ReflectionClass($thresholdManager); + $method = $reflection->getMethod('detectExpansionIntent'); + $method->setAccessible(true); + + $result = $method->invoke( + $thresholdManager, 'What else?', + "user: Show me comedies\nassistant: I recommend The Office\nuser: What else?", + $mockProviderManager, $modelConfig + ); + + $this->assertFalse($result); // Should return false on failure + } + + public function testExpansionStateResetOnNewConversation(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider that says yes + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'yes', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // First call with one conversation + $result1 = $thresholdManager->calculateDynamicThreshold( + 'What else?', + 'conversation1: user message', + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertEquals(1, $result1['expansion_level']); + + // Second call with different conversation (should reset) + $result2 = $thresholdManager->calculateDynamicThreshold( + 'What else?', + 'conversation2: different user message', + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertEquals(1, $result2['expansion_level']); // Should be 1, not 2 + } + + public function testExpansionStateResetOnTopicChange(): void { + $thresholdManager = new DynamicThresholdManager(); + + // Mock LLM provider - first says yes, then no (topic change) + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->expects($this->exactly(2)) + ->method('generateResponse') + ->willReturnOnConsecutiveCalls( + ['success' => true, 'content' => 'yes', 'metadata' => []], + ['success' => true, 'content' => 'no', 'metadata' => []] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // First call - expansion + $result1 = $thresholdManager->calculateDynamicThreshold( + 'What else?', + 'conversation: user wants comedies', + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertEquals(1, $result1['expansion_level']); + + // Second call - no expansion (topic change) + $result2 = $thresholdManager->calculateDynamicThreshold( + 'Now show me action movies', + 'conversation: user wants comedies, now wants action', + $mockProviderManager, + $modelConfig, + 0.8 + ); + + $this->assertEquals(0, $result2['expansion_level']); // Should reset to 0 + } +} diff --git a/test/Plugin/ConversationalRag/IntegrationTest.php b/test/Plugin/ConversationalRag/IntegrationTest.php new file mode 100644 index 00000000..dfce3de4 --- /dev/null +++ b/test/Plugin/ConversationalRag/IntegrationTest.php @@ -0,0 +1,362 @@ + Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('create_model', $payload->action); + $this->assertEquals('test_model', $payload->params['name']); + $this->assertEquals('openai', $payload->params['llm_provider']); + $this->assertEquals('gpt-4', $payload->params['llm_model']); + $this->assertEquals('You are a helpful assistant.', $payload->params['style_prompt']); + $this->assertEquals(0.7, $payload->params['temperature']); + $this->assertEquals(1000, $payload->params['max_tokens']); + $this->assertEquals(5, $payload->params['k_results']); + } + + + + + + public function testHandlerCanBeInstantiatedWithParsedPayload(): void { + $query = 'SHOW RAG MODELS'; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $handler = new RagHandler($payload); + $this->assertInstanceOf(RagHandler::class, $handler); + + // Verify the handler has the correct payload + $this->assertEquals('show_models', $payload->action); + } + + public function testFullShowModelsFlow(): void { + $query = 'SHOW RAG MODELS'; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('show_models', $payload->action); + } + + public function testFullDescribeModelFlow(): void { + $query = "DESCRIBE RAG MODEL 'test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('describe_model', $payload->action); + $this->assertEquals('test_model', $payload->params['model_name_or_uuid']); + } + + public function testFullDropModelFlow(): void { + $query = "DROP RAG MODEL 'test_model'"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('drop_model', $payload->action); + $this->assertEquals('test_model', $payload->params['model_name_or_uuid']); + } + + public function testConversationalRagFlowNewSearch(): void { + $query = "CALL CONVERSATIONAL_RAG('What is machine learning?', 'docs', 'test_model', 'content')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $payload->action); + $this->assertEquals('What is machine learning?', $payload->params['query']); + $this->assertEquals('docs', $payload->params['table']); + $this->assertEquals('test_model', $payload->params['model_uuid']); + $this->assertEquals('content', $payload->params['content_fields']); + } + + public function testConversationalRagFlowWithTable(): void { + $query = "CALL CONVERSATIONAL_RAG('Search this table', 'my_table', 'test_model', 'content')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $payload->action); + $this->assertEquals('Search this table', $payload->params['query']); + $this->assertEquals('my_table', $payload->params['table']); + $this->assertEquals('test_model', $payload->params['model_uuid']); + $this->assertEquals('content', $payload->params['content_fields']); + } + + public function testConversationalRagWithMultipleContentFields(): void { + $query = "CALL CONVERSATIONAL_RAG('Find products', 'products', 'test_model', 'title,description,price')"; + + $payload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $payload->action); + $this->assertEquals('Find products', $payload->params['query']); + $this->assertEquals('products', $payload->params['table']); + $this->assertEquals('test_model', $payload->params['model_uuid']); + $this->assertEquals('title,description,price', $payload->params['content_fields']); + } + + public function testEndToEndModelLifecycle(): void { + // Test complete model lifecycle through payload parsing + + // 1. Create model + $createQuery = "CREATE RAG MODEL 'lifecycle_test' ( + llm_provider = 'openai', + llm_model = 'gpt-4o-mini', + style_prompt = 'You are a test assistant.', + temperature = 0.8, + max_tokens = 1500, + k_results = 3 + )"; + + $createPayload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $createQuery, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('create_model', $createPayload->action); + $this->assertEquals('lifecycle_test', $createPayload->params['name']); + $this->assertEquals('openai', $createPayload->params['llm_provider']); + $this->assertEquals('gpt-4o-mini', $createPayload->params['llm_model']); + $this->assertEquals('You are a test assistant.', $createPayload->params['style_prompt']); + $this->assertEquals(0.8, $createPayload->params['temperature']); + $this->assertEquals(1500, $createPayload->params['max_tokens']); + $this->assertEquals(3, $createPayload->params['k_results']); + + // 2. Show models + $showPayload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => 'SHOW RAG MODELS', + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('show_models', $showPayload->action); + + // 3. Describe model + $describePayload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => "DESCRIBE RAG MODEL 'lifecycle_test'", + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('describe_model', $describePayload->action); + $this->assertEquals('lifecycle_test', $describePayload->params['model_name_or_uuid']); + + // 4. Use model in conversation + $ragPayload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => "CALL CONVERSATIONAL_RAG('Test query', 'docs', 'lifecycle_test', 'content')", + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('conversation', $ragPayload->action); + $this->assertEquals('Test query', $ragPayload->params['query']); + $this->assertEquals('lifecycle_test', $ragPayload->params['model_uuid']); + $this->assertEquals('content', $ragPayload->params['content_fields']); + + // 5. Drop model + $dropPayload = RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => "DROP RAG MODEL 'lifecycle_test'", + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + + $this->assertEquals('drop_model', $dropPayload->action); + $this->assertEquals('lifecycle_test', $dropPayload->params['model_name_or_uuid']); + } + + public function testErrorHandlingInvalidSyntax(): void { + // Test that invalid syntax is properly rejected during parsing + $query = 'CREATE RAG MODEL invalid syntax here'; + + try { + RagPayload::fromRequest( + Request::fromArray( + [ + 'version' => Buddy::PROTOCOL_VERSION, + 'error' => '', + 'payload' => $query, + 'format' => RequestFormat::SQL, + 'endpointBundle' => ManticoreEndpoint::Sql, + 'path' => '', + ] + ) + ); + // If we get here, the payload was parsed (which might be unexpected) + $this->assertTrue(true); // Test passes if no exception thrown + } catch (Exception $e) { + // If an exception is thrown, that's also acceptable for invalid syntax + $this->assertTrue(true); + } + } +} diff --git a/test/Plugin/ConversationalRag/IntentClassifierTest.php b/test/Plugin/ConversationalRag/IntentClassifierTest.php new file mode 100644 index 00000000..a369902f --- /dev/null +++ b/test/Plugin/ConversationalRag/IntentClassifierTest.php @@ -0,0 +1,330 @@ +createMock(BaseProvider::class); + /** @phpstan-ignore-next-line */ + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'REJECTION', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $intent = $intentClassifier->classifyIntent( + 'I already watched that movie', + "user: I want to watch a comedy\nassistant: I recommend The Office\nuser: I already watched that", + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('REJECTION', $intent); + } + + public function testClassifyIntentAlternatives(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'ALTERNATIVES', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $intent = $intentClassifier->classifyIntent( + 'What else do you have?', + "user: Show me comedies\nassistant: I recommend The Office\nuser: What else do you have?", + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('ALTERNATIVES', $intent); + } + + public function testClassifyIntentNewSearch(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => 'NEW_SEARCH', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $intent = $intentClassifier->classifyIntent( + 'Show me action movies', + '', // No conversation history + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('NEW_SEARCH', $intent); + } + + public function testClassifyIntentLLMFailure(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider that fails + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => false, + 'error' => 'API Error', + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $intent = $intentClassifier->classifyIntent( + 'What movies do you recommend?', + '', + $mockProviderManager, + $modelConfig + ); + + // Should fallback to NEW_SEARCH on failure + $this->assertEquals('NEW_SEARCH', $intent); + } + + public function testGenerateQueriesWithExclusions(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => "SEARCH_QUERY: action movies\nEXCLUDE_QUERY: comedy movies", + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $result = $intentClassifier->generateQueries( + 'I want action movies but not comedies', + 'NEW_SEARCH', + '', + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('action movies', $result['search_query']); + $this->assertEquals('comedy movies', $result['exclude_query']); + } + + public function testGenerateQueriesNoExclusions(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => "SEARCH_QUERY: science fiction movies\nEXCLUDE_QUERY: none", + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $result = $intentClassifier->generateQueries( + 'Show me science fiction movies', + 'NEW_SEARCH', + '', + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('science fiction movies', $result['search_query']); + $this->assertEquals('', $result['exclude_query']); // Should be empty when 'none' + } + + public function testGenerateQueriesIntentBased(): void { + $intentClassifier = new IntentClassifier(); + + // Mock LLM provider + $mockProvider = $this->createMock(BaseProvider::class); + $mockProvider->method('generateResponse') + ->willReturn( + [ + 'success' => true, + 'content' => "SEARCH_QUERY: similar to Inception\nEXCLUDE_QUERY: Inception", + 'metadata' => [], + ] + ); + + $mockProviderManager = $this->createMock( + LLMProviderManager::class + ); + $mockProviderManager->method('getConnection') + ->willReturn($mockProvider); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + $result = $intentClassifier->generateQueries( + 'I liked Inception, what else?', + 'INTEREST', + "user: Show me Inception\nassistant: Here's Inception\nuser: I liked Inception, what else?", + $mockProviderManager, + $modelConfig + ); + + $this->assertEquals('similar to Inception', $result['search_query']); + $this->assertEquals('Inception', $result['exclude_query']); + } + + public function testLimitConversationHistoryShortHistory(): void { + $intentClassifier = new IntentClassifier(); + + $shortHistory = "user: hello\nassistant: hi\nuser: how are you?\nassistant: good"; + + // Use reflection to access private method + $reflection = new ReflectionClass($intentClassifier); + $method = $reflection->getMethod('limitConversationHistory'); + $method->setAccessible(true); + + $result = $method->invoke($intentClassifier, $shortHistory); + + $this->assertEquals($shortHistory, $result); + } + + public function testLimitConversationHistoryLongHistory(): void { + $intentClassifier = new IntentClassifier(); + + // Create history with more than 10 exchanges (20 lines) + $longHistory = ''; + for ($i = 0; $i < 12; $i++) { + $longHistory .= "user: message {$i}\nassistant: response {$i}\n"; + } + + // Use reflection to access private method + $reflection = new ReflectionClass($intentClassifier); + $method = $reflection->getMethod('limitConversationHistory'); + $method->setAccessible(true); + + $result = $method->invoke($intentClassifier, $longHistory); + + $resultString = is_string($result) ? $result : ''; + $lines = explode("\n", trim($resultString)); + + $this->assertGreaterThanOrEqual(18, sizeof($lines)); // Should be limited, at least 9 exchanges (18 lines) + $this->assertLessThanOrEqual(20, sizeof($lines)); // Should not exceed 10 exchanges (20 lines) + $this->assertStringContainsString('message', $resultString); // Should contain messages + } + + public function testValidateIntentValidIntents(): void { + $intentClassifier = new IntentClassifier(); + + $validIntents = [ + 'REJECTION', 'ALTERNATIVES', 'TOPIC_CHANGE', 'INTEREST', 'NEW_SEARCH', + 'CONTENT_QUESTION', 'NEW_QUESTION', 'CLARIFICATION', 'UNCLEAR', + ]; + + // Use reflection to access private method + $reflection = new ReflectionClass($intentClassifier); + $method = $reflection->getMethod('validateIntent'); + $method->setAccessible(true); + + foreach ($validIntents as $intent) { + $result = $method->invoke($intentClassifier, $intent); + $this->assertEquals($intent, $result); + } + + // Test with extra text + $result = $method->invoke($intentClassifier, 'I think this is REJECTION because...'); + $this->assertEquals('REJECTION', $result); + + // Test invalid intent + $result = $method->invoke($intentClassifier, 'INVALID_INTENT'); + $this->assertEquals('NEW_SEARCH', $result); // Should default to NEW_SEARCH + } +} diff --git a/test/Plugin/ConversationalRag/LLMProviderManagerTest.php b/test/Plugin/ConversationalRag/LLMProviderManagerTest.php new file mode 100644 index 00000000..59db9fc0 --- /dev/null +++ b/test/Plugin/ConversationalRag/LLMProviderManagerTest.php @@ -0,0 +1,124 @@ + 'openai', + 'llm_model' => 'gpt-4', + 'temperature' => 0.7, + 'max_tokens' => 1000, + ]; + + // Get connection twice + $connection1 = $manager->getConnection($modelId, $modelConfig); + $connection2 = $manager->getConnection($modelId, $modelConfig); + + // Should return the same instance + $this->assertSame($connection1, $connection2); + $this->assertInstanceOf(BaseProvider::class, $connection1); + } + + public function testGetConnectionCreatesNewInstanceForDifferentModel(): void { + $manager = new LLMProviderManager(); + + $modelConfig1 = [ + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'temperature' => 0.7, + ]; + + $modelConfig2 = [ + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-3.5-turbo', + 'temperature' => 0.8, + ]; + + $connection1 = $manager->getConnection('model1', $modelConfig1); + $connection2 = $manager->getConnection('model2', $modelConfig2); + + // Should return different instances + $this->assertNotSame($connection1, $connection2); + $this->assertInstanceOf(BaseProvider::class, $connection1); + $this->assertInstanceOf(BaseProvider::class, $connection2); + } + + public function testGetProviderCachesInstances(): void { + $manager = new LLMProviderManager(); + + // Get provider twice + $provider1 = $manager->getProvider('openai'); + $provider2 = $manager->getProvider('openai'); + + // Should return the same instance + $this->assertSame($provider1, $provider2); + $this->assertInstanceOf(BaseProvider::class, $provider1); + } + + public function testGetProviderOpenAI(): void { + $manager = new LLMProviderManager(); + + $provider = $manager->getProvider('openai'); + + $this->assertInstanceOf(BaseProvider::class, $provider); + $this->assertInstanceOf( + OpenAIProvider::class, + $provider + ); + } + + public function testGetProviderUnsupportedProvider(): void { + $manager = new LLMProviderManager(); + + $this->expectException(ManticoreSearchClientError::class); + + $manager->getProvider('unsupported'); + } + + public function testCreateProviderOpenAI(): void { + $manager = new LLMProviderManager(); + + // Use reflection to access private method + $reflection = new ReflectionClass($manager); + $method = $reflection->getMethod('createProvider'); + $method->setAccessible(true); + + $provider = $method->invoke($manager, 'openai'); + + $this->assertInstanceOf(BaseProvider::class, $provider); + $this->assertInstanceOf( + OpenAIProvider::class, + $provider + ); + } + + public function testCreateProviderUnsupportedProvider(): void { + $manager = new LLMProviderManager(); + + // Use reflection to access private method + $reflection = new ReflectionClass($manager); + $method = $reflection->getMethod('createProvider'); + $method->setAccessible(true); + + $this->expectException(ManticoreSearchClientError::class); + + $method->invoke($manager, 'unsupported'); + } +} diff --git a/test/Plugin/ConversationalRag/LLMProviders/BaseProviderTest.php b/test/Plugin/ConversationalRag/LLMProviders/BaseProviderTest.php new file mode 100644 index 00000000..b0b03029 --- /dev/null +++ b/test/Plugin/ConversationalRag/LLMProviders/BaseProviderTest.php @@ -0,0 +1,270 @@ +provider = new TestableBaseProvider(); + } + + protected function tearDown(): void { + // Clean up environment variables + putenv('OPENAI_API_KEY'); + putenv('ANTHROPIC_API_KEY'); + } + + public function testConfigureResetsClient(): void { + $config = ['llm_provider' => 'openai', 'llm_model' => 'gpt-4']; + + // Configure once + $this->provider->configure($config); + + // Get client to initialize it using reflection + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getClient'); + $method->setAccessible(true); + $client1 = $method->invoke($this->provider); + + // Configure again + $this->provider->configure($config); + + // Client should be reset + $client2 = $method->invoke($this->provider); + + $this->assertNotSame($client1, $client2); + } + + public function testGetSettingsMergesOverrides(): void { + $config = [ + 'settings' => ['temperature' => 0.5, 'max_tokens' => 500], + 'temperature' => 0.7, + 'top_p' => 0.9, + ]; + + $this->provider->configure($config); + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getSettings'); + $method->setAccessible(true); + + $overrides = ['temperature' => 0.8, 'frequency_penalty' => 0.1]; + $result = (array)$method->invoke($this->provider, $overrides); + + $this->assertEquals(0.8, $result['temperature']); // Override takes precedence + $this->assertEquals(500, $result['max_tokens']); // From settings + $this->assertEquals(0.9, $result['top_p']); // From config + $this->assertEquals(0.1, $result['frequency_penalty']); // From overrides + } + + public function testGetSettingsFromJsonString(): void { + $config = [ + 'settings' => '{"temperature":0.6,"max_tokens":600}', + 'temperature' => 0.7, // This should override the JSON + ]; + + $this->provider->configure($config); + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getSettings'); + $method->setAccessible(true); + + $result = (array)$method->invoke($this->provider, []); + + $this->assertEquals(0.7, $result['temperature']); // Direct config overrides JSON + $this->assertEquals(600, $result['max_tokens']); // From JSON + } + + public function testConvertSettingsTypesNumericStrings(): void { + $settings = [ + 'temperature' => '0.7', + 'max_tokens' => '1000', + 'k_results' => '5', + 'top_p' => '0.9', + 'non_numeric' => 'text', + ]; + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('convertSettingsTypes'); + $method->setAccessible(true); + + $result = (array)$method->invoke($this->provider, $settings); + + $this->assertIsFloat($result['temperature']); + $this->assertEquals(0.7, $result['temperature']); + $this->assertIsInt($result['max_tokens']); + $this->assertEquals(1000, $result['max_tokens']); + $this->assertIsInt($result['k_results']); + $this->assertEquals(5, $result['k_results']); + $this->assertIsFloat($result['top_p']); + $this->assertEquals(0.9, $result['top_p']); + $this->assertEquals('text', $result['non_numeric']); // Unchanged + } + + public function testConvertToFloatValidNumeric(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('convertToFloat'); + $method->setAccessible(true); + + $this->assertEquals(3.14, $method->invoke($this->provider, '3.14')); + $this->assertEquals(42.0, $method->invoke($this->provider, '42')); + $this->assertEquals('not_numeric', $method->invoke($this->provider, 'not_numeric')); + $this->assertEquals(3.14, $method->invoke($this->provider, 3.14)); // Already float + } + + public function testConvertToIntValidNumeric(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('convertToInt'); + $method->setAccessible(true); + + $this->assertEquals(42, $method->invoke($this->provider, '42')); + $this->assertEquals(3, $method->invoke($this->provider, '3.14')); // Truncated + $this->assertEquals('not_numeric', $method->invoke($this->provider, 'not_numeric')); + $this->assertEquals(42, $method->invoke($this->provider, 42)); // Already int + } + + public function testGetConfigWithDefault(): void { + $config = ['existing_key' => 'value']; + $this->provider->configure($config); + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getConfig'); + $method->setAccessible(true); + + $this->assertEquals('value', $method->invoke($this->provider, 'existing_key')); + $this->assertEquals('default', $method->invoke($this->provider, 'missing_key', 'default')); + $this->assertNull($method->invoke($this->provider, 'missing_key')); + } + + public function testEstimateTokens(): void { + $this->assertEquals(1, $this->provider->estimateTokens('test')); // 4 chars / 4 = 1 + $this->assertEquals(1, $this->provider->estimateTokens('abcd')); // 4 chars / 4 = 1 + $this->assertEquals(2, $this->provider->estimateTokens('abcde')); // 5 chars / 4 = 1.25, ceil to 2 + $this->assertEquals(3, $this->provider->estimateTokens('abcdefghijk')); // 12 chars / 4 = 3 + } + + public function testGetStylePromptDefault(): void { + $this->provider->configure([]); + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getStylePrompt'); + $method->setAccessible(true); + + $result = $method->invoke($this->provider); + $this->assertStringContainsString('helpful AI assistant', is_string($result) ? $result : ''); + } + + public function testGetStylePromptCustom(): void { + $this->provider->configure(['style_prompt' => 'Custom prompt']); + + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getStylePrompt'); + $method->setAccessible(true); + + $result = $method->invoke($this->provider); + $this->assertEquals('Custom prompt', $result); + } + + public function testFormatError(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('formatError'); + $method->setAccessible(true); + + $exception = new Exception('Test exception'); + $result = $method->invoke($this->provider, 'Test message', $exception); + + $this->assertEquals( + [ + 'success' => false, + 'error' => 'Test message', + 'details' => 'Test exception', + 'provider' => 'test_provider', + ], $result + ); + } + + public function testFormatSuccess(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('formatSuccess'); + $method->setAccessible(true); + + $this->provider->configure(['llm_model' => 'test-model']); + $result = $method->invoke($this->provider, 'Test content', ['tokens' => 100]); + + $this->assertEquals( + [ + 'success' => true, + 'content' => 'Test content', + 'metadata' => [ + 'provider' => 'test_provider', + 'model' => 'test-model', + 'tokens' => 100, + ], + ], $result + ); + } + + public function testGetApiKeyForProviderValid(): void { + putenv('OPENAI_API_KEY=test-key-123'); + + $this->provider->configure(['llm_provider' => 'openai']); + + // Use reflection to access private method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + $result = $method->invoke($this->provider, 'openai'); + $this->assertEquals('test-key-123', $result); + } + + public function testGetApiKeyForProviderUnsupportedProvider(): void { + $this->provider->configure(['llm_provider' => 'openai']); + + // Use reflection to access private method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage("Unsupported LLM provider: 'unsupported'"); + + $method->invoke($this->provider, 'unsupported'); + } + + public function testGetApiKeyForProviderMissingEnvVar(): void { + $this->provider->configure(['llm_provider' => 'openai']); + + // Use reflection to access private method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('getApiKeyForProvider'); + $method->setAccessible(true); + + $this->expectException(QueryParseError::class); + $this->expectExceptionMessage("Environment variable 'OPENAI_API_KEY' not found or empty"); + + $method->invoke($this->provider, 'openai'); + } +} diff --git a/test/Plugin/ConversationalRag/LLMProviders/OpenAIProviderTest.php b/test/Plugin/ConversationalRag/LLMProviders/OpenAIProviderTest.php new file mode 100644 index 00000000..7885fc1c --- /dev/null +++ b/test/Plugin/ConversationalRag/LLMProviders/OpenAIProviderTest.php @@ -0,0 +1,142 @@ +provider = new OpenAIProvider(); + putenv('OPENAI_API_KEY=test-key-123'); + } + + protected function tearDown(): void { + putenv('OPENAI_API_KEY'); + } + + public function testGetName(): void { + $this->assertEquals('openai', $this->provider->getName()); + } + + public function testGenerateResponseMissingApiKey(): void { + $this->provider->configure(['llm_provider' => 'openai']); + putenv('OPENAI_API_KEY'); // Remove API key + + $result = $this->provider->generateResponse('Test prompt'); + + $this->assertFalse($result['success']); + $this->assertStringContainsString('OpenAI request failed', is_string($result['error']) ? $result['error'] : ''); + $details = $result['details'] ?? $result['error']; + $this->assertStringContainsString('not found or empty', is_string($details) ? $details : ''); + } + + public function testGenerateResponseDefaultModel(): void { + $this->provider->configure(['llm_provider' => 'openai']); + + // Create a partial mock to avoid actual HTTP calls + $mockProvider = $this->getMockBuilder(OpenAIProvider::class) + ->onlyMethods(['makeRequest']) + ->getMock(); + + $mockProvider->configure(['llm_provider' => 'openai']); + + $mockResponse = [ + 'choices' => [['message' => ['content' => 'Default model response']]], + 'usage' => ['total_tokens' => 100], + ]; + + $mockProvider->expects($this->once()) + ->method('makeRequest') + ->willReturn(['success' => true, 'data' => $mockResponse]); + + $result = $mockProvider->generateResponse('Test prompt'); + + $this->assertTrue($result['success']); + $this->assertEquals('Default model response', $result['content']); + } + + public function testGenerateResponseCustomSettings(): void { + $mockProvider = $this->getMockBuilder(OpenAIProvider::class) + ->onlyMethods(['makeRequest']) + ->getMock(); + + $mockProvider->configure( + [ + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4o', + 'temperature' => 0.8, + 'max_tokens' => 2000, + 'top_p' => 0.9, + ] + ); + + $mockResponse = [ + 'choices' => [['message' => ['content' => 'Custom settings response']]], + 'usage' => ['total_tokens' => 150], + ]; + + $mockProvider->expects($this->once()) + ->method('makeRequest') + ->willReturn(['success' => true, 'data' => $mockResponse]); + + $result = $mockProvider->generateResponse('Test prompt'); + + $this->assertTrue($result['success']); + $this->assertEquals('Custom settings response', $result['content']); + } + + public function testGenerateResponseMakeRequestFailure(): void { + $mockProvider = $this->getMockBuilder(OpenAIProvider::class) + ->onlyMethods(['makeRequest']) + ->getMock(); + + $mockProvider->configure(['llm_provider' => 'openai']); + + $mockProvider->expects($this->once()) + ->method('makeRequest') + ->willReturn(['success' => false, 'error' => 'API Error']); + + $result = $mockProvider->generateResponse('Test prompt'); + + $this->assertFalse($result['success']); + $this->assertEquals('API Error', $result['error']); + } + + public function testGenerateResponseExceptionHandling(): void { + $mockProvider = $this->getMockBuilder(OpenAIProvider::class) + ->onlyMethods(['getApiKey']) + ->getMock(); + + $mockProvider->configure(['llm_provider' => 'openai']); + + $mockProvider->expects($this->once()) + ->method('getApiKey') + ->willThrowException(new Exception('API key error')); + + $result = $mockProvider->generateResponse('Test prompt'); + + $this->assertFalse($result['success']); + $this->assertStringContainsString('OpenAI request failed', is_string($result['error']) ? $result['error'] : ''); + } + + public function testCreateClientReturnsCurlHandle(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->provider); + $method = $reflection->getMethod('createClient'); + $method->setAccessible(true); + + $client = $method->invoke($this->provider); + + $this->assertInstanceOf(CurlHandle::class, $client); + } +} diff --git a/test/Plugin/ConversationalRag/ModelManagerTest.php b/test/Plugin/ConversationalRag/ModelManagerTest.php new file mode 100644 index 00000000..8789b4e7 --- /dev/null +++ b/test/Plugin/ConversationalRag/ModelManagerTest.php @@ -0,0 +1,570 @@ +createMock(HTTPClient::class); + + // Mock successful response + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('CREATE TABLE IF NOT EXISTS system.rag_models')) + ->willReturn($mockResponse); + + $modelManager->initializeTables($mockClient); + } + + public function testCreateModelSuccessful(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock responses for modelExists check and insert + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('hasError')->willReturn(false); + $modelExistsResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['count' => 0]]]]) + ); + + $insertResponse = $this->createMock(Response::class); + $insertResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->exactly(2)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($modelExistsResponse, $insertResponse); + + $config = [ + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'style_prompt' => 'You are helpful', + 'temperature' => '0.7', + 'max_tokens' => '1000', + 'k_results' => '5', + ]; + + $result = $modelManager->createModel($mockClient, $config); + + $this->assertIsString($result); + $this->assertNotEmpty($result); + } + + public function testCreateModelDuplicateName(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response showing model already exists + $modelExistsResponse = $this->createMock(Response::class); + $modelExistsResponse->method('hasError')->willReturn(false); + $modelExistsResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['count' => 1]]]]) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($modelExistsResponse); + + $config = [ + 'name' => 'existing_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + ]; + + $this->expectException(ManticoreSearchClientError::class); + + $modelManager->createModel($mockClient, $config); + } + + /** + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function testGetModelByUuidOrNameFoundByName(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with model data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'style_prompt' => 'You are helpful', + 'settings' => '{"temperature":0.7,"max_tokens":1000}', + 'created_at' => 1234567890, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('WHERE (name = \'test_model\' OR uuid = \'test_model\')')) + ->willReturn($mockResponse); + + /** @var array{id:string, uuid:string, name:string,llm_provider:string,llm_model:string, + * style_prompt:string,settings:array{ temperature: string, max_tokens: string, k_results?: string, + * similarity_threshold: string, max_document_length: string},created_at:string,updated_at:string} $result */ + $result = $modelManager->getModelByUuidOrName($mockClient, 'test_model'); + + $this->assertIsArray($result); + $this->assertEquals('test-uuid-123', $result['uuid']); + $this->assertEquals('test_model', $result['name']); + $this->assertEquals('openai', $result['llm_provider']); + // Verify settings were properly parsed from JSON + $this->assertIsArray($result['settings']); + $this->assertNotEmpty($result['settings']['temperature']); + $this->assertNotEmpty($result['settings']['max_tokens']); + $this->assertEquals(0.7, $result['settings']['temperature']); + $this->assertEquals(1000, $result['settings']['max_tokens']); + } + + public function testGetModelByUuidOrNameFoundByUuid(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with model data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('WHERE (name = \'test-uuid-123\' OR uuid = \'test-uuid-123\')')) + ->willReturn($mockResponse); + + $result = $modelManager->getModelByUuidOrName($mockClient, 'test-uuid-123'); + + $this->assertIsArray($result); + $this->assertEquals('test-uuid-123', $result['uuid']); + } + + public function testGetModelByUuidOrNameNotFound(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with no data + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData([['data' => []]]) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $this->expectException(ManticoreSearchClientError::class); + $modelManager->getModelByUuidOrName($mockClient, 'nonexistent'); + } + + public function testDeleteModelByUuidOrNameSuccessful(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock getModelByUuidOrName response + $getModelResponse = $this->createMock(Response::class); + $getModelResponse->method('hasError')->willReturn(false); + $getModelResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + ], + ], + ], + ] + ) + ); + + // Mock delete response + $deleteResponse = $this->createMock(Response::class); + $deleteResponse->method('hasError')->willReturn(false); + + $mockClient->expects($this->exactly(2)) + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($getModelResponse, $deleteResponse); + + $modelManager->deleteModelByUuidOrName($mockClient, 'test_model'); + } + + public function testDeleteModelByUuidOrNameModelNotFound(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock getModelByUuidOrName response with no data + $getModelResponse = $this->createMock(Response::class); + $getModelResponse->method('hasError')->willReturn(false); + $getModelResponse->method('getResult')->willReturn( + Struct::fromData([['data' => []]]) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($getModelResponse); + + $this->expectException(ManticoreSearchClientError::class); + + $modelManager->deleteModelByUuidOrName($mockClient, 'nonexistent'); + } + + public function testGetAllModelsSuccessful(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with multiple models + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'id' => 1, + 'uuid' => 'uuid-1', + 'name' => 'model1', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'created_at' => 1234567890, + ], + [ + 'id' => 2, + 'uuid' => 'uuid-2', + 'name' => 'model2', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-3.5-turbo', + 'created_at' => 1234567891, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('SELECT id, uuid, name, llm_provider, llm_model, created_at')) + ->willReturn($mockResponse); + + $result = $modelManager->getAllModels($mockClient); + + $this->assertIsArray($result); + $this->assertCount(2, $result); + $this->assertEquals('uuid-1', $result[0]['uuid']); + $this->assertEquals('model1', $result[0]['name']); + $this->assertEquals('uuid-2', $result[1]['uuid']); + $this->assertEquals('model2', $result[1]['name']); + } + + public function testExtractSettingsFromJsonString(): void { + $modelManager = new ModelManager(); + + $config = [ + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => '{"temperature":0.8,"max_tokens":2000,"k_results":10}', + 'custom_field' => 'custom_value', + ]; + + // Use reflection to access private method + $reflection = new ReflectionClass($modelManager); + $method = $reflection->getMethod('extractSettings'); + $method->setAccessible(true); + + $result = $method->invoke($modelManager, $config); + + $this->assertIsArray($result); + $this->assertEquals(0.8, $result['temperature']); + $this->assertEquals(2000, $result['max_tokens']); + $this->assertEquals(10, $result['k_results']); + $this->assertEquals('custom_value', $result['custom_field']); + } + + public function testExtractSettingsFromArray(): void { + $modelManager = new ModelManager(); + + $config = [ + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => ['temperature' => 0.9, 'max_tokens' => 1500], + 'k_results' => 8, + ]; + + // Use reflection to access private method + $reflection = new ReflectionClass($modelManager); + $method = $reflection->getMethod('extractSettings'); + $method->setAccessible(true); + + $result = $method->invoke($modelManager, $config); + + $this->assertIsArray($result); + $this->assertEquals(0.9, $result['temperature']); + $this->assertEquals(1500, $result['max_tokens']); + $this->assertEquals(8, $result['k_results']); + } + + public function testModelExistsCaseSensitivity(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response showing model exists + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['count' => 1]]]]) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with($this->stringContains('WHERE name = \'TestModel\'')) + ->willReturn($mockResponse); + + // Use reflection to access private method + $reflection = new ReflectionClass($modelManager); + $method = $reflection->getMethod('modelExists'); + $method->setAccessible(true); + + $result = $method->invoke($modelManager, $mockClient, 'TestModel'); + + $this->assertTrue($result); + } + + public function testGetModelByUuidOrNameWithNullSettings(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with null settings + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'style_prompt' => 'You are helpful', + 'settings' => null, + 'created_at' => 1234567890, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $modelManager->getModelByUuidOrName($mockClient, 'test_model'); + + // Should be empty array due to empty() check + $this->assertIsArray($result['settings']); + $this->assertEmpty($result['settings']); + } + + public function testGetModelByUuidOrNameWithEmptyStringSettings(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with empty string settings + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => '', + 'created_at' => 1234567890, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $modelManager->getModelByUuidOrName($mockClient, 'test_model'); + + // Should be empty array due to empty() check + $this->assertIsArray($result['settings']); + $this->assertEmpty($result['settings']); + } + + public function testGetModelByUuidOrNameWithInvalidJsonSettings(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with invalid JSON settings + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => '{invalid json', + 'created_at' => 1234567890, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $modelManager->getModelByUuidOrName($mockClient, 'test_model'); + + // Should be empty array due to JSON decode failure + $this->assertIsArray($result['settings']); + $this->assertEmpty($result['settings']); + } + + public function testGetModelByUuidOrNameWithStringNullSettings(): void { + $modelManager = new ModelManager(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with string 'NULL' settings + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('hasError')->willReturn(false); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'uuid' => 'test-uuid-123', + 'name' => 'test_model', + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => 'NULL', + 'created_at' => 1234567890, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->willReturn($mockResponse); + + $result = $modelManager->getModelByUuidOrName($mockClient, 'test_model'); + + // Should be empty array - 'NULL' is not empty but JSON decode fails + $this->assertIsArray($result['settings']); + $this->assertEmpty($result['settings']); + } +} diff --git a/test/Plugin/ConversationalRag/SearchEngineTest.php b/test/Plugin/ConversationalRag/SearchEngineTest.php new file mode 100644 index 00000000..6aa75ac1 --- /dev/null +++ b/test/Plugin/ConversationalRag/SearchEngineTest.php @@ -0,0 +1,769 @@ +createMock(HTTPClient::class); + + // Mock response with FLOAT_VECTOR field + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($mockResponse); + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('detectVectorField'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, $mockClient, 'test_table'); + + $this->assertEquals('embedding', $result); + } + + public function testDetectVectorFieldWithCommonNames(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with common vector field name + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'content_embedding', 'Type' => 'float_vector(768)'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($mockResponse); + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('detectVectorField'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, $mockClient, 'test_table'); + + $this->assertEquals('content_embedding', $result); + } + + public function testDetectVectorFieldNoVectorFields(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response without vector fields + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'title', 'Type' => 'string'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($mockResponse); + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('detectVectorField'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, $mockClient, 'test_table'); + + $this->assertNull($result); + } + + public function testPerformVectorSearchSuccessful(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + // Mock exclusion response + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['id' => 1, 'knn_dist' => 0.1], + ['id' => 2, 'knn_dist' => 0.2], + ], + ], + ] + ) + ); + + // Mock search response + $searchResponse = $this->createMock(Response::class); + $searchResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'id' => 3, + 'content' => 'Test content', + 'embedding' => '[0.1, 0.2, 0.3]', + 'knn_dist' => 0.05, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(5)) // schema, exclusion, schema, search, schema + ->method('sendRequest') + ->willReturnOnConsecutiveCalls( + $schemaResponse, + $exclusionResponse, + $schemaResponse, + $searchResponse, + $schemaResponse + ); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-3.5-turbo', + 'k_results' => 5, 'settings' => ['similarity_threshold' => 0.8]]; + $result = $searchEngine->performSearch( + $mockClient, + 'test_table', + 'test search query', + 'exclude this', + $modelConfig + ); + + $this->assertIsArray($result); + $this->assertCount(1, $result); + $this->assertEquals(3, $result[0]['id']); + $this->assertEquals('Test content', $result[0]['content']); + $this->assertArrayNotHasKey('embedding', $result[0]); // Should be filtered out + } + + public function testPerformVectorSearchWithExclusions(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + // Mock exclusion response with multiple exclusions + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['id' => 1, 'knn_dist' => 0.1], + ['id' => 2, 'knn_dist' => 0.2], + ['id' => 4, 'knn_dist' => 0.15], + ], + ], + ] + ) + ); + + // Mock search response + $searchResponse = $this->createMock(Response::class); + $searchResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'id' => 3, + 'content' => 'Test content', + 'embedding' => '[0.1, 0.2, 0.3]', + 'knn_dist' => 0.05, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(5)) // schema, exclusion, schema, search, schema + ->method('sendRequest') + ->willReturnOnConsecutiveCalls( + $schemaResponse, + $exclusionResponse, + $schemaResponse, + $searchResponse, + $schemaResponse + ); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-3.5-turbo', + 'k_results' => 5, 'settings' => ['similarity_threshold' => 0.8]]; + $result = $searchEngine->performSearch( + $mockClient, + 'test_table', + 'test search query', + 'exclude this', + $modelConfig + ); + + $this->assertIsArray($result); + $this->assertCount(1, $result); + } + + public function testPerformVectorSearchNoVectorFields(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response without vector fields + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(2)) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($schemaResponse); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-3.5-turbo', 'k_results' => 5]; + $result = $searchEngine->performSearch( + $mockClient, + 'test_table', + 'test search query', + 'exclude this', + $modelConfig + ); + + $this->assertIsArray($result); + $this->assertEmpty($result); + } + + public function testGetVectorFieldsSuccessful(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with multiple vector fields + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ['Field' => 'title_embedding', 'Type' => 'float_vector(768)'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($mockResponse); + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('getVectorFields'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, $mockClient, 'test_table'); + + $this->assertIsArray($result); + $this->assertCount(2, $result); + $this->assertContains('embedding', $result); + $this->assertContains('title_embedding', $result); + } + + public function testFilterVectorFieldsRemovesEmbeddings(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock response with vector fields + $mockResponse = $this->createMock(Response::class); + $mockResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + $mockClient->expects($this->once()) + ->method('sendRequest') + ->with('DESCRIBE test_table') + ->willReturn($mockResponse); + + $testResults = [ + [ + 'id' => 1, + 'content' => 'Test content', + 'embedding' => '[0.1, 0.2, 0.3]', + 'title' => 'Test title', + ], + ]; + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('filterVectorFields'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, $testResults, 'test_table', $mockClient); + + $this->assertIsArray($result); + $this->assertCount(1, $result); + $this->assertArrayHasKey('id', $result[0]); + $this->assertArrayHasKey('content', $result[0]); + $this->assertArrayHasKey('title', $result[0]); + $this->assertArrayNotHasKey('embedding', $result[0]); + } + + public function testEscapeStringHandlesSpecialChars(): void { + $searchEngine = new SearchEngine(); + + // Use reflection to access private method + $reflection = new ReflectionClass($searchEngine); + $method = $reflection->getMethod('escapeString'); + $method->setAccessible(true); + + $result = $method->invoke($searchEngine, "test's string"); + $this->assertEquals("test''s string", $result); + + $result = $method->invoke($searchEngine, 'normal string'); + $this->assertEquals('normal string', $result); + } + + public function testGetExcludedIdsReturnsCorrectIds(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + // Mock exclusion response + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['id' => 1, 'knn_dist' => 0.1], + ['id' => 2, 'knn_dist' => 0.2], + ['id' => 5, 'knn_dist' => 0.15], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(2)) // schema, exclusion + ->method('sendRequest') + ->willReturnOnConsecutiveCalls($schemaResponse, $exclusionResponse); + + $result = $searchEngine->getExcludedIds( + $mockClient, + 'test_table', + 'exclude Star Wars' + ); + + $this->assertIsArray($result); + $this->assertEquals([1, 2, 5], $result); + } + + public function testGetExcludedIdsNoExclusions(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + $result = $searchEngine->getExcludedIds( + $mockClient, + 'test_table', + 'none' + ); + + $this->assertIsArray($result); + $this->assertEmpty($result); + } + + public function testPerformSearchWithExcludedIdsSkipsExclusionSearch(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + // Mock search response + $searchResponse = $this->createMock(Response::class); + $searchResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + [ + 'id' => 3, + 'content' => 'Test content', + 'embedding' => '[0.1, 0.2, 0.3]', + 'knn_dist' => 0.05, + ], + ], + ], + ] + ) + ); + + $mockClient->expects($this->exactly(3)) // schema, search, schema + ->method('sendRequest') + ->willReturnOnConsecutiveCalls( + $schemaResponse, + $searchResponse, + $schemaResponse + ); + + $modelConfig = ['llm_provider' => 'openai', 'llm_model' => 'gpt-3.5-turbo', + 'k_results' => 5, 'settings' => ['similarity_threshold' => 0.8]]; + $result = $searchEngine->performSearchWithExcludedIds( + $mockClient, + 'test_table', + 'test search query', + [1, 2, 5], // pre-computed excluded IDs + $modelConfig, + 0.8 + ); + + $this->assertIsArray($result); + $this->assertCount(1, $result); + $this->assertEquals(3, $result[0]['id']); + $this->assertArrayNotHasKey('embedding', $result[0]); // Should be filtered out + } + + /** + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function testGetExcludedIdsBuildsCorrectSQL(): void { + $searchEngine = new SearchEngine(); + + // Mock HTTP client + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData( + [ + [ + 'data' => [ + ['Field' => 'id', 'Type' => 'bigint'], + ['Field' => 'content', 'Type' => 'text'], + ['Field' => 'embedding', 'Type' => 'float_vector(1536)'], + ], + ], + ] + ) + ); + + // Mock exclusion response + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData([['data' => []]]) + ); + + $mockClient->expects($this->exactly(2)) + ->method('sendRequest') + ->willReturnCallback( + function ($sql) use ($schemaResponse, $exclusionResponse) { + if (str_contains($sql, 'DESCRIBE')) { + return $schemaResponse; + } + // Verify the exclusion SQL matches actual implementation + if (str_contains($sql, 'SELECT id, knn_dist() as knn_dist FROM test_table') + && str_contains($sql, "WHERE knn(embedding, 15, 'exclude query')") + && str_contains($sql, 'AND knn_dist < 0.75') + ) { + return $exclusionResponse; + } + throw new \Exception("Unexpected SQL: $sql"); + } + ); + + $result = $searchEngine->getExcludedIds( + $mockClient, + 'test_table', + 'exclude query' + ); + + $this->assertIsArray($result); + $this->assertEmpty($result); + } + + public function testGetExcludedIdsGeneratesCorrectSqlWithKnnDist(): void { + $searchEngine = new SearchEngine(); + $mockClient = $this->createMock(HTTPClient::class); + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('hasError')->willReturn(false); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['Field' => 'embedding_vector', 'Type' => 'float_vector']]]]) + ); + + // Mock exclusion response + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('hasError')->willReturn(false); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['id' => 1156395647918669832]]]]) + ); + + $actualSql = ''; + $mockClient->expects($this->exactly(2)) + ->method('sendRequest') + ->willReturnCallback( + function ($sql) use ($schemaResponse, $exclusionResponse, &$actualSql) { + if (strpos($sql, 'DESCRIBE') !== false) { + return $schemaResponse; + } + $actualSql = $sql; + return $exclusionResponse; + } + ); + + $result = $searchEngine->getExcludedIds($mockClient, 'docs', 'Stranger Things'); + + // Verify the SQL contains knn_dist() in SELECT clause + $this->assertStringContainsString('SELECT id, knn_dist() as knn_dist FROM docs', $actualSql); + $this->assertStringContainsString("WHERE knn(embedding_vector, 15, 'Stranger Things')", $actualSql); + $this->assertStringContainsString('AND knn_dist < 0.75', $actualSql); + + // Verify results + $this->assertEquals([1156395647918669832], $result); + } + + /** + * @throws ManticoreSearchClientError + * @throws ManticoreSearchResponseError + */ + public function testPerformSearchWithExcludedIdsActuallyExcludes(): void { + $searchEngine = new SearchEngine(); + $mockClient = $this->createMock(HTTPClient::class); + $model = [ + 'llm_provider' => 'openai', + 'llm_model' => 'gpt-4', + 'settings' => ['k_results' => 5], + ]; + + // Mock schema response + $schemaResponse = $this->createMock(Response::class); + $schemaResponse->method('hasError')->willReturn(false); + $schemaResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['Field' => 'embedding_vector', 'Type' => 'float_vector']]]]) + ); + + // Mock exclusion response (should find Stranger Things to exclude) + $exclusionResponse = $this->createMock(Response::class); + $exclusionResponse->method('hasError')->willReturn(false); + $exclusionResponse->method('getResult')->willReturn( + Struct::fromData([['data' => [['id' => 1156395647918669832]]]]) + ); + + // Mock search response (should exclude the found ID) + $searchResponse = $this->createMock(Response::class); + $searchResponse->method('hasError')->willReturn(false); + $searchResponse->method('getResult')->willReturn( + Struct::fromData( + [['data' => [ + ['id' => 1001, 'title' => 'Other Show', 'knn_dist' => 0.5], + ['id' => 1002, 'title' => 'Another Show', 'knn_dist' => 0.6], + ]]] + ) + ); + + $sqlQueries = []; + $mockClient->expects($this->exactly(5)) // schema, exclusion, schema, search, schema + ->method('sendRequest') + ->willReturnCallback( + function ($sql) use ($schemaResponse, $exclusionResponse, $searchResponse, &$sqlQueries) { + $sqlQueries[] = $sql; + if (str_contains($sql, 'DESCRIBE')) { + return $schemaResponse; + } + if (str_contains($sql, 'SELECT id, knn_dist() as knn_dist')) { + return $exclusionResponse; + } + if (str_contains($sql, 'id NOT IN (1156395647918669832)')) { + return $searchResponse; + } + throw new \Exception("Unexpected SQL: $sql"); + } + ); + + // First get excluded IDs + $excludedIds = $searchEngine->getExcludedIds($mockClient, 'docs', 'Stranger Things'); + $this->assertEquals([1156395647918669832], $excludedIds); + + $result = $searchEngine->performSearchWithExcludedIds( + $mockClient, 'docs', 'horror shows', $excludedIds, $model, 0.8 + ); + + // Verify exclusion SQL was generated correctly + $exclusionSql = $sqlQueries[1]; // Second query should be exclusion + $this->assertStringContainsString('SELECT id, knn_dist() as knn_dist FROM docs', $exclusionSql); + + // Verify search SQL excludes the found ID + $searchSql = $sqlQueries[3]; // Fourth query should be search + $this->assertStringContainsString('id NOT IN (1156395647918669832)', $searchSql); + + // Verify results don't contain the excluded ID + $this->assertCount(2, $result); + $this->assertEquals(1001, $result[0]['id']); + $this->assertEquals(1002, $result[1]['id']); + } +} diff --git a/test/Plugin/ConversationalRag/SqlEscapeTraitTest.php b/test/Plugin/ConversationalRag/SqlEscapeTraitTest.php new file mode 100644 index 00000000..59532699 --- /dev/null +++ b/test/Plugin/ConversationalRag/SqlEscapeTraitTest.php @@ -0,0 +1,105 @@ +testClass); + $method = $reflection->getMethod('sqlEscape'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, "Don't worry"); + $this->assertEquals("Don\\'t worry", $result); + } + + public function testSqlEscapeBackslashes(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('sqlEscape'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, 'Path\\to\\file'); + $this->assertEquals('Path\\to\\file', $result); // Backslashes are not escaped in SQL + } + + public function testSqlEscapeMultipleSpecialChars(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('sqlEscape'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, 'Test!@#$%^&*()'); + $this->assertEquals('Test!@#$%^&*()', $result); // Only single quotes are escaped in SQL + } + + public function testSqlEscapeNoSpecialChars(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('sqlEscape'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, 'NormalString123'); + $this->assertEquals('NormalString123', $result); + } + + public function testQuoteWrapsAndEscapes(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('quote'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, "O'Reilly"); + $this->assertEquals("'O\\'Reilly'", $result); + } + + public function testQuoteEmptyString(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('quote'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, ''); + $this->assertEquals("''", $result); + } + + public function testQuoteNumericString(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('quote'); + $method->setAccessible(true); + + $result = $method->invoke($this->testClass, '123'); + $this->assertEquals("'123'", $result); + } + + public function testQuoteSpecialCharsWithSingleQuote(): void { + // Use reflection to access protected method + $reflection = new ReflectionClass($this->testClass); + $method = $reflection->getMethod('quote'); + $method->setAccessible(true); + + // Test SQL escaping - only single quotes are escaped + $specialString = 'Don\'t escape "double quotes" or $special chars'; + $result = $method->invoke($this->testClass, $specialString); + + $expected = "'Don\\'t escape \"double quotes\" or \$special chars'"; + $this->assertEquals($expected, $result); + } + + protected function setUp(): void { + $this->testClass = new SqlEscapeTraitTestClass(); + } +} diff --git a/test/src/LLMProviders/TestableBaseProvider.php b/test/src/LLMProviders/TestableBaseProvider.php new file mode 100644 index 00000000..5bf4ca38 --- /dev/null +++ b/test/src/LLMProviders/TestableBaseProvider.php @@ -0,0 +1,32 @@ + true, 'content' => 'test response']; + } + + protected function createClient(): object { + return (object)['test' => 'client']; + } + + public function getName(): string { + return 'test_provider'; + } +} diff --git a/test/src/Plugin/ConversationalRag/SqlEscapeTraitTestClass.php b/test/src/Plugin/ConversationalRag/SqlEscapeTraitTestClass.php new file mode 100644 index 00000000..4a862555 --- /dev/null +++ b/test/src/Plugin/ConversationalRag/SqlEscapeTraitTestClass.php @@ -0,0 +1,21 @@ +