diff --git a/lib/Service/ChunkService.php b/lib/Service/ChunkService.php new file mode 100644 index 00000000..33d66ce7 --- /dev/null +++ b/lib/Service/ChunkService.php @@ -0,0 +1,60 @@ +openAiSettingsService->getChunkSize(); + if ($outputChunking) { + $maxTokens = $maxTokens ?? $this->openAiSettingsService->getMaxTokens(); + $chunkSize = min($chunkSize, $maxTokens); + } + + // https://platform.openai.com/tokenizer + // Rough approximation, 1 token is approximately 4 bytes for OpenAI models + // It's safer to have a lower estimate on the max number of tokens, so consider 3 bytes per token instead of 4 (to account for some multibyte characters) + $maxChars = $chunkSize * 3; + + if (!$chunkSize || (mb_strlen($prompt) <= $maxChars)) { + // Chunking is disabled or prompt is short enough to be a single chunk + return [$prompt]; + } + + // Try splitting by paragraph, match as many paragraphs as possible per chunk up to the maximum chunk size + if (preg_match_all("/.{1,{$maxChars}}\n/su", $prompt, $prompts)) { + return $prompts[0]; + } + + // Try splitting by sentence + if (preg_match_all("/.{1,{$maxChars}}[!\.\?\n]/su", $prompt, $prompts)) { + return $prompts[0]; + } + + // Try splitting by word + if (preg_match_all("/.{1,{$maxChars}}\W/su", $prompt, $prompts)) { + return $prompts[0]; + } + + // Split by number of characters in maximum chunk size + return mb_str_split($prompt, $maxChars); + } +} diff --git a/lib/TaskProcessing/ChangeToneProvider.php b/lib/TaskProcessing/ChangeToneProvider.php index e5fc136a..515a712d 100644 --- a/lib/TaskProcessing/ChangeToneProvider.php +++ b/lib/TaskProcessing/ChangeToneProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -19,6 +20,7 @@ use OCP\TaskProcessing\ISynchronousProvider; use OCP\TaskProcessing\ShapeDescriptor; use OCP\TaskProcessing\ShapeEnumValue; +use OCP\TaskProcessing\TaskTypes\TextToTextChangeTone; use RuntimeException; class ChangeToneProvider implements ISynchronousProvider { @@ -28,6 +30,7 @@ public function __construct( private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, + private ChunkService $chunkService, private ?string $userId, ) { } @@ -42,7 +45,7 @@ public function getName(): string { public function getTaskTypeId(): string { if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextChangeTone')) { - return \OCP\TaskProcessing\TaskTypes\TextToTextChangeTone::ID; + return TextToTextChangeTone::ID; } return ChangeToneTaskType::ID; } @@ -121,7 +124,6 @@ public function process(?string $userId, array $input, callable $reportProgress) } $textInput = $input['input']; $toneInput = $input['tone']; - $prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:'; $maxTokens = null; if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { @@ -134,22 +136,33 @@ public function process(?string $userId, array $input, callable $reportProgress) $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; } - try { - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + $chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens); + $result = ''; + $increase = 1.0 / (float)count($chunks); + $progress = 0.0; + foreach ($chunks as $textInput) { + $prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:'; + try { + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); + } + $progress += $increase; + $reportProgress($progress); + if (count($completion) > 0) { + $result .= array_pop($completion); + continue; } - } catch (Exception $e) { - throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); - } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; - } - throw new RuntimeException('No result in OpenAI/LocalAI response.'); + throw new RuntimeException('No result in OpenAI/LocalAI response.'); + } + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $result]; } } diff --git a/lib/TaskProcessing/ContextWriteProvider.php b/lib/TaskProcessing/ContextWriteProvider.php index 2a7fd38f..fabee1a2 100644 --- a/lib/TaskProcessing/ContextWriteProvider.php +++ b/lib/TaskProcessing/ContextWriteProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -27,6 +28,7 @@ public function __construct( private OpenAiAPIService $openAiAPIService, private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, + private ChunkService $chunkService, private IL10N $l, private ?string $userId, ) { @@ -108,18 +110,10 @@ public function process(?string $userId, array $input, callable $reportProgress) ) { throw new RuntimeException('Invalid inputs'); } + $writingStyle = $input['style_input']; $sourceMaterial = $input['source_input']; - $prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*' - . ' and writing a text on the provided *SOURCE MATERIAL*.' - . " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n" - . 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*' - . ' as source of facts and instruction on what to write about.' - . ' Do not invent any facts or events yourself.' - . ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.' - . ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.'; - $maxTokens = null; if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { $maxTokens = $input['max_tokens']; @@ -131,22 +125,42 @@ public function process(?string $userId, array $input, callable $reportProgress) $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; } - try { - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + $chunks = $this->chunkService->chunkSplitPrompt($sourceMaterial, true, $maxTokens); + $result = ''; + $increase = 1.0 / (float)count($chunks); + $progress = 0.0; + + foreach ($chunks as $sourceMaterial) { + $prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*' + . ' and writing a text on the provided *SOURCE MATERIAL*.' + . " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n" + . 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*' + . ' as source of facts and instruction on what to write about.' + . ' Do not invent any facts or events yourself.' + . ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.' + . ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.'; + try { + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); } - } catch (Exception $e) { - throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); - } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; + if (count($completion) > 0) { + $result .= array_pop($completion); + $progress += $increase; + $reportProgress($progress); + continue; + } + + throw new RuntimeException('No result in OpenAI/LocalAI response.'); } + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $result]; - throw new RuntimeException('No result in OpenAI/LocalAI response.'); } } diff --git a/lib/TaskProcessing/ProofreadProvider.php b/lib/TaskProcessing/ProofreadProvider.php index f6abe7a0..9e2c6b5f 100644 --- a/lib/TaskProcessing/ProofreadProvider.php +++ b/lib/TaskProcessing/ProofreadProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -28,6 +29,7 @@ public function __construct( private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, + private ChunkService $chunkService, private ?string $userId, ) { } @@ -119,23 +121,54 @@ public function process(?string $userId, array $input, callable $reportProgress) $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; } - try { - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput; - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + $chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens); + $result = ''; + $increase = 1.0 / ((float)count($chunks) + 1.0); + $progress = 0.0; + + foreach ($chunks as $textInput) { + try { + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput; + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); } - } catch (Exception $e) { - throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); + if (count($completion) > 0) { + $result .= array_pop($completion); + $progress += $increase; + $reportProgress($progress); + continue; + } + + throw new RuntimeException('No result in OpenAI/LocalAI response.'); } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; + if (count($chunks) > 1) { + $systemPrompt = 'Repeat the proofread feedback list. Ensure that no information is lost, but also not duplicated. '; + try { + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $result, $systemPrompt, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $result; + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); + } + if (count($completion) > 0) { + $result = array_pop($completion); + } } + $progress += $increase; + $reportProgress($progress); + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $result]; - throw new RuntimeException('No result in OpenAI/LocalAI response.'); } } diff --git a/lib/TaskProcessing/ReformulateProvider.php b/lib/TaskProcessing/ReformulateProvider.php index 1cdfe500..22592bc1 100644 --- a/lib/TaskProcessing/ReformulateProvider.php +++ b/lib/TaskProcessing/ReformulateProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -28,6 +29,7 @@ public function __construct( private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, + private ChunkService $chunkService, private ?string $userId, ) { } @@ -106,7 +108,6 @@ public function process(?string $userId, array $input, callable $reportProgress) throw new RuntimeException('Invalid prompt'); } $prompt = $input['input']; - $prompt = 'Reformulate the following text. Use the same language as the original text. Output only the reformulation. Here is the text:' . "\n\n" . $prompt . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:'; $maxTokens = null; if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { @@ -118,23 +119,35 @@ public function process(?string $userId, array $input, callable $reportProgress) } else { $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; } - - try { - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + $chunks = $this->chunkService->chunkSplitPrompt($prompt, true, $maxTokens); + $result = ''; + $increase = 1.0 / (float)count($chunks); + $progress = 0.0; + + foreach ($chunks as $chunk) { + $prompt = 'Reformulate the following text. Use the same language as the original text. Output only the reformulation. Here is the text:' . "\n\n" . $chunk . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:'; + try { + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); } - } catch (Exception $e) { - throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); - } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; + if (count($completion) > 0) { + $result .= array_pop($completion); + $progress += $increase; + $reportProgress($progress); + continue; + } + + throw new RuntimeException('No result in OpenAI/LocalAI response.'); } - throw new RuntimeException('No result in OpenAI/LocalAI response.'); + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $result]; } } diff --git a/lib/TaskProcessing/SummaryProvider.php b/lib/TaskProcessing/SummaryProvider.php index a0c966e5..a684fba8 100644 --- a/lib/TaskProcessing/SummaryProvider.php +++ b/lib/TaskProcessing/SummaryProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -28,6 +29,7 @@ public function __construct( private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, + private ChunkService $chunkService, private ?string $userId, ) { } @@ -117,11 +119,14 @@ public function process(?string $userId, array $input, callable $reportProgress) $model = $input['model']; } - $prompts = self::chunkSplitPrompt($prompt); + $prompts = $this->chunkService->chunkSplitPrompt($prompt); $newNumChunks = count($prompts); - + $progress = 0.0; do { + // Ensure that progress never finishes no matter how many times this loop runs + $increase = (1.0 - $progress) / 2.0 / (float)$newNumChunks; $oldNumChunks = $newNumChunks; + $reportProgress($progress); try { $completions = []; @@ -132,6 +137,8 @@ public function process(?string $userId, array $input, callable $reportProgress) foreach ($prompts as $p) { $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $p, $summarySystemPrompt, null, 1, $maxTokens); $completions[] = $completion['messages']; + $progress += $increase; + $reportProgress($progress); } } else { $wrapSummaryPrompt = function (string $p): string { @@ -142,6 +149,8 @@ public function process(?string $userId, array $input, callable $reportProgress) foreach (array_map($wrapSummaryPrompt, $prompts) as $p) { $completions[] = $this->openAiAPIService->createCompletion($userId, $p, 1, $model, $maxTokens); + $progress += $increase; + $reportProgress($progress); } } } catch (Exception $e) { @@ -161,7 +170,7 @@ public function process(?string $userId, array $input, callable $reportProgress) )); $summary = implode(' ', $completionStrings); - $prompts = self::chunkSplitPrompt($summary); + $prompts = $this->chunkService->chunkSplitPrompt($summary); $newNumChunks = count($prompts); } while ($oldNumChunks > $newNumChunks); @@ -170,35 +179,4 @@ public function process(?string $userId, array $input, callable $reportProgress) return ['output' => $summary]; } - private function chunkSplitPrompt(string $prompt): array { - $chunkSize = $this->openAiSettingsService->getChunkSize(); - - // https://platform.openai.com/tokenizer - // Rough approximation, 1 token is approximately 4 bytes for OpenAI models - // It's safer to have a lower estimate on the max number of tokens, so consider 3 bytes per token instead of 4 (to account for some multibyte characters) - $maxChars = $chunkSize * 3; - - if (!$chunkSize || (mb_strlen($prompt) <= $maxChars)) { - // Chunking is disabled or prompt is short enough to be a single chunk - return [$prompt]; - } - - // Try splitting by paragraph, match as many paragraphs as possible per chunk up to the maximum chunk size - if (preg_match_all("/.{1,{$maxChars}}\n/su", $prompt, $prompts)) { - return $prompts[0]; - } - - // Try splitting by sentence - if (preg_match_all("/.{1,{$maxChars}}[!\.\?\n]/su", $prompt, $prompts)) { - return $prompts[0]; - } - - // Try splitting by word - if (preg_match_all("/.{1,{$maxChars}}\W/su", $prompt, $prompts)) { - return $prompts[0]; - } - - // Split by number of characters in maximum chunk size - return mb_str_split($prompt, $maxChars); - } } diff --git a/lib/TaskProcessing/TopicsProvider.php b/lib/TaskProcessing/TopicsProvider.php index c2bdf215..0f48ab18 100644 --- a/lib/TaskProcessing/TopicsProvider.php +++ b/lib/TaskProcessing/TopicsProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -19,6 +20,7 @@ use OCP\TaskProcessing\ISynchronousProvider; use OCP\TaskProcessing\ShapeDescriptor; use OCP\TaskProcessing\TaskTypes\TextToTextTopics; +use Psr\Log\LoggerInterface; use RuntimeException; class TopicsProvider implements ISynchronousProvider { @@ -28,6 +30,8 @@ public function __construct( private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, + private ChunkService $chunkService, + private LoggerInterface $logger, private ?string $userId, ) { } @@ -106,7 +110,6 @@ public function process(?string $userId, array $input, callable $reportProgress) throw new RuntimeException('Invalid prompt'); } $prompt = $input['input']; - $prompt = 'Extract topics from the following text. Detect the language of the text. Use the same language as the text. Output only the topics, comma separated. Here is the text:' . "\n\n" . $prompt; $maxTokens = null; if (isset($input['max_tokens']) && is_int($input['max_tokens'])) { @@ -118,23 +121,64 @@ public function process(?string $userId, array $input, callable $reportProgress) } else { $model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; } + $prompts = $this->chunkService->chunkSplitPrompt($prompt); + $newNumChunks = count($prompts); + $progress = 0.0; + $firstRun = true; + do { + // Make sure to run again if there is more than one chunk after the first run to remove duplicates + $runAgain = $firstRun && $newNumChunks > 1; + $firstRun = false; + + // Ensure that progress never finishes no matter how many times this loop runs + $increase = (1.0 - $progress) / (float)$newNumChunks * 0.9; + $oldNumChunks = $newNumChunks; + $reportProgress($progress); + + try { + $completions = []; + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $topicsSystemPrompt = 'Extract topics from the following text. Detect the language of the text. Use the same language as the text. Output only the topics, comma separated.'; + + foreach ($prompts as $p) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $p, $topicsSystemPrompt, null, 1, $maxTokens); + $completions[] = $completion['messages']; + $progress += $increase; + $reportProgress($progress); + } + } else { + $wrapTopicsPrompt = function (string $p): string { + return 'Extract topics from the following text. Detect the language of the text. Use the same language as the text.' + . 'Output only the topics, comma separated. Here is the text:\n\n' . $p . "\n"; + }; + + foreach (array_map($wrapTopicsPrompt, $prompts) as $p) { + $completions[] = $this->openAiAPIService->createCompletion($userId, $p, 1, $model, $maxTokens); + $progress += $increase; + $reportProgress($progress); + } + } + } catch (Exception $e) { + throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); + } - try { - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + // Each prompt chunk should return a non-empty array of completions, this will return false if at least one array is empty + $allPromptsHaveCompletions = array_reduce($completions, fn (bool $prev, array $next): bool => $prev && count($next), true); + if (!$allPromptsHaveCompletions) { + throw new RuntimeException('No result in OpenAI/LocalAI response.'); } - } catch (Exception $e) { - throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage()); - } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; - } - throw new RuntimeException('No result in OpenAI/LocalAI response.'); + // Take only one completion for each chunk and combine them into a completion + $completionStrings = array_map(fn (array $completions): string => trim(array_pop($completions)), $completions); + $topics = implode(', ', $completionStrings); + + $prompts = $this->chunkService->chunkSplitPrompt($topics); + $this->logger->error('TopicsProvider(dsadsaads): ' . $topics); + $newNumChunks = count($prompts); + } while ($oldNumChunks > $newNumChunks || $runAgain); + + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $topics]; } } diff --git a/lib/TaskProcessing/TranslateProvider.php b/lib/TaskProcessing/TranslateProvider.php index 6eccf89e..f23e5457 100644 --- a/lib/TaskProcessing/TranslateProvider.php +++ b/lib/TaskProcessing/TranslateProvider.php @@ -11,6 +11,7 @@ use Exception; use OCA\OpenAi\AppInfo\Application; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\IAppConfig; @@ -35,6 +36,7 @@ public function __construct( private IFactory $l10nFactory, private ICacheFactory $cacheFactory, private LoggerInterface $logger, + private ChunkService $chunkService, private ?string $userId, ) { } @@ -151,42 +153,60 @@ public function process(?string $userId, array $input, callable $reportProgress) $maxTokens = $input['max_tokens']; } - $cacheKey = ($input['origin_language'] ?? '') . '/' . $input['target_language'] . '/' . md5($inputText); - - $cache = $this->cacheFactory->createDistributed('integration_openai'); - if ($cached = $cache->get($cacheKey)) { - return ['output' => $cached]; - } - + $chunks = $this->chunkService->chunkSplitPrompt($inputText, true, $maxTokens); + $result = ''; + $increase = 1.0 / (float)count($chunks); + $progress = 0.0; try { $coreLanguages = $this->getCoreLanguagesByCode(); $toLanguage = $coreLanguages[$input['target_language']] ?? $input['target_language']; + if ($input['origin_language'] !== 'detect_language') { $fromLanguage = $coreLanguages[$input['origin_language']] ?? $input['origin_language']; - $this->logger->debug('OpenAI translation FROM[' . $fromLanguage . '] TO[' . $toLanguage . ']', ['app' => Application::APP_ID]); - $prompt = 'Translate from ' . $fromLanguage . ' to ' . $toLanguage . ': ' . $inputText; + $promptStart = 'Translate from ' . $fromLanguage . ' to ' . $toLanguage . ': '; } else { - $this->logger->debug('OpenAI translation TO[' . $toLanguage . ']', ['app' => Application::APP_ID]); - $prompt = 'Translate to ' . $toLanguage . ': ' . $inputText; + $promptStart = 'Translate to ' . $toLanguage . ': '; } - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); - $completion = $completion['messages']; - } else { - $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + foreach ($chunks as $chunk) { + $progress += $increase; + $cacheKey = ($input['origin_language'] ?? '') . '/' . $input['target_language'] . '/' . md5($chunk); + + $cache = $this->cacheFactory->createDistributed('integration_openai'); + if ($cached = $cache->get($cacheKey)) { + $this->logger->debug('Using cached translation', ['cached' => $cached, 'cacheKey' => $cacheKey]); + $result .= $cached; + $reportProgress($progress); + continue; + } + $prompt = $promptStart . $chunk; + + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens); + $completion = $completion['messages']; + } else { + $completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens); + } + + $reportProgress($progress); + + if (count($completion) > 0) { + $completion = array_pop($completion); + $result .= $completion; + $cache->set($cacheKey, $completion); + continue; + } + + throw new RuntimeException("Failed translate from {$fromLanguage} to {$toLanguage} for chunk"); } - if (count($completion) > 0) { - $endTime = time(); - $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - return ['output' => array_pop($completion)]; - } + $endTime = time(); + $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); + return ['output' => $result]; } catch (Exception $e) { throw new RuntimeException("Failed translate from {$fromLanguage} to {$toLanguage}", 0, $e); } - throw new RuntimeException("Failed translate from {$fromLanguage} to {$toLanguage}"); } } diff --git a/tests/unit/Providers/OpenAiProviderTest.php b/tests/unit/Providers/OpenAiProviderTest.php index 1d9d694b..f746d860 100644 --- a/tests/unit/Providers/OpenAiProviderTest.php +++ b/tests/unit/Providers/OpenAiProviderTest.php @@ -14,6 +14,7 @@ use OCA\OpenAi\AppInfo\Application; use OCA\OpenAi\Db\QuotaUsageMapper; +use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCA\OpenAi\TaskProcessing\ChangeToneProvider; @@ -44,6 +45,7 @@ class OpenAiProviderTest extends TestCase { private OpenAiAPIService $openAiApiService; private OpenAiSettingsService $openAiSettingsService; + private ChunkService $chunkService; /** * @var MockObject|IClient */ @@ -64,6 +66,8 @@ protected function setUp(): void { $this->openAiSettingsService = \OC::$server->get(OpenAiSettingsService::class); + $this->chunkService = \OC::$server->get(ChunkService::class); + $this->quotaUsageMapper = \OC::$server->get(QuotaUsageMapper::class); // We'll hijack the client service and subsequently iClient to return a mock response from the OpenAI API @@ -298,6 +302,7 @@ public function testChangeToneProvider(): void { \OC::$server->get(IAppConfig::class), $this->openAiSettingsService, $this->createMock(\OCP\IL10N::class), + $this->chunkService, self::TEST_USER1, ); @@ -364,6 +369,7 @@ public function testSummaryProvider(): void { \OC::$server->get(IAppConfig::class), $this->openAiSettingsService, $this->createMock(\OCP\IL10N::class), + $this->chunkService, self::TEST_USER1, ); @@ -432,6 +438,7 @@ public function testProofreadProvider(): void { \OC::$server->get(IAppConfig::class), $this->openAiSettingsService, $this->createMock(\OCP\IL10N::class), + $this->chunkService, self::TEST_USER1, ); @@ -502,6 +509,7 @@ public function testTranslationProvider(): void { \OC::$server->get(\OCP\L10N\IFactory::class), $this->createMock(\OCP\ICacheFactory::class), $this->createMock(\Psr\Log\LoggerInterface::class), + $this->chunkService, self::TEST_USER1, );