Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions lib/Service/ChunkService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?php

/**
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\OpenAi\Service;

/**
* Helper Service to help with chunking
*/
class ChunkService {
public function __construct(
private OpenAiSettingsService $openAiSettingsService,
) {
}

/**
* @param string $prompt
* @param bool $outputChunking If the output is about the same size as the input so output tokens matter. Ex: translate
* @param int|null $maxTokens The maximum number of output tokens if specified by the user
* @return array
*/
public function chunkSplitPrompt(string $prompt, bool $outputChunking = false, ?int $maxTokens = null): array {
$chunkSize = $this->openAiSettingsService->getChunkSize();
if ($outputChunking) {
$maxTokens = $maxTokens ?? $this->openAiSettingsService->getMaxTokens();
$chunkSize = min($chunkSize, $maxTokens);
}

// https://platform.openai.com/tokenizer
// Rough approximation, 1 token is approximately 4 bytes for OpenAI models
// It's safer to have a lower estimate on the max number of tokens, so consider 3 bytes per token instead of 4 (to account for some multibyte characters)
$maxChars = $chunkSize * 3;

if (!$chunkSize || (mb_strlen($prompt) <= $maxChars)) {
// Chunking is disabled or prompt is short enough to be a single chunk
return [$prompt];
}

// Try splitting by paragraph, match as many paragraphs as possible per chunk up to the maximum chunk size
if (preg_match_all("/.{1,{$maxChars}}\n/su", $prompt, $prompts)) {
return $prompts[0];
}

// Try splitting by sentence
if (preg_match_all("/.{1,{$maxChars}}[!\.\?\n]/su", $prompt, $prompts)) {
return $prompts[0];
}

// Try splitting by word
if (preg_match_all("/.{1,{$maxChars}}\W/su", $prompt, $prompts)) {
return $prompts[0];
}

// Split by number of characters in maximum chunk size
return mb_str_split($prompt, $maxChars);
}
}
47 changes: 30 additions & 17 deletions lib/TaskProcessing/ChangeToneProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use Exception;
use OCA\OpenAi\AppInfo\Application;
use OCA\OpenAi\Service\ChunkService;
use OCA\OpenAi\Service\OpenAiAPIService;
use OCA\OpenAi\Service\OpenAiSettingsService;
use OCP\IAppConfig;
Expand All @@ -19,6 +20,7 @@
use OCP\TaskProcessing\ISynchronousProvider;
use OCP\TaskProcessing\ShapeDescriptor;
use OCP\TaskProcessing\ShapeEnumValue;
use OCP\TaskProcessing\TaskTypes\TextToTextChangeTone;
use RuntimeException;

class ChangeToneProvider implements ISynchronousProvider {
Expand All @@ -28,6 +30,7 @@ public function __construct(
private IAppConfig $appConfig,
private OpenAiSettingsService $openAiSettingsService,
private IL10N $l,
private ChunkService $chunkService,
private ?string $userId,
) {
}
Expand All @@ -42,7 +45,7 @@ public function getName(): string {

public function getTaskTypeId(): string {
if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextChangeTone')) {
return \OCP\TaskProcessing\TaskTypes\TextToTextChangeTone::ID;
return TextToTextChangeTone::ID;
}
return ChangeToneTaskType::ID;
}
Expand Down Expand Up @@ -121,7 +124,6 @@ public function process(?string $userId, array $input, callable $reportProgress)
}
$textInput = $input['input'];
$toneInput = $input['tone'];
$prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';

$maxTokens = null;
if (isset($input['max_tokens']) && is_int($input['max_tokens'])) {
Expand All @@ -134,22 +136,33 @@ public function process(?string $userId, array $input, callable $reportProgress)
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
}

try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
$chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens);
$result = '';
$increase = 1.0 / (float)count($chunks);
$progress = 0.0;
foreach ($chunks as $textInput) {
$prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';
try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
$progress += $increase;
$reportProgress($progress);
if (count($completion) > 0) {
$result .= array_pop($completion);
continue;
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
if (count($completion) > 0) {
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => array_pop($completion)];
}

throw new RuntimeException('No result in OpenAI/LocalAI response.');
throw new RuntimeException('No result in OpenAI/LocalAI response.');
}
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => $result];
}
}
60 changes: 37 additions & 23 deletions lib/TaskProcessing/ContextWriteProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use Exception;
use OCA\OpenAi\AppInfo\Application;
use OCA\OpenAi\Service\ChunkService;
use OCA\OpenAi\Service\OpenAiAPIService;
use OCA\OpenAi\Service\OpenAiSettingsService;
use OCP\IAppConfig;
Expand All @@ -27,6 +28,7 @@ public function __construct(
private OpenAiAPIService $openAiAPIService,
private IAppConfig $appConfig,
private OpenAiSettingsService $openAiSettingsService,
private ChunkService $chunkService,
private IL10N $l,
private ?string $userId,
) {
Expand Down Expand Up @@ -108,18 +110,10 @@ public function process(?string $userId, array $input, callable $reportProgress)
) {
throw new RuntimeException('Invalid inputs');
}

$writingStyle = $input['style_input'];
$sourceMaterial = $input['source_input'];

$prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*'
. ' and writing a text on the provided *SOURCE MATERIAL*.'
. " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n"
. 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*'
. ' as source of facts and instruction on what to write about.'
. ' Do not invent any facts or events yourself.'
. ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.'
. ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.';

$maxTokens = null;
if (isset($input['max_tokens']) && is_int($input['max_tokens'])) {
$maxTokens = $input['max_tokens'];
Expand All @@ -131,22 +125,42 @@ public function process(?string $userId, array $input, callable $reportProgress)
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
}

try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
$chunks = $this->chunkService->chunkSplitPrompt($sourceMaterial, true, $maxTokens);
$result = '';
$increase = 1.0 / (float)count($chunks);
$progress = 0.0;

foreach ($chunks as $sourceMaterial) {
$prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*'
. ' and writing a text on the provided *SOURCE MATERIAL*.'
. " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n"
. 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*'
. ' as source of facts and instruction on what to write about.'
. ' Do not invent any facts or events yourself.'
. ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.'
. ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.';
try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
if (count($completion) > 0) {
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => array_pop($completion)];
if (count($completion) > 0) {
$result .= array_pop($completion);
$progress += $increase;
$reportProgress($progress);
continue;
}

throw new RuntimeException('No result in OpenAI/LocalAI response.');
}
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => $result];

throw new RuntimeException('No result in OpenAI/LocalAI response.');
}
}
61 changes: 47 additions & 14 deletions lib/TaskProcessing/ProofreadProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

use Exception;
use OCA\OpenAi\AppInfo\Application;
use OCA\OpenAi\Service\ChunkService;
use OCA\OpenAi\Service\OpenAiAPIService;
use OCA\OpenAi\Service\OpenAiSettingsService;
use OCP\IAppConfig;
Expand All @@ -28,6 +29,7 @@ public function __construct(
private IAppConfig $appConfig,
private OpenAiSettingsService $openAiSettingsService,
private IL10N $l,
private ChunkService $chunkService,
private ?string $userId,
) {
}
Expand Down Expand Up @@ -119,23 +121,54 @@ public function process(?string $userId, array $input, callable $reportProgress)
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
}

try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput;
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
$chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens);
$result = '';
$increase = 1.0 / ((float)count($chunks) + 1.0);
$progress = 0.0;

foreach ($chunks as $textInput) {
try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput;
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
if (count($completion) > 0) {
$result .= array_pop($completion);
$progress += $increase;
$reportProgress($progress);
continue;
}

throw new RuntimeException('No result in OpenAI/LocalAI response.');
}
if (count($completion) > 0) {
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => array_pop($completion)];
if (count($chunks) > 1) {
$systemPrompt = 'Repeat the proofread feedback list. Ensure that no information is lost, but also not duplicated. ';
try {
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $result, $systemPrompt, null, 1, $maxTokens);
$completion = $completion['messages'];
} else {
$prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $result;
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
}
} catch (Exception $e) {
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
}
if (count($completion) > 0) {
$result = array_pop($completion);
}
}
$progress += $increase;
$reportProgress($progress);
$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => $result];

throw new RuntimeException('No result in OpenAI/LocalAI response.');
}
}
Loading
Loading