Skip to content

Commit 6b42432

Browse files
committed
feat: add chunking to ChangeTone, ContextWrite, ProofreadProvider, ReformulateProvider, TopicsProvider, and TranslateProvider
Signed-off-by: Lukas Schaefer <[email protected]>
1 parent cbe243c commit 6b42432

File tree

9 files changed

+296
-137
lines changed

9 files changed

+296
-137
lines changed

lib/Service/ChunkService.php

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<?php
2+
3+
/**
4+
* SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
5+
* SPDX-License-Identifier: AGPL-3.0-or-later
6+
*/
7+
8+
namespace OCA\OpenAi\Service;
9+
10+
/**
11+
* Helper Service to help with chunking
12+
*/
13+
class ChunkService {
14+
public function __construct(
15+
private OpenAiSettingsService $openAiSettingsService,
16+
) {
17+
}
18+
19+
/**
20+
* @param string $prompt
21+
* @param bool $outputChunking If the output is about the same size as the input so output tokens matter. Ex: translate
22+
* @param int|null $maxTokens The maximum number of output tokens if specified by the user
23+
* @return array
24+
*/
25+
public function chunkSplitPrompt(string $prompt, bool $outputChunking = false, ?int $maxTokens = null): array {
26+
$chunkSize = $this->openAiSettingsService->getChunkSize();
27+
if ($outputChunking) {
28+
$maxTokens = $maxTokens ?? $this->openAiSettingsService->getMaxTokens();
29+
$chunkSize = min($chunkSize, $maxTokens);
30+
}
31+
32+
// https://platform.openai.com/tokenizer
33+
// Rough approximation, 1 token is approximately 4 bytes for OpenAI models
34+
// It's safer to have a lower estimate on the max number of tokens, so consider 3 bytes per token instead of 4 (to account for some multibyte characters)
35+
$maxChars = $chunkSize * 3;
36+
37+
if (!$chunkSize || (mb_strlen($prompt) <= $maxChars)) {
38+
// Chunking is disabled or prompt is short enough to be a single chunk
39+
return [$prompt];
40+
}
41+
42+
// Try splitting by paragraph, match as many paragraphs as possible per chunk up to the maximum chunk size
43+
if (preg_match_all("/.{1,{$maxChars}}\n/su", $prompt, $prompts)) {
44+
return $prompts[0];
45+
}
46+
47+
// Try splitting by sentence
48+
if (preg_match_all("/.{1,{$maxChars}}[!\.\?\n]/su", $prompt, $prompts)) {
49+
return $prompts[0];
50+
}
51+
52+
// Try splitting by word
53+
if (preg_match_all("/.{1,{$maxChars}}\W/su", $prompt, $prompts)) {
54+
return $prompts[0];
55+
}
56+
57+
// Split by number of characters in maximum chunk size
58+
return mb_str_split($prompt, $maxChars);
59+
}
60+
}

lib/TaskProcessing/ChangeToneProvider.php

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
use Exception;
1313
use OCA\OpenAi\AppInfo\Application;
14+
use OCA\OpenAi\Service\ChunkService;
1415
use OCA\OpenAi\Service\OpenAiAPIService;
1516
use OCA\OpenAi\Service\OpenAiSettingsService;
1617
use OCP\IAppConfig;
@@ -19,6 +20,7 @@
1920
use OCP\TaskProcessing\ISynchronousProvider;
2021
use OCP\TaskProcessing\ShapeDescriptor;
2122
use OCP\TaskProcessing\ShapeEnumValue;
23+
use OCP\TaskProcessing\TaskTypes\TextToTextChangeTone;
2224
use RuntimeException;
2325

2426
class ChangeToneProvider implements ISynchronousProvider {
@@ -28,6 +30,7 @@ public function __construct(
2830
private IAppConfig $appConfig,
2931
private OpenAiSettingsService $openAiSettingsService,
3032
private IL10N $l,
33+
private ChunkService $chunkService,
3134
private ?string $userId,
3235
) {
3336
}
@@ -42,7 +45,7 @@ public function getName(): string {
4245

4346
public function getTaskTypeId(): string {
4447
if (class_exists('OCP\\TaskProcessing\\TaskTypes\\TextToTextChangeTone')) {
45-
return \OCP\TaskProcessing\TaskTypes\TextToTextChangeTone::ID;
48+
return TextToTextChangeTone::ID;
4649
}
4750
return ChangeToneTaskType::ID;
4851
}
@@ -121,7 +124,6 @@ public function process(?string $userId, array $input, callable $reportProgress)
121124
}
122125
$textInput = $input['input'];
123126
$toneInput = $input['tone'];
124-
$prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';
125127

126128
$maxTokens = null;
127129
if (isset($input['max_tokens']) && is_int($input['max_tokens'])) {
@@ -134,22 +136,33 @@ public function process(?string $userId, array $input, callable $reportProgress)
134136
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
135137
}
136138

137-
try {
138-
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
139-
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
140-
$completion = $completion['messages'];
141-
} else {
142-
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
139+
$chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens);
140+
$result = '';
141+
$increase = 1.0 / (float)count($chunks);
142+
$progress = 0.0;
143+
foreach ($chunks as $textInput) {
144+
$prompt = "Reformulate the following text in a $toneInput tone in its original language. Output only the reformulation. Here is the text:" . "\n\n" . $textInput . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';
145+
try {
146+
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
147+
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
148+
$completion = $completion['messages'];
149+
} else {
150+
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
151+
}
152+
} catch (Exception $e) {
153+
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
154+
}
155+
$progress += $increase;
156+
$reportProgress($progress);
157+
if (count($completion) > 0) {
158+
$result .= array_pop($completion);
159+
continue;
143160
}
144-
} catch (Exception $e) {
145-
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
146-
}
147-
if (count($completion) > 0) {
148-
$endTime = time();
149-
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
150-
return ['output' => array_pop($completion)];
151-
}
152161

153-
throw new RuntimeException('No result in OpenAI/LocalAI response.');
162+
throw new RuntimeException('No result in OpenAI/LocalAI response.');
163+
}
164+
$endTime = time();
165+
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
166+
return ['output' => $result];
154167
}
155168
}

lib/TaskProcessing/ContextWriteProvider.php

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
use Exception;
1313
use OCA\OpenAi\AppInfo\Application;
14+
use OCA\OpenAi\Service\ChunkService;
1415
use OCA\OpenAi\Service\OpenAiAPIService;
1516
use OCA\OpenAi\Service\OpenAiSettingsService;
1617
use OCP\IAppConfig;
@@ -27,6 +28,7 @@ public function __construct(
2728
private OpenAiAPIService $openAiAPIService,
2829
private IAppConfig $appConfig,
2930
private OpenAiSettingsService $openAiSettingsService,
31+
private ChunkService $chunkService,
3032
private IL10N $l,
3133
private ?string $userId,
3234
) {
@@ -108,18 +110,10 @@ public function process(?string $userId, array $input, callable $reportProgress)
108110
) {
109111
throw new RuntimeException('Invalid inputs');
110112
}
113+
111114
$writingStyle = $input['style_input'];
112115
$sourceMaterial = $input['source_input'];
113116

114-
$prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*'
115-
. ' and writing a text on the provided *SOURCE MATERIAL*.'
116-
. " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n"
117-
. 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*'
118-
. ' as source of facts and instruction on what to write about.'
119-
. ' Do not invent any facts or events yourself.'
120-
. ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.'
121-
. ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.';
122-
123117
$maxTokens = null;
124118
if (isset($input['max_tokens']) && is_int($input['max_tokens'])) {
125119
$maxTokens = $input['max_tokens'];
@@ -131,22 +125,42 @@ public function process(?string $userId, array $input, callable $reportProgress)
131125
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
132126
}
133127

134-
try {
135-
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
136-
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
137-
$completion = $completion['messages'];
138-
} else {
139-
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
128+
$chunks = $this->chunkService->chunkSplitPrompt($sourceMaterial, true, $maxTokens);
129+
$result = '';
130+
$increase = 1.0 / (float)count($chunks);
131+
$progress = 0.0;
132+
133+
foreach ($chunks as $sourceMaterial) {
134+
$prompt = 'You\'re a professional copywriter tasked with copying an instructed or demonstrated *WRITING STYLE*'
135+
. ' and writing a text on the provided *SOURCE MATERIAL*.'
136+
. " \n*WRITING STYLE*:\n$writingStyle\n\n*SOURCE MATERIAL*:\n\n$sourceMaterial\n\n"
137+
. 'Now write a text in the same style detailed or demonstrated under *WRITING STYLE* using the *SOURCE MATERIAL*'
138+
. ' as source of facts and instruction on what to write about.'
139+
. ' Do not invent any facts or events yourself.'
140+
. ' Also, use the *WRITING STYLE* as a guide for how to write the text ONLY and not as a source of facts or events.'
141+
. ' Detect the language used in the *SOURCE_MATERIAL*. Make sure to use the same language in your response. Do not mention the language explicitly.';
142+
try {
143+
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
144+
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
145+
$completion = $completion['messages'];
146+
} else {
147+
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
148+
}
149+
} catch (Exception $e) {
150+
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
140151
}
141-
} catch (Exception $e) {
142-
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
143-
}
144-
if (count($completion) > 0) {
145-
$endTime = time();
146-
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
147-
return ['output' => array_pop($completion)];
152+
if (count($completion) > 0) {
153+
$result .= array_pop($completion);
154+
$progress += $increase;
155+
$reportProgress($progress);
156+
continue;
157+
}
158+
159+
throw new RuntimeException('No result in OpenAI/LocalAI response.');
148160
}
161+
$endTime = time();
162+
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
163+
return ['output' => $result];
149164

150-
throw new RuntimeException('No result in OpenAI/LocalAI response.');
151165
}
152166
}

lib/TaskProcessing/ProofreadProvider.php

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
use Exception;
1313
use OCA\OpenAi\AppInfo\Application;
14+
use OCA\OpenAi\Service\ChunkService;
1415
use OCA\OpenAi\Service\OpenAiAPIService;
1516
use OCA\OpenAi\Service\OpenAiSettingsService;
1617
use OCP\IAppConfig;
@@ -28,6 +29,7 @@ public function __construct(
2829
private IAppConfig $appConfig,
2930
private OpenAiSettingsService $openAiSettingsService,
3031
private IL10N $l,
32+
private ChunkService $chunkService,
3133
private ?string $userId,
3234
) {
3335
}
@@ -119,23 +121,35 @@ public function process(?string $userId, array $input, callable $reportProgress)
119121
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
120122
}
121123

122-
try {
123-
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
124-
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens);
125-
$completion = $completion['messages'];
126-
} else {
127-
$prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput;
128-
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
124+
$chunks = $this->chunkService->chunkSplitPrompt($textInput, true, $maxTokens);
125+
$result = '';
126+
$increase = 1.0 / (float)count($chunks);
127+
$progress = 0.0;
128+
129+
foreach ($chunks as $textInput) {
130+
try {
131+
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
132+
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $textInput, $systemPrompt, null, 1, $maxTokens);
133+
$completion = $completion['messages'];
134+
} else {
135+
$prompt = $systemPrompt . ' Here is the text:' . "\n\n" . $textInput;
136+
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
137+
}
138+
} catch (Exception $e) {
139+
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
129140
}
130-
} catch (Exception $e) {
131-
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
132-
}
133-
if (count($completion) > 0) {
134-
$endTime = time();
135-
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
136-
return ['output' => array_pop($completion)];
141+
if (count($completion) > 0) {
142+
$result .= array_pop($completion);
143+
$progress += $increase;
144+
$reportProgress($progress);
145+
continue;
146+
}
147+
148+
throw new RuntimeException('No result in OpenAI/LocalAI response.');
137149
}
150+
$endTime = time();
151+
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
152+
return ['output' => $result];
138153

139-
throw new RuntimeException('No result in OpenAI/LocalAI response.');
140154
}
141155
}

lib/TaskProcessing/ReformulateProvider.php

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
use Exception;
1313
use OCA\OpenAi\AppInfo\Application;
14+
use OCA\OpenAi\Service\ChunkService;
1415
use OCA\OpenAi\Service\OpenAiAPIService;
1516
use OCA\OpenAi\Service\OpenAiSettingsService;
1617
use OCP\IAppConfig;
@@ -28,6 +29,7 @@ public function __construct(
2829
private IAppConfig $appConfig,
2930
private OpenAiSettingsService $openAiSettingsService,
3031
private IL10N $l,
32+
private ChunkService $chunkService,
3133
private ?string $userId,
3234
) {
3335
}
@@ -106,7 +108,6 @@ public function process(?string $userId, array $input, callable $reportProgress)
106108
throw new RuntimeException('Invalid prompt');
107109
}
108110
$prompt = $input['input'];
109-
$prompt = 'Reformulate the following text. Use the same language as the original text. Output only the reformulation. Here is the text:' . "\n\n" . $prompt . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';
110111

111112
$maxTokens = null;
112113
if (isset($input['max_tokens']) && is_int($input['max_tokens'])) {
@@ -118,23 +119,35 @@ public function process(?string $userId, array $input, callable $reportProgress)
118119
} else {
119120
$model = $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID;
120121
}
121-
122-
try {
123-
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
124-
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
125-
$completion = $completion['messages'];
126-
} else {
127-
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
122+
$chunks = $this->chunkService->chunkSplitPrompt($prompt, true, $maxTokens);
123+
$result = '';
124+
$increase = 1.0 / (float)count($chunks);
125+
$progress = 0.0;
126+
127+
foreach ($chunks as $chunk) {
128+
$prompt = 'Reformulate the following text. Use the same language as the original text. Output only the reformulation. Here is the text:' . "\n\n" . $chunk . "\n\n" . 'Do not mention the used language in your reformulation. Here is your reformulation in the same language:';
129+
try {
130+
if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
131+
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
132+
$completion = $completion['messages'];
133+
} else {
134+
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
135+
}
136+
} catch (Exception $e) {
137+
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
128138
}
129-
} catch (Exception $e) {
130-
throw new RuntimeException('OpenAI/LocalAI request failed: ' . $e->getMessage());
131-
}
132-
if (count($completion) > 0) {
133-
$endTime = time();
134-
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
135-
return ['output' => array_pop($completion)];
139+
if (count($completion) > 0) {
140+
$result .= array_pop($completion);
141+
$progress += $increase;
142+
$reportProgress($progress);
143+
continue;
144+
}
145+
146+
throw new RuntimeException('No result in OpenAI/LocalAI response.');
136147
}
137148

138-
throw new RuntimeException('No result in OpenAI/LocalAI response.');
149+
$endTime = time();
150+
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
151+
return ['output' => $result];
139152
}
140153
}

0 commit comments

Comments
 (0)