Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 65 additions & 15 deletions lib/TaskProcessing/TranslateProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,39 @@
use OCP\IL10N;
use OCP\L10N\IFactory;
use OCP\TaskProcessing\EShapeType;
use OCP\TaskProcessing\Exception\ProcessingException;
use OCP\TaskProcessing\Exception\UserFacingProcessingException;
use OCP\TaskProcessing\ISynchronousProvider;
use OCP\TaskProcessing\ShapeDescriptor;
use OCP\TaskProcessing\ShapeEnumValue;
use OCP\TaskProcessing\TaskTypes\TextToTextTranslate;
use Psr\Log\LoggerInterface;
use RuntimeException;

class TranslateProvider implements ISynchronousProvider {

public const SYSTEM_PROMPT = 'You are a translations expert that ONLY outputs a valid JSON with the translated text in the following format: { "translation": "<translated text>" } .';
public const JSON_RESPONSE_FORMAT = [
'response_format' => [
'type' => 'json_schema',
'json_schema' => [
'name' => 'TranslationResponse',
'description' => 'A JSON object containing the translated text',
'strict' => true,
'schema' => [
'type' => 'object',
'properties' => [
'translation' => [
'type' => 'string',
'description' => 'The translated text',
],
],
'required' => [ 'translation' ],
'additionalProperties' => false,
],
],
],
];

public function __construct(
private OpenAiAPIService $openAiAPIService,
private IAppConfig $appConfig,
Expand Down Expand Up @@ -144,7 +168,10 @@ public function process(?string $userId, array $input, callable $reportProgress)
}

if (!isset($input['input']) || !is_string($input['input'])) {
throw new RuntimeException('Invalid input text');
throw new ProcessingException('Invalid input text');
}
if (empty($input['input'])) {
throw new UserFacingProcessingException($this->l->t('Input text cannot be empty'));
}
$inputText = $input['input'];

Expand All @@ -160,13 +187,14 @@ public function process(?string $userId, array $input, callable $reportProgress)
try {
$coreLanguages = $this->getCoreLanguagesByCode();

$fromLanguage = $input['origin_language'];
$toLanguage = $coreLanguages[$input['target_language']] ?? $input['target_language'];

if ($input['origin_language'] !== 'detect_language') {
$fromLanguage = $coreLanguages[$input['origin_language']] ?? $input['origin_language'];
$promptStart = 'Translate from ' . $fromLanguage . ' to ' . $toLanguage . ': ';
$promptStart = 'Translate the following text from ' . $fromLanguage . ' to ' . $toLanguage . ': ';
} else {
$promptStart = 'Translate to ' . $toLanguage . ': ';
$promptStart = 'Translate the following text to ' . $toLanguage . ': ';
}

foreach ($chunks as $chunk) {
Expand All @@ -180,33 +208,55 @@ public function process(?string $userId, array $input, callable $reportProgress)
$reportProgress($progress);
continue;
}
$prompt = $promptStart . $chunk;
$prompt = $promptStart . PHP_EOL . PHP_EOL . $chunk;

if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
$completion = $this->openAiAPIService->createChatCompletion($userId, $model, $prompt, null, null, 1, $maxTokens);
$completion = $completion['messages'];
$completionsObj = $this->openAiAPIService->createChatCompletion(
$userId, $model, $prompt, self::SYSTEM_PROMPT, null, 1, $maxTokens, self::JSON_RESPONSE_FORMAT
);
$completions = $completionsObj['messages'];
} else {
$completion = $this->openAiAPIService->createCompletion($userId, $prompt, 1, $model, $maxTokens);
$completions = $this->openAiAPIService->createCompletion(
$userId, $prompt . PHP_EOL . self::SYSTEM_PROMPT . PHP_EOL . PHP_EOL, 1, $model, $maxTokens
);
}

$reportProgress($progress);

if (count($completion) > 0) {
$completion = array_pop($completion);
$result .= $completion;
$cache->set($cacheKey, $completion);
if (count($completions) === 0) {
$this->logger->error('Empty translation response received for chunk');
continue;
}

throw new RuntimeException("Failed translate from {$fromLanguage} to {$toLanguage} for chunk");
$completion = array_pop($completions);
$decodedCompletion = json_decode($completion, true);
if (
!isset($decodedCompletion['translation'])
|| !is_string($decodedCompletion['translation'])
|| empty($decodedCompletion['translation'])
) {
$this->logger->error('Invalid translation response received for chunk', ['response' => $completion]);
continue;
}
$result .= $decodedCompletion['translation'];
$cache->set($cacheKey, $decodedCompletion['translation']);
continue;
}

$endTime = time();
$this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime);
return ['output' => $result];

if (empty(trim($result))) {
throw new ProcessingException("Empty translation result from {$fromLanguage} to {$toLanguage}");
}
return ['output' => trim($result)];

} catch (Exception $e) {
throw new RuntimeException("Failed translate from {$fromLanguage} to {$toLanguage}", 0, $e);
throw new ProcessingException(
"Failed to translate from {$fromLanguage} to {$toLanguage}: {$e->getMessage()}",
$e->getCode(),
$e,
);
}
}
}
62 changes: 37 additions & 25 deletions tests/unit/Providers/OpenAiProviderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -521,53 +521,65 @@ public function testTranslationProvider(): void {
$inputText = 'This is a test prompt';
$n = 1;
$fromLang = 'Swedish';
$toLang = 'en';
$toLang = 'English';
$aiContent = ['translation' => 'This is a test response.'];

$response = '{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "gpt-3.5-turbo-0613",
"system_fingerprint": "fp_44709d6fcb",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test response."
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}';
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "gpt-4.1-mini",
"system_fingerprint": "fp_44709d6fcb",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": ' . json_encode(json_encode($aiContent)) . '
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}';

$url = self::OPENAI_API_BASE . 'chat/completions';
$prompt = 'Translate the following text from ' . $fromLang . ' to ' . $toLang . ': ' . PHP_EOL . PHP_EOL . $inputText;

$options = ['timeout' => Application::OPENAI_DEFAULT_REQUEST_TIMEOUT, 'headers' => ['User-Agent' => Application::USER_AGENT, 'Authorization' => self::AUTHORIZATION_HEADER, 'Content-Type' => 'application/json']];
$options['body'] = json_encode([
'model' => Application::DEFAULT_COMPLETION_MODEL_ID,
'messages' => [
['role' => 'user', 'content' => 'Translate from ' . $fromLang . ' to English (US): ' . $inputText],
['role' => 'system', 'content' => $translationProvider::SYSTEM_PROMPT],
['role' => 'user', 'content' => $prompt],
],
'n' => $n,
'max_completion_tokens' => Application::DEFAULT_MAX_NUM_OF_TOKENS,
'user' => self::TEST_USER1,
...$translationProvider::JSON_RESPONSE_FORMAT,
]);

$iResponse = $this->createMock(\OCP\Http\Client\IResponse::class);
$iResponse->method('getBody')->willReturn($response);
$iResponse->method('getStatusCode')->willReturn(200);
$iResponse->method('getHeader')->with('Content-Type')->willReturn('application/json');

$this->iClient->expects($this->once())->method('post')->with($url, $options)->willReturn($iResponse);
$this->iClient->expects($this->once())->method('post')->with(
$this->equalTo($url),
$this->callback(function ($revdOptions) use ($options) {
$body = json_decode($revdOptions['body'], true);
$expectedBody = json_decode($options['body'], true);
$this->assertEquals($expectedBody, $body);
return true;
}),
)->willReturn($iResponse);

$result = $translationProvider->process(self::TEST_USER1, ['input' => $inputText, 'origin_language' => $fromLang, 'target_language' => $toLang], fn () => null);
$this->assertEquals(['output' => 'This is a test response.'], $result);
$this->assertEquals(['output' => $aiContent['translation']], $result);

// Check that token usage is logged properly
$usage = $this->quotaUsageMapper->getQuotaUnitsOfUser(self::TEST_USER1, Application::QUOTA_TYPE_TEXT);
Expand Down
Loading