diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index 9087c24d..d8424784 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -79,6 +79,8 @@ class Application extends App implements IBootstrap { public const MODELS_CACHE_KEY = 'models'; public const MODELS_CACHE_TTL = 60 * 30; + public const AUDIO_TO_TEXT_LANGUAGES = [['en', 'English'], ['zh', '中文'], ['de', 'Deutsch'], ['es', 'Español'], ['ru', 'Русский'], ['ko', '한국어'], ['fr', 'Français'], ['ja', '日本語'], ['pt', 'Português'], ['tr', 'Türkçe'], ['pl', 'Polski'], ['ca', 'Català'], ['nl', 'Nederlands'], ['ar', 'العربية'], ['sv', 'Svenska'], ['it', 'Italiano'], ['id', 'Bahasa Indonesia'], ['hi', 'हिन्दी'], ['fi', 'Suomi'], ['vi', 'Tiếng Việt'], ['he', 'עברית'], ['uk', 'Українська'], ['el', 'Ελληνικά'], ['ms', 'Bahasa Melayu'], ['cs', 'Česky'], ['ro', 'Română'], ['da', 'Dansk'], ['hu', 'Magyar'], ['ta', 'தமிழ்'], ['no', 'Norsk (bokmål / riksmål)'], ['th', 'ไทย / Phasa Thai'], ['ur', 'اردو'], ['hr', 'Hrvatski'], ['bg', 'Български'], ['lt', 'Lietuvių'], ['la', 'Latina'], ['mi', 'Māori'], ['ml', 'മലയാളം'], ['cy', 'Cymraeg'], ['sk', 'Slovenčina'], ['te', 'తెలుగు'], ['fa', 'فارسی'], ['lv', 'Latviešu'], ['bn', 'বাংলা'], ['sr', 'Српски'], ['az', 'Azərbaycanca / آذربايجان'], ['sl', 'Slovenščina'], ['kn', 'ಕನ್ನಡ'], ['et', 'Eesti'], ['mk', 'Македонски'], ['br', 'Brezhoneg'], ['eu', 'Euskara'], ['is', 'Íslenska'], ['hy', 'Հայերեն'], ['ne', 'नेपाली'], ['mn', 'Монгол'], ['bs', 'Bosanski'], ['kk', 'Қазақша'], ['sq', 'Shqip'], ['sw', 'Kiswahili'], ['gl', 'Galego'], ['mr', 'मराठी'], ['pa', 'ਪੰਜਾਬੀ / पंजाबी / پنجابي'], ['si', 'සිංහල'], ['km', 'ភាសាខ្មែរ'], ['sn', 'chiShona'], ['yo', 'Yorùbá'], ['so', 'Soomaaliga'], ['af', 'Afrikaans'], ['oc', 'Occitan'], ['ka', 'ქართული'], ['be', 'Беларуская'], ['tg', 'Тоҷикӣ'], ['sd', 'सिनधि'], ['gu', 'ગુજરાતી'], ['am', 'አማርኛ'], ['yi', 'ייִדיש'], ['lo', 'ລາວ / Pha xa lao'], ['uz', 'Ўзбек'], ['fo', 'Føroyskt'], ['ht', 'Krèyol ayisyen'], ['ps', 'پښتو'], ['tk', 'Туркмен / تركمن'], ['nn', 'Norsk (nynorsk)'], ['mt', 'bil-Malti'], ['sa', 'संस्कृतम्'], ['lb', 'Lëtzebuergesch'], ['my', 'Myanmasa'], ['bo', 'བོད་ཡིག / Bod skad'], ['tl', 'Tagalog'], ['mg', 'Malagasy'], ['as', 'অসমীয়া'], ['tt', 'Tatarça'], ['haw', 'ʻŌlelo Hawaiʻi'], ['ln', 'Lingála'], ['ha', 'هَوُسَ'], ['ba', 'Башҡорт'], ['jw', 'ꦧꦱꦗꦮ'], ['su', 'Basa Sunda'], ['yue', '粤语']]; + private IAppConfig $appConfig; public function __construct(array $urlParams = []) { diff --git a/lib/Service/OpenAiAPIService.php b/lib/Service/OpenAiAPIService.php index cad206a7..b3e7e70a 100644 --- a/lib/Service/OpenAiAPIService.php +++ b/lib/Service/OpenAiAPIService.php @@ -683,6 +683,8 @@ public function transcribeBase64Mp3( * @param string|null $userId * @param File $file * @param bool $translate + * @param string $model + * @param string $language * @return string * @throws Exception */ @@ -691,9 +693,10 @@ public function transcribeFile( File $file, bool $translate = false, string $model = Application::DEFAULT_MODEL_ID, + string $language = 'default', ): string { try { - $transcriptionResponse = $this->transcribe($userId, $file->getContent(), $translate, $model); + $transcriptionResponse = $this->transcribe($userId, $file->getContent(), $translate, $model, $language); } catch (NotPermittedException|LockedException|GenericFileException $e) { $this->logger->warning('Could not read audio file: ' . $file->getPath() . '. Error: ' . $e->getMessage(), ['app' => Application::APP_ID]); throw new Exception($this->l10n->t('Could not read audio file.'), Http::STATUS_INTERNAL_SERVER_ERROR); @@ -707,6 +710,7 @@ public function transcribeFile( * @param string $audioFileContent * @param bool $translate * @param string $model + * @param string $language * @return string * @throws Exception */ @@ -715,6 +719,7 @@ public function transcribe( string $audioFileContent, bool $translate = true, string $model = Application::DEFAULT_MODEL_ID, + string $language = 'default', ): string { if ($this->isQuotaExceeded($userId, Application::QUOTA_TYPE_TRANSCRIPTION)) { throw new Exception($this->l10n->t('Audio transcription quota exceeded'), Http::STATUS_TOO_MANY_REQUESTS); @@ -730,6 +735,13 @@ public function transcribe( 'response_format' => 'verbose_json', // Verbose needed for extraction of audio duration ]; + // Gets the user's preferred language if it's not the default one + if ($language === 'default') { + $language = $this->openAiSettingsService->getUserSTTLanguage($userId); + } + if ($language !== 'detect_language') { + $params['language'] = $language; + } $endpoint = $translate ? 'audio/translations' : 'audio/transcriptions'; $contentType = 'multipart/form-data'; diff --git a/lib/Service/OpenAiSettingsService.php b/lib/Service/OpenAiSettingsService.php index 55bf7b45..8a71647c 100644 --- a/lib/Service/OpenAiSettingsService.php +++ b/lib/Service/OpenAiSettingsService.php @@ -53,6 +53,7 @@ class OpenAiSettingsService { 'api_key' => 'string', 'basic_user' => 'string', 'basic_password' => 'string', + 'stt_language' => 'string', ]; @@ -158,6 +159,14 @@ public function getUserApiKey(?string $userId, bool $fallBackOnAdminValue = fals return $userApiKey ?: $fallBackApiKey; } + /** + * @param string|null $userId + * @return string + */ + public function getUserSTTLanguage(?string $userId): string { + return $this->config->getUserValue($userId, Application::APP_ID, 'stt_language', 'detect_language'); + } + /** * @return string */ @@ -408,7 +417,7 @@ public function getAdminConfig(): array { /** * Get the user config for the settings page - * @return array{api_key: string, basic_password: string, basic_user: string, is_custom_service: bool, use_basic_auth: bool} + * @return array{api_key: string, basic_password: string, basic_user: string, is_custom_service: bool, use_basic_auth: bool, stt_language: string} */ public function getUserConfig(string $userId): array { $isCustomService = $this->getServiceUrl() !== '' && $this->getServiceUrl() !== Application::OPENAI_API_BASE_URL; @@ -418,6 +427,7 @@ public function getUserConfig(string $userId): array { 'basic_password' => $this->getUserBasicPassword($userId, false), 'use_basic_auth' => $this->getUseBasicAuth(), 'is_custom_service' => $isCustomService, + 'stt_language' => $this->getUserSTTLanguage($userId) ]; } @@ -544,6 +554,15 @@ public function setUserApiKey(string $userId, string $apiKey): void { $this->invalidateModelsCache(); } + /** + * @param string $userId + * @param string $language + * @throws PreConditionNotMetException + */ + public function setUserSTTLanguage(string $userId, string $language): void { + $this->config->setUserValue($userId, Application::APP_ID, 'stt_language', $language); + } + /** * @param string $defaultCompletionModelId * @return void @@ -886,6 +905,9 @@ public function setUserConfig(string $userId, array $userConfig): void { if (isset($userConfig['basic_password'])) { $this->setUserBasicPassword($userId, $userConfig['basic_password']); } + if (isset($userConfig['stt_language'])) { + $this->setUserSttLanguage($userId, $userConfig['stt_language']); + } } /** diff --git a/lib/Settings/Personal.php b/lib/Settings/Personal.php index de822ec4..92e1496f 100644 --- a/lib/Settings/Personal.php +++ b/lib/Settings/Personal.php @@ -11,12 +11,14 @@ use OCA\OpenAi\Service\OpenAiSettingsService; use OCP\AppFramework\Http\TemplateResponse; use OCP\AppFramework\Services\IInitialState; +use OCP\IL10N; use OCP\Settings\ISettings; class Personal implements ISettings { public function __construct( private IInitialState $initialStateService, private OpenAiSettingsService $openAiSettingsService, + private IL10N $l, private ?string $userId, ) { } @@ -31,6 +33,25 @@ public function getForm(): TemplateResponse { $userConfig = $this->openAiSettingsService->getUserConfig($this->userId); $userConfig['api_key'] = $userConfig['api_key'] === '' ? '' : 'dummyApiKey'; $userConfig['basic_password'] = $userConfig['basic_password'] === '' ? '' : 'dummyPassword'; + $languages = Application::AUDIO_TO_TEXT_LANGUAGES; + array_unshift($languages, ['detect_language', $this->l->t('Detect language')]); + $languages = array_map(static function (array $language) use ($userConfig) { + return [ + 'value' => $language[0], + 'label' => $language[1], + ]; + }, $languages); + $this->initialStateService->provideInitialState('languages', $languages); + $STTLanguage = $userConfig['stt_language']; + + // Sets the correct value and label for the frontend + $userConfig['stt_language'] = ['value' => '', 'label' => '']; + foreach ($languages as $language) { + if ($language['value'] === $STTLanguage) { + $userConfig['stt_language'] = $language; + break; + } + } $this->initialStateService->provideInitialState('config', $userConfig); return new TemplateResponse(Application::APP_ID, 'personalSettings'); } diff --git a/lib/TaskProcessing/AudioToTextProvider.php b/lib/TaskProcessing/AudioToTextProvider.php index 562bd24b..2e89b269 100644 --- a/lib/TaskProcessing/AudioToTextProvider.php +++ b/lib/TaskProcessing/AudioToTextProvider.php @@ -14,7 +14,11 @@ use OCA\OpenAi\Service\OpenAiAPIService; use OCP\Files\File; use OCP\IAppConfig; +use OCP\IL10N; +use OCP\TaskProcessing\EShapeType; use OCP\TaskProcessing\ISynchronousProvider; +use OCP\TaskProcessing\ShapeDescriptor; +use OCP\TaskProcessing\ShapeEnumValue; use OCP\TaskProcessing\TaskTypes\AudioToText; use Psr\Log\LoggerInterface; use RuntimeException; @@ -25,6 +29,7 @@ public function __construct( private OpenAiAPIService $openAiAPIService, private LoggerInterface $logger, private IAppConfig $appConfig, + private IL10N $l, ) { } @@ -53,15 +58,24 @@ public function getInputShapeDefaults(): array { } public function getOptionalInputShape(): array { - return []; + return ['language' => new ShapeDescriptor( + $this->l->t('Language'), + $this->l->t('The language of the audio file'), + EShapeType::Enum + )]; } public function getOptionalInputShapeEnumValues(): array { - return []; + $languageEnumValues = array_map(static function (array $language) { + return new ShapeEnumValue($language[1], $language[0]); + }, Application::AUDIO_TO_TEXT_LANGUAGES); + $detectLanguageEnumValue = new ShapeEnumValue($this->l->t('Detect language'), 'detect_language'); + $defaultLanguageEnumValue = new ShapeEnumValue($this->l->t('Default'), 'default'); + return ['language' => array_merge([$detectLanguageEnumValue, $defaultLanguageEnumValue], $languageEnumValues)]; } public function getOptionalInputShapeDefaults(): array { - return []; + return ['language' => 'default']; } public function getOutputShapeEnumValues(): array { @@ -81,11 +95,15 @@ public function process(?string $userId, array $input, callable $reportProgress) throw new RuntimeException('Invalid input file'); } $inputFile = $input['input']; + $language = $input['language'] ?? 'default'; + if (!is_string($language)) { + throw new RuntimeException('Invalid language'); + } $model = $this->appConfig->getValueString(Application::APP_ID, 'default_stt_model_id', Application::DEFAULT_MODEL_ID) ?: Application::DEFAULT_MODEL_ID; try { - $transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $model); + $transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $model, $language); return ['output' => $transcription]; } catch (Exception $e) { $this->logger->warning('OpenAI\'s Whisper transcription failed with: ' . $e->getMessage(), ['exception' => $e]); diff --git a/src/components/PersonalSettings.vue b/src/components/PersonalSettings.vue index 6fd4c7e2..19a83720 100644 --- a/src/components/PersonalSettings.vue +++ b/src/components/PersonalSettings.vue @@ -9,6 +9,14 @@ {{ t('integration_openai', 'OpenAI and LocalAI integration') }}
+

+ {{ t('integration_openai', 'Speech to Text Default Language') }} +

+

{{ t('integration_openai', 'Your administrator defined a custom service address') }} @@ -131,6 +139,7 @@ import OpenAiIcon from './icons/OpenAiIcon.vue' import NcNoteCard from '@nextcloud/vue/components/NcNoteCard' import NcTextField from '@nextcloud/vue/components/NcTextField' +import NcSelect from '@nextcloud/vue/components/NcSelect' import axios from '@nextcloud/axios' import { showError, showSuccess } from '@nextcloud/dialogs' @@ -150,6 +159,7 @@ export default { InformationOutlineIcon, NcNoteCard, NcTextField, + NcSelect, }, props: [], @@ -157,6 +167,7 @@ export default { data() { return { state: loadState('integration_openai', 'config'), + languages: loadState('integration_openai', 'languages'), // to prevent some browsers to fill fields with remembered passwords readonly: true, apiKeyUrl: 'https://platform.openai.com/account/api-keys', @@ -186,6 +197,7 @@ export default { methods: { onInput: debounce(function() { this.saveOptions({ + stt_language: this.state.stt_language.value, }) }, 2000), onSensitiveInput: debounce(async function() {