diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index f5e9fc32bd..39e964502f 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -95,6 +95,12 @@ export const globalSettingsSchema = z.object({ ttsEnabled: z.boolean().optional(), ttsSpeed: z.number().optional(), + ttsProvider: z.enum(["native", "google-cloud", "azure"]).optional(), + ttsVoice: z.string().optional(), + googleCloudTtsApiKey: z.string().optional(), + googleCloudTtsProjectId: z.string().optional(), + azureTtsSubscriptionKey: z.string().optional(), + azureTtsRegion: z.string().optional(), soundEnabled: z.boolean().optional(), soundVolume: z.number().optional(), @@ -255,6 +261,12 @@ export const EVALS_SETTINGS: RooCodeSettings = { ttsEnabled: false, ttsSpeed: 1, + ttsProvider: "native", + ttsVoice: undefined, + googleCloudTtsApiKey: undefined, + googleCloudTtsProjectId: undefined, + azureTtsSubscriptionKey: undefined, + azureTtsRegion: undefined, soundEnabled: false, soundVolume: 0.5, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8a0cb09263..27b69f2183 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -547,6 +547,9 @@ importers: '@aws-sdk/credential-providers': specifier: ^3.848.0 version: 3.848.0 + '@google-cloud/text-to-speech': + specifier: ^6.2.0 + version: 6.2.0 '@google/genai': specifier: ^1.0.0 version: 1.3.0(@modelcontextprotocol/sdk@1.12.0) @@ -646,6 +649,9 @@ importers: mammoth: specifier: ^1.9.1 version: 1.9.1 + microsoft-cognitiveservices-speech-sdk: + specifier: ^1.45.0 + version: 1.45.0 monaco-vscode-textmate-theme-converter: specifier: ^0.1.7 version: 0.1.7(tslib@2.8.1) @@ -1791,12 +1797,30 @@ packages: '@floating-ui/utils@0.2.9': resolution: {integrity: sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==} + '@google-cloud/text-to-speech@6.2.0': + resolution: {integrity: sha512-CsmGFNZVL0mPrgcrkBPJ5LB8TPEKiPsER0N5h3nAkkPIeyAGGn3YwuRWyQhDYI5/XWLp11LlDEWqL8B14aYtNg==} + engines: {node: '>=18'} + '@google/genai@1.3.0': resolution: {integrity: sha512-rrMzAELX4P902FUpuWy/W3NcQ7L3q/qtCzfCmGVqIce8yWpptTF9hkKsw744tvZpwqhuzD0URibcJA95wd8QFA==} engines: {node: '>=20.0.0'} peerDependencies: '@modelcontextprotocol/sdk': ^1.11.0 + '@grpc/grpc-js@1.13.4': + resolution: {integrity: sha512-GsFaMXCkMqkKIvwCQjCrwH+GHbPKBjhwo/8ZuUkWHqbI73Kky9I+pQltrlT0+MWpedCoosda53lgjYfyEPgxBg==} + engines: {node: '>=12.10.0'} + + '@grpc/proto-loader@0.7.15': + resolution: {integrity: sha512-tMXdRCfYVixjuFK+Hk0Q1s38gV9zDiDJfWL3h1rv4Qc39oILCu1TRTDt7+fGUI8K4G1Fj125Hx/ru3azECWTyQ==} + engines: {node: '>=6'} + hasBin: true + + '@grpc/proto-loader@0.8.0': + resolution: {integrity: sha512-rc1hOQtjIWGxcxpb9aHAfLpIctjEnsDehj0DAiVfBlmT84uvR0uUtN2hEi/ecvWVjXUGf5qPF4qEgiLOx1YIMQ==} + engines: {node: '>=6'} + hasBin: true + '@hookform/resolvers@5.1.1': resolution: {integrity: sha512-J/NVING3LMAEvexJkyTLjruSm7aOFx7QX21pzkiJfMoNG0wl5aFEjLTl7ay7IQb9EWY6AkrBy7tHL2Alijpdcg==} peerDependencies: @@ -1978,6 +2002,9 @@ packages: '@jridgewell/trace-mapping@0.3.25': resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==} + '@js-sdsl/ordered-map@4.4.2': + resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} + '@kwsites/file-exists@1.1.1': resolution: {integrity: sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw==} @@ -2352,6 +2379,36 @@ packages: '@polka/url@1.0.0-next.29': resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==} + '@protobufjs/aspromise@1.1.2': + resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} + + '@protobufjs/base64@1.1.2': + resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} + + '@protobufjs/codegen@2.0.4': + resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==} + + '@protobufjs/eventemitter@1.1.0': + resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} + + '@protobufjs/fetch@1.1.0': + resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} + + '@protobufjs/float@1.0.2': + resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} + + '@protobufjs/inquire@1.1.0': + resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==} + + '@protobufjs/path@1.1.2': + resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} + + '@protobufjs/pool@1.1.0': + resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} + + '@protobufjs/utf8@1.1.0': + resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} + '@puppeteer/browsers@2.10.5': resolution: {integrity: sha512-eifa0o+i8dERnngJwKrfp3dEq7ia5XFyoqB17S4gK8GhsQE4/P8nxOfQSE0zQHxzzLo/cmF+7+ywEQ7wK7Fb+w==} engines: {node: '>=18'} @@ -3651,6 +3708,10 @@ packages: peerDependencies: '@testing-library/dom': '>=7.21.4' + '@tootallnate/once@2.0.0': + resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==} + engines: {node: '>= 10'} + '@tootallnate/quickjs-emscripten@0.23.0': resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==} @@ -3943,6 +4004,9 @@ packages: '@types/vscode@1.100.0': resolution: {integrity: sha512-4uNyvzHoraXEeCamR3+fzcBlh7Afs4Ifjs4epINyUX/jvdk0uzLnwiDY35UKDKnkCHP5Nu3dljl2H8lR6s+rQw==} + '@types/webrtc@0.0.37': + resolution: {integrity: sha512-JGAJC/ZZDhcrrmepU4sPLQLIOIAgs5oIK+Ieq90K8fdaNMhfdfqmYatJdgif1NDQtvrSlTOGJDUYHIDunuufOg==} + '@types/ws@8.18.1': resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==} @@ -4150,6 +4214,14 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@5.1.1: + resolution: {integrity: sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==} + engines: {node: '>= 6.0.0'} + + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.3: resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==} engines: {node: '>= 14'} @@ -4355,6 +4427,9 @@ packages: resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==} engines: {node: '>=10.0.0'} + bent@7.3.12: + resolution: {integrity: sha512-T3yrKnVGB63zRuoco/7Ybl7BwwGZR0lceoVG5XmQyMIH9s19SV5m+a8qam4if0zQuAmOQTyPTPmsQBdAorGK3w==} + better-path-resolve@1.0.0: resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==} engines: {node: '>=4'} @@ -4449,6 +4524,9 @@ packages: resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} engines: {node: '>= 0.8'} + bytesish@0.4.4: + resolution: {integrity: sha512-i4uu6M4zuMUiyfZN4RU2+i9+peJh//pXhd9x1oSe1LBkZ3LEbCoygu8W0bXTukU1Jme2txKuotpCZRaC3FLxcQ==} + c8@9.1.0: resolution: {integrity: sha512-mBWcT5iqNir1zIkzSPyI3NCR9EZCVI3WUD+AVO17MVWTSFNyUueXE82qTeampNtTr+ilN/5Ua3j24LgbCKjDVg==} engines: {node: '>=14.14.0'} @@ -4488,6 +4566,9 @@ packages: caniuse-lite@1.0.30001718: resolution: {integrity: sha512-AflseV1ahcSunK53NfEs9gFWgOEmzr0f+kaMFA4xiLZlr9Hzt7HxcSpIFcnNCUkz6R6dWKa54rUz3HUmI3nVcw==} + caseless@0.12.0: + resolution: {integrity: sha512-4tYFyifaFfGacoiObjJegolkwSU4xQNGbVgUiNYVUxbQ2x2lUsFvY4hVgVzGiIe6WLOPqycWXA40l+PWsxthUw==} + ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} @@ -5296,6 +5377,9 @@ packages: duplexer@0.1.2: resolution: {integrity: sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==} + duplexify@4.1.3: + resolution: {integrity: sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==} + eastasianwidth@0.2.0: resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==} @@ -5896,10 +5980,18 @@ packages: resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==} engines: {node: '>=14'} + gaxios@7.1.1: + resolution: {integrity: sha512-Odju3uBUJyVCkW64nLD4wKLhbh93bh6vIg/ZIXkWiLPBrdgtc65+tls/qml+un3pr6JqYVFDZbbmLDQT68rTOQ==} + engines: {node: '>=18'} + gcp-metadata@6.1.1: resolution: {integrity: sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==} engines: {node: '>=14'} + gcp-metadata@7.0.1: + resolution: {integrity: sha512-UcO3kefx6dCcZkgcTGgVOTFb7b1LlQ02hY1omMjjrrBzkajRMCFgYOjs7J71WqnuG1k2b+9ppGL7FsOfhZMQKQ==} + engines: {node: '>=18'} + gel@2.1.0: resolution: {integrity: sha512-HCeRqInCt6BjbMmeghJ6BKeYwOj7WJT5Db6IWWAA3IMUUa7or7zJfTUEkUWCxiOtoXnwnm96sFK9Fr47Yh2hOA==} engines: {node: '>= 18.0.0'} @@ -6009,14 +6101,26 @@ packages: resolution: {integrity: sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==} engines: {node: '>=10'} + google-auth-library@10.2.1: + resolution: {integrity: sha512-HMxFl2NfeHYnaL1HoRIN1XgorKS+6CDaM+z9LSSN+i/nKDDL4KFFEWogMXu7jV4HZQy2MsxpY+wA5XIf3w410A==} + engines: {node: '>=18'} + google-auth-library@9.15.1: resolution: {integrity: sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==} engines: {node: '>=14'} + google-gax@5.0.3: + resolution: {integrity: sha512-DkWybwgkV8HA9aIizNEHEUHd8ho1BzGGQ/YMGDsTt167dQ8pk/oMiwxpUFvh6Ta93m8ZN7KwdWmP3o46HWjV+A==} + engines: {node: '>=18'} + google-logging-utils@0.0.2: resolution: {integrity: sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==} engines: {node: '>=14'} + google-logging-utils@1.1.1: + resolution: {integrity: sha512-rcX58I7nqpu4mbKztFeOAObbomBbHU2oIb/d3tJfF3dizGSApqtSwYJigGCooHdnMyQBIw8BrWyK96w3YXgr6A==} + engines: {node: '>=14'} + gopd@1.2.0: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} @@ -6035,6 +6139,10 @@ packages: resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==} engines: {node: '>=14.0.0'} + gtoken@8.0.0: + resolution: {integrity: sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw==} + engines: {node: '>=18'} + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} @@ -6149,10 +6257,22 @@ packages: resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==} engines: {node: '>= 0.8'} + http-proxy-agent@5.0.0: + resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==} + engines: {node: '>= 6'} + http-proxy-agent@7.0.2: resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} engines: {node: '>= 14'} + https-proxy-agent@4.0.0: + resolution: {integrity: sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==} + engines: {node: '>= 6.0.0'} + + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -6915,6 +7035,9 @@ packages: lodash-es@4.17.21: resolution: {integrity: sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==} + lodash.camelcase@4.3.0: + resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} + lodash.castarray@4.4.0: resolution: {integrity: sha512-aVx8ztPv7/2ULbArGJ2Y42bG1mEQ5mGjpdvrbJcJFU3TbYybe+QlLS4pst9zV52ymy2in1KpFPiZnAOATxD4+Q==} @@ -7003,6 +7126,9 @@ packages: resolution: {integrity: sha512-9ie8ItPR6tjY5uYJh8K/Zrv/RMZ5VOlOWvtZdEHYSTFKZfIBPQa9tOAEeAWhd+AnIneLJ22w5fjOYtoutpWq5w==} engines: {node: '>=18'} + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + longest-streak@3.1.0: resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==} @@ -7269,6 +7395,9 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} + microsoft-cognitiveservices-speech-sdk@1.45.0: + resolution: {integrity: sha512-etTSMGxDELxBQtNL8cgq2bwMrE6CjgfC8oIqKH9I9ghFs4/ITyLXy9HZuo0wQItN1zfDH3FhBeR72TmApe6pCQ==} + mime-db@1.52.0: resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} engines: {node: '>= 0.6'} @@ -8021,6 +8150,14 @@ packages: property-information@7.1.0: resolution: {integrity: sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==} + proto3-json-serializer@3.0.1: + resolution: {integrity: sha512-Rug90pDIefARAG9MgaFjd0yR/YP4bN3Fov00kckXMjTZa0x86c4WoWfCQFdSeWi9DvRXjhfLlPDIvODB5LOTfg==} + engines: {node: '>=18'} + + protobufjs@7.5.3: + resolution: {integrity: sha512-sildjKwVqOI2kmFDiXQ6aEB0fjYTafpEvIBs8tOR8qI4spuL9OPROLVu2qZqi/xgCfsHIwVqlaF8JBjWFHnKbw==} + engines: {node: '>=12.0.0'} + proxy-addr@2.0.7: resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==} engines: {node: '>= 0.10'} @@ -8358,6 +8495,10 @@ packages: resolution: {integrity: sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==} engines: {node: '>=18'} + retry-request@8.0.2: + resolution: {integrity: sha512-JzFPAfklk1kjR1w76f0QOIhoDkNkSqW8wYKT08n9yysTmZfB+RQ2QoXoTAeOi1HD9ZipTyTAZg3c4pM/jeqgSw==} + engines: {node: '>=18'} + retry@0.12.0: resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==} engines: {node: '>= 4'} @@ -8648,6 +8789,7 @@ packages: source-map@0.8.0-beta.0: resolution: {integrity: sha512-2ymg6oRBpebeZi9UUNsgQ89bhx01TcTkmNTGnNO88imTmbSgy4nfujrgVEFKWpMTEGA11EDkTt7mqObTPdigIA==} engines: {node: '>= 8'} + deprecated: The work that was done in this beta branch won't be included in future versions space-separated-tokens@1.1.5: resolution: {integrity: sha512-q/JSVd1Lptzhf5bkYm4ob4iWPjx0KiRe3sRFBNrVqbJkFaBm5vbbowy1mymoPNLRa52+oadOhJ+K49wsSeSjTA==} @@ -8706,9 +8848,15 @@ packages: stream-combiner@0.0.4: resolution: {integrity: sha512-rT00SPnTVyRsaSz5zgSPma/aHSOic5U1prhYdRy5HS2kTZviFpmDgzilbtsJsxiroqACmayynDN/9VzIbX5DOw==} + stream-events@1.0.5: + resolution: {integrity: sha512-E1GUzBSgvct8Jsb3v2X15pjzN1tYebtbLaMg+eBOUOAxgbLoSbT2NS91ckc5lJD1KfLjId+jXJRgo0qnV5Nerg==} + stream-json@1.9.1: resolution: {integrity: sha512-uWkjJ+2Nt/LO9Z/JyKZbMusL8Dkh97uUBTv3AJQ74y07lVahLY4eEFsPsE97pxYBwr8nnjMAIch5eqI0gPShyw==} + stream-shift@1.0.3: + resolution: {integrity: sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==} + streamsearch@1.1.0: resolution: {integrity: sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==} engines: {node: '>=10.0.0'} @@ -8829,6 +8977,9 @@ packages: resolution: {integrity: sha512-X5Z6riticuH5GnhUyzijfDi1SoXas8ODDyN7K8lJeQK+Jfi4dKdoJGL4CXTskY/ATBcN+rz5lROGn1tAUkOX7g==} engines: {node: '>=12.21.0'} + stubs@3.0.0: + resolution: {integrity: sha512-PdHt7hHUJKxvTCgbKX9C1V/ftOcjJQgz8BZwNfV5c4B6dcGqlpelTbJ999jBGZ2jYiPAwcX5dP6oBwVlBlUbxw==} + style-to-js@1.1.16: resolution: {integrity: sha512-/Q6ld50hKYPH3d/r6nr117TZkHR0w0kGGIVfpG9N6D8NymRPM9RqCUv4pRpJ62E5DqOYx2AFpbZMyCPnjQCnOw==} @@ -8932,6 +9083,10 @@ packages: resolution: {integrity: sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==} engines: {node: '>=18'} + teeny-request@10.1.0: + resolution: {integrity: sha512-3ZnLvgWF29jikg1sAQ1g0o+lr5JX6sVgYvfUJazn7ZjJroDBUTWp44/+cFVX0bULjv4vci+rBD+oGVAkWqhUbw==} + engines: {node: '>=18'} + term-size@2.2.1: resolution: {integrity: sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==} engines: {node: '>=8'} @@ -10967,6 +11122,12 @@ snapshots: '@floating-ui/utils@0.2.9': {} + '@google-cloud/text-to-speech@6.2.0': + dependencies: + google-gax: 5.0.3 + transitivePeerDependencies: + - supports-color + '@google/genai@1.3.0(@modelcontextprotocol/sdk@1.12.0)': dependencies: '@modelcontextprotocol/sdk': 1.12.0 @@ -10980,6 +11141,25 @@ snapshots: - supports-color - utf-8-validate + '@grpc/grpc-js@1.13.4': + dependencies: + '@grpc/proto-loader': 0.7.15 + '@js-sdsl/ordered-map': 4.4.2 + + '@grpc/proto-loader@0.7.15': + dependencies: + lodash.camelcase: 4.3.0 + long: 5.3.2 + protobufjs: 7.5.3 + yargs: 17.7.2 + + '@grpc/proto-loader@0.8.0': + dependencies: + lodash.camelcase: 4.3.0 + long: 5.3.2 + protobufjs: 7.5.3 + yargs: 17.7.2 + '@hookform/resolvers@5.1.1(react-hook-form@7.57.0(react@18.3.1))': dependencies: '@standard-schema/utils': 0.3.0 @@ -11118,7 +11298,7 @@ snapshots: '@jest/schemas': 29.6.3 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 20.17.57 + '@types/node': 20.19.9 '@types/yargs': 17.0.33 chalk: 4.1.2 @@ -11139,6 +11319,8 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.0 + '@js-sdsl/ordered-map@4.4.2': {} + '@kwsites/file-exists@1.1.1': dependencies: debug: 4.4.1(supports-color@8.1.1) @@ -11216,7 +11398,7 @@ snapshots: '@lmstudio/lms-isomorphic@0.4.5': dependencies: - ws: 8.18.2 + ws: 8.18.3 transitivePeerDependencies: - bufferutil - utf-8-validate @@ -11493,6 +11675,29 @@ snapshots: '@polka/url@1.0.0-next.29': {} + '@protobufjs/aspromise@1.1.2': {} + + '@protobufjs/base64@1.1.2': {} + + '@protobufjs/codegen@2.0.4': {} + + '@protobufjs/eventemitter@1.1.0': {} + + '@protobufjs/fetch@1.1.0': + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/inquire': 1.1.0 + + '@protobufjs/float@1.0.2': {} + + '@protobufjs/inquire@1.1.0': {} + + '@protobufjs/path@1.1.2': {} + + '@protobufjs/pool@1.1.0': {} + + '@protobufjs/utf8@1.1.0': {} + '@puppeteer/browsers@2.10.5': dependencies: debug: 4.4.1(supports-color@8.1.1) @@ -12935,6 +13140,8 @@ snapshots: dependencies: '@testing-library/dom': 10.4.0 + '@tootallnate/once@2.0.0': {} + '@tootallnate/quickjs-emscripten@0.23.0': {} '@tybys/wasm-util@0.9.0': @@ -13194,7 +13401,6 @@ snapshots: '@types/node@20.19.9': dependencies: undici-types: 6.21.0 - optional: true '@types/node@22.15.29': dependencies: @@ -13257,6 +13463,8 @@ snapshots: '@types/vscode@1.100.0': {} + '@types/webrtc@0.0.37': {} + '@types/ws@8.18.1': dependencies: '@types/node': 20.19.9 @@ -13270,7 +13478,7 @@ snapshots: '@types/yauzl@2.10.3': dependencies: - '@types/node': 20.17.50 + '@types/node': 20.19.9 optional: true '@typescript-eslint/eslint-plugin@8.32.1(@typescript-eslint/parser@8.32.1(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3))(eslint@9.27.0(jiti@2.4.2))(typescript@5.8.3)': @@ -13432,7 +13640,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@22.15.29)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.50)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: @@ -13568,6 +13776,14 @@ snapshots: acorn@8.15.0: {} + agent-base@5.1.1: {} + + agent-base@6.0.2: + dependencies: + debug: 4.4.1(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + agent-base@7.1.3: {} agentkeepalive@4.6.0: @@ -13803,6 +14019,12 @@ snapshots: basic-ftp@5.0.5: {} + bent@7.3.12: + dependencies: + bytesish: 0.4.4 + caseless: 0.12.0 + is-stream: 2.0.1 + better-path-resolve@1.0.0: dependencies: is-windows: 1.0.2 @@ -13904,6 +14126,8 @@ snapshots: bytes@3.1.2: {} + bytesish@0.4.4: {} + c8@9.1.0: dependencies: '@bcoe/v8-coverage': 0.2.3 @@ -13947,6 +14171,8 @@ snapshots: caniuse-lite@1.0.30001718: {} + caseless@0.12.0: {} + ccount@2.0.1: {} chai@5.2.0: @@ -14481,8 +14707,7 @@ snapshots: d3: 7.9.0 lodash-es: 4.17.21 - data-uri-to-buffer@4.0.1: - optional: true + data-uri-to-buffer@4.0.1: {} data-uri-to-buffer@6.0.2: {} @@ -14694,6 +14919,13 @@ snapshots: duplexer@0.1.2: {} + duplexify@4.1.3: + dependencies: + end-of-stream: 1.4.4 + inherits: 2.0.4 + readable-stream: 3.6.2 + stream-shift: 1.0.3 + eastasianwidth@0.2.0: {} easy-stack@1.0.1: {} @@ -15341,7 +15573,6 @@ snapshots: dependencies: node-domexception: 1.0.0 web-streams-polyfill: 3.3.3 - optional: true fflate@0.4.8: {} @@ -15431,7 +15662,6 @@ snapshots: formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 - optional: true forwarded@0.2.0: {} @@ -15516,6 +15746,14 @@ snapshots: - encoding - supports-color + gaxios@7.1.1: + dependencies: + extend: 3.0.2 + https-proxy-agent: 7.0.6 + node-fetch: 3.3.2 + transitivePeerDependencies: + - supports-color + gcp-metadata@6.1.1: dependencies: gaxios: 6.7.1 @@ -15525,6 +15763,14 @@ snapshots: - encoding - supports-color + gcp-metadata@7.0.1: + dependencies: + gaxios: 7.1.1 + google-logging-utils: 1.1.1 + json-bigint: 1.0.0 + transitivePeerDependencies: + - supports-color + gel@2.1.0: dependencies: '@petamoriken/float16': 3.9.2 @@ -15656,6 +15902,18 @@ snapshots: merge2: 1.4.1 slash: 3.0.0 + google-auth-library@10.2.1: + dependencies: + base64-js: 1.5.1 + ecdsa-sig-formatter: 1.0.11 + gaxios: 7.1.1 + gcp-metadata: 7.0.1 + google-logging-utils: 1.1.1 + gtoken: 8.0.0 + jws: 4.0.0 + transitivePeerDependencies: + - supports-color + google-auth-library@9.15.1: dependencies: base64-js: 1.5.1 @@ -15668,8 +15926,26 @@ snapshots: - encoding - supports-color + google-gax@5.0.3: + dependencies: + '@grpc/grpc-js': 1.13.4 + '@grpc/proto-loader': 0.8.0 + abort-controller: 3.0.0 + duplexify: 4.1.3 + google-auth-library: 10.2.1 + google-logging-utils: 1.1.1 + node-fetch: 3.3.2 + object-hash: 3.0.0 + proto3-json-serializer: 3.0.1 + protobufjs: 7.5.3 + retry-request: 8.0.2 + transitivePeerDependencies: + - supports-color + google-logging-utils@0.0.2: {} + google-logging-utils@1.1.1: {} + gopd@1.2.0: {} graceful-fs@4.2.11: {} @@ -15691,6 +15967,13 @@ snapshots: - encoding - supports-color + gtoken@8.0.0: + dependencies: + gaxios: 7.1.1 + jws: 4.0.0 + transitivePeerDependencies: + - supports-color + hachure-fill@0.5.2: {} harmony-reflect@1.6.2: {} @@ -15864,6 +16147,14 @@ snapshots: statuses: 2.0.1 toidentifier: 1.0.1 + http-proxy-agent@5.0.0: + dependencies: + '@tootallnate/once': 2.0.0 + agent-base: 6.0.2 + debug: 4.4.1(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.3 @@ -15871,6 +16162,20 @@ snapshots: transitivePeerDependencies: - supports-color + https-proxy-agent@4.0.0: + dependencies: + agent-base: 5.1.1 + debug: 4.4.1(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.1(supports-color@8.1.1) + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.3 @@ -16631,6 +16936,8 @@ snapshots: lodash-es@4.17.21: {} + lodash.camelcase@4.3.0: {} + lodash.castarray@4.4.0: {} lodash.debounce@4.0.8: {} @@ -16699,6 +17006,8 @@ snapshots: strip-ansi: 7.1.0 wrap-ansi: 9.0.0 + long@5.3.2: {} + longest-streak@3.1.0: {} loose-envify@1.4.0: @@ -17227,6 +17536,19 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 + microsoft-cognitiveservices-speech-sdk@1.45.0: + dependencies: + '@types/webrtc': 0.0.37 + agent-base: 6.0.2 + bent: 7.3.12 + https-proxy-agent: 4.0.0 + uuid: 9.0.1 + ws: 8.18.3 + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + mime-db@1.52.0: {} mime-db@1.54.0: {} @@ -17432,7 +17754,6 @@ snapshots: data-uri-to-buffer: 4.0.1 fetch-blob: 3.2.0 formdata-polyfill: 4.0.10 - optional: true node-ipc@12.0.0: dependencies: @@ -17992,6 +18313,25 @@ snapshots: property-information@7.1.0: {} + proto3-json-serializer@3.0.1: + dependencies: + protobufjs: 7.5.3 + + protobufjs@7.5.3: + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/base64': 1.1.2 + '@protobufjs/codegen': 2.0.4 + '@protobufjs/eventemitter': 1.1.0 + '@protobufjs/fetch': 1.1.0 + '@protobufjs/float': 1.0.2 + '@protobufjs/inquire': 1.1.0 + '@protobufjs/path': 1.1.2 + '@protobufjs/pool': 1.1.0 + '@protobufjs/utf8': 1.1.0 + '@types/node': 20.19.9 + long: 5.3.2 + proxy-addr@2.0.7: dependencies: forwarded: 0.2.0 @@ -18058,7 +18398,7 @@ snapshots: debug: 4.4.1(supports-color@8.1.1) devtools-protocol: 0.0.1452169 typed-query-selector: 2.12.0 - ws: 8.18.2 + ws: 8.18.3 transitivePeerDependencies: - bare-buffer - bufferutil @@ -18466,6 +18806,13 @@ snapshots: onetime: 7.0.0 signal-exit: 4.1.0 + retry-request@8.0.2: + dependencies: + extend: 3.0.2 + teeny-request: 10.1.0 + transitivePeerDependencies: + - supports-color + retry@0.12.0: {} reusify@1.1.0: {} @@ -18890,10 +19237,16 @@ snapshots: dependencies: duplexer: 0.1.2 + stream-events@1.0.5: + dependencies: + stubs: 3.0.0 + stream-json@1.9.1: dependencies: stream-chain: 2.2.5 + stream-shift@1.0.3: {} + streamsearch@1.1.0: {} streamx@2.22.0: @@ -19023,6 +19376,8 @@ snapshots: strong-type@1.1.0: {} + stubs@3.0.0: {} + style-to-js@1.1.16: dependencies: style-to-object: 1.0.8 @@ -19164,6 +19519,15 @@ snapshots: mkdirp: 3.0.1 yallist: 5.0.0 + teeny-request@10.1.0: + dependencies: + http-proxy-agent: 5.0.0 + https-proxy-agent: 5.0.1 + node-fetch: 3.3.2 + stream-events: 1.0.5 + transitivePeerDependencies: + - supports-color + term-size@2.2.1: {} test-exclude@6.0.0: @@ -19959,8 +20323,7 @@ snapshots: web-namespaces@2.0.1: {} - web-streams-polyfill@3.3.3: - optional: true + web-streams-polyfill@3.3.3: {} web-streams-polyfill@4.0.0-beta.3: {} @@ -20092,8 +20455,7 @@ snapshots: ws@8.18.2: {} - ws@8.18.3: - optional: true + ws@8.18.3: {} xml-name-validator@5.0.0: {} diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 274060a19b..9aa73aa034 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -63,7 +63,7 @@ import type { IndexProgressUpdate } from "../../services/code-index/interfaces/m import { MdmService } from "../../services/mdm/MdmService" import { fileExistsAtPath } from "../../utils/fs" -import { setTtsEnabled, setTtsSpeed } from "../../utils/tts" +import { setTtsEnabled, setTtsSpeed, initializeTts } from "../../utils/tts" import { getWorkspaceGitInfo } from "../../utils/git" import { getWorkspacePath } from "../../utils/path" @@ -532,13 +532,29 @@ export class ClineProvider }, ) - // Initialize tts enabled state - this.getState().then(({ ttsEnabled }) => { - setTtsEnabled(ttsEnabled ?? false) - }) + // Initialize TTS with configuration + this.getState().then(async (state) => { + const { + ttsEnabled, + ttsSpeed, + ttsProvider, + googleCloudTtsApiKey, + googleCloudTtsProjectId, + azureTtsSubscriptionKey, + azureTtsRegion, + } = state + + // Initialize TTS manager with provider configuration + await initializeTts({ + provider: ttsProvider as "native" | "google-cloud" | "azure" | undefined, + googleCloudApiKey: googleCloudTtsApiKey, + googleCloudProjectId: googleCloudTtsProjectId, + azureSubscriptionKey: azureTtsSubscriptionKey, + azureRegion: azureTtsRegion, + }) - // Initialize tts speed state - this.getState().then(({ ttsSpeed }) => { + // Set enabled state and speed + setTtsEnabled(ttsEnabled ?? false) setTtsSpeed(ttsSpeed ?? 1) }) @@ -1567,6 +1583,12 @@ export class ClineProvider soundEnabled, ttsEnabled, ttsSpeed, + ttsProvider, + ttsVoice, + googleCloudTtsApiKey, + googleCloudTtsProjectId, + azureTtsSubscriptionKey, + azureTtsRegion, diffEnabled, enableCheckpoints, taskHistory, @@ -1671,6 +1693,12 @@ export class ClineProvider soundEnabled: soundEnabled ?? false, ttsEnabled: ttsEnabled ?? false, ttsSpeed: ttsSpeed ?? 1.0, + ttsProvider: ttsProvider ?? "native", + ttsVoice: ttsVoice ?? undefined, + googleCloudTtsApiKey: googleCloudTtsApiKey ?? undefined, + googleCloudTtsProjectId: googleCloudTtsProjectId ?? undefined, + azureTtsSubscriptionKey: azureTtsSubscriptionKey ?? undefined, + azureTtsRegion: azureTtsRegion ?? undefined, diffEnabled: diffEnabled ?? true, enableCheckpoints: enableCheckpoints ?? true, shouldShowAnnouncement: @@ -1863,6 +1891,12 @@ export class ClineProvider soundEnabled: stateValues.soundEnabled ?? false, ttsEnabled: stateValues.ttsEnabled ?? false, ttsSpeed: stateValues.ttsSpeed ?? 1.0, + ttsProvider: stateValues.ttsProvider ?? "native", + ttsVoice: stateValues.ttsVoice ?? undefined, + googleCloudTtsApiKey: stateValues.googleCloudTtsApiKey ?? undefined, + googleCloudTtsProjectId: stateValues.googleCloudTtsProjectId ?? undefined, + azureTtsSubscriptionKey: stateValues.azureTtsSubscriptionKey ?? undefined, + azureTtsRegion: stateValues.azureTtsRegion ?? undefined, diffEnabled: stateValues.diffEnabled ?? true, enableCheckpoints: stateValues.enableCheckpoints ?? true, soundVolume: stateValues.soundVolume, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index e2c6d6a475..4e874a1774 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -36,7 +36,7 @@ import { getTheme } from "../../integrations/theme/getTheme" import { discoverChromeHostUrl, tryChromeHostUrl } from "../../services/browser/browserDiscovery" import { searchWorkspaceFiles } from "../../services/search/file-search" import { fileExistsAtPath } from "../../utils/fs" -import { playTts, setTtsEnabled, setTtsSpeed, stopTts } from "../../utils/tts" +import { playTts, setTtsEnabled, setTtsSpeed, stopTts, setTtsProvider, initializeTts } from "../../utils/tts" import { searchCommits } from "../../utils/git" import { exportSettings, importSettingsWithFeedback } from "../config/importExport" import { getOpenAiModels } from "../../api/providers/openai" @@ -330,11 +330,11 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "allowedMaxRequests": - await updateGlobalState("allowedMaxRequests", message.value) + await updateGlobalState("allowedMaxRequests", Number(message.value)) await provider.postStateToWebview() break case "allowedMaxCost": - await updateGlobalState("allowedMaxCost", message.value) + await updateGlobalState("allowedMaxCost", Number(message.value)) await provider.postStateToWebview() break case "alwaysAllowSubtasks": @@ -353,7 +353,7 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "autoCondenseContextPercent": - await updateGlobalState("autoCondenseContextPercent", message.value) + await updateGlobalState("autoCondenseContextPercent", Number(message.value)) await provider.postStateToWebview() break case "terminalOperation": @@ -936,21 +936,88 @@ export const webviewMessageHandler = async ( break case "soundVolume": const soundVolume = message.value ?? 0.5 - await updateGlobalState("soundVolume", soundVolume) + await updateGlobalState("soundVolume", Number(soundVolume)) await provider.postStateToWebview() break case "ttsEnabled": const ttsEnabled = message.bool ?? true await updateGlobalState("ttsEnabled", ttsEnabled) - setTtsEnabled(ttsEnabled) // Add this line to update the tts utility + setTtsEnabled(ttsEnabled) await provider.postStateToWebview() break case "ttsSpeed": - const ttsSpeed = message.value ?? 1.0 + const ttsSpeed = Number(message.value ?? 1.0) await updateGlobalState("ttsSpeed", ttsSpeed) setTtsSpeed(ttsSpeed) await provider.postStateToWebview() break + case "ttsProvider": + const ttsProvider = String(message.value) as "native" | "google-cloud" | "azure" + await updateGlobalState("ttsProvider", ttsProvider) + await setTtsProvider(ttsProvider) + await provider.postStateToWebview() + break + case "ttsVoice": + const ttsVoice = String(message.value) + await updateGlobalState("ttsVoice", ttsVoice) + await provider.postStateToWebview() + break + case "googleCloudTtsApiKey": + const googleCloudApiKey = String(message.value) + await updateGlobalState("googleCloudTtsApiKey", googleCloudApiKey) + // Re-initialize TTS with new config + const gcState = await provider.getState() + await initializeTts({ + provider: gcState.ttsProvider as "native" | "google-cloud" | "azure" | undefined, + googleCloudApiKey: googleCloudApiKey, + googleCloudProjectId: gcState.googleCloudTtsProjectId, + azureSubscriptionKey: gcState.azureTtsSubscriptionKey, + azureRegion: gcState.azureTtsRegion, + }) + await provider.postStateToWebview() + break + case "googleCloudTtsProjectId": + const googleCloudProjectId = String(message.value) + await updateGlobalState("googleCloudTtsProjectId", googleCloudProjectId) + // Re-initialize TTS with new config + const gcpState = await provider.getState() + await initializeTts({ + provider: gcpState.ttsProvider as "native" | "google-cloud" | "azure" | undefined, + googleCloudApiKey: gcpState.googleCloudTtsApiKey, + googleCloudProjectId: googleCloudProjectId, + azureSubscriptionKey: gcpState.azureTtsSubscriptionKey, + azureRegion: gcpState.azureTtsRegion, + }) + await provider.postStateToWebview() + break + case "azureTtsSubscriptionKey": + const azureSubscriptionKey = String(message.value) + await updateGlobalState("azureTtsSubscriptionKey", azureSubscriptionKey) + // Re-initialize TTS with new config + const azState = await provider.getState() + await initializeTts({ + provider: azState.ttsProvider as "native" | "google-cloud" | "azure" | undefined, + googleCloudApiKey: azState.googleCloudTtsApiKey, + googleCloudProjectId: azState.googleCloudTtsProjectId, + azureSubscriptionKey: azureSubscriptionKey, + azureRegion: azState.azureTtsRegion, + }) + await provider.postStateToWebview() + break + case "azureTtsRegion": + const azureRegion = String(message.value) + await updateGlobalState("azureTtsRegion", azureRegion) + // Re-initialize TTS with new config + const azrState = await provider.getState() + await initializeTts({ + provider: azrState.ttsProvider as "native" | "google-cloud" | "azure" | undefined, + googleCloudApiKey: azrState.googleCloudTtsApiKey, + googleCloudProjectId: azrState.googleCloudTtsProjectId, + azureSubscriptionKey: azrState.azureTtsSubscriptionKey, + azureRegion: azureRegion, + }) + await provider.postStateToWebview() + break case "playTts": if (message.text) { playTts(message.text, { @@ -1028,7 +1095,7 @@ export const webviewMessageHandler = async ( } break case "fuzzyMatchThreshold": - await updateGlobalState("fuzzyMatchThreshold", message.value) + await updateGlobalState("fuzzyMatchThreshold", Number(message.value)) await provider.postStateToWebview() break case "updateVSCodeSetting": { @@ -1072,11 +1139,11 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "requestDelaySeconds": - await updateGlobalState("requestDelaySeconds", message.value ?? 5) + await updateGlobalState("requestDelaySeconds", Number(message.value ?? 5)) await provider.postStateToWebview() break case "writeDelayMs": - await updateGlobalState("writeDelayMs", message.value) + await updateGlobalState("writeDelayMs", Number(message.value)) await provider.postStateToWebview() break case "diagnosticsEnabled": @@ -1109,10 +1176,10 @@ export const webviewMessageHandler = async ( } break case "terminalShellIntegrationTimeout": - await updateGlobalState("terminalShellIntegrationTimeout", message.value) + await updateGlobalState("terminalShellIntegrationTimeout", Number(message.value)) await provider.postStateToWebview() if (message.value !== undefined) { - Terminal.setShellIntegrationTimeout(message.value) + Terminal.setShellIntegrationTimeout(Number(message.value)) } break case "terminalShellIntegrationDisabled": @@ -1123,10 +1190,10 @@ export const webviewMessageHandler = async ( } break case "terminalCommandDelay": - await updateGlobalState("terminalCommandDelay", message.value) + await updateGlobalState("terminalCommandDelay", Number(message.value)) await provider.postStateToWebview() if (message.value !== undefined) { - Terminal.setCommandDelay(message.value) + Terminal.setCommandDelay(Number(message.value)) } break case "terminalPowershellCounter": @@ -1242,16 +1309,16 @@ export const webviewMessageHandler = async ( break } case "screenshotQuality": - await updateGlobalState("screenshotQuality", message.value) + await updateGlobalState("screenshotQuality", Number(message.value)) await provider.postStateToWebview() break case "maxOpenTabsContext": - const tabCount = Math.min(Math.max(0, message.value ?? 20), 500) + const tabCount = Math.min(Math.max(0, Number(message.value ?? 20)), 500) await updateGlobalState("maxOpenTabsContext", tabCount) await provider.postStateToWebview() break case "maxWorkspaceFiles": - const fileCount = Math.min(Math.max(0, message.value ?? 200), 500) + const fileCount = Math.min(Math.max(0, Number(message.value ?? 200)), 500) await updateGlobalState("maxWorkspaceFiles", fileCount) await provider.postStateToWebview() break @@ -1260,7 +1327,7 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "followupAutoApproveTimeoutMs": - await updateGlobalState("followupAutoApproveTimeoutMs", message.value) + await updateGlobalState("followupAutoApproveTimeoutMs", Number(message.value)) await provider.postStateToWebview() break case "browserToolEnabled": @@ -1281,20 +1348,20 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "maxReadFileLine": - await updateGlobalState("maxReadFileLine", message.value) + await updateGlobalState("maxReadFileLine", Number(message.value)) await provider.postStateToWebview() break case "maxImageFileSize": - await updateGlobalState("maxImageFileSize", message.value) + await updateGlobalState("maxImageFileSize", Number(message.value)) await provider.postStateToWebview() break case "maxTotalImageSize": - await updateGlobalState("maxTotalImageSize", message.value) + await updateGlobalState("maxTotalImageSize", Number(message.value)) await provider.postStateToWebview() break case "maxConcurrentFileReads": const valueToSave = message.value // Capture the value intended for saving - await updateGlobalState("maxConcurrentFileReads", valueToSave) + await updateGlobalState("maxConcurrentFileReads", Number(valueToSave)) await provider.postStateToWebview() break case "includeDiagnosticMessages": @@ -1304,7 +1371,7 @@ export const webviewMessageHandler = async ( await provider.postStateToWebview() break case "maxDiagnosticMessages": - await updateGlobalState("maxDiagnosticMessages", message.value ?? 50) + await updateGlobalState("maxDiagnosticMessages", Number(message.value ?? 50)) await provider.postStateToWebview() break case "setHistoryPreviewCollapsed": // Add the new case handler diff --git a/src/package.json b/src/package.json index 499cd403ce..37d6a87858 100644 --- a/src/package.json +++ b/src/package.json @@ -415,6 +415,7 @@ "@anthropic-ai/vertex-sdk": "^0.7.0", "@aws-sdk/client-bedrock-runtime": "^3.848.0", "@aws-sdk/credential-providers": "^3.848.0", + "@google-cloud/text-to-speech": "^6.2.0", "@google/genai": "^1.0.0", "@lmstudio/sdk": "^1.1.1", "@mistralai/mistralai": "^1.3.6", @@ -448,6 +449,7 @@ "isbinaryfile": "^5.0.2", "lodash.debounce": "^4.0.8", "mammoth": "^1.9.1", + "microsoft-cognitiveservices-speech-sdk": "^1.45.0", "monaco-vscode-textmate-theme-converter": "^0.1.7", "node-cache": "^5.1.2", "node-ipc": "^12.0.0", diff --git a/src/services/tts/TtsManager.ts b/src/services/tts/TtsManager.ts new file mode 100644 index 0000000000..059e5ec7ed --- /dev/null +++ b/src/services/tts/TtsManager.ts @@ -0,0 +1,272 @@ +import * as vscode from "vscode" +import { TtsProvider, TtsVoice, TtsPlayOptions } from "./interfaces/provider" +import { NativeTtsProvider } from "./providers/native" +import { GoogleCloudTtsProvider } from "./providers/google-cloud" +import { AzureTtsProvider } from "./providers/azure" + +export type TtsProviderType = "native" | "google-cloud" | "azure" + +interface TtsManagerConfig { + provider?: TtsProviderType + googleCloud?: { + apiKey?: string + projectId?: string + } + azure?: { + subscriptionKey?: string + region?: string + } +} + +interface QueueItem { + text: string + options: TtsPlayOptions +} + +/** + * Manages TTS providers and handles speech synthesis + */ +export class TtsManager { + private static instance: TtsManager + private providers: Map = new Map() + private activeProvider: TtsProvider | null = null + private activeProviderId: TtsProviderType = "native" + private queue: QueueItem[] = [] + private isProcessing = false + private isEnabled = false + private globalSpeed = 1.0 + + private constructor() { + // Initialize with native provider by default + this.registerProvider(new NativeTtsProvider()) + } + + static getInstance(): TtsManager { + if (!TtsManager.instance) { + TtsManager.instance = new TtsManager() + } + return TtsManager.instance + } + + /** + * Register a TTS provider + */ + private registerProvider(provider: TtsProvider): void { + this.providers.set(provider.id, provider) + } + + /** + * Initialize the TTS manager with configuration + */ + async initialize(config: TtsManagerConfig): Promise { + // Set the active provider + this.activeProviderId = config.provider || "native" + + // Initialize providers based on configuration + if (config.googleCloud?.apiKey) { + const googleProvider = new GoogleCloudTtsProvider() + try { + await googleProvider.initialize(config.googleCloud) + this.registerProvider(googleProvider) + } catch (error) { + console.error("Failed to initialize Google Cloud TTS:", error) + vscode.window.showErrorMessage(`Failed to initialize Google Cloud TTS: ${error}`) + } + } + + if (config.azure?.subscriptionKey && config.azure?.region) { + const azureProvider = new AzureTtsProvider() + try { + await azureProvider.initialize(config.azure) + this.registerProvider(azureProvider) + } catch (error) { + console.error("Failed to initialize Azure TTS:", error) + vscode.window.showErrorMessage(`Failed to initialize Azure TTS: ${error}`) + } + } + + // Set the active provider + await this.setActiveProvider(this.activeProviderId) + } + + /** + * Set the active TTS provider + */ + async setActiveProvider(providerId: TtsProviderType): Promise { + const provider = this.providers.get(providerId) + + if (!provider) { + // Fall back to native provider + this.activeProviderId = "native" + this.activeProvider = this.providers.get("native") || null + + if (providerId !== "native") { + vscode.window.showWarningMessage( + `TTS provider '${providerId}' not available. Falling back to native TTS.`, + ) + } + return + } + + // Initialize the provider if needed + if (!(await provider.isAvailable())) { + try { + await provider.initialize() + } catch (error) { + console.error(`Failed to initialize provider ${providerId}:`, error) + vscode.window.showErrorMessage(`Failed to initialize ${provider.name}: ${error}`) + + // Fall back to native provider + if (providerId !== "native") { + await this.setActiveProvider("native") + } + return + } + } + + this.activeProvider = provider + this.activeProviderId = providerId + } + + /** + * Get the active provider ID + */ + getActiveProviderId(): TtsProviderType { + return this.activeProviderId + } + + /** + * Get available providers + */ + async getAvailableProviders(): Promise> { + const available = [] + + for (const [id, provider] of this.providers) { + if (await provider.isAvailable()) { + available.push({ id, name: provider.name }) + } + } + + return available + } + + /** + * Get voices from the active provider + */ + async getVoices(): Promise { + if (!this.activeProvider) { + return [] + } + + try { + return await this.activeProvider.getVoices() + } catch (error) { + console.error("Failed to get voices:", error) + return [] + } + } + + /** + * Set whether TTS is enabled + */ + setEnabled(enabled: boolean): void { + this.isEnabled = enabled + if (!enabled) { + this.stop() + } + } + + /** + * Set the global speech speed + */ + setSpeed(speed: number): void { + this.globalSpeed = speed + } + + /** + * Speak text using the active provider + */ + async speak(text: string, options: TtsPlayOptions = {}): Promise { + if (!this.isEnabled) { + return + } + + // Add to queue + this.queue.push({ text, options }) + + // Process queue if not already processing + if (!this.isProcessing) { + await this.processQueue() + } + } + + /** + * Process the speech queue + */ + private async processQueue(): Promise { + if (!this.isEnabled || this.isProcessing) { + return + } + + const item = this.queue.shift() + if (!item) { + return + } + + this.isProcessing = true + + try { + if (!this.activeProvider) { + await this.setActiveProvider("native") + } + + if (this.activeProvider) { + // Merge global speed with item options + const mergedOptions: TtsPlayOptions = { + ...item.options, + speed: item.options.speed ?? this.globalSpeed, + } + + await this.activeProvider.speak(item.text, mergedOptions) + } + } catch (error) { + console.error("TTS error:", error) + vscode.window.showErrorMessage(`TTS error: ${error}`) + } finally { + this.isProcessing = false + + // Process next item in queue + if (this.queue.length > 0) { + await this.processQueue() + } + } + } + + /** + * Stop any ongoing speech and clear the queue + */ + stop(): void { + // Clear the queue + this.queue = [] + this.isProcessing = false + + // Stop the active provider + if (this.activeProvider) { + this.activeProvider.stop() + } + } + + /** + * Dispose of all providers and clean up resources + */ + dispose(): void { + this.stop() + + for (const provider of this.providers.values()) { + provider.dispose() + } + + this.providers.clear() + this.activeProvider = null + } +} diff --git a/src/services/tts/interfaces/provider.ts b/src/services/tts/interfaces/provider.ts new file mode 100644 index 0000000000..7b74049396 --- /dev/null +++ b/src/services/tts/interfaces/provider.ts @@ -0,0 +1,67 @@ +/** + * Voice information for TTS providers + */ +export interface TtsVoice { + id: string + name: string + language?: string + gender?: "male" | "female" | "neutral" + premium?: boolean +} + +/** + * Options for TTS playback + */ +export interface TtsPlayOptions { + voice?: string + speed?: number + pitch?: number + volume?: number + onStart?: () => void + onStop?: () => void +} + +/** + * Base interface for all TTS providers + */ +export interface TtsProvider { + /** + * Unique identifier for the provider + */ + readonly id: string + + /** + * Display name for the provider + */ + readonly name: string + + /** + * Initialize the provider with configuration + */ + initialize(config?: Record): Promise + + /** + * Check if the provider is available and configured + */ + isAvailable(): Promise + + /** + * Get available voices from the provider + */ + getVoices(): Promise + + /** + * Speak the given text + */ + speak(text: string, options?: TtsPlayOptions): Promise + + /** + * Stop any ongoing speech + */ + stop(): void + + /** + * Clean up resources + */ + dispose(): void +} diff --git a/src/services/tts/providers/azure.ts b/src/services/tts/providers/azure.ts new file mode 100644 index 0000000000..24a4690642 --- /dev/null +++ b/src/services/tts/providers/azure.ts @@ -0,0 +1,179 @@ +import { TtsProvider, TtsVoice, TtsPlayOptions } from "../interfaces/provider" + +interface AzureConfig { + subscriptionKey?: string + region?: string +} + +/** + * Microsoft Azure Speech Services TTS provider + */ +export class AzureTtsProvider implements TtsProvider { + readonly id = "azure" + readonly name = "Azure Speech Services" + + private speechConfig: any + private synthesizer: any + private config: AzureConfig = {} + private isInitialized = false + private currentSynthesis: any + + async initialize(config?: AzureConfig): Promise { + this.config = config || {} + + if (!this.config.subscriptionKey || !this.config.region) { + throw new Error("Azure subscription key and region are required") + } + + try { + // Dynamic import to avoid loading the SDK until needed + const sdk = await import("microsoft-cognitiveservices-speech-sdk") + + // Create speech config with subscription key and region + this.speechConfig = sdk.SpeechConfig.fromSubscription(this.config.subscriptionKey, this.config.region) + + // Create speech synthesizer + this.synthesizer = new sdk.SpeechSynthesizer(this.speechConfig) + + this.isInitialized = true + } catch (error) { + throw new Error(`Failed to initialize Azure TTS: ${error}`) + } + } + + async isAvailable(): Promise { + return this.isInitialized && !!this.synthesizer + } + + async getVoices(): Promise { + if (!this.isInitialized || !this.synthesizer) { + throw new Error("Azure TTS provider not initialized") + } + + try { + // Get available voices + const result = await this.synthesizer.getVoicesAsync() + + if (result.voices) { + return result.voices.map((voice: any) => ({ + id: voice.shortName, + name: `${voice.localName} (${voice.locale})`, + language: voice.locale, + gender: voice.gender === 0 ? "female" : voice.gender === 1 ? "male" : "neutral", + premium: voice.voiceType === "Neural", + })) + } + + return [] + } catch (error) { + console.error("Failed to fetch Azure voices:", error) + return [] + } + } + + async speak(text: string, options?: TtsPlayOptions): Promise { + if (!this.isInitialized || !this.synthesizer) { + throw new Error("Azure TTS provider not initialized") + } + + return new Promise((resolve, reject) => { + try { + options?.onStart?.() + + // Set voice if specified + if (options?.voice) { + this.speechConfig.speechSynthesisVoiceName = options.voice + } + + // Set speech rate if specified + if (options?.speed !== undefined) { + // Azure uses a percentage format: 0% = normal, -50% = half speed, +100% = double speed + const rate = ((options.speed - 1) * 100).toFixed(0) + const ssml = ` + + ${this.escapeXml(text)} + + ` + + // Use SSML for synthesis + this.currentSynthesis = this.synthesizer.speakSsmlAsync( + ssml, + (result: any) => { + if (result) { + options?.onStop?.() + resolve() + } else { + options?.onStop?.() + reject(new Error("Azure TTS synthesis failed")) + } + this.currentSynthesis = undefined + }, + (error: any) => { + options?.onStop?.() + reject(new Error(`Azure TTS error: ${error}`)) + this.currentSynthesis = undefined + }, + ) + } else { + // Use plain text synthesis + this.currentSynthesis = this.synthesizer.speakTextAsync( + text, + (result: any) => { + if (result) { + options?.onStop?.() + resolve() + } else { + options?.onStop?.() + reject(new Error("Azure TTS synthesis failed")) + } + this.currentSynthesis = undefined + }, + (error: any) => { + options?.onStop?.() + reject(new Error(`Azure TTS error: ${error}`)) + this.currentSynthesis = undefined + }, + ) + } + } catch (error) { + options?.onStop?.() + reject(error) + this.currentSynthesis = undefined + } + }) + } + + stop(): void { + // Stop any ongoing synthesis + if (this.synthesizer) { + try { + this.synthesizer.close() + // Recreate synthesizer for next use + const sdk = require("microsoft-cognitiveservices-speech-sdk") + this.synthesizer = new sdk.SpeechSynthesizer(this.speechConfig) + } catch (error) { + console.error("Error stopping Azure TTS:", error) + } + } + this.currentSynthesis = undefined + } + + dispose(): void { + this.stop() + if (this.synthesizer) { + this.synthesizer.close() + } + this.synthesizer = undefined + this.speechConfig = undefined + this.isInitialized = false + } + + private escapeXml(text: string): string { + return text + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'") + } +} diff --git a/src/services/tts/providers/google-cloud.ts b/src/services/tts/providers/google-cloud.ts new file mode 100644 index 0000000000..d9860b7986 --- /dev/null +++ b/src/services/tts/providers/google-cloud.ts @@ -0,0 +1,138 @@ +import { TtsProvider, TtsVoice, TtsPlayOptions } from "../interfaces/provider" +import * as vscode from "vscode" + +interface GoogleCloudConfig { + apiKey?: string + projectId?: string +} + +/** + * Google Cloud Text-to-Speech provider + */ +export class GoogleCloudTtsProvider implements TtsProvider { + readonly id = "google-cloud" + readonly name = "Google Cloud TTS" + + private client: any + private config: GoogleCloudConfig = {} + private isInitialized = false + private audioPlayer: any + + async initialize(config?: GoogleCloudConfig): Promise { + this.config = config || {} + + if (!this.config.apiKey) { + throw new Error("Google Cloud API key is required") + } + + try { + // Dynamic import to avoid loading the SDK until needed + const { TextToSpeechClient } = await import("@google-cloud/text-to-speech") + + // Create client with API key authentication + this.client = new TextToSpeechClient({ + apiKey: this.config.apiKey, + projectId: this.config.projectId, + }) + + this.isInitialized = true + } catch (error) { + throw new Error(`Failed to initialize Google Cloud TTS: ${error}`) + } + } + + async isAvailable(): Promise { + return this.isInitialized && !!this.client + } + + async getVoices(): Promise { + if (!this.isInitialized || !this.client) { + throw new Error("Google Cloud TTS provider not initialized") + } + + try { + const [response] = await this.client.listVoices({}) + + return ( + response.voices?.map((voice: any) => ({ + id: voice.name, + name: `${voice.name} (${voice.ssmlGender})`, + language: voice.languageCodes?.[0], + gender: voice.ssmlGender?.toLowerCase() as "male" | "female" | "neutral", + })) || [] + ) + } catch (error) { + console.error("Failed to fetch Google Cloud voices:", error) + return [] + } + } + + async speak(text: string, options?: TtsPlayOptions): Promise { + if (!this.isInitialized || !this.client) { + throw new Error("Google Cloud TTS provider not initialized") + } + + try { + options?.onStart?.() + + // Prepare the request + const request = { + input: { text }, + voice: { + languageCode: "en-US", + name: options?.voice || "en-US-Neural2-F", + ssmlGender: "FEMALE" as const, + }, + audioConfig: { + audioEncoding: "MP3" as const, + speakingRate: options?.speed || 1.0, + pitch: options?.pitch || 0, + volumeGainDb: options?.volume ? (options.volume - 1) * 20 : 0, + }, + } + + // Perform the text-to-speech request + const [response] = await this.client.synthesizeSpeech(request) + + if (response.audioContent) { + // Play the audio using the sound-play package + const soundPlay = require("sound-play") + + // Save audio to temporary file + const fs = require("fs") + const path = require("path") + const os = require("os") + + const tempFile = path.join(os.tmpdir(), `tts-${Date.now()}.mp3`) + fs.writeFileSync(tempFile, response.audioContent, "binary") + + // Play the audio file + this.audioPlayer = soundPlay.play(tempFile) + + await this.audioPlayer + + // Clean up temp file + fs.unlinkSync(tempFile) + } + + options?.onStop?.() + } catch (error) { + options?.onStop?.() + throw new Error(`Google Cloud TTS failed: ${error}`) + } + } + + stop(): void { + // Stop any ongoing playback + if (this.audioPlayer && typeof this.audioPlayer.kill === "function") { + this.audioPlayer.kill() + } + this.audioPlayer = undefined + } + + dispose(): void { + this.stop() + this.client = undefined + this.isInitialized = false + } +} diff --git a/src/services/tts/providers/native.ts b/src/services/tts/providers/native.ts new file mode 100644 index 0000000000..64cb40c376 --- /dev/null +++ b/src/services/tts/providers/native.ts @@ -0,0 +1,84 @@ +import { TtsProvider, TtsVoice, TtsPlayOptions } from "../interfaces/provider" + +interface Say { + speak: (text: string, voice?: string, speed?: number, callback?: (err?: string) => void) => void + stop: () => void +} + +/** + * Native TTS provider using the OS's built-in text-to-speech engine + */ +export class NativeTtsProvider implements TtsProvider { + readonly id = "native" + readonly name = "System TTS" + + private sayInstance: Say | undefined + private isInitialized = false + + async initialize(): Promise { + // Native provider doesn't need initialization + this.isInitialized = true + } + + async isAvailable(): Promise { + try { + // Check if the say module can be loaded + require("say") + return true + } catch { + return false + } + } + + async getVoices(): Promise { + // Native provider doesn't expose voice list + // Return a default voice + return [ + { + id: "default", + name: "System Default", + language: "en-US", + }, + ] + } + + async speak(text: string, options?: TtsPlayOptions): Promise { + if (!this.isInitialized) { + throw new Error("Native TTS provider not initialized") + } + + return new Promise((resolve, reject) => { + try { + const say: Say = require("say") + this.sayInstance = say + + options?.onStart?.() + + say.speak(text, undefined, options?.speed ?? 1.0, (err) => { + options?.onStop?.() + + if (err) { + reject(new Error(err)) + } else { + resolve() + } + + this.sayInstance = undefined + }) + } catch (error) { + this.sayInstance = undefined + reject(error) + } + }) + } + + stop(): void { + this.sayInstance?.stop() + this.sayInstance = undefined + } + + dispose(): void { + this.stop() + this.isInitialized = false + } +} diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 3ddd69945c..80cb7c62ef 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -232,6 +232,12 @@ export type ExtensionState = Pick< // | "enableCheckpoints" // Optional in GlobalSettings, required here. | "ttsEnabled" | "ttsSpeed" + | "ttsProvider" + | "ttsVoice" + | "googleCloudTtsApiKey" + | "googleCloudTtsProjectId" + | "azureTtsSubscriptionKey" + | "azureTtsRegion" | "soundEnabled" | "soundVolume" // | "maxOpenTabsContext" // Optional in GlobalSettings, required here. diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index cb8759d851..fb007ec04a 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -93,6 +93,12 @@ export interface WebviewMessage { | "soundEnabled" | "ttsEnabled" | "ttsSpeed" + | "ttsProvider" + | "ttsVoice" + | "googleCloudTtsApiKey" + | "googleCloudTtsProjectId" + | "azureTtsSubscriptionKey" + | "azureTtsRegion" | "soundVolume" | "diffEnabled" | "enableCheckpoints" @@ -220,7 +226,7 @@ export interface WebviewMessage { apiConfiguration?: ProviderSettings images?: string[] bool?: boolean - value?: number + value?: number | string commands?: string[] audioType?: AudioType serverName?: string diff --git a/src/utils/tts.ts b/src/utils/tts.ts index b544960571..af175d9d18 100644 --- a/src/utils/tts.ts +++ b/src/utils/tts.ts @@ -1,81 +1,82 @@ -interface Say { - speak: (text: string, voice?: string, speed?: number, callback?: (err?: string) => void) => void - stop: () => void -} +import { TtsManager } from "../services/tts/TtsManager" type PlayTtsOptions = { onStart?: () => void onStop?: () => void } -type QueueItem = { - message: string - options: PlayTtsOptions -} - -let isTtsEnabled = false - -export const setTtsEnabled = (enabled: boolean) => (isTtsEnabled = enabled) +// Get the singleton TTS manager instance +const ttsManager = TtsManager.getInstance() -let speed = 1.0 - -export const setTtsSpeed = (newSpeed: number) => (speed = newSpeed) +/** + * Enable or disable TTS + */ +export const setTtsEnabled = (enabled: boolean) => { + ttsManager.setEnabled(enabled) +} -let sayInstance: Say | undefined = undefined -let queue: QueueItem[] = [] +/** + * Set the TTS speed + */ +export const setTtsSpeed = (newSpeed: number) => { + ttsManager.setSpeed(newSpeed) +} +/** + * Play text-to-speech + */ export const playTts = async (message: string, options: PlayTtsOptions = {}) => { - if (!isTtsEnabled) { - return - } - - try { - queue.push({ message, options }) - await processQueue() - } catch (error) {} + await ttsManager.speak(message, options) } +/** + * Stop any ongoing TTS playback + */ export const stopTts = () => { - sayInstance?.stop() - sayInstance = undefined - queue = [] + ttsManager.stop() } -const processQueue = async (): Promise => { - if (!isTtsEnabled || sayInstance) { - return - } - - const item = queue.shift() - - if (!item) { - return - } - - try { - const { message: nextUtterance, options } = item - - await new Promise((resolve, reject) => { - const say: Say = require("say") - sayInstance = say - options.onStart?.() - - say.speak(nextUtterance, undefined, speed, (err) => { - options.onStop?.() +/** + * Initialize TTS with configuration + * This should be called when the extension activates + */ +export const initializeTts = async (config?: { + provider?: "native" | "google-cloud" | "azure" + googleCloudApiKey?: string + googleCloudProjectId?: string + azureSubscriptionKey?: string + azureRegion?: string +}) => { + await ttsManager.initialize({ + provider: config?.provider, + googleCloud: { + apiKey: config?.googleCloudApiKey, + projectId: config?.googleCloudProjectId, + }, + azure: { + subscriptionKey: config?.azureSubscriptionKey, + region: config?.azureRegion, + }, + }) +} - if (err) { - reject(new Error(err)) - } else { - resolve() - } +/** + * Get available TTS providers + */ +export const getAvailableTtsProviders = async () => { + return await ttsManager.getAvailableProviders() +} - sayInstance = undefined - }) - }) +/** + * Set the active TTS provider + */ +export const setTtsProvider = async (provider: "native" | "google-cloud" | "azure") => { + await ttsManager.setActiveProvider(provider) +} - await processQueue() - } catch (error: any) { - sayInstance = undefined - await processQueue() - } +/** + * Get voices from the active provider + */ +export const getTtsVoices = async () => { + return await ttsManager.getVoices() } diff --git a/webview-ui/src/components/settings/AzureTtsSettings.tsx b/webview-ui/src/components/settings/AzureTtsSettings.tsx new file mode 100644 index 0000000000..acd3c09b2f --- /dev/null +++ b/webview-ui/src/components/settings/AzureTtsSettings.tsx @@ -0,0 +1,85 @@ +import React from "react" +import { VSCodeTextField, VSCodeButton } from "@vscode/webview-ui-toolkit/react" +import { vscode } from "../../utils/vscode" + +interface AzureTtsSettingsProps { + subscriptionKey?: string + region?: string + onSubscriptionKeyChange: (value: string) => void + onRegionChange: (value: string) => void +} + +export const AzureTtsSettings: React.FC = ({ + subscriptionKey, + region, + onSubscriptionKeyChange, + onRegionChange, +}) => { + const handleTestConnection = async () => { + // Test the connection with the provided credentials + vscode.postMessage({ + type: "playTts", + text: "Testing Azure Speech Services connection.", + }) + } + + return ( +
+

Azure Speech Services Configuration

+ +
+ + onSubscriptionKeyChange(e.target.value)} + className="w-full" + /> + + Your Azure Speech Services subscription key + +
+ +
+ + onRegionChange(e.target.value)} + className="w-full" + /> + + The Azure region where your Speech Services resource is located + +
+ +
+ Test Connection + + window.open("https://docs.microsoft.com/azure/cognitive-services/speech-service/", "_blank") + }> + Documentation + +
+ +
+

To get started:

+
    +
  1. Create a Speech Services resource in Azure Portal
  2. +
  3. Copy your subscription key from the Keys and Endpoint section
  4. +
  5. Note your region (e.g., eastus, westeurope)
  6. +
  7. Enter your credentials above
  8. +
+
+
+ ) +} diff --git a/webview-ui/src/components/settings/GoogleCloudTtsSettings.tsx b/webview-ui/src/components/settings/GoogleCloudTtsSettings.tsx new file mode 100644 index 0000000000..3b5eeefa30 --- /dev/null +++ b/webview-ui/src/components/settings/GoogleCloudTtsSettings.tsx @@ -0,0 +1,82 @@ +import React from "react" +import { VSCodeTextField, VSCodeButton } from "@vscode/webview-ui-toolkit/react" +import { vscode } from "../../utils/vscode" + +interface GoogleCloudTtsSettingsProps { + apiKey?: string + projectId?: string + onApiKeyChange: (value: string) => void + onProjectIdChange: (value: string) => void +} + +export const GoogleCloudTtsSettings: React.FC = ({ + apiKey, + projectId, + onApiKeyChange, + onProjectIdChange, +}) => { + const handleTestConnection = async () => { + // Test the connection with the provided credentials + vscode.postMessage({ + type: "playTts", + text: "Testing Google Cloud Text-to-Speech connection.", + }) + } + + return ( +
+

Google Cloud TTS Configuration

+ +
+ + onApiKeyChange(e.target.value)} + className="w-full" + /> + + Your Google Cloud API key for Text-to-Speech service + +
+ +
+ + onProjectIdChange(e.target.value)} + className="w-full" + /> + + Your Google Cloud project ID (optional for API key authentication) + +
+ +
+ Test Connection + window.open("https://cloud.google.com/text-to-speech/docs/quickstart", "_blank")}> + Documentation + +
+ +
+

To get started:

+
    +
  1. Enable the Text-to-Speech API in your Google Cloud Console
  2. +
  3. Create an API key in the Credentials section
  4. +
  5. Enter your API key above
  6. +
+
+
+ ) +} diff --git a/webview-ui/src/components/settings/NotificationSettings.tsx b/webview-ui/src/components/settings/NotificationSettings.tsx index 9610cabad8..d4708114ce 100644 --- a/webview-ui/src/components/settings/NotificationSettings.tsx +++ b/webview-ui/src/components/settings/NotificationSettings.tsx @@ -1,24 +1,49 @@ import { HTMLAttributes } from "react" import { useAppTranslation } from "@/i18n/TranslationContext" -import { VSCodeCheckbox } from "@vscode/webview-ui-toolkit/react" +import { VSCodeCheckbox, VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react" import { Bell } from "lucide-react" import { SetCachedStateField } from "./types" import { SectionHeader } from "./SectionHeader" import { Section } from "./Section" import { Slider } from "../ui" +import { GoogleCloudTtsSettings } from "./GoogleCloudTtsSettings" +import { AzureTtsSettings } from "./AzureTtsSettings" type NotificationSettingsProps = HTMLAttributes & { ttsEnabled?: boolean ttsSpeed?: number + ttsProvider?: string + ttsVoice?: string + googleCloudTtsApiKey?: string + googleCloudTtsProjectId?: string + azureTtsSubscriptionKey?: string + azureTtsRegion?: string soundEnabled?: boolean soundVolume?: number - setCachedStateField: SetCachedStateField<"ttsEnabled" | "ttsSpeed" | "soundEnabled" | "soundVolume"> + setCachedStateField: SetCachedStateField< + | "ttsEnabled" + | "ttsSpeed" + | "ttsProvider" + | "ttsVoice" + | "googleCloudTtsApiKey" + | "googleCloudTtsProjectId" + | "azureTtsSubscriptionKey" + | "azureTtsRegion" + | "soundEnabled" + | "soundVolume" + > } export const NotificationSettings = ({ ttsEnabled, ttsSpeed, + ttsProvider = "native", + ttsVoice, + googleCloudTtsApiKey, + googleCloudTtsProjectId, + azureTtsSubscriptionKey, + azureTtsRegion, soundEnabled, soundVolume, setCachedStateField, @@ -49,6 +74,18 @@ export const NotificationSettings = ({ {ttsEnabled && (
+
+ + setCachedStateField("ttsProvider", e.target.value)} + className="w-full"> + System TTS + Google Cloud TTS + Azure Speech Services + +
+
+ + {ttsProvider === "google-cloud" && ( + setCachedStateField("googleCloudTtsApiKey", value)} + onProjectIdChange={(value) => setCachedStateField("googleCloudTtsProjectId", value)} + /> + )} + + {ttsProvider === "azure" && ( + + setCachedStateField("azureTtsSubscriptionKey", value) + } + onRegionChange={(value) => setCachedStateField("azureTtsRegion", value)} + /> + )} )} diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 630b59485d..f1d102773f 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -153,6 +153,12 @@ const SettingsView = forwardRef(({ onDone, t soundEnabled, ttsEnabled, ttsSpeed, + ttsProvider, + ttsVoice, + googleCloudTtsApiKey, + googleCloudTtsProjectId, + azureTtsSubscriptionKey, + azureTtsRegion, soundVolume, telemetrySetting, terminalOutputLineLimit, @@ -299,6 +305,12 @@ const SettingsView = forwardRef(({ onDone, t vscode.postMessage({ type: "soundEnabled", bool: soundEnabled }) vscode.postMessage({ type: "ttsEnabled", bool: ttsEnabled }) vscode.postMessage({ type: "ttsSpeed", value: ttsSpeed }) + vscode.postMessage({ type: "ttsProvider", value: ttsProvider }) + vscode.postMessage({ type: "ttsVoice", value: ttsVoice }) + vscode.postMessage({ type: "googleCloudTtsApiKey", value: googleCloudTtsApiKey }) + vscode.postMessage({ type: "googleCloudTtsProjectId", value: googleCloudTtsProjectId }) + vscode.postMessage({ type: "azureTtsSubscriptionKey", value: azureTtsSubscriptionKey }) + vscode.postMessage({ type: "azureTtsRegion", value: azureTtsRegion }) vscode.postMessage({ type: "soundVolume", value: soundVolume }) vscode.postMessage({ type: "diffEnabled", bool: diffEnabled }) vscode.postMessage({ type: "enableCheckpoints", bool: enableCheckpoints }) @@ -659,6 +671,12 @@ const SettingsView = forwardRef(({ onDone, t