From 01a8a3a152208b95d2963b873281494d10f26589 Mon Sep 17 00:00:00 2001 From: simonstratmann <102287615+simonstratmann@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:20:55 +0200 Subject: [PATCH 1/2] Add OpenAI TTS --- lib/tts-providers/openai.js | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 lib/tts-providers/openai.js diff --git a/lib/tts-providers/openai.js b/lib/tts-providers/openai.js new file mode 100644 index 00000000..95e3f13d --- /dev/null +++ b/lib/tts-providers/openai.js @@ -0,0 +1,83 @@ +'use strict'; +const crypto = require('crypto'); +const fs = require('fs'); +const http = require('http'); +const https = require('https'); +const path = require('path'); +const fileDuration = require('../helpers/file-duration'); +const settings = require('../../settings'); +const logger = require('sonos-discovery/lib/helpers/logger'); + +function openai(phrase, language, voice = 'alloy', model = 'tts-1') { + if (!language) { + language = 'en'; + } + + // Construct a filesystem neutral filename + const phraseHash = crypto.createHash('sha1').update(phrase).digest('hex'); + const filename = `openai-${phraseHash}-${language}.mp3`; + const filepath = path.resolve(settings.webroot, 'tts', filename); + + const expectedUri = `/tts/${filename}`; + try { + fs.accessSync(filepath, fs.R_OK); + return fileDuration(filepath) + .then((duration) => { + return { + duration, + uri: expectedUri + }; + }); + } catch (err) { + logger.info(`announce file for phrase "${phrase}" does not seem to exist, downloading from OpenAI TTS`); + } + + return new Promise((resolve, reject) => { + const postData = JSON.stringify({ + model: model, + input: phrase, + voice: voice + }); + const options = { + hostname: 'api.openai.com', + path: '/v1/audio/speech', + method: 'POST', + headers: { + 'Authorization': `Bearer ${settings.openaiKey}`, + 'Content-Type': 'application/json', + 'Content-Length': postData.length + } + }; + + const req = https.request(options, (res) => { + if (res.statusCode >= 200 && res.statusCode < 300) { + const file = fs.createWriteStream(filepath); + res.pipe(file); + file.on('finish', function () { + file.end(); + resolve(expectedUri); + }); + } else { + reject(new Error(`Download from OpenAI TTS failed with status ${res.statusCode}, ${res.statusMessage}`)); + } + }); + + req.on('error', (err) => { + reject(err); + }); + + req.write(postData); + req.end(); + }) + .then(() => { + return fileDuration(filepath); + }) + .then((duration) => { + return { + duration, + uri: expectedUri + }; + }); +} + +module.exports = openai; From ae1b64549d69c86e1cc12d01b894d9e039186022 Mon Sep 17 00:00:00 2001 From: simonstratmann <102287615+simonstratmann@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:25:29 +0200 Subject: [PATCH 2/2] Update README.md --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index bb0b00f9..253de26e 100644 --- a/README.md +++ b/README.md @@ -404,6 +404,7 @@ Experimental support for TTS. Today the following providers are available: * Google (default) * macOS say command * Elevenlabs +* OpenAI It will use the one you configure in settings.json. If you define settings for multiple TTS services, it will not be guaranteed which one it will choose! @@ -653,6 +654,35 @@ Full: } ``` +#### OpenAI + +This REQUIRES a registered API key from OpenAI! See https://platform.openai.com/docs/overview + +You need to add this to a file called settings.json (create if it doesn't exist), like this: + +``` +{ + "openaiKey": "sk-12822720jhskjhs9879879879" +} +``` + +Replace the code above (it is just made up) with the API key you've got after registering. + +Action is: + + /[Room name]/say/[phrase][/[language_code]][/[announce volume]] + /sayall/[phrase][/[language_code]][/[announce volume]] + +Example: + + /Office/say/Hello, dinner is ready + /Office/say/Hej, maten är klar/sv-se + /sayall/Hello, dinner is ready + /Office/say/Hello, dinner is ready/90 + /Office/say/Hej, maten är klar/sv-se/90 + +The language code doesn't matter as OpenAI will determine the language from the text. This may not always be correct but the probability increases with longer texts. + #### Google (default if no other has been configured) Does not require any API keys. Please note that Google has been known in the past to change the requirements for its Text-to-Speech API, and this may stop working in the future. There is also limiations to how many requests one is allowed to do in a specific time period.