diff --git a/README.md b/README.md index bb0b00f9..253de26e 100644 --- a/README.md +++ b/README.md @@ -404,6 +404,7 @@ Experimental support for TTS. Today the following providers are available: * Google (default) * macOS say command * Elevenlabs +* OpenAI It will use the one you configure in settings.json. If you define settings for multiple TTS services, it will not be guaranteed which one it will choose! @@ -653,6 +654,35 @@ Full: } ``` +#### OpenAI + +This REQUIRES a registered API key from OpenAI! See https://platform.openai.com/docs/overview + +You need to add this to a file called settings.json (create if it doesn't exist), like this: + +``` +{ + "openaiKey": "sk-12822720jhskjhs9879879879" +} +``` + +Replace the code above (it is just made up) with the API key you've got after registering. + +Action is: + + /[Room name]/say/[phrase][/[language_code]][/[announce volume]] + /sayall/[phrase][/[language_code]][/[announce volume]] + +Example: + + /Office/say/Hello, dinner is ready + /Office/say/Hej, maten är klar/sv-se + /sayall/Hello, dinner is ready + /Office/say/Hello, dinner is ready/90 + /Office/say/Hej, maten är klar/sv-se/90 + +The language code doesn't matter as OpenAI will determine the language from the text. This may not always be correct but the probability increases with longer texts. + #### Google (default if no other has been configured) Does not require any API keys. Please note that Google has been known in the past to change the requirements for its Text-to-Speech API, and this may stop working in the future. There is also limiations to how many requests one is allowed to do in a specific time period. diff --git a/lib/tts-providers/openai.js b/lib/tts-providers/openai.js new file mode 100644 index 00000000..95e3f13d --- /dev/null +++ b/lib/tts-providers/openai.js @@ -0,0 +1,83 @@ +'use strict'; +const crypto = require('crypto'); +const fs = require('fs'); +const http = require('http'); +const https = require('https'); +const path = require('path'); +const fileDuration = require('../helpers/file-duration'); +const settings = require('../../settings'); +const logger = require('sonos-discovery/lib/helpers/logger'); + +function openai(phrase, language, voice = 'alloy', model = 'tts-1') { + if (!language) { + language = 'en'; + } + + // Construct a filesystem neutral filename + const phraseHash = crypto.createHash('sha1').update(phrase).digest('hex'); + const filename = `openai-${phraseHash}-${language}.mp3`; + const filepath = path.resolve(settings.webroot, 'tts', filename); + + const expectedUri = `/tts/${filename}`; + try { + fs.accessSync(filepath, fs.R_OK); + return fileDuration(filepath) + .then((duration) => { + return { + duration, + uri: expectedUri + }; + }); + } catch (err) { + logger.info(`announce file for phrase "${phrase}" does not seem to exist, downloading from OpenAI TTS`); + } + + return new Promise((resolve, reject) => { + const postData = JSON.stringify({ + model: model, + input: phrase, + voice: voice + }); + const options = { + hostname: 'api.openai.com', + path: '/v1/audio/speech', + method: 'POST', + headers: { + 'Authorization': `Bearer ${settings.openaiKey}`, + 'Content-Type': 'application/json', + 'Content-Length': postData.length + } + }; + + const req = https.request(options, (res) => { + if (res.statusCode >= 200 && res.statusCode < 300) { + const file = fs.createWriteStream(filepath); + res.pipe(file); + file.on('finish', function () { + file.end(); + resolve(expectedUri); + }); + } else { + reject(new Error(`Download from OpenAI TTS failed with status ${res.statusCode}, ${res.statusMessage}`)); + } + }); + + req.on('error', (err) => { + reject(err); + }); + + req.write(postData); + req.end(); + }) + .then(() => { + return fileDuration(filepath); + }) + .then((duration) => { + return { + duration, + uri: expectedUri + }; + }); +} + +module.exports = openai;