Skip to content

Commit be5d766

Browse files
authored
Updated library with latest OpenAI models (Gpt4o variants) and enhanced image generation capabilities (#373)
1 parent 5d50912 commit be5d766

File tree

18 files changed

+445
-88
lines changed

18 files changed

+445
-88
lines changed

core/src/main/scala/sttp/openai/OpenAI.scala

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@ import sttp.openai.requests.admin.{QueryParameters => _, _}
99
import sttp.openai.requests.assistants.AssistantsRequestBody.{CreateAssistantBody, ModifyAssistantBody}
1010
import sttp.openai.requests.assistants.AssistantsResponseData.{AssistantData, DeleteAssistantResponse, ListAssistantsResponse}
1111
import sttp.openai.requests.audio.AudioResponseData.AudioResponse
12-
import sttp.openai.requests.audio.RecognitionModel
1312
import sttp.openai.requests.audio.speech.SpeechRequestBody
14-
import sttp.openai.requests.audio.transcriptions.TranscriptionConfig
15-
import sttp.openai.requests.audio.translations.TranslationConfig
13+
import sttp.openai.requests.audio.transcriptions.{TranscriptionConfig, TranscriptionModel}
14+
import sttp.openai.requests.audio.translations.{TranslationConfig, TranslationModel}
1615
import sttp.openai.requests.batch.{QueryParameters => _, _}
1716
import sttp.openai.requests.completions.CompletionsRequestBody.CompletionsBody
1817
import sttp.openai.requests.completions.CompletionsResponseData.CompletionsResponse
@@ -181,13 +180,25 @@ class OpenAI(authToken: String, baseUri: Uri = OpenAIUris.OpenAIBaseUri) {
181180
.post(openAIUris.EditImage)
182181
.multipartBody {
183182
import imageEditsConfig._
184-
Seq(
185-
Some(multipartFile("image", image)),
183+
val imageParts = image match {
184+
case singleImage :: Nil => Seq(multipartFile("image", singleImage))
185+
case _ => image.map(img => multipartFile("image[]", img))
186+
}
187+
imageParts ++ Seq(
186188
Some(multipart("prompt", prompt)),
189+
background.map(bg => multipart("background", bg)),
190+
inputFidelity.map(fid => multipart("input_fidelity", fid)),
187191
mask.map(multipartFile("mask", _)),
192+
model.map(m => multipart("model", m)),
188193
n.map(i => multipart("n", i.toString)),
194+
outputCompression.map(c => multipart("output_compression", c.toString)),
195+
outputFormat.map(f => multipart("output_format", f)),
196+
partialImages.map(p => multipart("partial_images", p.toString)),
197+
quality.map(q => multipart("quality", q)),
189198
size.map(s => multipart("size", s.value)),
190-
responseFormat.map(format => multipart("response_format", format.value))
199+
responseFormat.map(format => multipart("response_format", format.value)),
200+
stream.map(s => multipart("stream", s.toString)),
201+
user.map(u => multipart("user", u))
191202
).flatten
192203
}
193204
.response(asJson_parseErrors[ImageResponse])
@@ -570,7 +581,7 @@ class OpenAI(authToken: String, baseUri: Uri = OpenAIUris.OpenAIBaseUri) {
570581
* @param model
571582
* ID of the model to use. Only whisper-1 is currently available.
572583
*/
573-
def createTranslation(file: File, model: RecognitionModel): Request[Either[OpenAIException, AudioResponse]] =
584+
def createTranslation(file: File, model: TranslationModel): Request[Either[OpenAIException, AudioResponse]] =
574585
openAIAuthRequest
575586
.post(openAIUris.Translations)
576587
.multipartBody(
@@ -588,7 +599,7 @@ class OpenAI(authToken: String, baseUri: Uri = OpenAIUris.OpenAIBaseUri) {
588599
* @param model
589600
* ID of the model to use. Only whisper-1 is currently available.
590601
*/
591-
def createTranslation(systemPath: String, model: RecognitionModel): Request[Either[OpenAIException, AudioResponse]] =
602+
def createTranslation(systemPath: String, model: TranslationModel): Request[Either[OpenAIException, AudioResponse]] =
592603
openAIAuthRequest
593604
.post(openAIUris.Translations)
594605
.multipartBody(
@@ -640,9 +651,9 @@ class OpenAI(authToken: String, baseUri: Uri = OpenAIUris.OpenAIBaseUri) {
640651
* @param file
641652
* The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
642653
* @param model
643-
* ID of the model to use. Only whisper-1 is currently available.
654+
* ID of the model to use. Whisper-1, gpt-4o-transcribe, gpt-4o-mini-transcribe are currently available.
644655
*/
645-
def createTranscription(file: File, model: RecognitionModel): Request[Either[OpenAIException, AudioResponse]] =
656+
def createTranscription(file: File, model: TranscriptionModel): Request[Either[OpenAIException, AudioResponse]] =
646657
openAIAuthRequest
647658
.post(openAIUris.Transcriptions)
648659
.multipartBody(
@@ -658,11 +669,11 @@ class OpenAI(authToken: String, baseUri: Uri = OpenAIUris.OpenAIBaseUri) {
658669
* @param systemPath
659670
* The audio systemPath to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
660671
* @param model
661-
* ID of the model to use. Only whisper-1 is currently available.
672+
* ID of the model to use. Whisper-1, gpt-4o-transcribe, gpt-4o-mini-transcribe are currently available.
662673
*/
663674
def createTranscription(
664675
systemPath: String,
665-
model: RecognitionModel
676+
model: TranscriptionModel
666677
): Request[Either[OpenAIException, AudioResponse]] =
667678
openAIAuthRequest
668679
.post(openAIUris.Transcriptions)

core/src/main/scala/sttp/openai/OpenAISyncClient.scala

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@ import sttp.openai.requests.admin.{QueryParameters => _, _}
77
import sttp.openai.requests.assistants.AssistantsRequestBody.{CreateAssistantBody, ModifyAssistantBody}
88
import sttp.openai.requests.assistants.AssistantsResponseData.{AssistantData, DeleteAssistantResponse, ListAssistantsResponse}
99
import sttp.openai.requests.audio.AudioResponseData.AudioResponse
10-
import sttp.openai.requests.audio.RecognitionModel
11-
import sttp.openai.requests.audio.transcriptions.TranscriptionConfig
12-
import sttp.openai.requests.audio.translations.TranslationConfig
10+
import sttp.openai.requests.audio.transcriptions.{TranscriptionConfig, TranscriptionModel}
11+
import sttp.openai.requests.audio.translations.{TranslationConfig, TranslationModel}
1312
import sttp.openai.requests.batch.{BatchRequestBody, BatchResponse, ListBatchResponse}
1413
import sttp.openai.requests.completions.CompletionsRequestBody.CompletionsBody
1514
import sttp.openai.requests.completions.CompletionsResponseData.CompletionsResponse
@@ -375,7 +374,7 @@ class OpenAISyncClient private (
375374
* @param model
376375
* ID of the model to use. Only whisper-1 is currently available.
377376
*/
378-
def createTranslation(file: File, model: RecognitionModel): AudioResponse =
377+
def createTranslation(file: File, model: TranslationModel): AudioResponse =
379378
sendOrThrow(openAI.createTranslation(file, model))
380379

381380
/** Translates audio into English text.
@@ -387,7 +386,7 @@ class OpenAISyncClient private (
387386
* @param model
388387
* ID of the model to use. Only whisper-1 is currently available.
389388
*/
390-
def createTranslation(systemPath: String, model: RecognitionModel): AudioResponse =
389+
def createTranslation(systemPath: String, model: TranslationModel): AudioResponse =
391390
sendOrThrow(openAI.createTranslation(systemPath, model))
392391

393392
/** Translates audio into English text.
@@ -419,7 +418,7 @@ class OpenAISyncClient private (
419418
* @param model
420419
* ID of the model to use. Only whisper-1 is currently available.
421420
*/
422-
def createTranscription(file: File, model: RecognitionModel): AudioResponse =
421+
def createTranscription(file: File, model: TranscriptionModel): AudioResponse =
423422
sendOrThrow(openAI.createTranscription(file, model))
424423

425424
/** Transcribes audio into the input language.
@@ -431,7 +430,7 @@ class OpenAISyncClient private (
431430
* @param model
432431
* ID of the model to use. Only whisper-1 is currently available.
433432
*/
434-
def createTranscription(systemPath: String, model: RecognitionModel): AudioResponse =
433+
def createTranscription(systemPath: String, model: TranscriptionModel): AudioResponse =
435434
sendOrThrow(openAI.createTranscription(systemPath, model))
436435

437436
/** Transcribes audio into the input language.

core/src/main/scala/sttp/openai/requests/audio/RecognitionModel.scala

Lines changed: 0 additions & 18 deletions
This file was deleted.

core/src/main/scala/sttp/openai/requests/audio/speech/SpeechRequestBody.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ object SpeechModel {
3838

3939
case object TTS1 extends SpeechModel("tts-1")
4040
case object TTS1HD extends SpeechModel("tts-1-hd")
41+
case object GPT4oMiniTTS extends SpeechModel("gpt-4o-mini-tts")
42+
4143
case class CustomSpeechModel(customValue: String) extends SpeechModel(customValue)
4244
}
4345

core/src/main/scala/sttp/openai/requests/audio/transcriptions/TranscriptionConfig.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package sttp.openai.requests.audio.transcriptions
22

3-
import sttp.openai.requests.audio.RecognitionModel
43
import sttp.openai.requests.images.ResponseFormat
54

65
import java.io.File
@@ -23,7 +22,7 @@ import java.nio.file.Paths
2322
*/
2423
case class TranscriptionConfig(
2524
file: File,
26-
model: RecognitionModel,
25+
model: TranscriptionModel,
2726
prompt: Option[String] = None,
2827
responseFormat: Option[ResponseFormat] = None,
2928
temperature: Option[Float] = None,
@@ -33,7 +32,7 @@ case class TranscriptionConfig(
3332
object TranscriptionConfig {
3433
def createTranscriptionConfigWithSystemPaths(
3534
systemPathImage: String,
36-
model: RecognitionModel,
35+
model: TranscriptionModel,
3736
prompt: Option[String] = None,
3837
responseFormat: Option[ResponseFormat] = None,
3938
temperature: Option[Float] = None,
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package sttp.openai.requests.audio.transcriptions
2+
3+
import sttp.openai.json.SnakePickle
4+
5+
sealed abstract class TranscriptionModel(val value: String)
6+
7+
object TranscriptionModel {
8+
case object Whisper1 extends TranscriptionModel("whisper-1")
9+
case object Gpt4oTranscribe extends TranscriptionModel("gpt-4o-transcribe")
10+
case object Gpt4oMiniTranscribe extends TranscriptionModel("gpt-4o-mini-transcribe")
11+
12+
/** Use only as a workaround if API supports a format that's not yet predefined as a case object of Model. Otherwise, a custom format
13+
* would be rejected. See [[https://platform.openai.com/docs/api-reference/audio/createTranscription]] for current list of supported
14+
* formats
15+
*/
16+
case class Custom(customModel: String) extends TranscriptionModel(customModel)
17+
18+
implicit val ModelW: SnakePickle.Writer[TranscriptionModel] = SnakePickle
19+
.writer[ujson.Value]
20+
.comap(_.value)
21+
}

core/src/main/scala/sttp/openai/requests/audio/translations/TranslationConfig.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package sttp.openai.requests.audio.translations
22

3-
import sttp.openai.requests.audio.RecognitionModel
43
import sttp.openai.requests.images.ResponseFormat
54

65
import java.io.File
@@ -22,7 +21,7 @@ import java.io.File
2221
*/
2322
case class TranslationConfig(
2423
file: File,
25-
model: RecognitionModel,
24+
model: TranslationModel,
2625
prompt: Option[String] = None,
2726
responseFormat: Option[ResponseFormat] = None,
2827
temperature: Option[Float] = None,
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package sttp.openai.requests.audio.translations
2+
3+
import sttp.openai.json.SnakePickle
4+
5+
sealed abstract class TranslationModel(val value: String)
6+
7+
object TranslationModel {
8+
case object Whisper1 extends TranslationModel("whisper-1")
9+
10+
/** Use only as a workaround if API supports a format that's not yet predefined as a case object of Model. Otherwise, a custom format
11+
* would be rejected. See [[https://platform.openai.com/docs/api-reference/audio/createTranslation]] for current list of supported
12+
* formats
13+
*/
14+
case class Custom(customModel: String) extends TranslationModel(customModel)
15+
16+
implicit val ModelW: SnakePickle.Writer[TranslationModel] = SnakePickle
17+
.writer[ujson.Value]
18+
.comap(_.value)
19+
}

core/src/main/scala/sttp/openai/requests/completions/chat/ChatChunkRequestResponseData.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package sttp.openai.requests.completions.chat
22

3-
import sttp.openai.json.SnakePickle
43
import sttp.model.sse.ServerSentEvent
4+
import sttp.openai.json.SnakePickle
55

66
object ChatChunkRequestResponseData {
77

core/src/main/scala/sttp/openai/requests/completions/chat/message/Tool.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ package sttp.openai.requests.completions.chat.message
33
import sttp.apispec.Schema
44
import sttp.openai.json.SnakePickle
55
import sttp.openai.requests.completions.chat.SchemaSupport
6-
import sttp.tapir.{Schema => TSchema}
76
import sttp.tapir.docs.apispec.schema.TapirSchemaToJsonSchema
7+
import sttp.tapir.{Schema => TSchema}
88
import ujson._
99

1010
sealed trait Tool

0 commit comments

Comments
 (0)