Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp")
set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp" "media.cpp")

if(IDF_TARGET STREQUAL linux)
idf_component_register(
SRCS ${COMMON_SRC}
SRCS ${COMMON_SRC} "platform_linux.cpp"
REQUIRES peer esp-libopus esp_http_client)
target_link_libraries(${COMPONENT_LIB} PRIVATE pulse pulse-simple)
target_link_libraries(${COMPONENT_LIB} PRIVATE "-lbsd")
else()
idf_component_register(
SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp"
SRCS ${COMMON_SRC} "platform_esp32s3.cpp" "wifi.cpp"
REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client)
endif()

Expand Down
11 changes: 4 additions & 7 deletions src/http.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <string.h>

#include "main.h"
#include "platform.h"

#ifndef MIN
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
Expand Down Expand Up @@ -34,9 +35,7 @@ esp_err_t oai_http_event_handler(esp_http_client_event_t *evt) {
ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len);
if (esp_http_client_is_chunked_response(evt->client)) {
ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported");
#ifndef LINUX_BUILD
esp_restart();
#endif
oai_platform_restart();
}

if (output_len == 0 && evt->user_data) {
Expand Down Expand Up @@ -71,7 +70,7 @@ esp_err_t oai_http_event_handler(esp_http_client_event_t *evt) {

void oai_http_request(char *offer, char *answer) {
esp_http_client_config_t config;
memset(&config, 0, sizeof(esp_http_client_config_t));
memset(&config, 0, sizeof(config));

config.url = OPENAI_REALTIMEAPI;
config.event_handler = oai_http_event_handler;
Expand All @@ -88,9 +87,7 @@ void oai_http_request(char *offer, char *answer) {
esp_err_t err = esp_http_client_perform(client);
if (err != ESP_OK || esp_http_client_get_status_code(client) != 201) {
ESP_LOGE(LOG_TAG, "Error perform http request %s", esp_err_to_name(err));
#ifndef LINUX_BUILD
esp_restart();
#endif
oai_platform_restart();
}

esp_http_client_cleanup(client);
Expand Down
26 changes: 8 additions & 18 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,19 @@
#include <esp_log.h>
#include <peer.h>

#ifndef LINUX_BUILD
#include "nvs_flash.h"
#include "platform.h"

extern "C" void app_main(void) {
esp_err_t ret = nvs_flash_init();
if (ret == ESP_ERR_NVS_NO_FREE_PAGES ||
ret == ESP_ERR_NVS_NEW_VERSION_FOUND) {
ESP_ERROR_CHECK(nvs_flash_erase());
ret = nvs_flash_init();
}
ESP_ERROR_CHECK(ret);
#ifndef LINUX_BUILD
#define MAIN extern "C" void app_main(void)
#else
#define MAIN int main(void)
#endif

MAIN {
ESP_ERROR_CHECK(esp_event_loop_create_default());
peer_init();
oai_init_audio_capture();
oai_platform_init_audio_capture();
oai_init_audio_decoder();
oai_wifi();
oai_webrtc();
}
#else
int main(void) {
ESP_ERROR_CHECK(esp_event_loop_create_default());
peer_init();
oai_webrtc();
}
#endif
5 changes: 5 additions & 0 deletions src/main.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#ifndef _MAIN_H_
#define _MAIN_H_

#include <peer.h>

#define LOG_TAG "realtimeapi-sdk"
Expand All @@ -11,3 +14,5 @@ void oai_send_audio(PeerConnection *peer_connection);
void oai_audio_decode(uint8_t *data, size_t size);
void oai_webrtc();
void oai_http_request(char *offer, char *answer);

#endif
117 changes: 21 additions & 96 deletions src/media.cpp
Original file line number Diff line number Diff line change
@@ -1,143 +1,68 @@
#include <driver/i2s.h>
#include <opus.h>
#include <stdio.h>

#include "main.h"
#include "platform.h"

#define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode
#define SAMPLE_RATE 8000
#define BUFFER_SAMPLES 320

#define MCLK_PIN 0
#define DAC_BCLK_PIN 15
#define DAC_LRCLK_PIN 16
#define DAC_DATA_PIN 17
#define ADC_BCLK_PIN 38
#define ADC_LRCLK_PIN 39
#define ADC_DATA_PIN 40

#define OPUS_ENCODER_BITRATE 30000
#define OPUS_ENCODER_COMPLEXITY 0

void oai_init_audio_capture() {
i2s_config_t i2s_config_out = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S_MSB,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = BUFFER_SAMPLES,
.use_apll = 1,
.tx_desc_auto_clear = true,
};
if (i2s_driver_install(I2S_NUM_0, &i2s_config_out, 0, NULL) != ESP_OK) {
printf("Failed to configure I2S driver for audio output");
return;
}
const auto kCaptureFrameSize = kCaptureSampleRate * 20 / 1000;
const auto kPlaybackFrameSize = kPlaybackSampleRate * 20 / 1000;

i2s_pin_config_t pin_config_out = {
.mck_io_num = MCLK_PIN,
.bck_io_num = DAC_BCLK_PIN,
.ws_io_num = DAC_LRCLK_PIN,
.data_out_num = DAC_DATA_PIN,
.data_in_num = I2S_PIN_NO_CHANGE,
};
if (i2s_set_pin(I2S_NUM_0, &pin_config_out) != ESP_OK) {
printf("Failed to set I2S pins for audio output");
return;
}
i2s_zero_dma_buffer(I2S_NUM_0);
static OpusDecoder *opus_decoder = NULL;
static OpusEncoder *opus_encoder = NULL;

i2s_config_t i2s_config_in = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S_MSB,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 8,
.dma_buf_len = BUFFER_SAMPLES,
.use_apll = 1,
};
if (i2s_driver_install(I2S_NUM_1, &i2s_config_in, 0, NULL) != ESP_OK) {
printf("Failed to configure I2S driver for audio input");
return;
}

i2s_pin_config_t pin_config_in = {
.mck_io_num = MCLK_PIN,
.bck_io_num = ADC_BCLK_PIN,
.ws_io_num = ADC_LRCLK_PIN,
.data_out_num = I2S_PIN_NO_CHANGE,
.data_in_num = ADC_DATA_PIN,
};
if (i2s_set_pin(I2S_NUM_1, &pin_config_in) != ESP_OK) {
printf("Failed to set I2S pins for audio input");
return;
}
}

opus_int16 *output_buffer = NULL;
OpusDecoder *opus_decoder = NULL;
static opus_int16 output_buffer[kPlaybackFrameSize * kPlaybackChannelCount];
static opus_int16 input_buffer[kCaptureFrameSize * kCaptureChannelCount];
static uint8_t encoder_output_buffer[OPUS_OUT_BUFFER_SIZE];

void oai_init_audio_decoder() {
int decoder_error = 0;
opus_decoder = opus_decoder_create(SAMPLE_RATE, 2, &decoder_error);
opus_decoder = opus_decoder_create(kPlaybackSampleRate, kPlaybackChannelCount,
&decoder_error);
if (decoder_error != OPUS_OK) {
printf("Failed to create OPUS decoder");
return;
}

output_buffer = (opus_int16 *)malloc(BUFFER_SAMPLES * sizeof(opus_int16));
}

void oai_audio_decode(uint8_t *data, size_t size) {
int decoded_size =
opus_decode(opus_decoder, data, size, output_buffer, BUFFER_SAMPLES, 0);
int decoded_size = opus_decode(opus_decoder, data, size, output_buffer,
sizeof(output_buffer), 0);

if (decoded_size > 0) {
size_t bytes_written = 0;
i2s_write(I2S_NUM_0, output_buffer, BUFFER_SAMPLES * sizeof(opus_int16),
&bytes_written, portMAX_DELAY);
oai_platform_audio_write((char *)output_buffer, sizeof(output_buffer),
&bytes_written);
}
}

OpusEncoder *opus_encoder = NULL;
opus_int16 *encoder_input_buffer = NULL;
uint8_t *encoder_output_buffer = NULL;

void oai_init_audio_encoder() {
int encoder_error;
opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP,
&encoder_error);
opus_encoder = opus_encoder_create(kCaptureSampleRate, kCaptureChannelCount,
OPUS_APPLICATION_VOIP, &encoder_error);
if (encoder_error != OPUS_OK) {
printf("Failed to create OPUS encoder");
return;
}

if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) !=
OPUS_OK) {
printf("Failed to initialize OPUS encoder");
return;
}

opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE));
opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY));
opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
encoder_input_buffer = (opus_int16 *)malloc(BUFFER_SAMPLES);
encoder_output_buffer = (uint8_t *)malloc(OPUS_OUT_BUFFER_SIZE);
}

void oai_send_audio(PeerConnection *peer_connection) {
size_t bytes_read = 0;

i2s_read(I2S_NUM_1, encoder_input_buffer, BUFFER_SAMPLES, &bytes_read,
portMAX_DELAY);
oai_platform_audio_read((char *)input_buffer, sizeof(input_buffer),
&bytes_read);

auto encoded_size =
opus_encode(opus_encoder, encoder_input_buffer, BUFFER_SAMPLES / 2,
encoder_output_buffer, OPUS_OUT_BUFFER_SIZE);
opus_encode(opus_encoder, input_buffer, kCaptureFrameSize,
encoder_output_buffer, sizeof(encoder_output_buffer));

peer_connection_send_audio(peer_connection, encoder_output_buffer,
encoded_size);
Expand Down
5 changes: 5 additions & 0 deletions src/media.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include "peer_connection.h"

void oai_send_audio(PeerConnection *peer_connection);
19 changes: 19 additions & 0 deletions src/platform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include <stdlib.h>

#include "peer_connection.h"

inline constexpr int kCaptureSampleRate = 8000;
inline constexpr int kCaptureChannelCount = 1;
inline constexpr int kPlaybackSampleRate = 8000;
inline constexpr int kPlaybackChannelCount = 2;

void oai_platform_init(void);
void oai_platform_restart(void);
void oai_platform_init_audio_capture(void);
void oai_platform_audio_write(char *output_buffer, size_t output_buffer_size,
size_t *bytes_written);
void oai_platform_audio_read(char *input_buffer, size_t input_buffer_size,
size_t *bytes_read);
void oai_platform_send_audio_task(PeerConnection *peer_connection);
Loading
Loading