Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 71 additions & 26 deletions ProjectVG.Application/Models/Chat/ChatSegment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,25 @@

namespace ProjectVG.Application.Models.Chat
{
public record ChatSegment
public sealed class ChatSegment : IDisposable
{

public string Content { get; init; } = string.Empty;
public string Content { get; private set; } = string.Empty;

public int Order { get; init; }
public int Order { get; private set; }

public string? Emotion { get; init; }
public string? Emotion { get; private set; }

public List<string>? Actions { get; init; }
public List<string>? Actions { get; private set; }

public byte[]? AudioData { get; init; }
public string? AudioContentType { get; init; }
public float? AudioLength { get; init; }
public byte[]? AudioData { get; private set; }
public string? AudioContentType { get; private set; }
public float? AudioLength { get; private set; }

// 스트림 기반 음성 데이터 처리를 위한 새로운 프로퍼티
public IMemoryOwner<byte>? AudioMemoryOwner { get; init; }
public int AudioDataSize { get; init; }
// LOH 방지를 위한 ArrayPool 기반 메모리 관리
internal IMemoryOwner<byte>? AudioMemoryOwner { get; private set; }
internal int AudioDataSize { get; private set; }
private bool _disposed;



Expand All @@ -30,14 +31,13 @@ public record ChatSegment
public bool HasEmotion => !string.IsNullOrEmpty(Emotion);
public bool HasActions => Actions != null && Actions.Any();

/// <summary>
/// 메모리 효율적인 방식으로 음성 데이터에 접근합니다
/// </summary>
public ReadOnlySpan<byte> GetAudioSpan()
{
if (AudioMemoryOwner != null && AudioDataSize > 0)
{
return AudioMemoryOwner.Memory.Span.Slice(0, AudioDataSize);
var memory = AudioMemoryOwner.Memory;
var safeSize = Math.Min(AudioDataSize, memory.Length);
return memory.Span.Slice(0, safeSize);
}
if (AudioData != null)
{
Expand All @@ -48,6 +48,8 @@ public ReadOnlySpan<byte> GetAudioSpan()



private ChatSegment() { }

public static ChatSegment Create(string content, string? emotion = null, List<string>? actions = null, int order = 0)
{
return new ChatSegment
Expand All @@ -69,36 +71,80 @@ public static ChatSegment CreateAction(string action, int order = 0)
return Create("", null, new List<string> { action }, order);
}

// Method to add audio data (returns new record instance)
public ChatSegment WithAudioData(byte[] audioData, string audioContentType, float audioLength)
{
return this with
return new ChatSegment
{
Content = this.Content,
Order = this.Order,
Emotion = this.Emotion,
Actions = this.Actions,
AudioData = audioData,
AudioContentType = audioContentType,
AudioLength = audioLength
};
}

/// <summary>
/// 메모리 효율적인 방식으로 음성 데이터를 추가합니다 (LOH 방지)
/// </summary>
// 주의: 원본 인스턴스의 AudioMemoryOwner 해제됨
public ChatSegment WithAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audioDataSize, string audioContentType, float audioLength)
{
return this with
if (audioMemoryOwner is null)
throw new ArgumentNullException(nameof(audioMemoryOwner));

if (audioDataSize < 0 || audioDataSize > audioMemoryOwner.Memory.Length)
throw new ArgumentOutOfRangeException(
nameof(audioDataSize),
audioDataSize,
$"audioDataSize는 0 이상 {audioMemoryOwner.Memory.Length} 이하여야 합니다.");

// 기존 소유자 해제 및 상태 정리
this.AudioMemoryOwner?.Dispose();
this.AudioMemoryOwner = null;
this.AudioDataSize = 0;

return new ChatSegment
{
Content = this.Content,
Order = this.Order,
Emotion = this.Emotion,
Actions = this.Actions,
AudioMemoryOwner = audioMemoryOwner,
AudioDataSize = audioDataSize,
AudioContentType = audioContentType,
AudioLength = audioLength,
// 기존 AudioData는 null로 설정하여 중복 저장 방지
AudioData = null
};
}

/// <summary>
/// 음성 데이터를 배열로 변환합니다 (필요한 경우에만 사용)
/// 오디오 메모리를 부착한 새 인스턴스 생성 (원본 불변)
/// </summary>
public ChatSegment AttachAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audioDataSize, string audioContentType, float audioLength)
{
if (audioMemoryOwner is null)
throw new ArgumentNullException(nameof(audioMemoryOwner));

if (audioDataSize < 0 || audioDataSize > audioMemoryOwner.Memory.Length)
throw new ArgumentOutOfRangeException(
nameof(audioDataSize),
audioDataSize,
$"audioDataSize는 0 이상 {audioMemoryOwner.Memory.Length} 이하여야 합니다.");

return new ChatSegment
{
Content = this.Content,
Order = this.Order,
Emotion = this.Emotion,
Actions = this.Actions,
AudioMemoryOwner = audioMemoryOwner,
AudioDataSize = audioDataSize,
AudioContentType = audioContentType,
AudioLength = audioLength,
AudioData = null
};
}

// 필요시만 사용 - LOH 위험 있음
public byte[]? GetAudioDataAsArray()
{
if (AudioData != null)
Expand All @@ -115,12 +161,11 @@ public ChatSegment WithAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audi
return null;
}

/// <summary>
/// 리소스 해제 (IMemoryOwner 해제)
/// </summary>
public void Dispose()
{
if (_disposed) return;
AudioMemoryOwner?.Dispose();
_disposed = true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,14 @@ public async Task ProcessAsync(ChatProcessContext context)
var processedCount = 0;

foreach (var (idx, ttsResult) in ttsResults.OrderBy(x => x.idx)) {
if (ttsResult.Success == true && ttsResult.AudioData != null) {
if (ttsResult.Success == true && ttsResult.AudioMemoryOwner != null) {
var segment = context.Segments?[idx];
if (segment != null && context.Segments != null) {
context.Segments[idx] = segment.WithAudioData(ttsResult.AudioData, ttsResult.ContentType!, ttsResult.AudioLength ?? 0f);
context.Segments[idx] = segment.WithAudioMemory(
ttsResult.AudioMemoryOwner,
ttsResult.AudioDataSize,
ttsResult.ContentType!,
ttsResult.AudioLength ?? 0f);
}

if (ttsResult.AudioLength.HasValue) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
using System.Text.Json.Serialization;
using System.Buffers;

namespace ProjectVG.Infrastructure.Integrations.TextToSpeechClient.Models
{
/// <summary>
/// TTS API 응답 모델 - IMemoryOwner 기반 메모리 관리
/// </summary>
public class TextToSpeechResponse
{
/// <summary>
Expand All @@ -17,11 +21,24 @@ public class TextToSpeechResponse
public string? ErrorMessage { get; set; }

/// <summary>
/// 오디오 데이터 (바이트 배열)
/// 오디오 데이터 (바이트 배열) - 레거시 호환성용
/// </summary>
[JsonIgnore]
public byte[]? AudioData { get; set; }

/// <summary>
/// ArrayPool 기반 오디오 메모리 소유자 (LOH 방지)
/// 주의: ChatSegment로 이전하지 않을 경우 직접 Dispose() 필요
/// </summary>
[JsonIgnore]
public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }

/// <summary>
/// 실제 오디오 데이터 크기
/// </summary>
[JsonIgnore]
public int AudioDataSize { get; set; }
Comment on lines +33 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

IMemoryOwner/크기 세터 접근 축소로 오·남용 방지

소비자가 임의로 교체하면 이중 Dispose/누수 가능성이 있습니다. 동일 어셈블리에서만 설정되도록 setter를 internal로 축소하세요.

-        public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
+        public IMemoryOwner<byte>? AudioMemoryOwner { get; internal set; }

-        public int AudioDataSize { get; set; }
+        public int AudioDataSize { get; internal set; }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
[JsonIgnore]
public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
/// <summary>
/// 실제 오디오 데이터 크기
/// </summary>
[JsonIgnore]
public int AudioDataSize { get; set; }
[JsonIgnore]
public IMemoryOwner<byte>? AudioMemoryOwner { get; internal set; }
/// <summary>
/// 실제 오디오 데이터 크기
/// </summary>
[JsonIgnore]
public int AudioDataSize { get; internal set; }
🤖 Prompt for AI Agents
In
ProjectVG.Infrastructure/Integrations/TextToSpeechClient/Models/TextToSpeechResponse.cs
around lines 33 to 40, the public setters for AudioMemoryOwner and AudioDataSize
should be restricted to prevent external replacement and potential
double-dispose/leak; change both properties so only code within the same
assembly can set them (make their setters internal) while keeping their getters
public.


/// <summary>
/// 오디오 길이 (초)
/// </summary>
Expand All @@ -39,5 +56,20 @@ public class TextToSpeechResponse
/// </summary>
[JsonIgnore]
public int StatusCode { get; set; } = 200;

/// <summary>
/// 오디오 메모리 소유권을 안전하게 가져갑니다
/// </summary>
public bool TryTakeAudioOwner(out IMemoryOwner<byte>? owner, out int size)
{
owner = AudioMemoryOwner;
size = AudioDataSize;

// 소유권 이전 후 현재 객체에서 제거하여 중복 해제 방지
AudioMemoryOwner = null;
AudioDataSize = 0;

return owner != null;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
return voiceResponse;
}

// 스트림 기반으로 음성 데이터 읽기 (LOH 방지)
voiceResponse.AudioData = await ReadAudioDataWithPoolAsync(response.Content);
// ArrayPool 기반으로 음성 데이터 읽기 (LOH 방지)
var (memoryOwner, dataSize) = await ReadAudioDataWithPoolAsync(response.Content);
voiceResponse.AudioMemoryOwner = memoryOwner;
voiceResponse.AudioDataSize = dataSize;
voiceResponse.ContentType = response.Content.Headers.ContentType?.ToString();

if (response.Headers.Contains("X-Audio-Length"))
Expand All @@ -64,7 +66,7 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
}

_logger.LogDebug("[TTS][Response] 오디오 길이: {AudioLength:F2}초, ContentType: {ContentType}, 바이트: {Length}, 소요시간: {Elapsed}ms",
voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioData?.Length ?? 0, elapsed);
voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioDataSize, elapsed);

return voiceResponse;
}
Expand All @@ -82,44 +84,63 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
/// <summary>
/// ArrayPool을 사용하여 스트림 기반으로 음성 데이터를 읽습니다 (LOH 할당 방지)
/// </summary>
private async Task<byte[]?> ReadAudioDataWithPoolAsync(HttpContent content)
private async Task<(IMemoryOwner<byte>?, int)> ReadAudioDataWithPoolAsync(HttpContent content)
{
const int chunkSize = 32768; // 32KB 청크 크기
byte[]? buffer = null;
MemoryStream? memoryStream = null;
byte[]? readBuffer = null;
IMemoryOwner<byte>? owner = null;

try
{
buffer = _arrayPool.Rent(chunkSize);
memoryStream = new MemoryStream();

readBuffer = _arrayPool.Rent(chunkSize);
using var stream = await content.ReadAsStreamAsync();
int bytesRead;

// 청크 단위로 데이터 읽어서 MemoryStream에 복사
while ((bytesRead = await stream.ReadAsync(buffer, 0, chunkSize)) > 0)
// 초기 버퍼 렌트(증분 확장 전략)
owner = MemoryPool<byte>.Shared.Rent(chunkSize);
int total = 0;
while (true)
{
// 여유 공간 없으면 확장
if (total == owner.Memory.Length)
{
var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
owner.Dispose();
owner = newOwner;
}
Comment on lines +106 to +110
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

버퍼 확장 시 int 오버플로 가능성 — 안전한 배수 계산 필요

owner.Memory.Length*2에서 오버플로가 발생하면 음수 크기로 Rent 호출될 수 있습니다.

-                        var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
+                        int cur = owner.Memory.Length;
+                        int next = (cur > (int.MaxValue / 2)) ? int.MaxValue : cur * 2;
+                        var newOwner = MemoryPool<byte>.Shared.Rent(next);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
owner.Dispose();
owner = newOwner;
}
int cur = owner.Memory.Length;
int next = (cur > (int.MaxValue / 2)) ? int.MaxValue : cur * 2;
var newOwner = MemoryPool<byte>.Shared.Rent(next);
owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
owner.Dispose();
owner = newOwner;
}
🤖 Prompt for AI Agents
In
ProjectVG.Infrastructure/Integrations/TextToSpeechClient/TextToSpeechClient.cs
around lines 106 to 110, the expression owner.Memory.Length * 2 can overflow
causing Rent to be called with a negative size; compute the grown buffer size
safely by using either a checked long multiplication or an overflow-safe branch:
determine newSize = owner.Memory.Length >= int.MaxValue/2 ? int.MaxValue :
owner.Memory.Length * 2, then ensure newSize is at least total (newSize =
Math.Max(newSize, total)) and finally cast to int before calling
MemoryPool<byte>.Shared.Rent(newSize); replace the existing multiplication with
this safe size calculation and keep the subsequent copy, dispose and
reassignment logic.


int toRead = Math.Min(chunkSize, owner.Memory.Length - total);
int bytesRead = await stream.ReadAsync(readBuffer, 0, toRead);
if (bytesRead == 0) break;
readBuffer.AsSpan(0, bytesRead).CopyTo(owner.Memory.Span.Slice(total));
total += bytesRead;
}

if (total == 0)
{
await memoryStream.WriteAsync(buffer, 0, bytesRead);
owner.Dispose();
_logger.LogDebug("[TTS][ArrayPool] 비어있는 오디오 스트림");
return (null, 0);
}

var result = memoryStream.ToArray();
_logger.LogDebug("[TTS][ArrayPool] 음성 데이터 읽기 완료: {Size} bytes, 청크 크기: {ChunkSize}",
result.Length, chunkSize);
total, chunkSize);

return result;
return (owner, total);
}
catch (Exception ex)
{
_logger.LogError(ex, "[TTS][ArrayPool] 음성 데이터 읽기 실패");
return null;
owner?.Dispose();
return (null, 0);
}
finally
{
if (buffer != null)
if (readBuffer != null)
{
_arrayPool.Return(buffer);
_arrayPool.Return(readBuffer);
}
memoryStream?.Dispose();
// owner는 정상 경로에서 호출자에게 반환됨. 예외 시 위에서 Dispose 처리.
}
}

Expand Down
Loading
Loading