ProjectVG · ImGdevel · Sep 13, 2025 · Sep 13, 2025 · Sep 14, 2025 · Sep 14, 2025
diff --git a/ProjectVG.Application/Models/Chat/ChatSegment.cs b/ProjectVG.Application/Models/Chat/ChatSegment.cs
@@ -3,24 +3,25 @@
 
 namespace ProjectVG.Application.Models.Chat
 {
-    public record ChatSegment
+    public sealed class ChatSegment : IDisposable
     {
 
-        public string Content { get; init; } = string.Empty;
+        public string Content { get; private set; } = string.Empty;
 
-        public int Order { get; init; }
+        public int Order { get; private set; }
 
-        public string? Emotion { get; init; }
+        public string? Emotion { get; private set; }
 
-        public List<string>? Actions { get; init; }
+        public List<string>? Actions { get; private set; }
 
-        public byte[]? AudioData { get; init; }
-        public string? AudioContentType { get; init; }
-        public float? AudioLength { get; init; }
+        public byte[]? AudioData { get; private set; }
+        public string? AudioContentType { get; private set; }
+        public float? AudioLength { get; private set; }
 
-        // 스트림 기반 음성 데이터 처리를 위한 새로운 프로퍼티
-        public IMemoryOwner<byte>? AudioMemoryOwner { get; init; }
-        public int AudioDataSize { get; init; }
+        // LOH 방지를 위한 ArrayPool 기반 메모리 관리
+        internal IMemoryOwner<byte>? AudioMemoryOwner { get; private set; }
+        internal int AudioDataSize { get; private set; }
+        private bool _disposed;
 
 
 
@@ -30,14 +31,13 @@ public record ChatSegment
         public bool HasEmotion => !string.IsNullOrEmpty(Emotion);
         public bool HasActions => Actions != null && Actions.Any();
 
-        /// <summary>
-        /// 메모리 효율적인 방식으로 음성 데이터에 접근합니다
-        /// </summary>
         public ReadOnlySpan<byte> GetAudioSpan()
         {
             if (AudioMemoryOwner != null && AudioDataSize > 0)
             {
-                return AudioMemoryOwner.Memory.Span.Slice(0, AudioDataSize);
+                var memory = AudioMemoryOwner.Memory;
+                var safeSize = Math.Min(AudioDataSize, memory.Length);
+                return memory.Span.Slice(0, safeSize);
             }
             if (AudioData != null)
             {
@@ -48,6 +48,8 @@ public ReadOnlySpan<byte> GetAudioSpan()
 
 
 
+        private ChatSegment() { }
+
         public static ChatSegment Create(string content, string? emotion = null, List<string>? actions = null, int order = 0)
         {
             return new ChatSegment
@@ -69,36 +71,80 @@ public static ChatSegment CreateAction(string action, int order = 0)
             return Create("", null, new List<string> { action }, order);
         }
 
-        // Method to add audio data (returns new record instance)
         public ChatSegment WithAudioData(byte[] audioData, string audioContentType, float audioLength)
         {
-            return this with
+            return new ChatSegment
             {
+                Content = this.Content,
+                Order = this.Order,
+                Emotion = this.Emotion,
+                Actions = this.Actions,
                 AudioData = audioData,
                 AudioContentType = audioContentType,
                 AudioLength = audioLength
             };
         }
 
-        /// <summary>
-        /// 메모리 효율적인 방식으로 음성 데이터를 추가합니다 (LOH 방지)
-        /// </summary>
+        // 주의: 원본 인스턴스의 AudioMemoryOwner 해제됨
         public ChatSegment WithAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audioDataSize, string audioContentType, float audioLength)
         {
-            return this with
+            if (audioMemoryOwner is null)
+                throw new ArgumentNullException(nameof(audioMemoryOwner));
+
+            if (audioDataSize < 0 || audioDataSize > audioMemoryOwner.Memory.Length)
+                throw new ArgumentOutOfRangeException(
+                    nameof(audioDataSize),
+                    audioDataSize,
+                    $"audioDataSize는 0 이상 {audioMemoryOwner.Memory.Length} 이하여야 합니다.");
+
+            // 기존 소유자 해제 및 상태 정리
+            this.AudioMemoryOwner?.Dispose();
+            this.AudioMemoryOwner = null;
+            this.AudioDataSize = 0;
+
+            return new ChatSegment
             {
+                Content = this.Content,
+                Order = this.Order,
+                Emotion = this.Emotion,
+                Actions = this.Actions,
                 AudioMemoryOwner = audioMemoryOwner,
                 AudioDataSize = audioDataSize,
                 AudioContentType = audioContentType,
                 AudioLength = audioLength,
-                // 기존 AudioData는 null로 설정하여 중복 저장 방지
                 AudioData = null
             };
         }
 
         /// <summary>
-        /// 음성 데이터를 배열로 변환합니다 (필요한 경우에만 사용)
+        /// 오디오 메모리를 부착한 새 인스턴스 생성 (원본 불변)
         /// </summary>
+        public ChatSegment AttachAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audioDataSize, string audioContentType, float audioLength)
+        {
+            if (audioMemoryOwner is null)
+                throw new ArgumentNullException(nameof(audioMemoryOwner));
+
+            if (audioDataSize < 0 || audioDataSize > audioMemoryOwner.Memory.Length)
+                throw new ArgumentOutOfRangeException(
+                    nameof(audioDataSize),
+                    audioDataSize,
+                    $"audioDataSize는 0 이상 {audioMemoryOwner.Memory.Length} 이하여야 합니다.");
+
+            return new ChatSegment
+            {
+                Content = this.Content,
+                Order = this.Order,
+                Emotion = this.Emotion,
+                Actions = this.Actions,
+                AudioMemoryOwner = audioMemoryOwner,
+                AudioDataSize = audioDataSize,
+                AudioContentType = audioContentType,
+                AudioLength = audioLength,
+                AudioData = null
+            };
+        }
+
+        // 필요시만 사용 - LOH 위험 있음
         public byte[]? GetAudioDataAsArray()
         {
             if (AudioData != null)
@@ -115,12 +161,11 @@ public ChatSegment WithAudioMemory(IMemoryOwner<byte> audioMemoryOwner, int audi
             return null;
         }
 
-        /// <summary>
-        /// 리소스 해제 (IMemoryOwner 해제)
-        /// </summary>
         public void Dispose()
         {
+            if (_disposed) return;
             AudioMemoryOwner?.Dispose();
+            _disposed = true;
         }
     }
 }
diff --git a/ProjectVG.Application/Services/Chat/Processors/ChatTTSProcessor.cs b/ProjectVG.Application/Services/Chat/Processors/ChatTTSProcessor.cs
@@ -46,10 +46,14 @@ public async Task ProcessAsync(ChatProcessContext context)
             var processedCount = 0;
 
             foreach (var (idx, ttsResult) in ttsResults.OrderBy(x => x.idx)) {
-                if (ttsResult.Success == true && ttsResult.AudioData != null) {
+                if (ttsResult.Success == true && ttsResult.AudioMemoryOwner != null) {
                     var segment = context.Segments?[idx];
                     if (segment != null && context.Segments != null) {
-                        context.Segments[idx] = segment.WithAudioData(ttsResult.AudioData, ttsResult.ContentType!, ttsResult.AudioLength ?? 0f);
+                        context.Segments[idx] = segment.WithAudioMemory(
+                            ttsResult.AudioMemoryOwner,
+                            ttsResult.AudioDataSize,
+                            ttsResult.ContentType!,
+                            ttsResult.AudioLength ?? 0f);
                     }
 
                     if (ttsResult.AudioLength.HasValue) {

diff --git a/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/Models/TextToSpeechResponse.cs b/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/Models/TextToSpeechResponse.cs
@@ -1,7 +1,11 @@
 using System.Text.Json.Serialization;
+using System.Buffers;
 
 namespace ProjectVG.Infrastructure.Integrations.TextToSpeechClient.Models
 {
+    /// <summary>
+    /// TTS API 응답 모델 - IMemoryOwner 기반 메모리 관리
+    /// </summary>
     public class TextToSpeechResponse
     {
         /// <summary>
@@ -17,11 +21,24 @@ public class TextToSpeechResponse
         public string? ErrorMessage { get; set; }
 
         /// <summary>
-        /// 오디오 데이터 (바이트 배열)
+        /// 오디오 데이터 (바이트 배열) - 레거시 호환성용
         /// </summary>
         [JsonIgnore]
         public byte[]? AudioData { get; set; }
 
+        /// <summary>
+        /// ArrayPool 기반 오디오 메모리 소유자 (LOH 방지)
+        /// 주의: ChatSegment로 이전하지 않을 경우 직접 Dispose() 필요
+        /// </summary>
+        [JsonIgnore]
+        public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
+
+        /// <summary>
+        /// 실제 오디오 데이터 크기
+        /// </summary>
+        [JsonIgnore]
+        public int AudioDataSize { get; set; }
-        [JsonIgnore]
-        public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
-
-        /// <summary>
-        /// 실제 오디오 데이터 크기
-        /// </summary>
-        [JsonIgnore]
-        public int AudioDataSize { get; set; }
+        [JsonIgnore]
+        public IMemoryOwner<byte>? AudioMemoryOwner { get; internal set; }
+
+        /// <summary>
+        /// 실제 오디오 데이터 크기
+        /// </summary>
+        [JsonIgnore]
+        public int AudioDataSize { get; internal set; }
-        [JsonIgnore]
-        public IMemoryOwner<byte>? AudioMemoryOwner { get; set; }
-
-        /// <summary>
-        /// 실제 오디오 데이터 크기
-        /// </summary>
-        [JsonIgnore]
-        public int AudioDataSize { get; set; }
+        [JsonIgnore]
+        public IMemoryOwner<byte>? AudioMemoryOwner { get; internal set; }
+
+        /// <summary>
+        /// 실제 오디오 데이터 크기
+        /// </summary>
+        [JsonIgnore]
+        public int AudioDataSize { get; internal set; }
+
         /// <summary>
         /// 오디오 길이 (초)
         /// </summary>
@@ -39,5 +56,20 @@ public class TextToSpeechResponse
         /// </summary>
         [JsonIgnore]
         public int StatusCode { get; set; } = 200;
+
+        /// <summary>
+        /// 오디오 메모리 소유권을 안전하게 가져갑니다
+        /// </summary>
+        public bool TryTakeAudioOwner(out IMemoryOwner<byte>? owner, out int size)
+        {
+            owner = AudioMemoryOwner;
+            size = AudioDataSize;
+
+            // 소유권 이전 후 현재 객체에서 제거하여 중복 해제 방지
+            AudioMemoryOwner = null;
+            AudioDataSize = 0;
+
+            return owner != null;
+        }
     }
 } 
diff --git a/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/TextToSpeechClient.cs b/ProjectVG.Infrastructure/Integrations/TextToSpeechClient/TextToSpeechClient.cs
@@ -50,8 +50,10 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
                     return voiceResponse;
                 }
 
-                // 스트림 기반으로 음성 데이터 읽기 (LOH 방지)
-                voiceResponse.AudioData = await ReadAudioDataWithPoolAsync(response.Content);
+                // ArrayPool 기반으로 음성 데이터 읽기 (LOH 방지)
+                var (memoryOwner, dataSize) = await ReadAudioDataWithPoolAsync(response.Content);
+                voiceResponse.AudioMemoryOwner = memoryOwner;
+                voiceResponse.AudioDataSize = dataSize;
                 voiceResponse.ContentType = response.Content.Headers.ContentType?.ToString();
 
                 if (response.Headers.Contains("X-Audio-Length"))
@@ -64,7 +66,7 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
                 }
 
                 _logger.LogDebug("[TTS][Response] 오디오 길이: {AudioLength:F2}초, ContentType: {ContentType}, 바이트: {Length}, 소요시간: {Elapsed}ms",
-                    voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioData?.Length ?? 0, elapsed);
+                    voiceResponse.AudioLength, voiceResponse.ContentType, voiceResponse.AudioDataSize, elapsed);
 
                 return voiceResponse;
             }
@@ -82,44 +84,63 @@ public async Task<TextToSpeechResponse> TextToSpeechAsync(TextToSpeechRequest re
         /// <summary>
         /// ArrayPool을 사용하여 스트림 기반으로 음성 데이터를 읽습니다 (LOH 할당 방지)
         /// </summary>
-        private async Task<byte[]?> ReadAudioDataWithPoolAsync(HttpContent content)
+        private async Task<(IMemoryOwner<byte>?, int)> ReadAudioDataWithPoolAsync(HttpContent content)
         {
             const int chunkSize = 32768; // 32KB 청크 크기
-            byte[]? buffer = null;
-            MemoryStream? memoryStream = null;
+            byte[]? readBuffer = null;
+            IMemoryOwner<byte>? owner = null;
 
             try
             {
-                buffer = _arrayPool.Rent(chunkSize);
-                memoryStream = new MemoryStream();
-
+                readBuffer = _arrayPool.Rent(chunkSize);
                 using var stream = await content.ReadAsStreamAsync();
-                int bytesRead;
 
-                // 청크 단위로 데이터 읽어서 MemoryStream에 복사
-                while ((bytesRead = await stream.ReadAsync(buffer, 0, chunkSize)) > 0)
+                // 초기 버퍼 렌트(증분 확장 전략)
+                owner = MemoryPool<byte>.Shared.Rent(chunkSize);
+                int total = 0;
+                while (true)
+                {
+                    // 여유 공간 없으면 확장
+                    if (total == owner.Memory.Length)
+                    {
+                        var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
+                        owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
+                        owner.Dispose();
+                        owner = newOwner;
+                    }
-                        var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
-                        owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
-                        owner.Dispose();
-                        owner = newOwner;
-                    }
+                        int cur = owner.Memory.Length;
+                        int next = (cur > (int.MaxValue / 2)) ? int.MaxValue : cur * 2;
+                        var newOwner = MemoryPool<byte>.Shared.Rent(next);
+                        owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
+                        owner.Dispose();
+                        owner = newOwner;
+                    }
-                        var newOwner = MemoryPool<byte>.Shared.Rent(Math.Min(owner.Memory.Length * 2, int.MaxValue));
-                        owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
-                        owner.Dispose();
-                        owner = newOwner;
-                    }
+                        int cur = owner.Memory.Length;
+                        int next = (cur > (int.MaxValue / 2)) ? int.MaxValue : cur * 2;
+                        var newOwner = MemoryPool<byte>.Shared.Rent(next);
+                        owner.Memory.Span.Slice(0, total).CopyTo(newOwner.Memory.Span);
+                        owner.Dispose();
+                        owner = newOwner;
+                    }
+
+                    int toRead = Math.Min(chunkSize, owner.Memory.Length - total);
+                    int bytesRead = await stream.ReadAsync(readBuffer, 0, toRead);
+                    if (bytesRead == 0) break;
+                    readBuffer.AsSpan(0, bytesRead).CopyTo(owner.Memory.Span.Slice(total));
+                    total += bytesRead;
+                }
+
+                if (total == 0)
                 {
-                    await memoryStream.WriteAsync(buffer, 0, bytesRead);
+                    owner.Dispose();
+                    _logger.LogDebug("[TTS][ArrayPool] 비어있는 오디오 스트림");
+                    return (null, 0);
                 }
 
-                var result = memoryStream.ToArray();
                 _logger.LogDebug("[TTS][ArrayPool] 음성 데이터 읽기 완료: {Size} bytes, 청크 크기: {ChunkSize}",
-                    result.Length, chunkSize);
+                    total, chunkSize);
 
-                return result;
+                return (owner, total);
             }
             catch (Exception ex)
             {
                 _logger.LogError(ex, "[TTS][ArrayPool] 음성 데이터 읽기 실패");
-                return null;
+                owner?.Dispose();
+                return (null, 0);
             }
             finally
             {
-                if (buffer != null)
+                if (readBuffer != null)
                 {
-                    _arrayPool.Return(buffer);
+                    _arrayPool.Return(readBuffer);
                 }
-                memoryStream?.Dispose();
+                // owner는 정상 경로에서 호출자에게 반환됨. 예외 시 위에서 Dispose 처리.
             }
         }