Skip to content

Commit 8fece95

Browse files
committed
feat(talkscriber): add initial implementation and changelog for Talkscriber STT plugin
1 parent 17e4fea commit 8fece95

File tree

7 files changed

+46
-26
lines changed

7 files changed

+46
-26
lines changed

livekit-agents/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ tavus = ["livekit-plugins-tavus>=1.2.15"]
9999
turn-detector = ["livekit-plugins-turn-detector>=1.2.15"]
100100
ultravox = ["livekit-plugins-ultravox>=1.2.15"]
101101
upliftai = ["livekit-plugins-upliftai>=1.2.15"]
102-
talkscriber = ["livekit-plugins-talkscriber>=0.1.0"]
103102

104103

105104
[project.urls]
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Changelog
2+
3+
## [0.1.0] - Initial release
4+
- Initial implementation of Talkscriber STT plugin for LiveKit

livekit-plugins/livekit-plugins-talkscriber/livekit/plugins/talkscriber/__init__.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
1+
from livekit.agents import Plugin
2+
3+
from .log import logger
14
from .stt import STT, SpeechStream
25
from .tts import TTS as TTSClass, ChunkedStream, SynthesizeStream, configure_for_server
36
from .version import __version__
47

5-
__all__ = ["STT", "SpeechStream", "TTS", "ChunkedStream", "SynthesizeStream", "configure_for_server", "__version__"]
8+
__all__ = [
9+
"STT",
10+
"SpeechStream",
11+
"TTS",
12+
"ChunkedStream",
13+
"SynthesizeStream",
14+
"configure_for_server",
15+
"__version__",
16+
]
617

718
# Re-export TTS with the expected name
819
TTS = TTSClass
920

10-
from livekit.agents import Plugin
11-
from .log import logger
12-
1321

1422
class TalkscriberPlugin(Plugin):
1523
def __init__(self):
@@ -25,4 +33,4 @@ def __init__(self):
2533
__pdoc__ = {}
2634

2735
for n in NOT_IN_ALL:
28-
__pdoc__[n] = False
36+
__pdoc__[n] = False
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import logging
22

3-
logger = logging.getLogger("livekit.plugins.talkscriber")
3+
logger = logging.getLogger("livekit.plugins.talkscriber")

livekit-plugins/livekit-plugins-talkscriber/livekit/plugins/talkscriber/stt.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616

1717
import asyncio
1818
import json
19-
import logging
2019
import os
2120
import uuid
2221
from dataclasses import dataclass
2322

2423
import aiohttp
2524
import numpy as np
25+
2626
from livekit import rtc
2727
from livekit.agents import (
2828
DEFAULT_API_CONNECT_OPTIONS,
@@ -34,10 +34,18 @@
3434
)
3535
from livekit.agents.utils import AudioBuffer
3636

37-
# Talkscriber WebSocket API endpoint
38-
BASE_URL = "wss://api.talkscriber.com:9090"
37+
from .log import logger
38+
39+
# Talkscriber STT WebSocket API endpoint
40+
# Support environment variables for flexible deployment
41+
# Default to Talkscriber API server as per reference implementation
42+
STT_SERVER_HOST = os.environ.get("STT_SERVER_HOST", "api.talkscriber.com")
43+
STT_SERVER_PORT = int(os.environ.get("STT_SERVER_PORT", "9090"))
44+
STT_SERVER_USE_SSL = os.environ.get("STT_SERVER_USE_SSL", "true").lower() == "true"
3945

40-
logger = logging.getLogger("livekit.plugins.talkscriber")
46+
# Build URL based on environment
47+
_protocol = "wss" if STT_SERVER_USE_SSL else "ws"
48+
BASE_URL = f"{_protocol}://{STT_SERVER_HOST}:{STT_SERVER_PORT}"
4149

4250

4351
@dataclass
@@ -138,7 +146,7 @@ def stream(
138146
*,
139147
language: str | None = None,
140148
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
141-
) -> "SpeechStream":
149+
) -> SpeechStream:
142150
return SpeechStream(
143151
stt=self,
144152
opts=self._sanitize_options(language=language),
@@ -445,7 +453,7 @@ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
445453

446454
except Exception as e:
447455
logger.error(f"Failed to connect to Talkscriber: {e}")
448-
raise APIConnectionError(f"Failed to connect to Talkscriber: {e}")
456+
raise APIConnectionError(f"Failed to connect to Talkscriber: {e}") from e
449457

450458
def _process_stream_event(self, data: dict) -> None:
451459
"""Process incoming messages from Talkscriber WebSocket."""

livekit-plugins/livekit-plugins-talkscriber/livekit/plugins/talkscriber/tts.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
import os
66
import weakref
77
from dataclasses import dataclass
8-
from typing import Optional
9-
from urllib.parse import urlencode
108

119
import aiohttp
10+
1211
from livekit.agents import (
1312
APIConnectionError,
1413
APIConnectOptions,
@@ -19,8 +18,7 @@
1918
utils,
2019
)
2120

22-
# from .log import logger
23-
from loguru import logger
21+
from .log import logger
2422

2523
# Talkscriber TTS WebSocket API endpoint
2624
# Support environment variables for flexible deployment
@@ -64,9 +62,7 @@ def __init__(
6462
base_url: str = BASE_URL,
6563
base_rest_url: str = BASE_REST_URL,
6664
use_streaming: bool = True,
67-
word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
68-
ignore_punctuation=False
69-
),
65+
word_tokenizer: tokenize.WordTokenizer | None = None,
7066
http_session: aiohttp.ClientSession | None = None,
7167
audio_buffer_size: int = 10,
7268
) -> None:
@@ -95,6 +91,10 @@ def __init__(
9591
voice = model
9692
logger.info(f"Using model parameter '{model}' as voice")
9793

94+
# Initialize word_tokenizer if not provided
95+
if word_tokenizer is None:
96+
word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
97+
9898
super().__init__(
9999
capabilities=tts.TTSCapabilities(streaming=use_streaming),
100100
sample_rate=sample_rate,
@@ -225,8 +225,8 @@ def synthesize(
225225
self,
226226
text: str,
227227
*,
228-
conn_options: Optional[APIConnectOptions] = None,
229-
) -> "ChunkedStream":
228+
conn_options: APIConnectOptions | None = None,
229+
) -> ChunkedStream:
230230
# Use default conn_options if not provided
231231
if conn_options is None:
232232
conn_options = APIConnectOptions()
@@ -241,7 +241,7 @@ def synthesize(
241241
session=self._ensure_session(),
242242
)
243243

244-
def stream(self, *, conn_options: Optional[APIConnectOptions] = None) -> "SynthesizeStream":
244+
def stream(self, *, conn_options: APIConnectOptions | None = None) -> SynthesizeStream:
245245
if not self._use_streaming:
246246
raise ValueError("Streaming is disabled. Use synthesize() for chunked synthesis.")
247247

@@ -338,8 +338,8 @@ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
338338
error_body = None
339339
try:
340340
error_body = await res.json()
341-
except:
342-
pass
341+
except Exception as e:
342+
logger.debug(f"Failed to parse error response as JSON: {e}")
343343

344344
raise APIStatusError(
345345
message=res.reason or "Unknown error occurred.",
@@ -500,6 +500,7 @@ async def _accumulate_and_process():
500500
await audio_task
501501

502502
except Exception as e:
503+
logger.error(f"Error in _accumulate_and_process: {e}")
503504
audio_task.cancel()
504505
raise
505506

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.0"
1+
__version__ = "0.1.0"

0 commit comments

Comments
 (0)