Skip to content

Commit f5131c8

Browse files
committed
update eg
1 parent fcdabed commit f5131c8

File tree

3 files changed

+74
-11
lines changed

3 files changed

+74
-11
lines changed

examples/rt/async/speaker_id/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,8 @@ The SDK requires an API key to be set as an environment variable before it can b
88

99
- Install Speechmatics RT SDK: `pip install speechmatics-rt`
1010
- Export Speechmatics API key: `export SPEECHMATICS_API_KEY=YOUR-API-KEY`
11+
12+
## Usage
13+
14+
- Generate speaker IDs: `python generate.py` - this will generate a `speakers.json` file
15+
- Transcribe audio: `python transcribe.py` - this will use the `speakers.json` file to perform speaker ID on a conversation
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import asyncio
2+
import logging
3+
import os
4+
import json
5+
6+
from speechmatics.rt import (
7+
AsyncClient,
8+
OperatingPoint,
9+
TranscriptionConfig,
10+
ServerMessageType,
11+
)
12+
13+
14+
logging.basicConfig(level=logging.INFO)
15+
16+
17+
speakers: list[dict] = []
18+
19+
20+
async def generate_ids(voice_file: str) -> None:
21+
"""Run async transcription example."""
22+
23+
transcription_config = TranscriptionConfig(
24+
operating_point=OperatingPoint.ENHANCED,
25+
diarization="speaker",
26+
)
27+
28+
# Initialize client with API key from environment
29+
async with AsyncClient() as client:
30+
try:
31+
@client.on(ServerMessageType.SPEAKERS_RESULT)
32+
def handle_speakers_result(msg):
33+
new_speakers = msg.get('speakers', [])
34+
new_speakers[0]["label"] = voice_file
35+
speakers.append(new_speakers[0])
36+
37+
# Transcribe audio file
38+
with open(os.path.join(voices_folder, voice_file), "rb") as audio_file:
39+
await client.transcribe(
40+
audio_file,
41+
transcription_config=transcription_config,
42+
get_speakers=True,
43+
)
44+
except Exception as e:
45+
print(f"Transcription error: {e}")
46+
47+
48+
if __name__ == "__main__":
49+
voices_folder = "./examples/rt/async/speaker_id/voices"
50+
voice_files = [f for f in os.listdir(voices_folder) if os.path.isfile(os.path.join(voices_folder, f))]
51+
52+
for voice_file in voice_files:
53+
asyncio.run(generate_ids(voice_file))
54+
55+
with open('./speakers.json', 'w') as f:
56+
json.dump(speakers, f)

examples/rt/async/speaker_id/main.py renamed to examples/rt/async/speaker_id/transcribe.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import asyncio
22
import logging
3+
import json
34

4-
from speechmatics.rt import ServerMessageType
5+
from speechmatics.rt import SpeakerIdentifier
6+
from speechmatics.rt import SpeakerDiarizationConfig
57
from speechmatics.rt import (
68
AsyncClient,
7-
AudioEncoding,
8-
AudioFormat,
99
OperatingPoint,
1010
TranscriptionConfig,
11+
ServerMessageType
1112
)
1213

1314

@@ -17,29 +18,30 @@
1718
async def main() -> None:
1819
"""Run async transcription example."""
1920

21+
with open('./speakers.json') as f:
22+
speaker_identifiers = [SpeakerIdentifier(**s) for s in json.load(f)]
23+
2024
transcription_config = TranscriptionConfig(
21-
max_delay=0.8,
2225
operating_point=OperatingPoint.ENHANCED,
2326
diarization="speaker",
27+
max_delay=4,
28+
speaker_diarization_config=SpeakerDiarizationConfig(
29+
speakers=speaker_identifiers,
30+
)
2431
)
2532

2633
# Initialize client with API key from environment
2734
async with AsyncClient() as client:
2835
try:
2936
@client.on(ServerMessageType.ADD_TRANSCRIPT)
3037
def handle_finals(msg):
31-
print(f"Final: {msg['metadata']['transcript']}")
32-
33-
@client.on(ServerMessageType.SPEAKERS_RESULT)
34-
def handle_speakers_result(msg):
35-
print(msg)
38+
print(f"Final: {msg['metadata']['speaker']} {msg['metadata']['transcript']}")
3639

3740
# Transcribe audio file
38-
with open("./examples/example.wav", "rb") as audio_file:
41+
with open("./examples/conversation.wav", "rb") as audio_file:
3942
await client.transcribe(
4043
audio_file,
4144
transcription_config=transcription_config,
42-
get_speakers=True,
4345
)
4446
except Exception as e:
4547
print(f"Transcription error: {e}")

0 commit comments

Comments
 (0)