From f168fd0de9d281657d26463d0e026749660b1ba8 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Wed, 15 Oct 2025 15:49:07 +0100
Subject: [PATCH 1/4] add speaker ID function

---
 examples/rt/async/speaker_id/README.md  | 11 ++++++
 examples/rt/async/speaker_id/main.py    | 50 +++++++++++++++++++++++++
 sdk/batch/README.md                     |  2 +
 sdk/rt/README.md                        |  2 +
 sdk/rt/speechmatics/rt/_async_client.py |  5 +++
 sdk/rt/speechmatics/rt/_base_client.py  | 32 ++++++++++++++++
 6 files changed, 102 insertions(+)
 create mode 100644 examples/rt/async/speaker_id/README.md
 create mode 100644 examples/rt/async/speaker_id/main.py

diff --git a/examples/rt/async/speaker_id/README.md b/examples/rt/async/speaker_id/README.md
new file mode 100644
index 00000000..86301429
--- /dev/null
+++ b/examples/rt/async/speaker_id/README.md
@@ -0,0 +1,11 @@
+# Live Real-Time Example
+
+This example demonstrates how to use the Speechmatics Python SDK to transcribe audio from a microphone in real-time. It requires `pyaudio` to be installed for the example to work correctly.
+
+The SDK requires an API key to be set as an environment variable before it can be used. You can obtain an API key by signing up for a Speechmatics account at https://portal.speechmatics.com/dashboard
+
+## Prerequisites
+
+- Install Speechmatics RT SDK: `pip install speechmatics-rt`
+- Export Speechmatics API key: `export SPEECHMATICS_API_KEY=YOUR-API-KEY`
+- Install `pyaudio`: `pip install pyaudio`
diff --git a/examples/rt/async/speaker_id/main.py b/examples/rt/async/speaker_id/main.py
new file mode 100644
index 00000000..f31d085e
--- /dev/null
+++ b/examples/rt/async/speaker_id/main.py
@@ -0,0 +1,50 @@
+import asyncio
+import logging
+
+from speechmatics.rt import ServerMessageType
+from speechmatics.rt import (
+    AsyncClient,
+    AudioEncoding,
+    AudioFormat,
+    OperatingPoint,
+    TranscriptionConfig,
+)
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+async def main() -> None:
+    """Run async transcription example."""
+
+    transcription_config = TranscriptionConfig(
+        max_delay=0.8,
+        enable_partials=True,
+        operating_point=OperatingPoint.ENHANCED,
+        diarization="speaker",
+    )
+
+    # Initialize client with API key from environment
+    async with AsyncClient() as client:
+        try:
+            @client.on(ServerMessageType.ADD_TRANSCRIPT)
+            def handle_finals(msg):
+                print(f"Final: {msg['metadata']['transcript']}")
+
+            @client.on(ServerMessageType.SPEAKERS_RESULT)
+            def handle_speakers_result(msg):
+                print(msg)
+
+            # Transcribe audio file
+            with open("./examples/example.wav", "rb") as audio_file:
+                await client.transcribe(
+                    audio_file,
+                    transcription_config=transcription_config,
+                    get_speakers=True,
+                )
+        except Exception as e:
+            print(f"Transcription error: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/sdk/batch/README.md b/sdk/batch/README.md
index a3160020..ae7193db 100644
--- a/sdk/batch/README.md
+++ b/sdk/batch/README.md
@@ -25,6 +25,8 @@ pip install speechmatics-batch
 
 ### Quick Start
 
+To run transcription, you'll need an audio file. You can find an example file [here](https://github.com/speechmatics/speechmatics-python-sdk/blob/main/examples/example.wav).
+
 ```python
 import asyncio
 from speechmatics.batch import AsyncClient
diff --git a/sdk/rt/README.md b/sdk/rt/README.md
index c509c3a6..93976895 100644
--- a/sdk/rt/README.md
+++ b/sdk/rt/README.md
@@ -24,6 +24,8 @@ pip install speechmatics-rt
 ```
 ## Quick Start
 
+To run transcription, you'll need an audio file. You can find an example file [here](https://github.com/speechmatics/speechmatics-python-sdk/blob/main/examples/example.wav).
+
 ```python
 import asyncio
 from speechmatics.rt import AsyncClient, ServerMessageType
diff --git a/sdk/rt/speechmatics/rt/_async_client.py b/sdk/rt/speechmatics/rt/_async_client.py
index 3566ce31..1d453111 100644
--- a/sdk/rt/speechmatics/rt/_async_client.py
+++ b/sdk/rt/speechmatics/rt/_async_client.py
@@ -171,6 +171,7 @@ async def transcribe(
         audio_events_config: Optional[AudioEventsConfig] = None,
         ws_headers: Optional[dict] = None,
         timeout: Optional[float] = None,
+        get_speakers: Optional[bool] = False,
     ) -> None:
         """
         Transcribe a single audio stream in real-time.
@@ -193,6 +194,7 @@ async def transcribe(
             ws_headers: Additional headers to include in the WebSocket handshake.
             timeout: Maximum time in seconds to wait for transcription completion.
                     Default None.
+            get_speakers: Send a speaker identifier event at the end of the session.
 
         Raises:
             AudioError: If source is invalid or cannot be read.
@@ -233,6 +235,9 @@ async def transcribe(
             ws_headers=ws_headers,
         )
 
+        if get_speakers:
+            await self.send_message({"message": "GetSpeakers", "final": True})
+
         try:
             await asyncio.wait_for(
                 self._audio_producer(source, audio_format.chunk_size),
diff --git a/sdk/rt/speechmatics/rt/_base_client.py b/sdk/rt/speechmatics/rt/_base_client.py
index 0ac6d085..dd93a962 100644
--- a/sdk/rt/speechmatics/rt/_base_client.py
+++ b/sdk/rt/speechmatics/rt/_base_client.py
@@ -18,7 +18,9 @@
 from ._models import AudioEventsConfig
 from ._models import AudioFormat
 from ._models import ConnectionConfig
+from ._models import ServerMessageType
 from ._models import SessionInfo
+from ._models import SpeakerIdentifier
 from ._models import TranscriptionConfig
 from ._models import TranslationConfig
 from ._transport import Transport
@@ -149,6 +151,36 @@ async def send_message(self, message: dict[str, Any]) -> None:
             self._closed_evt.set()
             raise
 
+    async def get_speakers(self, final=False) -> list[SpeakerIdentifier]:
+        """
+        Get the list of speakers in the current session.
+        This method returns as soon as a SPEAKERS_RESULT message is received.
+        Multiple requests to the method may therefore cause a race condition in which the same
+        SPEAKERS_RESULT message is received by multiple requests. This should not cause any issues,
+        but will result in redundant SPEAKERS_RESULT events.
+
+        Args:
+            final: Whether to wait to the end of the session to return speaker IDs (default: False)
+
+        Returns:
+            List of SpeakerIdentifier objects
+        """
+        try:
+            await self.send_message({"message": "GetSpeakers", "final": final})
+            speaker_evt = asyncio.Event()
+            speaker_identifiers: list[SpeakerIdentifier] = []
+            self.once(
+                ServerMessageType.SPEAKERS_RESULT,
+                lambda msg: (speaker_identifiers.extend(msg.get("speakers", [])), speaker_evt.set()),
+            )
+            await speaker_evt.wait()
+            return speaker_identifiers
+        except asyncio.TimeoutError:
+            raise TransportError("Timeout waiting for SPEAKERS_RESULT")
+        except Exception:
+            self._closed_evt.set()
+            raise
+
     async def _recv_loop(self) -> None:
         """
         Background task that continuously receives and dispatches server messages.

From ebb5fed01edb5d7130c59eda1e347c01605613c1 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Wed, 15 Oct 2025 15:50:40 +0100
Subject: [PATCH 2/4] update README

---
 examples/rt/async/speaker_id/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/rt/async/speaker_id/README.md b/examples/rt/async/speaker_id/README.md
index 86301429..1b070c5c 100644
--- a/examples/rt/async/speaker_id/README.md
+++ b/examples/rt/async/speaker_id/README.md
@@ -1,6 +1,6 @@
-# Live Real-Time Example
+# Live Real-Time Speaker ID Example
 
-This example demonstrates how to use the Speechmatics Python SDK to transcribe audio from a microphone in real-time. It requires `pyaudio` to be installed for the example to work correctly.
+This example demonstrates how to use the Speechmatics Python SDK to perform speaker ID in real-time.
 
 The SDK requires an API key to be set as an environment variable before it can be used. You can obtain an API key by signing up for a Speechmatics account at https://portal.speechmatics.com/dashboard
 
@@ -8,4 +8,3 @@ The SDK requires an API key to be set as an environment variable before it can b
 
 - Install Speechmatics RT SDK: `pip install speechmatics-rt`
 - Export Speechmatics API key: `export SPEECHMATICS_API_KEY=YOUR-API-KEY`
-- Install `pyaudio`: `pip install pyaudio`

From fcdabed852a1c338bb7452f868e420a40c317c0e Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Fri, 17 Oct 2025 11:09:34 +0100
Subject: [PATCH 3/4] remove partials

---
 examples/rt/async/speaker_id/main.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/rt/async/speaker_id/main.py b/examples/rt/async/speaker_id/main.py
index f31d085e..13dfeba9 100644
--- a/examples/rt/async/speaker_id/main.py
+++ b/examples/rt/async/speaker_id/main.py
@@ -19,7 +19,6 @@ async def main() -> None:
 
     transcription_config = TranscriptionConfig(
         max_delay=0.8,
-        enable_partials=True,
         operating_point=OperatingPoint.ENHANCED,
         diarization="speaker",
     )

From f5131c8b0c5a7f08e1d9e383869dd7047786b374 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Mon, 20 Oct 2025 15:59:40 +0100
Subject: [PATCH 4/4] update eg

---
 examples/rt/async/speaker_id/README.md        |  5 ++
 examples/rt/async/speaker_id/generate.py      | 56 +++++++++++++++++++
 .../speaker_id/{main.py => transcribe.py}     | 24 ++++----
 3 files changed, 74 insertions(+), 11 deletions(-)
 create mode 100644 examples/rt/async/speaker_id/generate.py
 rename examples/rt/async/speaker_id/{main.py => transcribe.py} (61%)

diff --git a/examples/rt/async/speaker_id/README.md b/examples/rt/async/speaker_id/README.md
index 1b070c5c..0ae32fed 100644
--- a/examples/rt/async/speaker_id/README.md
+++ b/examples/rt/async/speaker_id/README.md
@@ -8,3 +8,8 @@ The SDK requires an API key to be set as an environment variable before it can b
 
 - Install Speechmatics RT SDK: `pip install speechmatics-rt`
 - Export Speechmatics API key: `export SPEECHMATICS_API_KEY=YOUR-API-KEY`
+
+## Usage
+
+- Generate speaker IDs: `python generate.py` - this will generate a `speakers.json` file
+- Transcribe audio: `python transcribe.py` - this will use the `speakers.json` file to perform speaker ID on a conversation
diff --git a/examples/rt/async/speaker_id/generate.py b/examples/rt/async/speaker_id/generate.py
new file mode 100644
index 00000000..a7604494
--- /dev/null
+++ b/examples/rt/async/speaker_id/generate.py
@@ -0,0 +1,56 @@
+import asyncio
+import logging
+import os
+import json
+
+from speechmatics.rt import (
+    AsyncClient,
+    OperatingPoint,
+    TranscriptionConfig,
+    ServerMessageType,
+)
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+speakers: list[dict] = []
+
+
+async def generate_ids(voice_file: str) -> None:
+    """Run async transcription example."""
+
+    transcription_config = TranscriptionConfig(
+        operating_point=OperatingPoint.ENHANCED,
+        diarization="speaker",
+    )
+
+    # Initialize client with API key from environment
+    async with AsyncClient() as client:
+        try:
+            @client.on(ServerMessageType.SPEAKERS_RESULT)
+            def handle_speakers_result(msg):
+                new_speakers = msg.get('speakers', [])
+                new_speakers[0]["label"] = voice_file
+                speakers.append(new_speakers[0])
+
+            # Transcribe audio file
+            with open(os.path.join(voices_folder, voice_file), "rb") as audio_file:
+                await client.transcribe(
+                    audio_file,
+                    transcription_config=transcription_config,
+                    get_speakers=True,
+                )
+        except Exception as e:
+            print(f"Transcription error: {e}")
+
+
+if __name__ == "__main__":
+    voices_folder = "./examples/rt/async/speaker_id/voices"
+    voice_files = [f for f in os.listdir(voices_folder) if os.path.isfile(os.path.join(voices_folder, f))]
+
+    for voice_file in voice_files:
+        asyncio.run(generate_ids(voice_file))
+
+    with open('./speakers.json', 'w') as f:
+        json.dump(speakers, f)
diff --git a/examples/rt/async/speaker_id/main.py b/examples/rt/async/speaker_id/transcribe.py
similarity index 61%
rename from examples/rt/async/speaker_id/main.py
rename to examples/rt/async/speaker_id/transcribe.py
index 13dfeba9..179db3f9 100644
--- a/examples/rt/async/speaker_id/main.py
+++ b/examples/rt/async/speaker_id/transcribe.py
@@ -1,13 +1,14 @@
 import asyncio
 import logging
+import json
 
-from speechmatics.rt import ServerMessageType
+from speechmatics.rt import SpeakerIdentifier
+from speechmatics.rt import SpeakerDiarizationConfig
 from speechmatics.rt import (
     AsyncClient,
-    AudioEncoding,
-    AudioFormat,
     OperatingPoint,
     TranscriptionConfig,
+    ServerMessageType
 )
 
 
@@ -17,10 +18,16 @@
 async def main() -> None:
     """Run async transcription example."""
 
+    with open('./speakers.json') as f:
+        speaker_identifiers = [SpeakerIdentifier(**s) for s in json.load(f)]
+
     transcription_config = TranscriptionConfig(
-        max_delay=0.8,
         operating_point=OperatingPoint.ENHANCED,
         diarization="speaker",
+        max_delay=4,
+        speaker_diarization_config=SpeakerDiarizationConfig(
+            speakers=speaker_identifiers,
+        )
     )
 
     # Initialize client with API key from environment
@@ -28,18 +35,13 @@ async def main() -> None:
         try:
             @client.on(ServerMessageType.ADD_TRANSCRIPT)
             def handle_finals(msg):
-                print(f"Final: {msg['metadata']['transcript']}")
-
-            @client.on(ServerMessageType.SPEAKERS_RESULT)
-            def handle_speakers_result(msg):
-                print(msg)
+                print(f"Final: {msg['metadata']['speaker']} {msg['metadata']['transcript']}")
 
             # Transcribe audio file
-            with open("./examples/example.wav", "rb") as audio_file:
+            with open("./examples/conversation.wav", "rb") as audio_file:
                 await client.transcribe(
                     audio_file,
                     transcription_config=transcription_config,
-                    get_speakers=True,
                 )
         except Exception as e:
             print(f"Transcription error: {e}")