Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions examples/nodejs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,10 @@ pnpm run:batch

```
pnpm run:real-time-file
```

### Get speakers (real-time)

```
pnpm run:speaker-id
```
Binary file modified examples/nodejs/example.wav
Binary file not shown.
3 changes: 2 additions & 1 deletion examples/nodejs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"type": "module",
"scripts": {
"run:batch": "node --import tsx/esm batch-example.ts",
"run:real-time-file": "node --import tsx/esm real-time-file-example.ts"
"run:real-time-file": "node --import tsx/esm real-time-file-example.ts",
"run:speaker-id": "node --import tsx/esm speaker-id-example.ts"
},
"keywords": [],
"author": "",
Expand Down
58 changes: 58 additions & 0 deletions examples/nodejs/speaker-id-example.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/**
* This file showcases the speaker ID feature of the real-time-client package being used in NodeJS.
*
* It will connect to the real-time API and transcribe a file in real-time, then return the speakers.
* To run this example, you will need to have a Speechmatics API key,
* which can be generated from the Speechmatics Portal: https://portal.speechmatics.com/api-keys
*
* NOTE: This script is run as an ES Module via tsx, letting us use top-level await.
* The library also works with CommonJS, but the code would need to be wrapped in an async function.
*/
import { RealtimeClient } from '@speechmatics/real-time-client';
import fs from 'node:fs';
import dotenv from 'dotenv';
import { createSpeechmaticsJWT } from '@speechmatics/auth';

dotenv.config();

const apiKey = process.env.API_KEY;
if (!apiKey) {
throw new Error('Please set the API_KEY environment variable');
}

const client = new RealtimeClient();

const jwt = await createSpeechmaticsJWT({
type: 'rt',
apiKey,
ttl: 60, // 1 minute
});

const fileStream = fs.createReadStream('./example.wav', {
highWaterMark: 4096, // avoid sending too much data at once
});

await client.start(jwt, {
transcription_config: {
language: 'en',
operating_point: 'enhanced',
},
});

//send it
fileStream.on('data', (sample) => {
client.sendAudio(sample);
});

//end the session
fileStream.on('end', () => {
// Send a stop message to the server when we're done sending audio.
// We set `noTimeout` because we are streaming faster than real-time,
// so we should wait for all the data to be processed before closing the connection.
client.stopRecognition({ noTimeout: true });
});

// We wait for the speakers to be available.
// With final = true, the speakers are only returned when the session is finished
const speakers = await client.getSpeakers(true);
console.log(speakers);
2 changes: 1 addition & 1 deletion packages/real-time-client-react/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@speechmatics/real-time-client-react",
"version": "2.0.2",
"version": "3.0.0",
"description": "React hooks for interacting with the Speechmatics Real-Time API",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,40 @@ export function useRealtimeTranscription() {
[client],
);

const setRecognitionConfig = useCallback<
RealtimeClient['setRecognitionConfig']
>(
(config) => {
client.setRecognitionConfig(config);
},
[client],
);

const getSpeakers = useCallback<RealtimeClient['getSpeakers']>(
(final?: boolean) => {
return client.getSpeakers(final);
},
[client],
);

return useMemo(
() => ({
sessionId,
socketState,
startTranscription,
stopTranscription,
sendAudio,
setRecognitionConfig,
getSpeakers,
}),
[sessionId, socketState, startTranscription, stopTranscription, sendAudio],
[
sessionId,
socketState,
startTranscription,
stopTranscription,
sendAudio,
setRecognitionConfig,
getSpeakers,
],
);
}
12 changes: 12 additions & 0 deletions packages/real-time-client/models/AddChannelAudio.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
interface AddChannelAudio {
message: 'AddChannelAudio';
/**
* The channel identifier to which the audio belongs.
*/
channel: string;
/**
* The audio data in base64 format.
*/
data: string;
}
export type { AddChannelAudio };
10 changes: 10 additions & 0 deletions packages/real-time-client/models/AddPartialTranscript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,15 @@ interface AddPartialTranscript {
format?: string;
metadata: RecognitionMetadata;
results: RecognitionResult[];
/**
* The channel identifier to which the audio belongs. This field is only seen in multichannel.
*
* :::note
*
* This field is only available in [preview mode](https://docs.speechmatics.com/private/preview-mode).
*
* :::
*/
channel?: string;
}
export type { AddPartialTranscript };
3 changes: 3 additions & 0 deletions packages/real-time-client/models/AddPartialTranslation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ interface AddPartialTranslation {
* Speechmatics JSON output format version number.
*/
format?: string;
/**
* Language translation relates to given as an ISO language code.
*/
language: string;
results: TranslatedSentence[];
}
Expand Down
10 changes: 10 additions & 0 deletions packages/real-time-client/models/AddTranscript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,15 @@ interface AddTranscript {
format?: string;
metadata: RecognitionMetadata;
results: RecognitionResult[];
/**
* The channel identifier to which the audio belongs. This field is only seen in multichannel.
*
* :::note
*
* This field is only available in [preview mode](https://docs.speechmatics.com/private/preview-mode).
*
* :::
*/
channel?: string;
}
export type { AddTranscript };
3 changes: 3 additions & 0 deletions packages/real-time-client/models/AddTranslation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ interface AddTranslation {
* Speechmatics JSON output format version number.
*/
format?: string;
/**
* Language translation relates to given as an ISO language code.
*/
language: string;
results: TranslatedSentence[];
}
Expand Down
3 changes: 3 additions & 0 deletions packages/real-time-client/models/AudioEventEndData.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
interface AudioEventEndData {
/**
* The type of audio event that has started or ended. See our list of [supported Audio Event types](https://docs.speechmatics.com/speech-to-text/features/audio-events#supported-audio-events).
*/
type: string;
end_time: number;
}
Expand Down
9 changes: 9 additions & 0 deletions packages/real-time-client/models/AudioEventStartData.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
interface AudioEventStartData {
/**
* The type of audio event that has started or ended. See our list of [supported Audio Event types](https://docs.speechmatics.com/speech-to-text/features/audio-events#supported-audio-events).
*/
type: string;
/**
* The time (in seconds) of the audio corresponding to the beginning of the audio event.
*/
start_time: number;
/**
* A confidence score assigned to the audio event. Ranges from 0.0 (least confident) to 1.0 (most confident).
*/
confidence: number;
}
export type { AudioEventStartData };
6 changes: 6 additions & 0 deletions packages/real-time-client/models/AudioEventsConfig.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
/**
* Contains configuration for [Audio Events](https://docs.speechmatics.com/speech-to-text/features/audio-events)
*/
interface AudioEventsConfig {
/**
* List of [Audio Event types](https://docs.speechmatics.com/speech-to-text/features/audio-events#supported-audio-events) to enable.
*/
types?: string[];
}
export type { AudioEventsConfig };
3 changes: 3 additions & 0 deletions packages/real-time-client/models/AudioFilteringConfig.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
/**
* Puts a lower limit on the volume of processed audio by using the `volume_threshold` setting. See [Audio Filtering](https://docs.speechmatics.com/speech-to-text/features/audio-filtering).
*/
interface AudioFilteringConfig {
volume_threshold?: number;
}
Expand Down
4 changes: 0 additions & 4 deletions packages/real-time-client/models/AudioFormatFile.ts

This file was deleted.

7 changes: 0 additions & 7 deletions packages/real-time-client/models/AudioFormatRaw.ts

This file was deleted.

6 changes: 6 additions & 0 deletions packages/real-time-client/models/ChannelAudioAdded.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
interface ChannelAudioAdded {
message: 'ChannelAudioAdded';
seq_no: number;
channel: string;
}
export type { ChannelAudioAdded };
2 changes: 1 addition & 1 deletion packages/real-time-client/models/ConversationConfig.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* This mode will detect when a speaker has stopped talking. The end_of_utterance_silence_trigger is the time in seconds after which the server will assume that the speaker has finished speaking, and will emit an EndOfUtterance message. A value of 0 disables the feature.
* This mode will detect when a speaker has stopped talking. The `end_of_utterance_silence_trigger` is the time in seconds after which the server will assume that the speaker has finished speaking, and will emit an `EndOfUtterance` message. A value of 0 disables the feature.
*/
interface ConversationConfig {
end_of_utterance_silence_trigger?: number;
Expand Down
3 changes: 3 additions & 0 deletions packages/real-time-client/models/DiarizationConfig.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
/**
* Set to `speaker` to apply [Speaker Diarization](https://docs.speechmatics.com/speech-to-text/features/diarization) to the audio.
*/
type DiarizationConfig = 'none' | 'speaker';
export type { DiarizationConfig };
3 changes: 3 additions & 0 deletions packages/real-time-client/models/DirectionEnum.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
/**
* Either `ltr` for words that should be displayed left-to-right, or `rtl` vice versa.
*/
type DirectionEnum = 'ltr' | 'rtl';
export type { DirectionEnum };
9 changes: 9 additions & 0 deletions packages/real-time-client/models/EndOfChannel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
interface EndOfChannel {
message: 'EndOfChannel';
/**
* The channel identifier to which the audio belongs.
*/
channel: string;
last_seq_no: number;
}
export type { EndOfChannel };
4 changes: 4 additions & 0 deletions packages/real-time-client/models/EndOfUtterance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@ import type { EndOfUtteranceMetadata } from './EndOfUtteranceMetadata';
interface EndOfUtterance {
message: 'EndOfUtterance';
metadata: EndOfUtteranceMetadata;
/**
* The channel identifier to which the EndOfUtterance message belongs. This field is only seen in multichannel.
*/
channel?: string;
}
export type { EndOfUtterance };
6 changes: 6 additions & 0 deletions packages/real-time-client/models/EndOfUtteranceMetadata.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
interface EndOfUtteranceMetadata {
/**
* The time (in seconds) that the end of utterance was detected.
*/
start_time?: number;
/**
* The time (in seconds) that the end of utterance was detected.
*/
end_time?: number;
}
export type { EndOfUtteranceMetadata };
30 changes: 30 additions & 0 deletions packages/real-time-client/models/ErrorType.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,36 @@
import type { ErrorTypeEnum } from './ErrorTypeEnum';
interface ErrorType {
message: 'Error';
/**
* The following are the possible error types:
*
* | Error Type | Description |
* | --- | --- |
* | `invalid_message` | The message received was not understood. |
* | `invalid_model` | Unable to use the model for the recognition. This can happen if the language is not supported at all, or is not available for the user. |
* | `invalid_config` | The config received contains some wrong or unsupported fields, or too many translation target languages were requested. |
* | `invalid_audio_type` | Audio type is not supported, is deprecated, or the `audio_type` is malformed. |
* | `invalid_output_format` | Output format is not supported, is deprecated, or the `output_format` is malformed. |
* | `not_authorised` | User was not recognised, or the API key provided is not valid. |
* | `insufficient_funds` | User doesn't have enough credits or any other reason preventing the user to be charged for the job properly. |
* | `not_allowed` | User is not allowed to use this message (is not allowed to perform the action the message would invoke). |
* | `job_error` | Unable to do any work on this job, the server might have timed out etc. |
* | `data_error` | Unable to accept the data specified - usually because there is too much data being sent at once |
* | `buffer_error` | Unable to fit the data in a corresponding buffer. This can happen for clients sending the input data faster than real-time. |
* | `protocol_error` | Message received was syntactically correct, but could not be accepted due to protocol limitations. This is usually caused by messages sent in the wrong order. |
* | `quota_exceeded` | Maximum number of concurrent connections allowed for the contract has been reached |
* | `timelimit_exceeded` | Usage quota for the contract has been reached |
* | `idle_timeout` | Idle duration limit was reached (no audio data sent within the last hour), a closing handshake with code 1008 follows this in-band error. |
* | `session_timeout` | Max session duration was reached (maximum session duration of 48 hours), a closing handshake with code 1008 follows this in-band error. |
* | `session_transfer` | An error while transferring session to another backend with the reason: Session transfer failed. This may occur when moving sessions due to backend maintenance operations or migration from a faulty backend. |
* | `unknown_error` | An error that did not fit any of the types above. |
*
* :::info
*
* `invalid_message`, `protocol_error` and `unknown_error` can be triggered as a response to any type of messages.
*
* :::
*/
type: ErrorTypeEnum;
reason: string;
code?: number;
Expand Down
36 changes: 35 additions & 1 deletion packages/real-time-client/models/ErrorTypeEnum.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,50 @@
/**
* The following are the possible error types:
*
* | Error Type | Description |
* | --- | --- |
* | `invalid_message` | The message received was not understood. |
* | `invalid_model` | Unable to use the model for the recognition. This can happen if the language is not supported at all, or is not available for the user. |
* | `invalid_config` | The config received contains some wrong or unsupported fields, or too many translation target languages were requested. |
* | `invalid_audio_type` | Audio type is not supported, is deprecated, or the `audio_type` is malformed. |
* | `invalid_output_format` | Output format is not supported, is deprecated, or the `output_format` is malformed. |
* | `not_authorised` | User was not recognised, or the API key provided is not valid. |
* | `insufficient_funds` | User doesn't have enough credits or any other reason preventing the user to be charged for the job properly. |
* | `not_allowed` | User is not allowed to use this message (is not allowed to perform the action the message would invoke). |
* | `job_error` | Unable to do any work on this job, the server might have timed out etc. |
* | `data_error` | Unable to accept the data specified - usually because there is too much data being sent at once |
* | `buffer_error` | Unable to fit the data in a corresponding buffer. This can happen for clients sending the input data faster than real-time. |
* | `protocol_error` | Message received was syntactically correct, but could not be accepted due to protocol limitations. This is usually caused by messages sent in the wrong order. |
* | `quota_exceeded` | Maximum number of concurrent connections allowed for the contract has been reached |
* | `timelimit_exceeded` | Usage quota for the contract has been reached |
* | `idle_timeout` | Idle duration limit was reached (no audio data sent within the last hour), a closing handshake with code 1008 follows this in-band error. |
* | `session_timeout` | Max session duration was reached (maximum session duration of 48 hours), a closing handshake with code 1008 follows this in-band error. |
* | `session_transfer` | An error while transferring session to another backend with the reason: Session transfer failed. This may occur when moving sessions due to backend maintenance operations or migration from a faulty backend. |
* | `unknown_error` | An error that did not fit any of the types above. |
*
* :::info
*
* `invalid_message`, `protocol_error` and `unknown_error` can be triggered as a response to any type of messages.
*
* :::
*/
type ErrorTypeEnum =
| 'invalid_message'
| 'invalid_model'
| 'invalid_config'
| 'invalid_audio_type'
| 'invalid_output_format'
| 'not_authorised'
| 'insufficient_funds'
| 'not_allowed'
| 'job_error'
| 'data_error'
| 'buffer_error'
| 'protocol_error'
| 'timelimit_exceeded'
| 'quota_exceeded'
| 'timelimit_exceeded'
| 'idle_timeout'
| 'session_timeout'
| 'session_transfer'
| 'unknown_error';
export type { ErrorTypeEnum };
Loading