twilio · fvmach · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ This repository consists of examples of consuming [Twilio Media Streams](https:/
 ### Node.js
 
 * [Basic](node/basic/README.md)
-* [Realtime Transcription](node/realtime-transcriptions/README.md)
+* [Realtime Transcription](node/realtime-transcriptions/README.md) UPDATED 2025
 * [Realtime Keyword Detection](node/keyword-detection/README.md)
 * [Google Dialogflow Integration](node/dialogflow-integration)
 * [Amazon Transcribe Integration](https://github.com/TwilioDevEd/talkin-cedric)
@@ -30,4 +30,4 @@ This repository consists of examples of consuming [Twilio Media Streams](https:/
 
 ## Contributing
 
-This project welcomes contributions from the community.
+This project welcomes contributions from the community.
diff --git a/node/realtime-transcriptions/README.md b/node/realtime-transcriptions/README.md
@@ -1,69 +1,38 @@
-# RealTimeTranscription Demo
+# Media Streams Demos
 
-This demo is a server application consuming audio from Twilio Media Streams and using Google Cloud Speech to perform realtime transcriptions.
+This repository consists of examples of consuming [Twilio Media Streams](https://www.twilio.com/docs/voice/tutorials/consume-real-time-media-stream-using-websockets-python-and-flask) via WebSockets.
 
-## App sever setup
+## By programming language
 
-### Enable Google Cloud Speech API
+### Python
 
-https://console.cloud.google.com/launcher/details/google/speech.googleapis.com
+* [Basic](python/basic/README.md)
+* [Realtime Transcription](python/realtime-transcriptions/README.md)
 
-* Select a Project
-* Enable or Manage
-* Choose Credentials
-  * Create a new Credential or make sure you have the JSON
-  * Copy JSON and save as `google_creds.json` in the root of this project
+### Node.js
 
-### Installation
+* [Basic](node/basic/README.md)
+* [Realtime Transcription](node/realtime-transcriptions/README.md) –
+  - Updated to use `express`, `ws`, and native ES modules
+  - Integrated with Google Cloud Speech-to-Text
+  - Provides real-time transcription of Twilio audio stream
+  - Uses `/twiml` POST route to return static TwiML XML
+* [Realtime Keyword Detection](node/keyword-detection/README.md)
+* [Google Dialogflow Integration](node/dialogflow-integration)
+* [Amazon Transcribe Integration](https://github.com/TwilioDevEd/talkin-cedric)
 
-**Requires Node >= v12.1.0**
+### Java
 
-Run `npm install`
+* [Basic](java/basic/README.md)
+* [Realtime Transcription](java/realtime-transcriptions/README.md)
+* [Save Audio](java/save-audio/README.md)
 
-#### npm dependencies (defined in the `package.json`):
+### Ruby/Rails
 
-* dotenv
-* httpdispatcher
-* websocket
-* @google-cloud/speech
+* [Realtime Transcription - Ruby Service](ruby/standalone-ruby/README.md)
+* [Realtime Transcription - Rails Controller](ruby/rails-controller/README.md)
 
-#### Running the server
+## Contributing
 
-Start with `node ./server.js`
+This project welcomes contributions from the community. Please submit a pull request with a clear description of your changes, and ensure that updates to documentation and examples remain consistent across programming languages.
 
-#### Useful pointers
-
-https://cloud.google.com/nodejs/docs/reference/speech/2.2.x/v1.SpeechClient#properties
-
-https://google-cloud-python.readthedocs.io/en/0.32.0/speech/gapic/api.html
-
-https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/speech/cloud-client/transcribe_streaming_mic.py
-
-## Setup
-
-You can setup your environment to run the demo by using the CLI (BETA) or the Console.
-
-### Configure using the CLI
-
-1. Find available phone number
-`twilio api:core:available-phone-numbers:local:list --country-code="US" --voice-enabled --properties="phoneNumber"`
-
-2. Purchase the phone number (where `+123456789` is a number you found)
-`twilio api:core:incoming-phone-numbers:create --phone-number="+123456789"`
-
-3. Start ngrok
-`ngrok http 8080`
-
-4. Edit the `templates/streams` file to replace `<ngrok url>` with your ngrok host.
-
-5. Make the call where `+123456789` is the Twilio number you bought and `+198765432` is your phone number and `abcdef.ngrok.io` is your ngrok host.
-`twilio api:core:calls:create --from="+123456789" --to="+198765432" --url="https://abcdef.ngrok.io/twiml"`
-
-### Configure using the Console
-
-1. Access the [Twilio console](https://www.twilio.com/console/voice/numbers) to get a `<TWILIO-PHONE-NUMBER>`.
-2. Run the server (listening in 8080 port)
-3. Use ngrok to make the server publicly available: `ngrok http 8080`
-4. Edit the streams.xml file in the `templates` directory and add your ngrok URL as `wss://<ngrok url>`
-5. Run the curl command in order to make the proper call
-`curl -XPOST https://api.twilio.com/2010-04-01/Accounts/<ACCOUNT-SID>/Calls.json -d "Url=http://<ngrok url>/twiml" -d "To=<PHONE-NUMBER>" -d "From=<TWILIO-PHONE-NUMBER>" -u <ACCOUNT-SID>:<AUTH-TOKEN>`
diff --git a/node/realtime-transcriptions/package.json b/node/realtime-transcriptions/package.json
@@ -1,25 +1,27 @@
 {
-  "name": "realtime-transcriptions",
-  "version": "0.0.1",
-  "description": "Real Time Transcriptions with Twilio",
-  "main": "server.js",
-  "scripts": {
-    "start": "node server.js"
-  },
-  "repository": {
-    "type": "git",
-    "url": ""
-  },
-  "author": "",
-  "license": "MIT",
-  "bugs": {
-    "url": ""
-  },
-  "homepage": "",
-  "dependencies": {
-    "dotenv": "^4.0.0",
-    "httpdispatcher": "^2.1.2",
-    "websocket": "^1.0.28",
-    "@google-cloud/speech": "^1.5.0"
-  }
+    "name": "realtime-transcriptions",
+    "version": "1.0.0",
+    "description": "Real-Time Transcriptions with Twilio Media Streams and Google Speech-to-Text",
+    "type": "module",
+    "main": "server.js",
+    "scripts": {
+        "start": "node server.js"
+    },
+    "repository": {
+        "type": "git",
+        "url": "https://github.com/your-org/realtime-transcriptions"
+    },
+    "author": "Your Name",
+    "license": "MIT",
+    "bugs": {
+        "url": "https://github.com/your-org/realtime-transcriptions/issues"
+    },
+    "homepage": "https://github.com/your-org/realtime-transcriptions#readme",
+    "dependencies": {
+        "@google-cloud/speech": "^5.6.0",
+        "axios": "^1.6.0",
+        "dotenv": "^16.3.1",
+        "express": "^4.19.2",
+        "ws": "^8.13.0"
+    }
 }
diff --git a/node/realtime-transcriptions/server.js b/node/realtime-transcriptions/server.js
@@ -1,97 +1,86 @@
-"use strict";
-require('dotenv').load();
+import express from 'express';
+import { createServer } from 'http';
+import { WebSocketServer } from 'ws';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import dotenv from 'dotenv';
+import TranscriptionService from './transcription-service.js';
 
-const fs = require('fs');
-const path = require('path');
-const http = require('http');
-const HttpDispatcher = require('httpdispatcher');
-const WebSocketServer = require('websocket').server;
-const TranscriptionService = require('./transcription-service');
+dotenv.config();
 
-const dispatcher = new HttpDispatcher();
-const wsserver = http.createServer(handleRequest);
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 
-const HTTP_SERVER_PORT = 8080;
+const HTTP_SERVER_PORT = process.env.PORT || 8080;
 
 function log(message, ...args) {
-  console.log(new Date(), message, ...args);
+  console.log(new Date().toISOString(), message, ...args);
 }
 
-const mediaws = new WebSocketServer({
-  httpServer: wsserver,
-  autoAcceptConnections: true,
-});
-
-
-function handleRequest(request, response){
-  try {
-    dispatcher.dispatch(request, response);
-  } catch(err) {
-    console.error(err);
-  }
-}
-
-dispatcher.onPost('/twiml', function(req,res) {
-  log('POST TwiML');
-
-  var filePath = path.join(__dirname+'/templates', 'streams.xml');
-  var stat = fs.statSync(filePath);
+const app = express();
+const httpServer = createServer(app);
+const mediaws = new WebSocketServer({ server: httpServer });
 
-  res.writeHead(200, {
-    'Content-Type': 'text/xml',
-    'Content-Length': stat.size
-  });
-
-  var readStream = fs.createReadStream(filePath);
-  readStream.pipe(res);
+// Serve static XML TwiML template
+app.post('/twiml', (req, res) => {
+  const filePath = path.join(__dirname, 'templates', 'streams.xml');
+  res.setHeader('Content-Type', 'text/xml');
+  fs.createReadStream(filePath).pipe(res);
 });
 
-mediaws.on('connect', function(connection) {
+// Handle Media Stream connections
+mediaws.on('connection', (connection) => {
   log('Media WS: Connection accepted');
   new MediaStreamHandler(connection);
 });
 
+// MediaStreamHandler class
 class MediaStreamHandler {
   constructor(connection) {
     this.metaData = null;
     this.trackHandlers = {};
+
     connection.on('message', this.processMessage.bind(this));
     connection.on('close', this.close.bind(this));
   }
 
-  processMessage(message){
-    if (message.type === 'utf8') {
-      const data = JSON.parse(message.utf8Data);
-      if (data.event === "start") {
+  processMessage(message) {
+    try {
+      const data = JSON.parse(message);
+
+      if (data.event === 'start') {
         this.metaData = data.start;
-      }
-      if (data.event !== "media") {
         return;
       }
+
+      if (data.event !== 'media') return;
+
       const track = data.media.track;
-      if (this.trackHandlers[track] === undefined) {
+      if (!this.trackHandlers[track]) {
         const service = new TranscriptionService();
         service.on('transcription', (transcription) => {
-          log(`Transcription (${track}): ${transcription}`);
+          log(`Transcription (${track}):`, transcription);
         });
         this.trackHandlers[track] = service;
       }
+
       this.trackHandlers[track].send(data.media.payload);
-    } else if (message.type === 'binary') {
-      log('Media WS: binary message received (not supported)');
+    } catch (err) {
+      log('Failed to parse message:', err);
     }
   }
 
-  close(){
-    log('Media WS: closed');
+  close() {
+    log('Media WS: Connection closed');
 
-    for (let track of Object.keys(this.trackHandlers)) {
-      log(`Closing ${track} handler`);
+    for (const track of Object.keys(this.trackHandlers)) {
+      log(`Closing handler for track: ${track}`);
       this.trackHandlers[track].close();
     }
   }
 }
 
-wsserver.listen(HTTP_SERVER_PORT, function(){
-  console.log("Server listening on: http://localhost:%s", HTTP_SERVER_PORT);
+httpServer.listen(HTTP_SERVER_PORT, () => {
+  console.log(`Server listening on http://localhost:${HTTP_SERVER_PORT}`);
 });
diff --git a/node/realtime-transcriptions/templates/streams.xml b/node/realtime-transcriptions/templates/streams.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Response>
   <Start>
-    <Stream url="wss://<ngrok url>/"></Stream>
+    <Stream url="wss://e6e321efa75d.ngrok.app/"></Stream>
   </Start>
   <Pause length="40"/>
 </Response>