rayhunter
diff --git a/‎tutorials/instagram-reel-subtitler/captioner.py‎
Lines changed: 87 additions & 0 deletions b/‎tutorials/instagram-reel-subtitler/captioner.py‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎tutorials/instagram-reel-subtitler/example.env‎
Lines changed: 1 addition & 0 deletions b/‎tutorials/instagram-reel-subtitler/example.env‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tutorials/instagram-reel-subtitler/example_video_output.mp4‎
5.52 MB b/‎tutorials/instagram-reel-subtitler/example_video_output.mp4‎
5.52 MB
diff --git a/‎tutorials/instagram-reel-subtitler/final.mp4‎
-12.3 MB b/‎tutorials/instagram-reel-subtitler/final.mp4‎
-12.3 MB
diff --git a/‎tutorials/instagram-reel-subtitler/subtitler-tutorial.ipynb‎
Lines changed: 22 additions & 12 deletions b/‎tutorials/instagram-reel-subtitler/subtitler-tutorial.ipynb‎
Lines changed: 22 additions & 12 deletions
@@ -0,0 +1,87 @@
+import os
+from groq import Groq
+import datetime
+from moviepy import *
+from moviepy.video.tools.subtitles import SubtitlesClip
+from moviepy.video.io.VideoFileClip import VideoFileClip
+from dotenv import load_dotenv
+load_dotenv()  
+
+GROQ_API_KEY = os.environ["GROQ_API_KEY"]
+client = Groq(api_key=GROQ_API_KEY)
+
+
+def convert_mp4_to_mp3(mp4_filepath, mp3_file):
+    """
+    Converts an MP4 file to MP3.
+
+    Args:
+        mp4_filepath: Path to the input MP4 file.
+        mp3_filepath: Path to save the output MP3 file.
+    """
+    video_clip = VideoFileClip(mp4_filepath)
+
+    # Extract audio from video
+    video_clip.audio.write_audiofile(mp3_file)
+    print("now is an mp3")
+    video_clip.close()
+
+# Step 1: Transcribe Audio
+def transcribe_audio(mp3_file):
+
+# Open the audio file
+    with open(mp3_file, "rb") as file:
+        # Create a transcription of the audio file
+        transcription = client.audio.transcriptions.create(
+            file=(mp3_file, file.read()), # Required audio file
+            model="whisper-large-v3-turbo", # Required model to use for transcription
+            timestamp_granularities=["word"],
+            response_format="verbose_json",  # Optional
+            language="en",  # Optional
+            temperature=0.0  # Optional
+        )
+        # Print the transcription text
+        print(transcription.words)
+        return transcription.words
+
+def add_subtitles(verbose_json, width, fontsize):
+    text_clips = []
+
+    for segment in verbose_json:
+        text_clips.append(
+            TextClip(text=segment["word"],
+                     font_size=fontsize,
+                     stroke_width=5, 
+                     stroke_color="black", 
+                     font="./Roboto-Condensed-Bold.otf",
+                     color="white",
+                     size=(width, None),
+                     method="caption",
+                     text_align="center",
+                     margin=(30, 0)
+                     )
+            .with_start(segment["start"])
+            .with_end(segment["end"])   
+            .with_position("center")
+        )
+    return text_clips
+
+# Run the Process
+video_file = "../input.mp4"
+output_file = "output_with_subtitles.mp4"
+
+# Loading the video as a VideoFileClip
+original_clip = VideoFileClip(video_file)
+width = original_clip.w
+print(width)
+
+mp3_file = "../output.mp3"
+convert_mp4_to_mp3(video_file, mp3_file)
+segments = transcribe_audio(mp3_file)
+text_clip_list = add_subtitles(segments, width, fontsize=40)
+
+# Create a CompositeVideoClip that we write to a file
+final_clip = CompositeVideoClip([original_clip] + text_clip_list)
+
+final_clip.write_videofile("final.mp4", codec="libx264") # Mac users may want to add this within the parentheses: ,audio_codec="aac"
+print("Subtitled video saved as:", output_file)
@@ -0,0 +1 @@
+GROQ_API_KEY="groq-api-key-value"
@@ -11,14 +11,10 @@
     "# Groq Whisper Instagram Reel Subtitler\n",
     "This guide will walk you through creating an automated subtitle generator for Instagram Reels using Groq Whisper. The script extracts audio from a video, transcribes it using Groq's Whisper API, and overlays word by word subtitles onto the video.\n",
     "\n",
-    "Example video output:\n",
+    "Example video output: [example_video_output.mp4](example_video_output.mp4)\n",
     "\n",
-    "<video controls width=\"300\" height=\"auto\" src=\"final.mp4\" title=\"Example final video\"></video>\n",
-    "\n",
-    "## How It Works\n",
-    "\n",
-    "Technologies Used\n",
-    "- Groq Whisper: AI-powered speech-to-text transcription.\n",
+    "## Technologies Used\n",
+    "- [Groq Whisper Large V3 Turbo:](https://console.groq.com/docs/speech-to-text) AI-powered speech-to-text transcription with word level time stamps.\n",
     "- MoviePy: Handles video and subtitle overlaying.\n",
     "- Python OS Module: Manages file paths.\n",
     "\n",
@@ -124,7 +120,7 @@
     "    with open(mp3_file, \"rb\") as file:\n",
     "        transcription = client.audio.transcriptions.create(\n",
     "            file=(mp3_file, file.read()),\n",
-    "            model=\"whisper-large-v3-turbo\",\n",
+    "            model=\"whisper-large-v3-turbo\", # Alternatively, use \"distil-whisper-large-v3-en\" for a faster and lower cost (English-only)\n",
     "            timestamp_granularities=[\"word\"], # Word level time stamps\n",
     "            response_format=\"verbose_json\",\n",
     "            language=\"en\",\n",
@@ -145,7 +141,18 @@
    "metadata": {},
    "source": [
     "# Step 5: Overlay Subtitle Clips\n",
-    "From the previous function, we'll recieve a JSON file that contains timestamped segments of words. With these word segments, we'll loop through them and create TextClips to be put into the video at the correct time."
+    "From the previous function, we'll recieve a JSON that contains timestamped segments of words. With these word segments, we'll loop through them and create TextClips to be put into the video at the correct time.\n",
+    "\n",
+    "Example of the JSON you would recieve that we'll iterate through:\n",
+    "```\n",
+    "[\n",
+    "    {'word': 'This', 'start': 0.1, 'end': 0.28},\n",
+    "    {'word': 'month', 'start': 0.28, 'end': 0.56},\n",
+    "    {'word': 'I', 'start': 0.56, 'end': 0.78},\n",
+    "    {'word': 'traveled', 'start': 0.78, 'end': 1.12},\n",
+    "    {'word': 'to', 'start': 1.12, 'end': 1.38}\n",
+    "...\n",
+    "```"
    ]
   },
   {
@@ -231,10 +238,13 @@
     "```\n",
     "\n",
     "## Troubleshooting errors:\n",
-    "- Make sure to have a video file ready before running the script\n",
-    "- Make sure the path to the file is correct\n",
-    "- Make sure you have a Groq API key in your .env file"
+    "- On MacOS, playing audio within VSCode versus opening up the video in Finder uses different audio encoding outputs. Adding `audio_codec=\"aac\"` to the output line `final_clip.write_videofile(\"final.mp4\", codec=\"libx264\", audio_codec=\"aac\")` will allow you to hear audio on playback in MacOS Finder. But without it, you will only be able to hear the audio file from within VSCode and not from the Finder."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
   }
  ],
  "metadata": {