n4ze3m · groxaxo · Aug 9, 2025 · Aug 10, 2025 · Aug 10, 2025 · Aug 14, 2025
diff --git a/README.md b/README.md
@@ -41,6 +41,8 @@ Checkout the Demo (v1.0.0):
 
 - **Chat With Webpage**: You can chat with the webpage and ask questions about the content.
 
+- **Text-to-Speech (Kokoro)**: Select text and use the context menu "Speak selection (Kokoro)" to hear it. Use "Stop speaking (Kokoro)" to stop playback. Works on Chrome, Edge, and Firefox. First run may download the model and take a moment.
+
 want more features? Create an issue and let me know.
 
 ### Manual Installation
@@ -103,6 +105,15 @@ Default Keyboard Shortcut: `Ctrl+Shift+L`
 
 Note: You can change the keyboard shortcuts from the extension settings on the Chrome Extension Management page.
 
+### Kokoro TTS
+
+- Select text on a page → right-click → "Speak selection (Kokoro)".
+- To stop playback, choose "Stop speaking (Kokoro)".
+
+Notes:
+- On first use, the TTS model is downloaded; playback may take a moment.
+- Firefox performance can improve with WebGPU (optional). In `about:config`, set `dom.webgpu.enabled = true` and restart. The extension auto-falls back to WASM if WebGPU is unavailable.
+
 ## Keyboard Shortcuts
 
 Page Assist supports various keyboard shortcuts to enhance your productivity:
@@ -134,6 +145,19 @@ You can run the extension in development mode to make changes and test them.
 bun dev
 ```
 
+For browser-specific development:
+
+```bash
+# Auto-loads in Firefox
+bun run dev:firefox
+
+# Auto-loads in Microsoft Edge
+bun run dev:edge
+
+# Using npm instead of Bun
+npm run dev:firefox
+```
+
 This will start a development server and watch for changes in the source files. You can load the extension in your browser and test the changes.
 
 ## Browser Support

diff --git a/package.json b/package.json
@@ -73,7 +73,8 @@
     "turndown": "^7.1.3",
     "unist-util-visit": "^5.0.0",
     "yt-transcript": "^0.0.2",
-    "zustand": "^4.5.0"
+    "zustand": "^4.5.0",
+    "kokoro-js": "^1.0.0"
   },
   "devDependencies": {
     "@plasmohq/prettier-plugin-sort-imports": "4.0.1",

diff --git a/src/entries-firefox/background.ts b/src/entries-firefox/background.ts
@@ -74,6 +74,18 @@ export default defineBackground({
           title: browser.i18n.getMessage("contextCustom"),
           contexts: ["selection"]
         })
+
+        // Kokoro TTS context menus (Firefox)
+        browser.contextMenus.create({
+          id: "kokoro-speak",
+          title: "Speak selection (Kokoro)",
+          contexts: ["selection"]
+        })
+        browser.contextMenus.create({
+          id: "kokoro-stop",
+          title: "Stop speaking (Kokoro)",
+          contexts: ["page", "selection"]
+        })
 
       } catch (error) {
         console.error("Error in initLogic:", error)
@@ -133,7 +145,7 @@ export default defineBackground({
     }
 
 
-    browser.contextMenus.onClicked.addListener((info, tab) => {
+    browser.contextMenus.onClicked.addListener(async (info, tab) => {
       if (info.menuItemId === "open-side-panel-pa") {
         browser.sidebarAction.toggle()
       } else if (info.menuItemId === "open-web-ui-pa") {
@@ -195,6 +207,29 @@ export default defineBackground({
             text: info.selectionText
           })
         }, isCopilotRunning ? 0 : 5000)
+      } else if (info.menuItemId === "kokoro-speak") {
+        if (tab?.id) {
+          try {
+            await browser.tabs.sendMessage(tab.id, {
+              type: "kokoro_tts_speak",
+              from: "background",
+              text: info.selectionText
+            })
+          } catch (e) {
+            console.error("Failed to send kokoro_tts_speak:", e)
+          }
+        }
+      } else if (info.menuItemId === "kokoro-stop") {
+        if (tab?.id) {
+          try {
+            await browser.tabs.sendMessage(tab.id, {
+              type: "kokoro_tts_stop",
+              from: "background"
+            })
+          } catch (e) {
+            console.error("Failed to send kokoro_tts_stop:", e)
+          }
+        }
       }
     })
 

diff --git a/src/entries-firefox/content/tts.ts b/src/entries-firefox/content/tts.ts
@@ -0,0 +1 @@
+import "../../entries/content/tts"
diff --git a/src/entries-firefox/tts.content.ts b/src/entries-firefox/tts.content.ts
@@ -0,0 +1,7 @@
+export default defineContentScript({
+  registration: "runtime",
+  matches: ["<all_urls>"],
+  async main() {
+    await import("../entries/content/tts")
+  }
+})
diff --git a/src/entries/background.ts b/src/entries/background.ts
@@ -78,6 +78,17 @@ export default defineBackground({
           title: browser.i18n.getMessage("contextCustom"),
           contexts: ["selection"]
         })
+        // Kokoro TTS context menus
+        browser.contextMenus.create({
+          id: "kokoro-speak",
+          title: "Speak selection (Kokoro)",
+          contexts: ["selection"]
+        })
+        browser.contextMenus.create({
+          id: "kokoro-stop",
+          title: "Stop speaking (Kokoro)",
+          contexts: ["page", "selection"]
+        })
       } catch (error) {
         console.error("Error in initLogic:", error)
       }
@@ -215,6 +226,56 @@ export default defineBackground({
           },
           isCopilotRunning ? 0 : 5000
         )
+      } else if (info.menuItemId === "kokoro-speak") {
+        // Ask content script to speak; if not present, inject and retry
+        if (tab?.id) {
+          let sent = false
+          try {
+            await browser.tabs.sendMessage(tab.id, {
+              type: "kokoro_tts_speak",
+              from: "background",
+              text: info.selectionText
+            })
+            sent = true
+          } catch (e) {
+            // likely no receiver; inject then retry
+          }
+          if (!sent) {
+            try {
+              if ((chrome as any)?.scripting?.executeScript) {
+                await chrome.scripting.executeScript({
+                  target: { tabId: tab.id },
+                  files: ["content-scripts/tts.js"]
+                })
+              } else {
+                // Firefox MV2 fallback
+                await (browser.tabs as any).executeScript(tab.id, { file: "content-scripts/tts.js" })
+              }
+            } catch (e) {
+              // ignore injection errors; the script may already be present
+            }
+            try {
+              await browser.tabs.sendMessage(tab.id, {
+                type: "kokoro_tts_speak",
+                from: "background",
+                text: info.selectionText
+              })
+            } catch (e) {
+              console.error("Failed to send kokoro_tts_speak after inject:", e)
+            }
+          }
+        }
+      } else if (info.menuItemId === "kokoro-stop") {
+        if (tab?.id) {
+          try {
+            await browser.tabs.sendMessage(tab.id, {
+              type: "kokoro_tts_stop",
+              from: "background"
+            })
+          } catch (e) {
+            console.error("Failed to send kokoro_tts_stop:", e)
+          }
+        }
       }
     })
 

diff --git a/src/entries/content/tts.ts b/src/entries/content/tts.ts
@@ -0,0 +1,58 @@
+import { browser } from "wxt/browser"
+
+// Kokoro TTS: lazy-load model and play audio on demand via messages from background.
+let kokoroPromise: Promise<any> | null = null
+let currentAudio: HTMLAudioElement | null = null
+
+async function loadKokoro() {
+  if (!kokoroPromise) {
+    kokoroPromise = (async () => {
+      const { KokoroTTS } = await import("kokoro-js")
+      try {
+        const device = ((navigator as any).gpu ? "webgpu" : "wasm") as "webgpu" | "wasm"
+        const dtype = device === "webgpu" ? ("fp32" as const) : ("q8" as const)
+        return await KokoroTTS.from_pretrained("onnx-community/Kokoro-82M-v1.0-ONNX", { dtype, device })
+      } catch (e) {
+        // Fallback to WASM/q8 if WebGPU path fails
+        const { KokoroTTS } = await import("kokoro-js")
+        return await KokoroTTS.from_pretrained("onnx-community/Kokoro-82M-v1.0-ONNX", { dtype: "q8", device: "wasm" })
+      }
+    })()
+  }
+  return kokoroPromise
+}
+
+async function speakWithKokoro(text: string) {
+  const tts = await loadKokoro()
+  const raw = await tts.generate(text, { voice: "af_heart" })
+  const blob = raw.toBlob()
+  const url = URL.createObjectURL(blob)
+
+  if (currentAudio) {
+    try { currentAudio.pause() } catch {}
+    if (currentAudio.src) URL.revokeObjectURL(currentAudio.src)
+  }
+
+  currentAudio = new Audio(url)
+  currentAudio.addEventListener("ended", () => {
+    URL.revokeObjectURL(url)
+  })
+  await currentAudio.play()
+}
+
+browser.runtime.onMessage.addListener(async (message) => {
+  if (message?.type === "kokoro_tts_speak") {
+    const text = (message.text || window.getSelection()?.toString() || "").trim()
+    if (text) {
+      try {
+        await speakWithKokoro(text)
+      } catch (err) {
+        console.error("[Page Assist] Kokoro TTS error:", err)
+      }
+    }
+  } else if (message?.type === "kokoro_tts_stop") {
+    if (currentAudio) {
+      try { currentAudio.pause() } catch {}
+    }
+  }
+})
diff --git a/src/entries/tts.content.ts b/src/entries/tts.content.ts
@@ -0,0 +1,10 @@
+export default defineContentScript({
+  // Do not register in manifest; we'll inject on demand from background
+  registration: "runtime",
+  // Matches are required by WXT even if we inject by file path
+  matches: ["<all_urls>"],
+  async main() {
+    // Load the actual TTS logic which sets up message listeners
+    await import("./content/tts")
+  }
+})
diff --git a/src/libs/get-html.ts b/src/libs/get-html.ts
@@ -39,7 +39,13 @@ export const getDataFromCurrentTab = async () => {
         })
 
         if (data.length > 0) {
-          resolve(data[0].result)
+          resolve(
+            data[0].result as {
+              url: string
+              content: string
+              type: string
+            }
+          )
         }
       })
     } else {
@@ -54,7 +60,13 @@ export const getDataFromCurrentTab = async () => {
             })
 
             if (data.length > 0) {
-              resolve(data[0].result)
+              resolve(
+                data[0].result as {
+                  url: string
+                  content: string
+                  type: string
+                }
+              )
             }
           } catch (e) {
             console.error("error", e)

diff --git a/src/libs/get-tab-contents.ts b/src/libs/get-tab-contents.ts
@@ -3,6 +3,7 @@ import { isAmazonURL, parseAmazonWebsite } from "@/parser/amazon"
 import { defaultExtractContent } from "@/parser/default"
 import { isTwitterProfile, isTwitterTimeline, parseTweetProfile, parseTwitterTimeline } from "@/parser/twitter"
 import { isWikipedia, parseWikipedia } from "@/parser/wiki"
+import { isYoutubeLink } from "@/utils/is-youtube"
 import { getMaxContextSize } from "@/services/kb"
 import { YtTranscript } from "yt-transcript"
 import { processPDFFromURL } from "./pdf"
@@ -64,7 +65,12 @@ export const getTabContents = async (documents: ChatDocuments) => {
                     isPDF: document.contentType === 'application/pdf'
                 })
             })
-            const content = pageContent[0].result
+            const content = pageContent[0].result as {
+                html: string
+                title: string
+                url: string
+                isPDF: boolean
+            }
             const header = formatDocumentHeader(doc.title, doc.url)
             let extractedContent = ""
 
@@ -74,7 +80,7 @@ export const getTabContents = async (documents: ChatDocuments) => {
                     extractedContent = formatTranscriptText(transcript)
                 }
             } else if (isWikipedia(doc.url)) {
-                extractedContent = parseWikipedia(content)
+                extractedContent = parseWikipedia(content.html)
             } else if (isAmazonURL(doc.url)) {
                 extractedContent = parseAmazonWebsite(content.html)
             } else if (isTwitterProfile(doc.url)) {

diff --git a/src/types/kokoro-js.d.ts b/src/types/kokoro-js.d.ts
@@ -0,0 +1,16 @@
+declare module "kokoro-js" {
+  export class KokoroTTS {
+    static from_pretrained(model: string, options?: any): Promise<KokoroTTS>
+    generate(text: string, options?: any): Promise<RawAudio>
+  }
+  export interface RawAudio {
+    /** PCM float32 samples */
+    data?: Float32Array
+    /** sample rate in Hz */
+    sampling_rate?: number
+    toWav(): ArrayBuffer
+    toBlob(): Blob
+    save?(path: string): Promise<void>
+  }
+  export function speak(text: string): Promise<void>
+}
diff --git a/wxt.config.ts b/wxt.config.ts
@@ -59,7 +59,6 @@ export default defineConfig({
     description: "__MSG_extDescription__",
     default_locale: "en",
     action: {},
-    author: "n4ze3m",
     browser_specific_settings:
       process.env.TARGET === "firefox"
         ? {
@@ -87,14 +86,22 @@ export default defineConfig({
       }
     },
     content_security_policy:
-      process.env.TARGET !== "firefox" ?
-        {
+      process.env.TARGET !== "firefox"
+        ? {
           extension_pages:
             "script-src 'self' 'wasm-unsafe-eval'; object-src 'self';"
-        } :  "script-src 'self' 'wasm-unsafe-eval' blob:; object-src 'self'; worker-src 'self' blob:;",
+        }
+        : {
+          extension_pages:
+            "script-src 'self' 'wasm-unsafe-eval' blob:; object-src 'self'; worker-src 'self' blob:;"
+        },
     permissions:
       process.env.TARGET === "firefox"
         ? firefoxMV2Permissions
-        : chromeMV3Permissions
+        : chromeMV3Permissions,
+    // content_scripts are auto-registered by WXT via *.content.ts entrypoints
+    // (e.g. src/entries/**/tts.content.ts). We inject TTS on-demand from
+    // background to avoid loading it on every page.
+    // https://wxt.dev/guide/entrypoints/content-scripts.html
   }
 }) as any