Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ Checkout the Demo (v1.0.0):

- **Chat With Webpage**: You can chat with the webpage and ask questions about the content.

- **Text-to-Speech (Kokoro)**: Select text and use the context menu "Speak selection (Kokoro)" to hear it. Use "Stop speaking (Kokoro)" to stop playback. Works on Chrome, Edge, and Firefox. First run may download the model and take a moment.

want more features? Create an issue and let me know.

### Manual Installation
Expand Down Expand Up @@ -103,6 +105,15 @@ Default Keyboard Shortcut: `Ctrl+Shift+L`

Note: You can change the keyboard shortcuts from the extension settings on the Chrome Extension Management page.

### Kokoro TTS

- Select text on a page → right-click → "Speak selection (Kokoro)".
- To stop playback, choose "Stop speaking (Kokoro)".

Notes:
- On first use, the TTS model is downloaded; playback may take a moment.
- Firefox performance can improve with WebGPU (optional). In `about:config`, set `dom.webgpu.enabled = true` and restart. The extension auto-falls back to WASM if WebGPU is unavailable.

## Keyboard Shortcuts

Page Assist supports various keyboard shortcuts to enhance your productivity:
Expand Down Expand Up @@ -134,6 +145,19 @@ You can run the extension in development mode to make changes and test them.
bun dev
```

For browser-specific development:

```bash
# Auto-loads in Firefox
bun run dev:firefox

# Auto-loads in Microsoft Edge
bun run dev:edge

# Using npm instead of Bun
npm run dev:firefox
```

This will start a development server and watch for changes in the source files. You can load the extension in your browser and test the changes.

## Browser Support
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
"turndown": "^7.1.3",
"unist-util-visit": "^5.0.0",
"yt-transcript": "^0.0.2",
"zustand": "^4.5.0"
"zustand": "^4.5.0",
"kokoro-js": "^1.0.0"
},
"devDependencies": {
"@plasmohq/prettier-plugin-sort-imports": "4.0.1",
Expand Down
37 changes: 36 additions & 1 deletion src/entries-firefox/background.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ export default defineBackground({
title: browser.i18n.getMessage("contextCustom"),
contexts: ["selection"]
})

// Kokoro TTS context menus (Firefox)
browser.contextMenus.create({
id: "kokoro-speak",
title: "Speak selection (Kokoro)",
contexts: ["selection"]
})
browser.contextMenus.create({
id: "kokoro-stop",
title: "Stop speaking (Kokoro)",
contexts: ["page", "selection"]
})

} catch (error) {
console.error("Error in initLogic:", error)
Expand Down Expand Up @@ -133,7 +145,7 @@ export default defineBackground({
}


browser.contextMenus.onClicked.addListener((info, tab) => {
browser.contextMenus.onClicked.addListener(async (info, tab) => {
if (info.menuItemId === "open-side-panel-pa") {
browser.sidebarAction.toggle()
} else if (info.menuItemId === "open-web-ui-pa") {
Expand Down Expand Up @@ -195,6 +207,29 @@ export default defineBackground({
text: info.selectionText
})
}, isCopilotRunning ? 0 : 5000)
} else if (info.menuItemId === "kokoro-speak") {
if (tab?.id) {
try {
await browser.tabs.sendMessage(tab.id, {
type: "kokoro_tts_speak",
from: "background",
text: info.selectionText
})
} catch (e) {
console.error("Failed to send kokoro_tts_speak:", e)
}
}
} else if (info.menuItemId === "kokoro-stop") {
if (tab?.id) {
try {
await browser.tabs.sendMessage(tab.id, {
type: "kokoro_tts_stop",
from: "background"
})
} catch (e) {
console.error("Failed to send kokoro_tts_stop:", e)
}
}
}
})

Expand Down
1 change: 1 addition & 0 deletions src/entries-firefox/content/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import "../../entries/content/tts"
7 changes: 7 additions & 0 deletions src/entries-firefox/tts.content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export default defineContentScript({
registration: "runtime",
matches: ["<all_urls>"],
async main() {
await import("../entries/content/tts")
}
})
61 changes: 61 additions & 0 deletions src/entries/background.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,17 @@ export default defineBackground({
title: browser.i18n.getMessage("contextCustom"),
contexts: ["selection"]
})
// Kokoro TTS context menus
browser.contextMenus.create({
id: "kokoro-speak",
title: "Speak selection (Kokoro)",
contexts: ["selection"]
})
browser.contextMenus.create({
id: "kokoro-stop",
title: "Stop speaking (Kokoro)",
contexts: ["page", "selection"]
})
} catch (error) {
console.error("Error in initLogic:", error)
}
Expand Down Expand Up @@ -215,6 +226,56 @@ export default defineBackground({
},
isCopilotRunning ? 0 : 5000
)
} else if (info.menuItemId === "kokoro-speak") {
// Ask content script to speak; if not present, inject and retry
if (tab?.id) {
let sent = false
try {
await browser.tabs.sendMessage(tab.id, {
type: "kokoro_tts_speak",
from: "background",
text: info.selectionText
})
sent = true
} catch (e) {
// likely no receiver; inject then retry
}
if (!sent) {
try {
if ((chrome as any)?.scripting?.executeScript) {
await chrome.scripting.executeScript({
target: { tabId: tab.id },
files: ["content-scripts/tts.js"]
})
} else {
// Firefox MV2 fallback
await (browser.tabs as any).executeScript(tab.id, { file: "content-scripts/tts.js" })
}
} catch (e) {
// ignore injection errors; the script may already be present
}
try {
await browser.tabs.sendMessage(tab.id, {
type: "kokoro_tts_speak",
from: "background",
text: info.selectionText
})
} catch (e) {
console.error("Failed to send kokoro_tts_speak after inject:", e)
}
}
}
} else if (info.menuItemId === "kokoro-stop") {
if (tab?.id) {
try {
await browser.tabs.sendMessage(tab.id, {
type: "kokoro_tts_stop",
from: "background"
})
} catch (e) {
console.error("Failed to send kokoro_tts_stop:", e)
}
}
}
})

Expand Down
58 changes: 58 additions & 0 deletions src/entries/content/tts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { browser } from "wxt/browser"

// Kokoro TTS: lazy-load model and play audio on demand via messages from background.
let kokoroPromise: Promise<any> | null = null
let currentAudio: HTMLAudioElement | null = null

async function loadKokoro() {
if (!kokoroPromise) {
kokoroPromise = (async () => {
const { KokoroTTS } = await import("kokoro-js")
try {
const device = ((navigator as any).gpu ? "webgpu" : "wasm") as "webgpu" | "wasm"
const dtype = device === "webgpu" ? ("fp32" as const) : ("q8" as const)
return await KokoroTTS.from_pretrained("onnx-community/Kokoro-82M-v1.0-ONNX", { dtype, device })
} catch (e) {
// Fallback to WASM/q8 if WebGPU path fails
const { KokoroTTS } = await import("kokoro-js")
return await KokoroTTS.from_pretrained("onnx-community/Kokoro-82M-v1.0-ONNX", { dtype: "q8", device: "wasm" })
}
})()
}
return kokoroPromise
}

async function speakWithKokoro(text: string) {
const tts = await loadKokoro()
const raw = await tts.generate(text, { voice: "af_heart" })
const blob = raw.toBlob()
const url = URL.createObjectURL(blob)

if (currentAudio) {
try { currentAudio.pause() } catch {}
if (currentAudio.src) URL.revokeObjectURL(currentAudio.src)
}

currentAudio = new Audio(url)
currentAudio.addEventListener("ended", () => {
URL.revokeObjectURL(url)
})
await currentAudio.play()
}

browser.runtime.onMessage.addListener(async (message) => {
if (message?.type === "kokoro_tts_speak") {
const text = (message.text || window.getSelection()?.toString() || "").trim()
if (text) {
try {
await speakWithKokoro(text)
} catch (err) {
console.error("[Page Assist] Kokoro TTS error:", err)
}
}
} else if (message?.type === "kokoro_tts_stop") {
if (currentAudio) {
try { currentAudio.pause() } catch {}
}
}
})
10 changes: 10 additions & 0 deletions src/entries/tts.content.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export default defineContentScript({
// Do not register in manifest; we'll inject on demand from background
registration: "runtime",
// Matches are required by WXT even if we inject by file path
matches: ["<all_urls>"],
async main() {
// Load the actual TTS logic which sets up message listeners
await import("./content/tts")
}
})
16 changes: 14 additions & 2 deletions src/libs/get-html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,13 @@ export const getDataFromCurrentTab = async () => {
})

if (data.length > 0) {
resolve(data[0].result)
resolve(
data[0].result as {
url: string
content: string
type: string
}
)
}
})
} else {
Expand All @@ -54,7 +60,13 @@ export const getDataFromCurrentTab = async () => {
})

if (data.length > 0) {
resolve(data[0].result)
resolve(
data[0].result as {
url: string
content: string
type: string
}
)
}
} catch (e) {
console.error("error", e)
Expand Down
10 changes: 8 additions & 2 deletions src/libs/get-tab-contents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { isAmazonURL, parseAmazonWebsite } from "@/parser/amazon"
import { defaultExtractContent } from "@/parser/default"
import { isTwitterProfile, isTwitterTimeline, parseTweetProfile, parseTwitterTimeline } from "@/parser/twitter"
import { isWikipedia, parseWikipedia } from "@/parser/wiki"
import { isYoutubeLink } from "@/utils/is-youtube"
import { getMaxContextSize } from "@/services/kb"
import { YtTranscript } from "yt-transcript"
import { processPDFFromURL } from "./pdf"
Expand Down Expand Up @@ -64,7 +65,12 @@ export const getTabContents = async (documents: ChatDocuments) => {
isPDF: document.contentType === 'application/pdf'
})
})
const content = pageContent[0].result
const content = pageContent[0].result as {
html: string
title: string
url: string
isPDF: boolean
}
const header = formatDocumentHeader(doc.title, doc.url)
let extractedContent = ""

Expand All @@ -74,7 +80,7 @@ export const getTabContents = async (documents: ChatDocuments) => {
extractedContent = formatTranscriptText(transcript)
}
} else if (isWikipedia(doc.url)) {
extractedContent = parseWikipedia(content)
extractedContent = parseWikipedia(content.html)
} else if (isAmazonURL(doc.url)) {
extractedContent = parseAmazonWebsite(content.html)
} else if (isTwitterProfile(doc.url)) {
Expand Down
16 changes: 16 additions & 0 deletions src/types/kokoro-js.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
declare module "kokoro-js" {
export class KokoroTTS {
static from_pretrained(model: string, options?: any): Promise<KokoroTTS>
generate(text: string, options?: any): Promise<RawAudio>
}
export interface RawAudio {
/** PCM float32 samples */
data?: Float32Array
/** sample rate in Hz */
sampling_rate?: number
toWav(): ArrayBuffer
toBlob(): Blob
save?(path: string): Promise<void>
}
export function speak(text: string): Promise<void>
}
17 changes: 12 additions & 5 deletions wxt.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ export default defineConfig({
description: "__MSG_extDescription__",
default_locale: "en",
action: {},
author: "n4ze3m",
browser_specific_settings:
process.env.TARGET === "firefox"
? {
Expand Down Expand Up @@ -87,14 +86,22 @@ export default defineConfig({
}
},
content_security_policy:
process.env.TARGET !== "firefox" ?
{
process.env.TARGET !== "firefox"
? {
extension_pages:
"script-src 'self' 'wasm-unsafe-eval'; object-src 'self';"
} : "script-src 'self' 'wasm-unsafe-eval' blob:; object-src 'self'; worker-src 'self' blob:;",
}
: {
extension_pages:
"script-src 'self' 'wasm-unsafe-eval' blob:; object-src 'self'; worker-src 'self' blob:;"
},
permissions:
process.env.TARGET === "firefox"
? firefoxMV2Permissions
: chromeMV3Permissions
: chromeMV3Permissions,
// content_scripts are auto-registered by WXT via *.content.ts entrypoints
// (e.g. src/entries/**/tts.content.ts). We inject TTS on-demand from
// background to avoid loading it on every page.
// https://wxt.dev/guide/entrypoints/content-scripts.html
}
}) as any