Skip to content

Commit f9aad93

Browse files
authored
Merge pull request #1 from run-llama/clelia/adding-document-chat
Adding document chat and moving to a multi-page app
2 parents 7ce3d12 + e9ac959 commit f9aad93

File tree

7 files changed

+175
-14
lines changed

7 files changed

+175
-14
lines changed

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
OPENAI_API_KEY="sk-***"
2+
LLAMACLOUD_API_KEY="llx-***"
3+
ELEVENLABS_API_KEY="sk_***"

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The MIT License
22

3-
Copyright (c) Clelia Astra Bertelli
3+
Copyright (c) Jerry Liu
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,67 @@
1-
# NotebookLM clone
1+
# NotebookLlaMa🦙
22

3-
This project is aimed at producing a fully open-source, LlamaCloud-backed alternative to NotebookLM.
3+
## A fluffy and open-source alternative to NotebookLM!
4+
5+
This project is aimed at producing a fully open-source, [**LlamaCloud**](https//cloud.llamaindex.ai)-backed alternative to NotebookLM.
6+
7+
### Get it up and running!
8+
9+
Get the GitHub repository:
10+
11+
```bash
12+
git clone https://github.com/run-llama/notebooklm-clone
13+
```
14+
15+
Install dependencies:
16+
17+
```bash
18+
cd notebooklm-clone/
19+
uv sync
20+
```
21+
22+
Modify the `.env.example` file with your API keys:
23+
24+
- `OPENAI_API_KEY`: find it [on OpenAI Platform](https://platform.openai.com/api-keys)
25+
- `ELEVENLABS_API_KEY`: find it [on ElevenLabs Settings](https://elevenlabs.io/app/settings/api-keys)
26+
- `LLAMACLOUD_API_KEY`: find it [on LlamaCloud Dashboard](https://cloud.llamaindex.ai/)
27+
28+
Rename the file to `.env`:
29+
30+
```bash
31+
mv .env.example .env
32+
```
33+
34+
Now, you will have to execute the following scripts:
35+
36+
```bash
37+
uv run tools/create_llama_extract_agent.py
38+
uv run tools/create_llama_cloud_index.py
39+
```
40+
41+
And you're ready to set up the app!
42+
43+
Run the **MCP** server:
44+
45+
```bash
46+
uv run src/notebooklm_clone/server.py
47+
```
48+
49+
Now, launch the Streamlit app:
50+
51+
```bash
52+
streamlit run src/notebooklm_clone/Home.py
53+
```
54+
55+
> [!IMPORTANT]
56+
>
57+
> _You might need to install `ffmpeg` if you do not have it installed already_
58+
59+
And start exploring the app at `http://localhost:8751/`.
60+
61+
### Contributing
62+
63+
Contribute to this project following the [guidelines](./CONTRIBUTING.md).
64+
65+
### License
66+
67+
This project is provided under an [MIT License](LICENSE).

src/notebooklm_clone/main.py renamed to src/notebooklm_clone/Home.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,19 @@ def sync_create_podcast(file_content: str):
5353

5454
# Display the network
5555
st.set_page_config(
56-
page_title="NotebookLlaMa",
57-
page_icon="🦙",
56+
page_title="NotebookLlaMa - Home",
57+
page_icon="🏠",
5858
layout="wide",
5959
menu_items={
6060
"Get Help": "https://github.com/run-llama/notebooklm-clone/discussions/categories/general",
6161
"Report a bug": "https://github.com/run-llama/notebooklm-clone/issues/",
6262
"About": "An OSS alternative to NotebookLM that runs with the power of a flully Llama!",
6363
},
6464
)
65-
st.title("NotebookLlaMa🦙")
65+
st.sidebar.header("Home🏠")
66+
st.sidebar.info("To switch to the Document Chat, select it from above!🔺")
67+
st.markdown("---")
68+
st.markdown("## NotebookLlaMa - Home🦙")
6669

6770
file_input = st.file_uploader(
6871
label="Upload your source PDF file!", accept_multiple_files=False

src/notebooklm_clone/audio.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ def validate_conversation(self) -> Self:
5757

5858

5959
class PodcastGenerator(BaseModel):
60-
model_config: ConfigDict = ConfigDict(arbitrary_types_allowed=True)
6160
llm: StructuredLLM
6261
client: AsyncElevenLabs
6362

63+
model_config = ConfigDict(arbitrary_types_allowed=True)
64+
6465
@model_validator(mode="after")
6566
def validate_podcast(self) -> Self:
6667
try:
@@ -69,6 +70,7 @@ def validate_podcast(self) -> Self:
6970
raise ValueError(
7071
f"The output class of the structured LLM must be {MultiTurnConversation.__qualname__}, your LLM has output class: {self.llm.output_cls.__qualname__}"
7172
)
73+
return self
7274

7375
async def _conversation_script(self, file_transcript: str) -> MultiTurnConversation:
7476
response = await self.llm.achat(
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import streamlit as st
2+
import asyncio
3+
4+
from llama_index.tools.mcp import BasicMCPClient
5+
6+
MCP_CLIENT = BasicMCPClient(command_or_url="http://localhost:8000/mcp")
7+
8+
9+
async def chat(inpt: str):
10+
result = await MCP_CLIENT.call_tool(
11+
tool_name="query_index_tool", arguments={"question": inpt}
12+
)
13+
return result.content[0].text
14+
15+
16+
def sync_chat(inpt: str):
17+
return asyncio.run(chat(inpt))
18+
19+
20+
# Chat Interface
21+
st.set_page_config(page_title="NotebookLlaMa - Document Chat", page_icon="🗣")
22+
23+
st.sidebar.header("Document Chat🗣")
24+
st.sidebar.info("To switch to the Home page, select it from above!🔺")
25+
st.markdown("---")
26+
st.markdown("## NotebookLlaMa - Document Chat🗣")
27+
28+
# Initialize chat history
29+
if "messages" not in st.session_state:
30+
st.session_state.messages = []
31+
32+
# Display chat messages from history on app rerun
33+
for i, message in enumerate(st.session_state.messages):
34+
with st.chat_message(message["role"]):
35+
if message["role"] == "assistant" and "sources" in message:
36+
# Display the main response
37+
st.markdown(message["content"])
38+
# Add toggle for sources
39+
with st.expander("Sources"):
40+
st.markdown(message["sources"])
41+
else:
42+
st.markdown(message["content"])
43+
44+
# React to user input
45+
if prompt := st.chat_input("Ask a question about your document"):
46+
# Display user message in chat message container
47+
st.chat_message("user").markdown(prompt)
48+
# Add user message to chat history
49+
st.session_state.messages.append({"role": "user", "content": prompt})
50+
51+
# Get bot response
52+
with st.chat_message("assistant"):
53+
with st.spinner("Thinking..."):
54+
try:
55+
response = sync_chat(prompt)
56+
57+
# Split response and sources if they exist
58+
# Assuming your response format includes sources somehow
59+
# You might need to modify this based on your actual response format
60+
if "## Sources" in response:
61+
parts = response.split("## Sources", 1)
62+
main_response = parts[0].strip()
63+
sources = "## Sources" + parts[1].strip()
64+
else:
65+
main_response = response
66+
sources = None
67+
68+
st.markdown(main_response)
69+
70+
# Add toggle for sources if they exist
71+
if sources:
72+
with st.expander("Sources"):
73+
st.markdown(sources)
74+
# Add to history with sources
75+
st.session_state.messages.append(
76+
{
77+
"role": "assistant",
78+
"content": main_response,
79+
"sources": sources,
80+
}
81+
)
82+
else:
83+
# Add to history without sources
84+
st.session_state.messages.append(
85+
{"role": "assistant", "content": main_response}
86+
)
87+
88+
except Exception as e:
89+
error_msg = f"Error: {str(e)}"
90+
st.markdown(error_msg)
91+
st.session_state.messages.append(
92+
{"role": "assistant", "content": error_msg}
93+
)

src/notebooklm_clone/utils.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
from pydantic import BaseModel, Field, model_validator
88
from llama_index.core.llms import ChatMessage
9-
from llama_index.core.query_engine.multistep_query_engine import MultiStepQueryEngine
10-
from llama_index.core.indices.query.query_transform import StepDecomposeQueryTransform
119
from llama_cloud_services import LlamaExtract, LlamaParse
1210
from llama_cloud_services.extract import SourceText
1311
from llama_cloud.client import AsyncLlamaCloud
@@ -90,11 +88,9 @@ class MindMapCreationFailedWarning(Warning):
9088
)
9189
PARSER = LlamaParse(api_key=os.getenv("LLAMACLOUD_API_KEY"), result_type="markdown")
9290
PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID")
93-
qe = LlamaCloudIndex(
91+
QE = LlamaCloudIndex(
9492
api_key=os.getenv("LLAMACLOUD_API_KEY"), pipeline_id=PIPELINE_ID
9593
).as_query_engine(llm=LLM)
96-
step_decompose = StepDecomposeQueryTransform(llm=LLM)
97-
MS_QE = MultiStepQueryEngine(query_engine=qe, query_transform=step_decompose)
9894
LLM_STRUCT = LLM.as_structured_llm(MindMap)
9995

10096

@@ -157,13 +153,13 @@ async def get_mind_map(summary: str, highlights: List[str]) -> Union[str, None]:
157153

158154

159155
async def query_index(question: str) -> Union[str, None]:
160-
response = await MS_QE.aquery(question)
156+
response = await QE.aquery(question)
161157
if not response.response:
162158
return None
163159
sources = [node.text for node in response.source_nodes]
164160
return (
165161
"## Answer\n\n"
166162
+ response.response
167-
+ "\n\n##Sources\n\n- "
163+
+ "\n\n## Sources\n\n- "
168164
+ "\n- ".join(sources)
169165
)

0 commit comments

Comments
 (0)