diff --git a/.gitignore b/.gitignore index e78dd3224..99ed58ee8 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ fly.toml __pycache__ storage venv +node_modules credentials.json token.json *.chainlit diff --git a/langchain-azure-agent/.env.example b/langchain-azure-agent/.env.example index d4bf74409..4eab16d58 100644 --- a/langchain-azure-agent/.env.example +++ b/langchain-azure-agent/.env.example @@ -1,5 +1,5 @@ AZURE_OPENAI_API_VERSION= -AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=gpt-4o +AZURE_OPENAI_CHAT_DEPLOYMENT_NAME=gpt-4.1 AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME=text-embedding-3-large AZURE_OPENAI_ENDPOINT= AZURE_OPENAI_API_KEY= @@ -11,10 +11,7 @@ OAUTH_AZURE_AD_TENANT_ID= CHAINLIT_AUTH_SECRET= OAUTH_AZURE_AD_ENABLE_SINGLE_TENANT=true CHAINLIT_URL=http://localhost:8000/ -PTS_STOCKBALANCE_API_KEY= AZURE_SEARCH_APP_ID= AZURE_SEARCH_APP_SECRET= DOCUMENT_INTELLIGENCE_ENDPOINT= -DOCUMENT_INTELLIGENCE_API_KEY= -BING_SEARCH_API_KEY= -BING_SEARCH_ENDPOINT= \ No newline at end of file +DOCUMENT_INTELLIGENCE_API_KEY= \ No newline at end of file diff --git a/langchain-azure-agent/README.md b/langchain-azure-agent/README.md index 936476e8e..e8be90f5b 100644 --- a/langchain-azure-agent/README.md +++ b/langchain-azure-agent/README.md @@ -1,14 +1,11 @@ -# Chat Application with ToolCallingAgent +# Chainlit application with ReAct agent -This repository contains a Chat Application powered by AI models and tools for interacting with users, processing files, and retrieving relevant information. +This repository contains a chainlit application powered by a LangGraph ReAct agent. The two main challenges for creating an LLM chat app are the python sandbox code execution and performant document loading. There may be better solutions for these in the future. ## Features -- **AI Agent Integration**: Supports AI tools for RAG (Retrieve-then-Answer Generation) search, web search, and file search. -- **Azure AI Integration**: Utilizes Azure AI Document Intelligence for file processing. -- **Memory Management**: Includes conversation summary buffer memory for maintaining context. -- **File Upload Support**: Processes various file types like `.pdf`, `.xlsx`, `.docx`, `.pptx`, `.txt`, images, and more. -- **Custom Handlers**: Implements handlers for streaming AI responses and OAuth integration. +- **AI Agent Integration**: Supports AI tools for Retrieval-augmented generation (RAG), web search, and file upload. +- **Azure AI Integration**: Utilizes Azure AI Document Intelligence as well as local parsers for file processing. - **Dynamic Context Handling**: Dynamically adjusts context based on user input and uploaded files. ## Supported File Types @@ -27,25 +24,27 @@ The application supports the following file types for upload and processing: - `.heif` - `.html` -## Setup Instructions +These types are handled differently for performance reasons. While Document Intelligence can also handle text types (parsing the layout as well!), it does so with a large hit to performance. Similarly, Unstructured is an alternative parser for PDF and other file types, but it is approximately twice as slow for large PDFs and results in a very large Docker image (15 GB+), which can be challenging to host. The main challenge for document parsing is not open source code, but compute (GPU resources to parse documents). -Add the following configuration to `.chainlit/config.toml`: -```toml -cot = "tool_call" -``` +The approach taken here is similar to Open WebUI and AnythingLLM, using different parsers for different file formats. + +# Python code sandbox + +The langchain_sandbox library is used here, since it doesn't require hosting of any additional resources. ## Folder Structure -- `tools`: Contains tools for RAG search, web search, and uploaded file search. +- `tools`: Contains tools for RAG search and uploaded file search. - `services`: Includes Azure integration services. -- `handlers`: Implements custom callback handlers for streaming and OAuth. - `app.py`: Main application file. ## Usage -1. Start the chat application. -2. Upload files or send messages. -3. Interact with the AI-powered agent for information retrieval, summarization, and dynamic responses. +1. Create a virtual environment using: python -m venv .venv +2. Use the virtual environment using: .venv\Scripts\activate +3. Install the dependencies using pip install -r requirements.txt +4. Install deno: https://docs.deno.com/runtime/getting_started/installation/ +5. Run the app from the virtual environment, using: chainlit run app.py ## Extending the Application diff --git a/langchain-azure-agent/app.py b/langchain-azure-agent/app.py index 27c4e3a3b..993160c5f 100644 --- a/langchain-azure-agent/app.py +++ b/langchain-azure-agent/app.py @@ -1,17 +1,19 @@ -from datetime import datetime -import os +from datetime import datetime, timezone from typing import Dict, Optional +from uuid import uuid4 -from langchain.agents import AgentExecutor, create_tool_calling_agent -from langchain.memory import ConversationSummaryBufferMemory -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain_core.runnables import RunnableConfig from langchain.callbacks.base import BaseCallbackHandler -from langchain_community.document_loaders import AzureAIDocumentIntelligenceLoader -from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_core.messages import HumanMessage, AIMessage +from langchain_core.messages.utils import trim_messages, count_tokens_approximately +from langchain_sandbox import PyodideSandboxTool +from langgraph.prebuilt import create_react_agent +from langgraph.checkpoint.memory import InMemorySaver, CheckpointMetadata, Checkpoint, ChannelVersions + +from services.document_loader import AsyncLoader from tools.rag_search import rag_search -from tools.web_search import web_search -from tools.uploaded_files_search import uploaded_files_search +from tools.file_search import file_search from services.azure_services import AzureServices @@ -19,18 +21,14 @@ import chainlit as cl import mimetypes - # Add all supported mimetypes so the app functions on app services mimetypes.add_type( - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx" -) + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ".xlsx") mimetypes.add_type( - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx" -) + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ".docx") mimetypes.add_type("application/pdf", ".pdf") mimetypes.add_type( - "application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx" -) + "application/vnd.openxmlformats-officedocument.presentationml.presentation", ".pptx") mimetypes.add_type("text/plain", ".txt") mimetypes.add_type("image/jpeg", ".jpeg") mimetypes.add_type("image/png", ".png") @@ -39,17 +37,28 @@ mimetypes.add_type("image/heif", ".heif") mimetypes.add_type("text/html", ".html") +checkpointer = InMemorySaver() -text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - model_name="gpt-4o", - chunk_size=5000, - chunk_overlap=500, +python_code_sandbox = PyodideSandboxTool( + # Allow Pyodide to install python packages that + # might be required. + allow_net=True ) -azure_services = AzureServices() -DOCUMENT_INTELLIGENCE_ENDPOINT = os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT") -DOCUMENT_INTELLIGENCE_API_KEY = os.getenv("DOCUMENT_INTELLIGENCE_API_KEY") +# This function will be called every time before the node that calls LLM +def pre_model_hook(state): + trimmed_messages = trim_messages( + state["messages"], + strategy="last", + token_counter=count_tokens_approximately, + max_tokens=10000, + start_on="human", + end_on=("human", "tool"), + ) + # You can return updated messages either under `llm_input_messages` or + # `messages` key (see the note below) + return {"llm_input_messages": trimmed_messages} # Callback handler for handling streaming responses from the language model @@ -66,25 +75,21 @@ class StreamHandler(BaseCallbackHandler): """ def __init__(self): - self.msg = None + self.msg = cl.Message(content="") async def on_llm_new_token(self, token: str, **kwargs): - if not token: - return - - if self.msg is None: - self.msg = cl.Message(content="", author="Assistant") - await self.msg.stream_token(token) async def on_llm_end(self, response: str, **kwargs): - if self.msg: - await self.msg.send() - self.msg = None + await self.msg.send() + self.msg = cl.Message(content="") + + +azure_services = AzureServices() # Function to setup the runnable environment for the chat application -async def setup_runnable(memory: ConversationSummaryBufferMemory): +async def setup_runnable(): """ Sets up the runnable environment for the chat application. """ @@ -92,36 +97,23 @@ async def setup_runnable(memory: ConversationSummaryBufferMemory): # Create the prompt for the agent # Add knowledge of current date to the prompt - system_prompt = ( - "You are a helpful assistant. The current date is " - + datetime.now().date().strftime("%A, %Y-%m-%d") - ) + system_prompt = "You are a helpful assistant. The current date is " + \ + datetime.now().date().strftime('%A, %Y-%m-%d') - # Create the chat prompt template, the ordering of the placeholders is important, taken from: https://smith.langchain.com/hub/hwchase17/openai-tools-agent - prompt = ChatPromptTemplate.from_messages( - [ - ("system", system_prompt), - MessagesPlaceholder(variable_name="chat_history"), - ("user", "{input}"), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ] - ) - - agent_tools = [rag_search, web_search] + tools = [rag_search, python_code_sandbox] if cl.user_session.get("uploaded_files") is True: - agent_tools = [uploaded_files_search] - - # Create the OpenAI Tools agent using the specified model, tools, and prompt - agent = create_tool_calling_agent(azure_services.model, agent_tools, prompt) - - # Create an agent executor by passing in the agent and tools - agent_executor = AgentExecutor( - agent=agent, tools=agent_tools, memory=memory, max_iterations=5 + tools = [file_search] + + agent = create_react_agent( + model=azure_services.model, + tools=tools, + pre_model_hook=pre_model_hook, + prompt=system_prompt, + checkpointer=checkpointer, ) - # Set the agent executor in the user session - cl.user_session.set("agent_executor", agent_executor) + cl.user_session.set("agent", agent) # Handler for the main chat start event @@ -134,13 +126,7 @@ async def start_chat(): cl.user_session.set("current_thread", None) cl.user_session.set("uploaded_files", False) - conversation_summary_memory = ConversationSummaryBufferMemory( - llm=azure_services.model, - max_token_limit=4000, - memory_key="chat_history", - return_messages=True, - ) - await setup_runnable(conversation_summary_memory) + await setup_runnable() # Handler for the main message event @@ -152,30 +138,25 @@ async def chat(message: cl.Message): It handles both regular messages and file uploads. """ cl.user_session.set("current_thread", message.thread_id) + # If the message contains file elements, start the file loading process if message.elements: try: await file_loader(message) - except Exception as e: + except Exception: await cl.Message( author="System", content="An error occurred while reading the file. Please try again.", ).send() - # Get the agent executor from the user session - agent_executor: AgentExecutor = cl.user_session.get("agent_executor") - - # Invoke the agent with the user message as input - try: - await agent_executor.ainvoke( - {"input": message.content}, - {"callbacks": [cl.AsyncLangchainCallbackHandler(), StreamHandler()]}, - ) - except Exception as e: - await cl.Message( - author="System", - content="An error occurred while processing the message. Please try again.", - ).send() + agent = cl.user_session.get("agent") + await agent.ainvoke( + {"messages": [{"role": "user", "content": message.content}]}, + config=RunnableConfig( + configurable={"thread_id": cl.user_session.get("current_thread")}, + callbacks=[cl.LangchainCallbackHandler(), StreamHandler()] + ), + ) # Function to handle file loading @@ -190,42 +171,37 @@ async def file_loader(message: cl.Message): documents = [] for element in message.elements: - loader = AzureAIDocumentIntelligenceLoader( - api_endpoint=DOCUMENT_INTELLIGENCE_ENDPOINT, - api_key=DOCUMENT_INTELLIGENCE_API_KEY, - file_path=element.path, - api_model="prebuilt-layout", - mode="markdown", - ) + loader = AsyncLoader() - docs = await cl.make_async(loader.load)() - - split_docs = await text_splitter.atransform_documents(docs) - - for doc in split_docs: - doc.metadata["thread_id"] = message.thread_id - doc.metadata["title"] = element.name - documents.append(doc) + documents.extend(await loader.aload( + file_name=element.name, + file_mime=element.mime, + file_path=element.path + )) # If there is only a single document or chunk, directly insert that chunk into the chat history. The tool to search in uploaded files has a description that tells it to not use this tool if the information it needs is already present in the context. ChatGPT also uses this strategy. if len(documents) == 1: - single_doc = documents[0] - # Insert the document's content into the chat history as a "context" field - conversation_summary_memory = cl.user_session.get("agent_executor").memory + messages = [HumanMessage( + content=f"File uploaded: title={documents[0].metadata.get('title', None)}, page_content={documents[0].page_content}")] - conversation_summary_memory.chat_memory.add_ai_message( - f"context: page_content={single_doc.page_content}, " - f"title={single_doc.metadata.get('title', None)}" - ) + await write_checkpoint(cl.user_session.get("current_thread"), messages) - await setup_runnable(conversation_summary_memory) + # Otherwise, add only the title, and vectorize the documents else: - await azure_services.uploaded_files_vector_store.aadd_documents(documents) + messages = [HumanMessage( + content=( + f"File uploaded: title={documents[0].metadata.get('title', None)}" + ) + )] + + await azure_services.uploaded_files_vector_store.aadd_documents(documents=documents) + await write_checkpoint(cl.user_session.get("current_thread"), messages) cl.user_session.set("uploaded_files", True) - await setup_runnable(cl.user_session.get("agent_executor").memory) + await setup_runnable() + await cl.Message( author="System", content="Done reading and memorizing files.", @@ -239,30 +215,14 @@ async def on_chat_resume(thread: ThreadDict): It initializes the ConversationSummaryBufferMemory with the history from the previous session. """ - cl.user_session.set("current_thread", thread["id"]) - - # Create a new ConversationSummaryBufferMemory with the specified parameters - conversation_summary_memory = ConversationSummaryBufferMemory( - llm=azure_services.model, - max_token_limit=4000, - memory_key="chat_history", - return_messages=True, - ) - - # Retrieve the root messages from the thread - root_messages = [m for m in thread["steps"] if m["parentId"] is None] - # Iterate over the root messages - for message in root_messages: - # Check the type of the message - if message["type"] == "USER_MESSAGE": - # Add user message to the chat memory - conversation_summary_memory.chat_memory.add_user_message(message["output"]) - else: - # Add AI message to the chat memory - conversation_summary_memory.chat_memory.add_ai_message(message["output"]) + messages = [ + (HumanMessage if s["type"] == "USER_MESSAGE" else AIMessage)( + content=s["output"]) + for s in thread["steps"] + ] - # Call the setup_runnable function to continue the chat application - await setup_runnable(conversation_summary_memory) + await write_checkpoint(thread["id"], messages) + await setup_runnable() @cl.oauth_callback @@ -288,3 +248,50 @@ async def on_chat_end(): It sets the current_thread value in the user session to None. """ cl.user_session.set("current_thread", None) + + +async def write_checkpoint( + thread_id: str, + messages: list[AIMessage | HumanMessage], + checkpoint_ns: str = "", +) -> str: + """ + Persist `messages` to a checkpoint. + + Returns the generated checkpoint-id. + """ + checkpoint_id = str(uuid4()) + + checkpoint = Checkpoint( + { + "v": 4, + "ts": datetime.now(timezone.utc).isoformat(), + "id": checkpoint_id, + "channel_versions": {"messages": "0"}, + "versions_seen": {}, + "channel_values": {"messages": list(messages)}, + } + ) + + metadata = CheckpointMetadata( + source="import", + step=len(messages) - 1, + parents={}, + ) + + cfg = RunnableConfig( + configurable={ + "thread_id": thread_id, + "checkpoint_ns": checkpoint_ns, + "checkpoint_id": checkpoint_id, + } + ) + + await checkpointer.aput( + cfg, + checkpoint, + metadata, + new_versions=ChannelVersions({"messages": "0"}), + ) + + return checkpoint_id diff --git a/langchain-azure-agent/chainlit.md b/langchain-azure-agent/chainlit.md new file mode 100644 index 000000000..4507ac467 --- /dev/null +++ b/langchain-azure-agent/chainlit.md @@ -0,0 +1,14 @@ +# Welcome to Chainlit! πŸš€πŸ€– + +Hi there, Developer! πŸ‘‹ We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs. + +## Useful Links πŸ”— + +- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) πŸ“š +- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! πŸ’¬ + +We can't wait to see what you create with Chainlit! Happy coding! πŸ’»πŸ˜Š + +## Welcome screen + +To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty. diff --git a/langchain-azure-agent/requirements.txt b/langchain-azure-agent/requirements.txt index a0cc30101..bf641e882 100644 --- a/langchain-azure-agent/requirements.txt +++ b/langchain-azure-agent/requirements.txt @@ -1,14 +1,24 @@ -ο»Ώazure-core==1.29.7 -azure-search-documents==11.6.0b4 -chainlit==1.2.0 -langchain==0.3.1 -langchain-community==0.3.1 -langchain-openai==0.2.1 -langchain-unstructured==0.1.5 -requests==2.31.0 -aiofiles==23.2.1 -aiohttp==3.9.3 -aiohttp_retry==2.8.3 +ο»Ώazure-core==1.34.0 +azure-search-documents==11.5.3 +chainlit==2.6.9 +langchain==0.3.27 +langchain-community==0.3.27 +langchain-openai==0.3.30 +requests==2.32.4 +aiofiles==24.1.0 +aiohttp==3.12.13 +aiohttp_retry==2.9.1 async-timeout==4.0.3 -azure-identity==1.17.1 -azure-ai-documentintelligence==1.0.0b1 \ No newline at end of file +azure-identity==1.23.0 +azure-ai-documentintelligence==1.0.2 +langgraph==0.6.5 +langgraph-checkpoint==2.1.1 +pypdf==6.0.0 +docx2txt==0.9 +chardet==5.2.0 +beautifulsoup4==4.13.4 +unstructured==0.18.13 +networkx==3.5 +pandas==2.3.1 +python-pptx==1.0.2 +langchain_sandbox==0.0.6 \ No newline at end of file diff --git a/langchain-azure-agent/services/azure_services.py b/langchain-azure-agent/services/azure_services.py index 749aa5592..e1cf71356 100644 --- a/langchain-azure-agent/services/azure_services.py +++ b/langchain-azure-agent/services/azure_services.py @@ -17,20 +17,18 @@ class AzureServices: def __init__(self): # Azure Search service constants self.azure_search_service_endpoint = os.environ.get( - "AZURE_SEARCH_SERVICE_ENDPOINT" - ) - self.azure_search_api_key = os.environ.get("AZURE_SEARCH_API_KEY") + 'AZURE_SEARCH_SERVICE_ENDPOINT') + self.azure_search_api_key = os.environ.get('AZURE_SEARCH_API_KEY') # Azure OpenAI service constants self.azure_openai_chat_deployment_name = os.environ.get( - "AZURE_OPENAI_CHAT_DEPLOYMENT_NAME" - ) + 'AZURE_OPENAI_CHAT_DEPLOYMENT_NAME') self.azure_openai_embeddings_deployment_name = os.environ.get( - "AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME" - ) - self.azure_openai_api_version = os.environ.get("AZURE_OPENAI_API_VERSION") - self.azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY") - self.azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") + 'AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME') + self.azure_openai_api_version = os.environ.get( + 'AZURE_OPENAI_API_VERSION') + self.azure_openai_api_key = os.environ.get('AZURE_OPENAI_API_KEY') + self.azure_openai_endpoint = os.environ.get('AZURE_OPENAI_ENDPOINT') # Initialize the Azure Chat OpenAI model self.model = AzureChatOpenAI( @@ -38,11 +36,11 @@ def __init__(self): openai_api_version=self.azure_openai_api_version, azure_endpoint=self.azure_openai_endpoint, api_key=self.azure_openai_api_key, - model_name="gpt-4o", - temperature=0, - streaming=True, + streaming=True ) + self.final_model = self.model.with_config(tags=["final_node"]) + # Initialize the Azure OpenAI Embeddings model self.embeddings = AzureOpenAIEmbeddings( azure_deployment=self.azure_openai_embeddings_deployment_name, @@ -50,35 +48,30 @@ def __init__(self): azure_endpoint=self.azure_openai_endpoint, api_key=self.azure_openai_api_key, model="text-embedding-3-large", + ) # Define fields for user-upload index self.uploaded_files_fields = [ - SimpleField( - name="id", type=SearchFieldDataType.String, key=True, filterable=True - ), + SimpleField(name="id", type=SearchFieldDataType.String, + key=True, filterable=True), SearchableField( - name="content", type=SearchFieldDataType.String, searchable=True - ), + name="content", type=SearchFieldDataType.String, searchable=True), SearchField( name="content_vector", - type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + type=SearchFieldDataType.Collection( + SearchFieldDataType.Single), searchable=True, - vector_search_dimensions=len(self.embeddings.embed_query("Text")), + vector_search_dimensions=len( + self.embeddings.embed_query("Text")), vector_search_profile_name="myHnswProfile", ), + SearchableField(name="metadata", + type=SearchFieldDataType.String, searchable=True), SearchableField( - name="metadata", type=SearchFieldDataType.String, searchable=True - ), - SearchableField( - name="title", type=SearchFieldDataType.String, searchable=True - ), - SimpleField( - name="thread_id", - type=SearchFieldDataType.String, - filterable=True, - searchable=True, - ), + name="title", type=SearchFieldDataType.String, searchable=True), + SimpleField(name="thread_id", type=SearchFieldDataType.String, + filterable=True, searchable=True) ] # Initialize Azure Search vector store for user uploads @@ -86,32 +79,31 @@ def __init__(self): azure_search_endpoint=self.azure_search_service_endpoint, azure_search_key=self.azure_search_api_key, index_name="uploaded-files-idx", - embedding_function=self.embeddings.embed_query, + embedding_function=self.embeddings, fields=self.uploaded_files_fields, ) # Define fields for RAG index self.rag_idx_fields = [ - SimpleField( - name="id", type=SearchFieldDataType.String, key=True, filterable=True - ), + SimpleField(name="id", type=SearchFieldDataType.String, + key=True, filterable=True), SearchableField( - name="content", type=SearchFieldDataType.String, searchable=True - ), + name="content", type=SearchFieldDataType.String, searchable=True), SearchField( name="content_vector", - type=SearchFieldDataType.Collection(SearchFieldDataType.Single), + type=SearchFieldDataType.Collection( + SearchFieldDataType.Single), searchable=True, - vector_search_dimensions=len(self.embeddings.embed_query("Text")), + vector_search_dimensions=len( + self.embeddings.embed_query("Text")), vector_search_profile_name="myHnswProfile", ), + SearchableField(name="metadata", + type=SearchFieldDataType.String, searchable=True), SearchableField( - name="metadata", type=SearchFieldDataType.String, searchable=True - ), - SearchableField( - name="title", type=SearchFieldDataType.String, searchable=True - ), - SimpleField(name="url", type=SearchFieldDataType.String, filterable=True), + name="title", type=SearchFieldDataType.String, searchable=True), + SimpleField(name="url", type=SearchFieldDataType.String, + filterable=True) ] # Initialize Azure Search vector store for RAG index @@ -119,6 +111,6 @@ def __init__(self): azure_search_endpoint=self.azure_search_service_endpoint, azure_search_key=self.azure_search_api_key, index_name="rag-idx", - embedding_function=self.embeddings.embed_query, - fields=self.rag_idx_fields, + embedding_function=self.embeddings, + fields=self.rag_idx_fields ) diff --git a/langchain-azure-agent/services/document_loader.py b/langchain-azure-agent/services/document_loader.py new file mode 100644 index 000000000..61121fde5 --- /dev/null +++ b/langchain-azure-agent/services/document_loader.py @@ -0,0 +1,86 @@ +import os +import chainlit as cl +from typing import List + +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_core.documents import Document +from langchain_community.document_loaders import ( + AzureAIDocumentIntelligenceLoader, + BSHTMLLoader, + PyPDFLoader, + TextLoader, + UnstructuredExcelLoader, + UnstructuredPowerPointLoader, + Docx2txtLoader, +) + + +class AsyncLoader: + def __init__(self): + self.text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( + model_name="gpt-4o", + chunk_size=50000, + chunk_overlap=5000, + ) + self.document_intelligence_endpoint = os.getenv( + "DOCUMENT_INTELLIGENCE_ENDPOINT") + self.document_intelligence_api_key = os.getenv( + "DOCUMENT_INTELLIGENCE_API_KEY") + self.api_model = "prebuilt-layout" + self.mode = "markdown" + + async def aload( + self, file_name: str, file_mime: str, file_path: str + ) -> List[Document]: + documents = [] + loader = self._get_loader(file_mime, file_path) + docs = await cl.make_async(loader.load)() + split_docs = await self.text_splitter.atransform_documents(docs) + + for doc in split_docs: + doc.metadata["thread_id"] = cl.user_session.get("current_thread") + doc.metadata["title"] = file_name + documents.append(doc) + + return documents + + def _get_loader(self, file_mime: str, file_path: str): + + if file_mime in {"image/jpeg", "image/png", "image/bmp", "image/tiff"}: + ep = self.document_intelligence_endpoint + key = self.document_intelligence_api_key + + if ep and key: + return AzureAIDocumentIntelligenceLoader( + api_endpoint=ep, + api_key=key, + file_path=file_path, + api_model=self.api_model, + mode=self.mode, + ) + + # Neither present β†’ local mode doesn't support images + if not ep and not key: + raise ValueError( + "Images are not supported when using the local version.") + + # Exactly one missing β†’ be explicit which one + if not ep: + raise ValueError( + "Azure Document Intelligence endpoint is missing.") + if not key: + raise ValueError( + "Azure Document Intelligence API key is missing.") + + if file_mime == "text/html": + return BSHTMLLoader(file_path, open_encoding="utf-8") + if file_mime == "text/plain": + return TextLoader(file_path, autodetect_encoding=True) + if file_mime == "application/pdf": + return PyPDFLoader(file_path, extract_images=False, mode="single") + if file_mime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + return Docx2txtLoader(file_path) + if file_mime == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": + return UnstructuredExcelLoader(file_path) + if file_mime == "application/vnd.openxmlformats-officedocument.presentationml.presentation": + return UnstructuredPowerPointLoader(file_path) diff --git a/langchain-azure-agent/tools/file_search.py b/langchain-azure-agent/tools/file_search.py new file mode 100644 index 000000000..65701f789 --- /dev/null +++ b/langchain-azure-agent/tools/file_search.py @@ -0,0 +1,47 @@ +import chainlit as cl +from langchain.tools import tool +from services.azure_services import AzureServices +from pydantic import BaseModel, Field + + +azure_services = AzureServices() + + +class SearchInput(BaseModel): + query: str = Field( + description="Provide a semantic search query. Avoid overly broad or vague queries. Example of good queries: 'Key findings from the 2023 financial report.', 'Summary of climate change policies in Document X.' Avoid: Single-word queries or overly general searches, like 'report.'" + ) + + +@tool("file_search", args_schema=SearchInput) +async def file_search(query: str) -> str: + """ +Use this tool, the uploaded files search tool, to retrieve relevant information from uploaded files. + +Relevant parts of uploaded documents will be included in the conversation when needed. Use this tool only if they lack the details required to fulfill the user's request. + +Think carefully about how the information you find relates to the user's request. Respond as soon as you find information that clearly answers the request. + +You should only issue multiple queries when the user's question needs to be decomposed to find different facts. In other scenarios, prefer providing a single, well-designed query. + +Always provide references using markdown footnotes. For example: + +Revenue increased by 10% in 2023. [^1] + +Then, list the footnote references at the bottom of the document: + +[^1]: 2023 Financial Report, Page 5 + """ + try: + result = await azure_services.uploaded_files_vector_store.asimilarity_search( + query=query, k=5, search_type="similarity", filters="thread_id eq '{}'".format(cl.user_session.get('current_thread'))) + + return [ + { + "page_content": doc.page_content, + "title": doc.metadata["title"] + } + for doc in result + ] + except Exception: + return {"response": "An error occurred during the search.", "instructions": "Notify user of the error and provide guidance on alternative steps."} diff --git a/langchain-azure-agent/tools/rag_search.py b/langchain-azure-agent/tools/rag_search.py index fad3305e9..fd46e9448 100644 --- a/langchain-azure-agent/tools/rag_search.py +++ b/langchain-azure-agent/tools/rag_search.py @@ -12,38 +12,34 @@ class SearchInput(BaseModel): ) -@tool("rag-search-tool", args_schema=SearchInput) +@tool("rag_search", args_schema=SearchInput) async def rag_search(query: str) -> str: """ - Use this tool, the rag search tool, to access a large collection of documents that can be searched using semantic search. +Use this tool, the rag search tool, to access a large collection of documents that can be searched using semantic search. - Think carefully about how the information you find relates to the user's request. Respond as soon as you find information that clearly answers the request. +Think carefully about how the information you find relates to the user's request. Respond as soon as you find information that clearly answers the request. - You should only issue multiple queries when the user's question needs to be decomposed to find different facts. In other scenarios, prefer providing a single, well-designed query. +You should only issue multiple queries when the user's question needs to be decomposed to find different facts. In other scenarios, prefer providing a single, well-designed query. - Always provide references using markdown footnotes. For example: +Always provide references using markdown footnotes. For example: - Revenue increased by 10% in 2023. [^1] +Revenue increased by 10% in 2023. [^1] - Then, list the footnote references at the bottom of the document: +Then, list the footnote references at the bottom of the document: - [^1]: [2023 Financial Report, Page 5](https://example.com/report.pdf) +[^1]: [2023 Financial Report, Page 5](https://example.com/report.pdf) """ try: result = await azure_services.rag_vector_store.asimilarity_search( - query=query, k=5, search_type="similarity" - ) + query=query, k=5, search_type="similarity") return [ { "page_content": doc.page_content, "url": doc.metadata["url"], - "title": doc.metadata["title"], + "title": doc.metadata["title"] } for doc in result ] except Exception: - return { - "response": "An error occurred during the search.", - "instructions": "Notify user of the error and provide guidance on alternative steps.", - } + return {"response": "An error occurred during the search.", "instructions": "Notify user of the error and provide guidance on alternative steps."} diff --git a/langchain-azure-agent/tools/uploaded_files_search.py b/langchain-azure-agent/tools/uploaded_files_search.py deleted file mode 100644 index 41fbd6d3c..000000000 --- a/langchain-azure-agent/tools/uploaded_files_search.py +++ /dev/null @@ -1,51 +0,0 @@ -import chainlit as cl -from langchain.tools import tool -from services.azure_services import AzureServices -from pydantic import BaseModel, Field - - -azure_services = AzureServices() - - -class SearchInput(BaseModel): - query: str = Field( - description="Provide a semantic search query. Avoid overly broad or vague queries. Example of good queries: 'Key findings from the 2023 financial report.', 'Summary of climate change policies in Document X.' Avoid: Single-word queries or overly general searches, like 'report.'" - ) - - -@tool("uploaded-files-search-tool", args_schema=SearchInput) -async def uploaded_files_search(query: str) -> str: - """ - Use this tool, the uploaded files search tool, to retrieve relevant information from uploaded files. - - Relevant parts of uploaded documents will be included in the conversation when needed. Use this tool only if they lack the details required to fulfill the user's request. - - Think carefully about how the information you find relates to the user's request. Respond as soon as you find information that clearly answers the request. - - You should only issue multiple queries when the user's question needs to be decomposed to find different facts. In other scenarios, prefer providing a single, well-designed query. - - Always provide references using markdown footnotes. For example: - - Revenue increased by 10% in 2023. [^1] - - Then, list the footnote references at the bottom of the document: - - [^1]: 2023 Financial Report, Page 5 - """ - try: - result = await azure_services.uploaded_files_vector_store.asimilarity_search( - query=query, - k=5, - search_type="similarity", - filters="thread_id eq '{}'".format(cl.user_session.get("current_thread")), - ) - - return [ - {"page_content": doc.page_content, "title": doc.metadata["title"]} - for doc in result - ] - except Exception: - return { - "response": "An error occurred during the search.", - "instructions": "Notify user of the error and provide guidance on alternative steps.", - } diff --git a/langchain-azure-agent/tools/web_search.py b/langchain-azure-agent/tools/web_search.py deleted file mode 100644 index 128689b3c..000000000 --- a/langchain-azure-agent/tools/web_search.py +++ /dev/null @@ -1,60 +0,0 @@ -import os -import requests -from langchain.tools import tool -from pydantic import BaseModel, Field - -# Add your Bing Search V7 subscription key and endpoint to your environment variables. -BING_SEARCH_API_KEY = os.getenv("BING_SEARCH_API_KEY") -BING_ENDPOINT = os.getenv("BING_SEARCH_ENDPOINT") + "v7.0/search" - - -class SearchInput(BaseModel): - query: str = Field( - description="Provide a concise keyword-based query. Examples: 'financial report 2023,' 'AI trends 2024.' Avoid vague terms like 'report' or full sentences." - ) - mkt: str = Field( - default="en-US", - description="Specify the target market and language (e.g., 'en-US' for US English, 'fr-FR' for French in France). Defaults to 'en-US.'", - ) - - -@tool("web-search-tool", args_schema=SearchInput) -async def web_search(query: str, mkt: str = "en-US") -> str: - """ - Use this tool, the web search tool to search for information on the web via the Bing Search API. - - If no results are found an empty list is returned. - - Example response: - 1. **[The Benefits of Renewable Energy](https://example.com/renewable-energy)** - *Source*: [example.com](https://example.com/renewable-energy) - *Snippet*: Renewable energy reduces carbon emissions and promotes sustainability. Key technologies include solar, wind, and hydro power. - """ - params = {"q": query, "mkt": mkt, "count": 5} - headers = {"Ocp-Apim-Subscription-Key": BING_SEARCH_API_KEY} - - # Call the API - try: - response = requests.get(BING_ENDPOINT, headers=headers, params=params) - response.raise_for_status() - - search_results = response.json() - - # Get the webpages from the search results - result = search_results.get("webPages", {}).get("value", []) - - return [ - { - "title": webpage.get("name"), - "url": webpage.get("url"), - "source": webpage.get("displayUrl"), - "snippet": webpage.get("snippet"), - } - for webpage in result - ] - - except Exception: - return { - "response": "An error occurred during the search.", - "instructions": "Notify user of the error and provide guidance on alternative steps.", - }