Merge pull request #10 from Anush008/qdrant-rag

recchris · web-flow · commit 91701f458228 · 2024-10-04T15:23:39.000-04:00
feat: RBR using Qdrant
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ The Rule-based Retrieval package is a Python package that enables you to create
 
 - Python 3.10 or higher
 - OpenAI API key
-- Pinecone or Milvus API key
+- Pinecone, Milvus or Qdrant credentials
 
 ### Install from PyPI
 
@@ -87,6 +87,7 @@ Check out the `examples/` directory for sample scripts demonstrating how to use
 
 - [Milvus](docs/milvus.md) 
 - [Pinecone](docs/pinecone.md)
+- [Qdrant](docs/qdrant.md)
 
 # Contributing
 
diff --git a/examples/qdrant/create_collection.py b/examples/qdrant/create_collection.py
@@ -0,0 +1,32 @@
+"""Example of creating a Pinecone index and uploading documents to it."""
+
+import logging
+
+from openai import OpenAI
+from qdrant_client import QdrantClient
+
+from src.whyhow_rbr.rag_qdrant import Client
+
+# Parameters
+collection_name = "<collection_name>"  # Replace with your collection name
+pdfs = (
+    []
+)  # Replace with the paths to your PDFs, e.g. ["path/to/pdf1.pdf", "path/to/pdf2.pdf
+logging_level = logging.INFO
+
+# Logging
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+)
+logger = logging.getLogger("create_index")
+logger.setLevel(logging_level)
+
+
+client = Client(
+    OpenAI(),  # Set OPENAI_API_KEY environment variable
+    QdrantClient(url="http://localhost:6333"),
+)
+
+client.create_collection(collection_name)
+client.upload_documents(collection_name, documents=pdfs)
diff --git a/examples/qdrant/query.py b/examples/qdrant/query.py
@@ -0,0 +1,57 @@
+"""Example demonostating how to perform RAG."""
+
+import logging
+
+from openai import OpenAI
+from qdrant_client import QdrantClient
+
+from src.whyhow_rbr.rag_qdrant import Client, Rule
+
+# Parameters
+collection_name = "<collection_name>"
+question = ""  # Replace with your question
+logging_level = logging.INFO  # Set to logging.DEBUG for more verbosity
+top_k = 5
+
+# Logging
+logging.basicConfig(
+    level=logging.WARNING,
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+)
+logger = logging.getLogger("querying")
+logger.setLevel(logging_level)
+logging.getLogger("whyhow_rbr").setLevel(logging_level)
+
+
+client = Client(
+    OpenAI(),  # Set OPENAI_API_KEY environment variable
+    QdrantClient(url="http://localhost:6333"),
+)
+
+rules = [
+    Rule(
+        # Replace with your filename
+        filename="name/of/pdf_1.pdf",
+        page_numbers=[2],
+        keywords=["keyword1", "keyword2"],
+    ),
+    Rule(
+        # Replace with your filename
+        filename="name/of/pdf_1.pdf",
+        page_numbers=[1],
+        keywords=[],
+    ),
+]
+
+result = client.query(
+    question=question,
+    collection_name=collection_name,
+    rules=rules,
+    top_k=top_k,
+    process_rules_separately=False,
+    keyword_trigger=False,
+)
+answer = result["answer"]
+
+
+logger.info(f"Answer: {answer}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "pydantic>1",
     "pypdf",
     "tiktoken",
+    "qdrant-client"
 ]
 dynamic = ["version"]
 
diff --git a/src/whyhow_rbr/embedding.py b/src/whyhow_rbr/embedding.py
@@ -27,7 +27,7 @@ def generate_embeddings(
         List of embeddings for each chunk.
 
     """
-    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model=model)  # type: ignore[call-arg]
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model=model)  # type: ignore
     embeddings_array = embeddings.embed_documents(chunks)
 
     return embeddings_array
diff --git a/src/whyhow_rbr/rag_qdrant.py b/src/whyhow_rbr/rag_qdrant.py
diff --git a/tests/test_qdrant_rag.py b/tests/test_qdrant_rag.py

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ dependencies = [`
`20`	`20`	`"pydantic>1",`
`21`	`21`	`"pypdf",`
`22`	`22`	`"tiktoken",`
	`23`	`+ "qdrant-client"`
`23`	`24`	`]`
`24`	`25`	`dynamic = ["version"]`
`25`	`26`