Skip to content

Commit 91701f4

Browse files
authored
Merge pull request #10 from Anush008/qdrant-rag
feat: RBR using Qdrant
2 parents 1911788 + 9ca7ac8 commit 91701f4

File tree

7 files changed

+1113
-2
lines changed

7 files changed

+1113
-2
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ The Rule-based Retrieval package is a Python package that enables you to create
1515

1616
- Python 3.10 or higher
1717
- OpenAI API key
18-
- Pinecone or Milvus API key
18+
- Pinecone, Milvus or Qdrant credentials
1919

2020
### Install from PyPI
2121

@@ -87,6 +87,7 @@ Check out the `examples/` directory for sample scripts demonstrating how to use
8787

8888
- [Milvus](docs/milvus.md)
8989
- [Pinecone](docs/pinecone.md)
90+
- [Qdrant](docs/qdrant.md)
9091

9192
# Contributing
9293

examples/qdrant/create_collection.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""Example of creating a Pinecone index and uploading documents to it."""
2+
3+
import logging
4+
5+
from openai import OpenAI
6+
from qdrant_client import QdrantClient
7+
8+
from src.whyhow_rbr.rag_qdrant import Client
9+
10+
# Parameters
11+
collection_name = "<collection_name>" # Replace with your collection name
12+
pdfs = (
13+
[]
14+
) # Replace with the paths to your PDFs, e.g. ["path/to/pdf1.pdf", "path/to/pdf2.pdf
15+
logging_level = logging.INFO
16+
17+
# Logging
18+
logging.basicConfig(
19+
level=logging.WARNING,
20+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
21+
)
22+
logger = logging.getLogger("create_index")
23+
logger.setLevel(logging_level)
24+
25+
26+
client = Client(
27+
OpenAI(), # Set OPENAI_API_KEY environment variable
28+
QdrantClient(url="http://localhost:6333"),
29+
)
30+
31+
client.create_collection(collection_name)
32+
client.upload_documents(collection_name, documents=pdfs)

examples/qdrant/query.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""Example demonostating how to perform RAG."""
2+
3+
import logging
4+
5+
from openai import OpenAI
6+
from qdrant_client import QdrantClient
7+
8+
from src.whyhow_rbr.rag_qdrant import Client, Rule
9+
10+
# Parameters
11+
collection_name = "<collection_name>"
12+
question = "" # Replace with your question
13+
logging_level = logging.INFO # Set to logging.DEBUG for more verbosity
14+
top_k = 5
15+
16+
# Logging
17+
logging.basicConfig(
18+
level=logging.WARNING,
19+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
20+
)
21+
logger = logging.getLogger("querying")
22+
logger.setLevel(logging_level)
23+
logging.getLogger("whyhow_rbr").setLevel(logging_level)
24+
25+
26+
client = Client(
27+
OpenAI(), # Set OPENAI_API_KEY environment variable
28+
QdrantClient(url="http://localhost:6333"),
29+
)
30+
31+
rules = [
32+
Rule(
33+
# Replace with your filename
34+
filename="name/of/pdf_1.pdf",
35+
page_numbers=[2],
36+
keywords=["keyword1", "keyword2"],
37+
),
38+
Rule(
39+
# Replace with your filename
40+
filename="name/of/pdf_1.pdf",
41+
page_numbers=[1],
42+
keywords=[],
43+
),
44+
]
45+
46+
result = client.query(
47+
question=question,
48+
collection_name=collection_name,
49+
rules=rules,
50+
top_k=top_k,
51+
process_rules_separately=False,
52+
keyword_trigger=False,
53+
)
54+
answer = result["answer"]
55+
56+
57+
logger.info(f"Answer: {answer}")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies = [
2020
"pydantic>1",
2121
"pypdf",
2222
"tiktoken",
23+
"qdrant-client"
2324
]
2425
dynamic = ["version"]
2526

src/whyhow_rbr/embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def generate_embeddings(
2727
List of embeddings for each chunk.
2828
2929
"""
30-
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model=model) # type: ignore[call-arg]
30+
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model=model) # type: ignore
3131
embeddings_array = embeddings.embed_documents(chunks)
3232

3333
return embeddings_array

0 commit comments

Comments
 (0)