Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 53 additions & 25 deletions financial_documents/01_PDF_Loader_for_Neo4j_GraphRAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,23 @@
"outputs": [],
"source": [
"# --- Imports and Environment Setup ---\n",
"from neo4j import GraphDatabase\n",
"# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n",
"import nest_asyncio\n",
"from dotenv import load_dotenv\n",
"import os\n",
"import time\n",
"import csv\n",
"import json\n",
"import sys\n",
"sys.path.append('./financial_documents') # Add the folder to the Python path\n",
"from JSONWriter import JSONWriter\n",
"# Neo4j imports\n",
"from neo4j import GraphDatabase\n",
"from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n",
"from neo4j_graphrag.experimental.components.types import Neo4jGraph\n",
"from neo4j_graphrag.experimental.components.resolver import SinglePropertyExactMatchResolver\n",
"from neo4j_graphrag.llm import OpenAILLM\n",
"from neo4j_graphrag.embeddings import OpenAIEmbeddings\n",
"from neo4j_graphrag.generation.prompts import ERExtractionTemplate\n",
"from dotenv import load_dotenv\n",
"import os\n",
"import time\n",
"import csv"
"from neo4j_graphrag.indexes import create_vector_index"
]
},
{
Expand Down Expand Up @@ -96,8 +101,6 @@
"outputs": [],
"source": [
"# --- Load Environment Variables ---\n",
"import neo4j\n",
"\n",
"load_dotenv()\n",
"NEO4J_URI = os.getenv(\"NEO4J_URI\")\n",
"NEO4J_USER = os.getenv(\"NEO4J_USERNAME\")\n",
Expand Down Expand Up @@ -356,10 +359,10 @@
},
"outputs": [],
"source": [
"# FOR WORKSHOP - PDF to JSON\n",
"# DO NOT RUN LIVE IN WORKSHOP - PDF to JSON\n",
"\n",
"# import os\n",
"# from JSONWriter import JSONWriter\n",
"# from JSONWriter import JsonWriter\n",
"# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n",
"\n",
"# async def run_pipeline_on_file(file_path):\n",
Expand Down Expand Up @@ -466,15 +469,13 @@
"metadata": {},
"outputs": [],
"source": [
"from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n",
"from neo4j_graphrag.experimental.components.types import Neo4jGraph\n",
"\n",
"# --- Initialize and Run the Pipeline ---\n",
"try:\n",
" writer = Neo4jWriter(driver)\n",
" graph = Neo4jGraph(nodes=[], relationships=[])\n",
" resolver = SinglePropertyExactMatchResolver(driver)\n",
"except Exception as e:\n",
" print(\"Pipeline initialization failed:\", e)\n",
" print(\"Pipeline step initialization failed:\", e)\n",
" raise"
]
},
Expand All @@ -486,8 +487,9 @@
"outputs": [],
"source": [
"# --- Async Pipeline Run Example ---\n",
"async def run_pipeline_on_json(neo4j_graph, writer):\n",
" await writer.run(neo4j_graph)"
"async def run_pipeline_on_json(neo4j_graph):\n",
" await writer.run(neo4j_graph)\n",
" await resolver.run()"
]
},
{
Expand All @@ -497,8 +499,6 @@
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"def load_json_as_neo4jgraph(json_path):\n",
" with open(json_path, \"r\", encoding=\"utf-8\") as f:\n",
" data = json.load(f)\n",
Expand All @@ -515,8 +515,6 @@
"metadata": {},
"outputs": [],
"source": [
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()\n",
"\n",
"# Example usage:\n",
Expand All @@ -525,8 +523,7 @@
" print(json_file)\n",
" try:\n",
" neo4j_graph = load_json_as_neo4jgraph(json_file)\n",
"\n",
" await run_pipeline_on_json(neo4j_graph, writer)\n",
" await run_pipeline_on_json(neo4j_graph)\n",
" time.sleep(21)\n",
" except Exception as e:\n",
" print(f\"Error processing {json_file}: {str(e)}\")\n",
Expand Down Expand Up @@ -562,11 +559,42 @@
"outputs": [],
"source": [
"# --- Create Vector Index in Neo4j ---\n",
"from neo4j_graphrag.indexes import create_vector_index\n",
"\n",
"create_vector_index(driver, name=\"chunkEmbeddings\", label=\"Chunk\",\n",
" embedding_property=\"embedding\", dimensions=1536, similarity_fn=\"cosine\")"
]
},
{
"cell_type": "markdown",
"id": "33502e7a",
"metadata": {},
"source": [
"## Explore! Query or Explore tools\n",
"\n",
"Sample query:\n",
"\n",
"```\n",
"MATCH (c:Company {name: \"AMAZON\"})-[rel]-(other)\n",
"RETURN * LIMIT 50;\n",
"```\n",
"\n",
"<div align=\"center\">\n",
" <img src=\"Images-Diagrams/sample-query-output.png\" width=\"400\" />\n",
"</div>\n",
"\n",
"Feel free to play around with substituting other company names or with more queries!\n",
"\n",
"```\n",
"{'AMAZON',\n",
" 'AMERICAN INTL GROUP',\n",
" 'APPLE INC',\n",
" 'INTEL CORP',\n",
" 'MCDONALDS CORP',\n",
" 'MICROSOFT CORP',\n",
" 'NVIDIA CORPORATION',\n",
" 'PAYPAL',\n",
" 'PG&E CORP'}\n",
"```"
]
}
],
"metadata": {
Expand Down
72 changes: 67 additions & 5 deletions financial_documents/02_Load_Structured_Data_Neo4j_GraphRAG.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,71 @@
"<div align=\"center\">\n",
" <img src=\"Images-Diagrams/14.png\" width=\"400\" />\n",
"</div>\n",
" \n",
"5. Once the files are connected, you’ll see that the data model has check marks for each entity and relationship. Click **Run Import** in the upper right-hand corner. \n",
"\n",
"7. Verify mappings.\n",
"\n",
"**AssetManager**\n",
"\n",
"Label: `AssetManager` \n",
"Table: `Asset_Manager_Holdings.csv` \n",
"Properties: `managerName` \n",
"ID(key): `managerName`\n",
"\n",
"**Company**\n",
"\n",
"Label: `Company` \n",
"Table: `Company_Filings.csv` \n",
"Properties: `name`, `ticker` \n",
"ID(key): `name`\n",
"\n",
"**Document**\n",
"\n",
"Label: `Document` \n",
"Table: `Company_Filings.csv` \n",
"Properties: `path` *(this must match exactly* — *read below)* \n",
"ID(key): `path`\n",
"\n",
"**OWNS (relationship)**\n",
"\n",
"Relationship Type: `OWNS` \n",
"Table: `Asset_Manager_Holdings.csv`\n",
"\n",
"Node ID Mapping:\n",
"\n",
"From: Node - `AssetManager` \n",
" * ID: `managerName`\n",
" * ID column: `managerName`\n",
"\n",
"To: Node - `Company`\n",
" * ID: `name`\n",
" * ID column: `companyName` \n",
"\n",
"Properties: shares\n",
"\n",
"**FILED (relationship)** - connects the structured and the unstructured data in this GraphRAG application.\n",
"\n",
"Relationship Type: `FILED` \n",
"Table:`Company_Filings.csv`\n",
"\n",
"Node ID Mapping:\n",
"\n",
"From: Node - `Company`\n",
" * ID: `name`\n",
" * ID column: `name`\n",
"\n",
"To: Node - `Document`\n",
" * ID: `path`\n",
" * ID column: `path_Windows` or<br>\n",
" &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`path_Mac_ix`\n",
" \n",
"6. Once the files are connected, you’ll see that the data model has check marks for each entity and relationship. Click **Run Import** in the upper right-hand corner."
]
},
{
"cell_type": "markdown",
"id": "08e789e9",
"metadata": {},
"source": [
"### Mapping Your Data to Graph Structures\n",
"\n",
"To get you started, we’ve given you a full, completed data model for this exercise. When working with your own data, you’ll create these data model maps yourself. \n",
Expand Down Expand Up @@ -202,7 +264,7 @@
"- **From:**\n",
" - Node: `Company`\n",
" - ID: `name`\n",
" - ID column: `companyName`\n",
" - ID column: `name`\n",
"- **To:**\n",
" - Node: `Document`\n",
" - ID: `path`\n",
Expand All @@ -221,7 +283,7 @@
"\n",
"Now that your unstructured and structured data is loaded, you can use the [**Explore**](https://neo4j.com/docs/aura/preview/explore/introduction/) and [**Query**](https://neo4j.com/docs/aura/query/introduction/) functions to refine your graph structure and data to accurately represent your business domain. Use **Explore** to visualize and navigate your graph with Neo4j Bloom and **Query** to investigate the graph.\n",
"\n",
"For a detailed walkthrough of graph data modeling, see [The Developer’s Guide: How to Build a Knowledge Graph](https://neo4j.com/whitepapers/developers-guide-how-to-build-knowledge-graph/).\n"
"For a detailed walkthrough of graph data modeling, see [The Developer’s Guide: How to Build a Knowledge Graph](https://neo4j.com/whitepapers/developers-guide-how-to-build-knowledge-graph/)."
]
},
{
Expand Down Expand Up @@ -265,7 +327,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
Loading
Loading