Skip to content

Commit e9ecdf3

Browse files
authored
Fix json load to add entity res, minor cleanup (#56)
1 parent f7b0bd1 commit e9ecdf3

File tree

4 files changed

+157
-101
lines changed

4 files changed

+157
-101
lines changed

financial_documents/01_PDF_Loader_for_Neo4j_GraphRAG.ipynb

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,23 @@
5757
"outputs": [],
5858
"source": [
5959
"# --- Imports and Environment Setup ---\n",
60-
"from neo4j import GraphDatabase\n",
61-
"# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n",
60+
"import nest_asyncio\n",
61+
"from dotenv import load_dotenv\n",
62+
"import os\n",
63+
"import time\n",
64+
"import csv\n",
65+
"import json\n",
6266
"import sys\n",
6367
"sys.path.append('./financial_documents') # Add the folder to the Python path\n",
64-
"from JSONWriter import JSONWriter\n",
68+
"# Neo4j imports\n",
69+
"from neo4j import GraphDatabase\n",
70+
"from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n",
71+
"from neo4j_graphrag.experimental.components.types import Neo4jGraph\n",
72+
"from neo4j_graphrag.experimental.components.resolver import SinglePropertyExactMatchResolver\n",
6573
"from neo4j_graphrag.llm import OpenAILLM\n",
6674
"from neo4j_graphrag.embeddings import OpenAIEmbeddings\n",
6775
"from neo4j_graphrag.generation.prompts import ERExtractionTemplate\n",
68-
"from dotenv import load_dotenv\n",
69-
"import os\n",
70-
"import time\n",
71-
"import csv"
76+
"from neo4j_graphrag.indexes import create_vector_index"
7277
]
7378
},
7479
{
@@ -96,8 +101,6 @@
96101
"outputs": [],
97102
"source": [
98103
"# --- Load Environment Variables ---\n",
99-
"import neo4j\n",
100-
"\n",
101104
"load_dotenv()\n",
102105
"NEO4J_URI = os.getenv(\"NEO4J_URI\")\n",
103106
"NEO4J_USER = os.getenv(\"NEO4J_USERNAME\")\n",
@@ -356,10 +359,10 @@
356359
},
357360
"outputs": [],
358361
"source": [
359-
"# FOR WORKSHOP - PDF to JSON\n",
362+
"# DO NOT RUN LIVE IN WORKSHOP - PDF to JSON\n",
360363
"\n",
361364
"# import os\n",
362-
"# from JSONWriter import JSONWriter\n",
365+
"# from JSONWriter import JsonWriter\n",
363366
"# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n",
364367
"\n",
365368
"# async def run_pipeline_on_file(file_path):\n",
@@ -466,15 +469,13 @@
466469
"metadata": {},
467470
"outputs": [],
468471
"source": [
469-
"from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n",
470-
"from neo4j_graphrag.experimental.components.types import Neo4jGraph\n",
471-
"\n",
472472
"# --- Initialize and Run the Pipeline ---\n",
473473
"try:\n",
474474
" writer = Neo4jWriter(driver)\n",
475475
" graph = Neo4jGraph(nodes=[], relationships=[])\n",
476+
" resolver = SinglePropertyExactMatchResolver(driver)\n",
476477
"except Exception as e:\n",
477-
" print(\"Pipeline initialization failed:\", e)\n",
478+
" print(\"Pipeline step initialization failed:\", e)\n",
478479
" raise"
479480
]
480481
},
@@ -486,8 +487,9 @@
486487
"outputs": [],
487488
"source": [
488489
"# --- Async Pipeline Run Example ---\n",
489-
"async def run_pipeline_on_json(neo4j_graph, writer):\n",
490-
" await writer.run(neo4j_graph)"
490+
"async def run_pipeline_on_json(neo4j_graph):\n",
491+
" await writer.run(neo4j_graph)\n",
492+
" await resolver.run()"
491493
]
492494
},
493495
{
@@ -497,8 +499,6 @@
497499
"metadata": {},
498500
"outputs": [],
499501
"source": [
500-
"import json\n",
501-
"\n",
502502
"def load_json_as_neo4jgraph(json_path):\n",
503503
" with open(json_path, \"r\", encoding=\"utf-8\") as f:\n",
504504
" data = json.load(f)\n",
@@ -515,8 +515,6 @@
515515
"metadata": {},
516516
"outputs": [],
517517
"source": [
518-
"import nest_asyncio\n",
519-
"\n",
520518
"nest_asyncio.apply()\n",
521519
"\n",
522520
"# Example usage:\n",
@@ -525,8 +523,7 @@
525523
" print(json_file)\n",
526524
" try:\n",
527525
" neo4j_graph = load_json_as_neo4jgraph(json_file)\n",
528-
"\n",
529-
" await run_pipeline_on_json(neo4j_graph, writer)\n",
526+
" await run_pipeline_on_json(neo4j_graph)\n",
530527
" time.sleep(21)\n",
531528
" except Exception as e:\n",
532529
" print(f\"Error processing {json_file}: {str(e)}\")\n",
@@ -562,11 +559,42 @@
562559
"outputs": [],
563560
"source": [
564561
"# --- Create Vector Index in Neo4j ---\n",
565-
"from neo4j_graphrag.indexes import create_vector_index\n",
566-
"\n",
567562
"create_vector_index(driver, name=\"chunkEmbeddings\", label=\"Chunk\",\n",
568563
" embedding_property=\"embedding\", dimensions=1536, similarity_fn=\"cosine\")"
569564
]
565+
},
566+
{
567+
"cell_type": "markdown",
568+
"id": "33502e7a",
569+
"metadata": {},
570+
"source": [
571+
"## Explore! Query or Explore tools\n",
572+
"\n",
573+
"Sample query:\n",
574+
"\n",
575+
"```\n",
576+
"MATCH (c:Company {name: \"AMAZON\"})-[rel]-(other)\n",
577+
"RETURN * LIMIT 50;\n",
578+
"```\n",
579+
"\n",
580+
"<div align=\"center\">\n",
581+
" <img src=\"Images-Diagrams/sample-query-output.png\" width=\"400\" />\n",
582+
"</div>\n",
583+
"\n",
584+
"Feel free to play around with substituting other company names or with more queries!\n",
585+
"\n",
586+
"```\n",
587+
"{'AMAZON',\n",
588+
" 'AMERICAN INTL GROUP',\n",
589+
" 'APPLE INC',\n",
590+
" 'INTEL CORP',\n",
591+
" 'MCDONALDS CORP',\n",
592+
" 'MICROSOFT CORP',\n",
593+
" 'NVIDIA CORPORATION',\n",
594+
" 'PAYPAL',\n",
595+
" 'PG&E CORP'}\n",
596+
"```"
597+
]
570598
}
571599
],
572600
"metadata": {

financial_documents/02_Load_Structured_Data_Neo4j_GraphRAG.ipynb

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,71 @@
5050
"<div align=\"center\">\n",
5151
" <img src=\"Images-Diagrams/14.png\" width=\"400\" />\n",
5252
"</div>\n",
53-
" \n",
54-
"5. Once the files are connected, you’ll see that the data model has check marks for each entity and relationship. Click **Run Import** in the upper right-hand corner. \n",
5553
"\n",
54+
"7. Verify mappings.\n",
55+
"\n",
56+
"**AssetManager**\n",
57+
"\n",
58+
"Label: `AssetManager` \n",
59+
"Table: `Asset_Manager_Holdings.csv` \n",
60+
"Properties: `managerName` \n",
61+
"ID(key): `managerName`\n",
62+
"\n",
63+
"**Company**\n",
64+
"\n",
65+
"Label: `Company` \n",
66+
"Table: `Company_Filings.csv` \n",
67+
"Properties: `name`, `ticker` \n",
68+
"ID(key): `name`\n",
69+
"\n",
70+
"**Document**\n",
71+
"\n",
72+
"Label: `Document` \n",
73+
"Table: `Company_Filings.csv` \n",
74+
"Properties: `path` *(this must match exactly* — *read below)* \n",
75+
"ID(key): `path`\n",
76+
"\n",
77+
"**OWNS (relationship)**\n",
78+
"\n",
79+
"Relationship Type: `OWNS` \n",
80+
"Table: `Asset_Manager_Holdings.csv`\n",
81+
"\n",
82+
"Node ID Mapping:\n",
83+
"\n",
84+
"From: Node - `AssetManager` \n",
85+
" * ID: `managerName`\n",
86+
" * ID column: `managerName`\n",
87+
"\n",
88+
"To: Node - `Company`\n",
89+
" * ID: `name`\n",
90+
" * ID column: `companyName` \n",
91+
"\n",
92+
"Properties: shares\n",
93+
"\n",
94+
"**FILED (relationship)** - connects the structured and the unstructured data in this GraphRAG application.\n",
95+
"\n",
96+
"Relationship Type: `FILED` \n",
97+
"Table:`Company_Filings.csv`\n",
98+
"\n",
99+
"Node ID Mapping:\n",
100+
"\n",
101+
"From: Node - `Company`\n",
102+
" * ID: `name`\n",
103+
" * ID column: `name`\n",
104+
"\n",
105+
"To: Node - `Document`\n",
106+
" * ID: `path`\n",
107+
" * ID column: `path_Windows` or<br>\n",
108+
" &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`path_Mac_ix`\n",
109+
" \n",
110+
"6. Once the files are connected, you’ll see that the data model has check marks for each entity and relationship. Click **Run Import** in the upper right-hand corner."
111+
]
112+
},
113+
{
114+
"cell_type": "markdown",
115+
"id": "08e789e9",
116+
"metadata": {},
117+
"source": [
56118
"### Mapping Your Data to Graph Structures\n",
57119
"\n",
58120
"To get you started, we’ve given you a full, completed data model for this exercise. When working with your own data, you’ll create these data model maps yourself. \n",
@@ -202,7 +264,7 @@
202264
"- **From:**\n",
203265
" - Node: `Company`\n",
204266
" - ID: `name`\n",
205-
" - ID column: `companyName`\n",
267+
" - ID column: `name`\n",
206268
"- **To:**\n",
207269
" - Node: `Document`\n",
208270
" - ID: `path`\n",
@@ -221,7 +283,7 @@
221283
"\n",
222284
"Now that your unstructured and structured data is loaded, you can use the [**Explore**](https://neo4j.com/docs/aura/preview/explore/introduction/) and [**Query**](https://neo4j.com/docs/aura/query/introduction/) functions to refine your graph structure and data to accurately represent your business domain. Use **Explore** to visualize and navigate your graph with Neo4j Bloom and **Query** to investigate the graph.\n",
223285
"\n",
224-
"For a detailed walkthrough of graph data modeling, see [The Developer’s Guide: How to Build a Knowledge Graph](https://neo4j.com/whitepapers/developers-guide-how-to-build-knowledge-graph/).\n"
286+
"For a detailed walkthrough of graph data modeling, see [The Developer’s Guide: How to Build a Knowledge Graph](https://neo4j.com/whitepapers/developers-guide-how-to-build-knowledge-graph/)."
225287
]
226288
},
227289
{
@@ -265,7 +327,7 @@
265327
"name": "python",
266328
"nbconvert_exporter": "python",
267329
"pygments_lexer": "ipython3",
268-
"version": "3.10.11"
330+
"version": "3.12.3"
269331
}
270332
},
271333
"nbformat": 4,

0 commit comments

Comments
 (0)