|
57 | 57 | "outputs": [], |
58 | 58 | "source": [ |
59 | 59 | "# --- Imports and Environment Setup ---\n", |
60 | | - "from neo4j import GraphDatabase\n", |
61 | | - "# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n", |
| 60 | + "import nest_asyncio\n", |
| 61 | + "from dotenv import load_dotenv\n", |
| 62 | + "import os\n", |
| 63 | + "import time\n", |
| 64 | + "import csv\n", |
| 65 | + "import json\n", |
62 | 66 | "import sys\n", |
63 | 67 | "sys.path.append('./financial_documents') # Add the folder to the Python path\n", |
64 | | - "from JSONWriter import JSONWriter\n", |
| 68 | + "# Neo4j imports\n", |
| 69 | + "from neo4j import GraphDatabase\n", |
| 70 | + "from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n", |
| 71 | + "from neo4j_graphrag.experimental.components.types import Neo4jGraph\n", |
| 72 | + "from neo4j_graphrag.experimental.components.resolver import SinglePropertyExactMatchResolver\n", |
65 | 73 | "from neo4j_graphrag.llm import OpenAILLM\n", |
66 | 74 | "from neo4j_graphrag.embeddings import OpenAIEmbeddings\n", |
67 | 75 | "from neo4j_graphrag.generation.prompts import ERExtractionTemplate\n", |
68 | | - "from dotenv import load_dotenv\n", |
69 | | - "import os\n", |
70 | | - "import time\n", |
71 | | - "import csv" |
| 76 | + "from neo4j_graphrag.indexes import create_vector_index" |
72 | 77 | ] |
73 | 78 | }, |
74 | 79 | { |
|
96 | 101 | "outputs": [], |
97 | 102 | "source": [ |
98 | 103 | "# --- Load Environment Variables ---\n", |
99 | | - "import neo4j\n", |
100 | | - "\n", |
101 | 104 | "load_dotenv()\n", |
102 | 105 | "NEO4J_URI = os.getenv(\"NEO4J_URI\")\n", |
103 | 106 | "NEO4J_USER = os.getenv(\"NEO4J_USERNAME\")\n", |
|
356 | 359 | }, |
357 | 360 | "outputs": [], |
358 | 361 | "source": [ |
359 | | - "# FOR WORKSHOP - PDF to JSON\n", |
| 362 | + "# DO NOT RUN LIVE IN WORKSHOP - PDF to JSON\n", |
360 | 363 | "\n", |
361 | 364 | "# import os\n", |
362 | | - "# from JSONWriter import JSONWriter\n", |
| 365 | + "# from JSONWriter import JsonWriter\n", |
363 | 366 | "# from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline\n", |
364 | 367 | "\n", |
365 | 368 | "# async def run_pipeline_on_file(file_path):\n", |
|
466 | 469 | "metadata": {}, |
467 | 470 | "outputs": [], |
468 | 471 | "source": [ |
469 | | - "from neo4j_graphrag.experimental.components.kg_writer import Neo4jWriter\n", |
470 | | - "from neo4j_graphrag.experimental.components.types import Neo4jGraph\n", |
471 | | - "\n", |
472 | 472 | "# --- Initialize and Run the Pipeline ---\n", |
473 | 473 | "try:\n", |
474 | 474 | " writer = Neo4jWriter(driver)\n", |
475 | 475 | " graph = Neo4jGraph(nodes=[], relationships=[])\n", |
| 476 | + " resolver = SinglePropertyExactMatchResolver(driver)\n", |
476 | 477 | "except Exception as e:\n", |
477 | | - " print(\"Pipeline initialization failed:\", e)\n", |
| 478 | + " print(\"Pipeline step initialization failed:\", e)\n", |
478 | 479 | " raise" |
479 | 480 | ] |
480 | 481 | }, |
|
486 | 487 | "outputs": [], |
487 | 488 | "source": [ |
488 | 489 | "# --- Async Pipeline Run Example ---\n", |
489 | | - "async def run_pipeline_on_json(neo4j_graph, writer):\n", |
490 | | - " await writer.run(neo4j_graph)" |
| 490 | + "async def run_pipeline_on_json(neo4j_graph):\n", |
| 491 | + " await writer.run(neo4j_graph)\n", |
| 492 | + " await resolver.run()" |
491 | 493 | ] |
492 | 494 | }, |
493 | 495 | { |
|
497 | 499 | "metadata": {}, |
498 | 500 | "outputs": [], |
499 | 501 | "source": [ |
500 | | - "import json\n", |
501 | | - "\n", |
502 | 502 | "def load_json_as_neo4jgraph(json_path):\n", |
503 | 503 | " with open(json_path, \"r\", encoding=\"utf-8\") as f:\n", |
504 | 504 | " data = json.load(f)\n", |
|
515 | 515 | "metadata": {}, |
516 | 516 | "outputs": [], |
517 | 517 | "source": [ |
518 | | - "import nest_asyncio\n", |
519 | | - "\n", |
520 | 518 | "nest_asyncio.apply()\n", |
521 | 519 | "\n", |
522 | 520 | "# Example usage:\n", |
|
525 | 523 | " print(json_file)\n", |
526 | 524 | " try:\n", |
527 | 525 | " neo4j_graph = load_json_as_neo4jgraph(json_file)\n", |
528 | | - "\n", |
529 | | - " await run_pipeline_on_json(neo4j_graph, writer)\n", |
| 526 | + " await run_pipeline_on_json(neo4j_graph)\n", |
530 | 527 | " time.sleep(21)\n", |
531 | 528 | " except Exception as e:\n", |
532 | 529 | " print(f\"Error processing {json_file}: {str(e)}\")\n", |
|
562 | 559 | "outputs": [], |
563 | 560 | "source": [ |
564 | 561 | "# --- Create Vector Index in Neo4j ---\n", |
565 | | - "from neo4j_graphrag.indexes import create_vector_index\n", |
566 | | - "\n", |
567 | 562 | "create_vector_index(driver, name=\"chunkEmbeddings\", label=\"Chunk\",\n", |
568 | 563 | " embedding_property=\"embedding\", dimensions=1536, similarity_fn=\"cosine\")" |
569 | 564 | ] |
| 565 | + }, |
| 566 | + { |
| 567 | + "cell_type": "markdown", |
| 568 | + "id": "33502e7a", |
| 569 | + "metadata": {}, |
| 570 | + "source": [ |
| 571 | + "## Explore! Query or Explore tools\n", |
| 572 | + "\n", |
| 573 | + "Sample query:\n", |
| 574 | + "\n", |
| 575 | + "```\n", |
| 576 | + "MATCH (c:Company {name: \"AMAZON\"})-[rel]-(other)\n", |
| 577 | + "RETURN * LIMIT 50;\n", |
| 578 | + "```\n", |
| 579 | + "\n", |
| 580 | + "<div align=\"center\">\n", |
| 581 | + " <img src=\"Images-Diagrams/sample-query-output.png\" width=\"400\" />\n", |
| 582 | + "</div>\n", |
| 583 | + "\n", |
| 584 | + "Feel free to play around with substituting other company names or with more queries!\n", |
| 585 | + "\n", |
| 586 | + "```\n", |
| 587 | + "{'AMAZON',\n", |
| 588 | + " 'AMERICAN INTL GROUP',\n", |
| 589 | + " 'APPLE INC',\n", |
| 590 | + " 'INTEL CORP',\n", |
| 591 | + " 'MCDONALDS CORP',\n", |
| 592 | + " 'MICROSOFT CORP',\n", |
| 593 | + " 'NVIDIA CORPORATION',\n", |
| 594 | + " 'PAYPAL',\n", |
| 595 | + " 'PG&E CORP'}\n", |
| 596 | + "```" |
| 597 | + ] |
570 | 598 | } |
571 | 599 | ], |
572 | 600 | "metadata": { |
|
0 commit comments