Skip to content

Commit 14ad37a

Browse files
committed
tweaks to NeoAccess.import_json_dump()
New optional argument extended_validation. Additional pytests. Updated pyproject.toml
1 parent db98625 commit 14ad37a

File tree

4 files changed

+190
-68
lines changed

4 files changed

+190
-68
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ https://brainannex.org/guide.php
44

55

66
This library used to be distributed together with the web app "Brain Annex";
7-
but, as of version 4.0.3, it's being independently released.
7+
but, starting with version 4.0.3, it's being independently released.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ keywords = [
3535
Homepage = "https://brainannex.org/guide.php"
3636
"Home-page" = "https://brainannex.org/guide.php"
3737
"Bug Tracker" = "https://github.com/BrainAnnex/brain-annex/issues"
38-
Source = "https://github.com/BrainAnnex/brain-annex/tree/main/BrainAnnex/modules/neo_access"
38+
Source = "https://github.com/BrainAnnex/neoaccess"
3939
Documentation = "https://brainannex.org/guide.php"

src/neoaccess/neoaccess.py

Lines changed: 103 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,7 @@ def get_nodes(self, match: Union[int, dict],
630630
:param match: EITHER an integer with a Neo4j node id,
631631
OR a dictionary of data to identify a node, or set of nodes, as returned by match()
632632
633-
:param return_internal_id: Flag indicating whether to also include the Neo4j internal node ID in the returned data
633+
:param return_internal_id: Flag indicating whether to also include the Neo4j internal node ID in the returned data
634634
(using "internal_id" as its key in the returned dictionary)
635635
:param return_labels: Flag indicating whether to also include the Neo4j label names in the returned data
636636
(using "neo4j_labels" as its key in the returned dictionary)
@@ -2319,7 +2319,7 @@ def drop_all_indexes(self, including_constraints=True) -> None:
23192319
if self.apoc:
23202320
self.query("call apoc.schema.assert({},{})")
23212321
else:
2322-
self.drop_all_constraints() # TODO: it doesn't work in version 5.5 of the database
2322+
self.drop_all_constraints() # TODO: it doesn't work in version 5.5 of the Neo4j database
23232323

23242324
indexes = self.get_indexes()
23252325
for name in indexes['name']:
@@ -2815,85 +2815,122 @@ def list_importer(self, l: list, labels, level) -> [int]:
28152815

28162816

28172817

2818-
2819-
def import_json_dump(self, json_str: str) -> str:
2818+
def import_json_dump(self, json_str: str, extended_validation = True) -> str:
28202819
"""
2821-
Used to import data from a database dump done with export_dbase_json() or export_nodes_rels_json()
2822-
Import nodes and/or relationships into the database, as directed by the given data dump in JSON form.
2823-
Note: the id's of the nodes need to be shifted,
2820+
Used to import data from a database dump that was done with export_dbase_json() or export_nodes_rels_json().
2821+
2822+
Import nodes and relationships into the database, as specified in the JSON code
2823+
that was created by the earlier data dump.
2824+
2825+
IMPORTANT: the internal id's of the nodes need to be shifted,
28242826
because one cannot force the Neo4j internal id's to be any particular value...
28252827
and, besides (if one is importing into an existing database), particular id's may already be taken.
2826-
:param json_str: A JSON string with the format specified under export_dbase_json()
2827-
:return: A status message with import details if successful, or raise an Exception if not
2828+
2829+
:param json_str: A JSON string with the format specified under export_dbase_json()
2830+
:param extended_validation: If True, an attempt is made to try to avoid partial imports,
2831+
by running extended validations prior to importing
2832+
(it will make a first pass thru the data, and hence take longer)
2833+
2834+
:return: A status message with import details if successful;
2835+
or raise an Exception if not.
2836+
If an error does occur during import then the import is aborted -
2837+
and the number of imported nodes & relationships is returned in the Exception raised.
28282838
"""
28292839

28302840
try:
2831-
json_list = json.loads(json_str) # Turn the string (representing a JSON list) into a list
2841+
json_list = json.loads(json_str) # Turn the string (which represents a JSON list) into a list
28322842
except Exception as ex:
2833-
raise Exception(f"Incorrectly-formatted JSON string. {ex}")
2843+
raise Exception(f"import_json_dump(): incorrectly-formatted JSON string. {ex}")
28342844

28352845
if self.debug:
28362846
print("json_list: ", json_list)
28372847

2838-
assert type(json_list) == list, "The JSON string does not represent the expected list"
2848+
assert type(json_list) == list, \
2849+
"import_json_dump(): the JSON string does not represent a list"
2850+
28392851

28402852
id_shifting = {} # To map the Neo4j internal ID's specified in the JSON data dump
28412853
# into the ID's of newly-created nodes
28422854

2843-
# Do an initial pass for correctness, to try to avoid partial imports
2844-
for i, item in enumerate(json_list):
2845-
# We use item.get(key_name) to handle without error situation where the key is missing
2846-
if (item.get("type") != "node") and (item.get("type") != "relationship"):
2847-
raise Exception(f"Item in list index {i} must have a 'type' of either 'node' or 'relationship'. Nothing imported. Item: {item}")
2848-
2849-
if item["type"] == "node":
2850-
if "id" not in item:
2851-
raise Exception(f"Item in list index {i} is marked as 'node' but it lacks an 'id'. Nothing imported. Item: {item}")
2852-
2853-
elif item["type"] == "relationship":
2854-
if "label" not in item:
2855-
raise Exception(f"Item in list index {i} is marked as 'relationship' but lacks a 'label'. Nothing imported. Item: {item}")
2856-
if "start" not in item:
2857-
raise Exception(f"Item in list index {i} is marked as 'relationship' but lacks a 'start' value. Nothing imported. Item: {item}")
2858-
if "end" not in item:
2859-
raise Exception(f"Item in list index {i} is marked as 'relationship' but lacks a 'end' value. Nothing imported. Item: {item}")
2860-
if "id" not in item["start"]:
2861-
raise Exception(f"Item in list index {i} is marked as 'relationship' but its 'start' value lacks an 'id'. Nothing imported. Item: {item}")
2862-
if "id" not in item["end"]:
2863-
raise Exception(f"Item in list index {i} is marked as 'relationship' but its 'end' value lacks an 'id'. Nothing imported. Item: {item}")
2864-
2865-
2866-
# First, process all the nodes, and in the process create the id_shifting map
2855+
if extended_validation:
2856+
# Do an initial pass for correctness, to help avoid partial imports.
2857+
# TODO: maybe also check the validity of the start and end nodes of relationships
2858+
for i, item in enumerate(json_list):
2859+
assert type(item) == dict, \
2860+
f"import_json_dump(): Item in list index {i} should be a dict, but instead it's of type {type(item)}. Nothing imported. Item: {item}"
2861+
# We use item.get(key_name) to handle without error situation where the key is missing
2862+
if (item.get("type") != "node") and (item.get("type") != "relationship"):
2863+
raise Exception(f"import_json_dump(): Item in list index {i} must be a dict with a 'type' key, "
2864+
f"whose value is either 'node' or 'relationship'. Nothing imported. Item: {item}")
2865+
2866+
if item["type"] == "node":
2867+
if "id" not in item:
2868+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'node' but it lacks an 'id'. Nothing imported. Item: {item}")
2869+
try:
2870+
int(item["id"])
2871+
except ValueError:
2872+
raise Exception(f"import_json_dump(): Item in list index {i} has an 'id' key whose value ({item['id']}) doesn't correspond to an integer. "
2873+
f"Nothing imported. Item: {item}")
2874+
2875+
elif item["type"] == "relationship":
2876+
if "label" not in item:
2877+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'relationship' but lacks a 'label'. Nothing imported. Item: {item}")
2878+
if "start" not in item:
2879+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'relationship' but lacks a 'start' value. Nothing imported. Item: {item}")
2880+
if "end" not in item:
2881+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'relationship' but lacks a 'end' value. Nothing imported. Item: {item}")
2882+
if "id" not in item["start"]:
2883+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'relationship' but its 'start' value lacks an 'id'. Nothing imported. Item: {item}")
2884+
if "id" not in item["end"]:
2885+
raise Exception(f"import_json_dump(): Item in list index {i} is marked as 'relationship' but its 'end' value lacks an 'id'. Nothing imported. Item: {item}")
2886+
2887+
2888+
# First, process all the node data, and create the nodes; while doing that, generate the id_shifting map
28672889
num_nodes_imported = 0
2868-
for item in json_list:
2869-
if item["type"] == "node":
2870-
#print("ADDING NODE: ", item)
2871-
#print(f' Creating node with label `{item["labels"][0]}` and properties {item["properties"]}')
2872-
old_id = int(item["id"])
2873-
new_id = self.create_node(item["labels"][0], item["properties"]) # TODO: Only the 1st label is used for now
2874-
id_shifting[old_id] = new_id
2875-
num_nodes_imported += 1
2890+
try:
2891+
for item in json_list:
2892+
if item["type"] == "node":
2893+
#print("ADDING NODE: ", item)
2894+
#print(f' Creating node with labels `{item["labels"]}` and properties {item["properties"]}')
2895+
old_id = int(item["id"])
2896+
new_id = self.create_node(item["labels"], item["properties"]) # Note: any number of labels can be imported
2897+
id_shifting[old_id] = new_id
2898+
num_nodes_imported += 1
2899+
except Exception as ex:
2900+
raise Exception(f"import_json_dump(): the import process was INTERRUPTED "
2901+
f"after importing {num_nodes_imported} node(s) and 0 relationship(s). Reason: " + str(ex))
2902+
28762903

28772904
#print("id_shifting map:", id_shifting)
28782905

28792906
# Then process all the relationships, linking to the correct (newly-created) nodes by using the id_shifting map
2907+
# (node: item types that aren't either "node" nor "relationship" are currently being ignored during the import)
28802908
num_rels_imported = 0
2881-
for item in json_list:
2882-
if item["type"] == "relationship":
2883-
#print("ADDING RELATIONSHIP: ", item)
2884-
rel_name = item["label"]
2885-
#rel_props = item["properties"]
2886-
rel_props = item.get("properties") # Also works if no "properties" is present (relationships may lack it)
2887-
2888-
start_id_original = int(item["start"]["id"])
2889-
end_id_original = int(item["end"]["id"])
2890-
2891-
start_id_shifted = id_shifting[start_id_original]
2892-
end_id_shifted = id_shifting[end_id_original]
2893-
#print(f' Creating relationship named `{rel_name}` from node {start_id_shifted} to node {end_id_shifted}, with properties {rel_props}')
2894-
2895-
self.link_nodes_by_ids(start_id_shifted, end_id_shifted, rel_name, rel_props)
2896-
num_rels_imported += 1
2909+
try:
2910+
for item in json_list:
2911+
if item["type"] == "relationship":
2912+
#print("ADDING RELATIONSHIP: ", item)
2913+
rel_name = item["label"]
2914+
#rel_props = item["properties"]
2915+
rel_props = item.get("properties") # Also works if no "properties" is present (relationships may lack it)
2916+
2917+
start_id_original = int(item["start"]["id"])
2918+
end_id_original = int(item["end"]["id"])
2919+
2920+
if start_id_original not in id_shifting:
2921+
raise Exception(f"cannot add a relationship `{rel_name}` starting at node with id {start_id_original}, because no node with that id was imported")
2922+
if end_id_original not in id_shifting:
2923+
raise Exception(f"cannot add a relationship `{rel_name}` ending at node with id {start_id_original}, because no node with that id was imported")
2924+
2925+
start_id_shifted = id_shifting[start_id_original]
2926+
end_id_shifted = id_shifting[end_id_original]
2927+
2928+
#print(f' Creating relationship named `{rel_name}` from node {start_id_shifted} to node {end_id_shifted}, with properties {rel_props}')
2929+
self.link_nodes_by_ids(start_id_shifted, end_id_shifted, rel_name, rel_props)
2930+
num_rels_imported += 1
2931+
except Exception as ex:
2932+
raise Exception(f"import_json_dump(): the import process was INTERRUPTED "
2933+
f"after importing {num_nodes_imported} node(s) and {num_rels_imported} relationship(s). Reason: " + str(ex))
28972934

28982935

28992936
return f"Successful import of {num_nodes_imported} node(s) and {num_rels_imported} relationship(s)"
@@ -3002,4 +3039,9 @@ def indent_chooser(self, level: int) -> str:
30023039

30033040

30043041
def _debug_local(self) -> str:
3005-
return "local"
3042+
"""
3043+
Use to test the switch from a local to remote repository, for debugging
3044+
3045+
:return:
3046+
"""
3047+
return "remote"

tests/test_neoaccess_import_export.py

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,19 +286,99 @@ def test_import_json_data(db):
286286
# Incorrectly-formatted JSON string. Expecting value: line 1 column 1 (char 0)
287287
with pytest.raises(Exception):
288288
assert db.import_json_dump('{"a": "this is good JSON, but not a list!"}') # This ought to raise an Exception:
289-
# "The JSON string does not represent the expected list"
290-
# TODO: extend
289+
# "The JSON string does not represent a list"
290+
with pytest.raises(Exception): # This ought to raise an Exception:
291+
assert db.import_json_dump('[1, 2, 3]') # Item in list index 0 should be a dict, but instead it's of type <class 'int'>. Item: 1
292+
293+
with pytest.raises(Exception):
294+
assert db.import_json_dump('[{"bad_type":"node"}]') # The dict in 0-th element lacks a "type" key
295+
296+
with pytest.raises(Exception):
297+
assert db.import_json_dump('[{"type":"bad_value"}]') # The dict in 0-th element doesn't have an acceptable value
298+
# for the "type" key
299+
300+
with pytest.raises(Exception):
301+
assert db.import_json_dump('[{"type":"node"}]') # Missing "id" key
302+
303+
with pytest.raises(Exception):
304+
assert db.import_json_dump('[{"type":"node", "id": "NOT_INTEGER"}]') # Non-integer id
305+
306+
with pytest.raises(Exception):
307+
json = '[{"id":"4","type":"relationship","label":"is_friends_with","start":{"id":"123","labels":["User"]},"end":{"id":"456","labels":["Person", "Client"]}}]'
308+
db.import_json_dump(json) # Trying to add a relationship between non-existing nodes
309+
291310

292311
# Now, test actual imports
293312

294-
# Completely clear the database
295-
db.empty_dbase()
313+
db.empty_dbase() # Completely clear the database
296314

315+
# Import a 1st node
297316
json = '[{"type":"node","id":"123","labels":["User"],"properties":{"name":"Eve"}}]'
298317
details = db.import_json_dump(json)
299318
assert details == "Successful import of 1 node(s) and 0 relationship(s)"
300319
match = db.match(labels="User", properties={"name": "Eve"})
301320
retrieved_records = db.get_nodes(match)
302321
assert len(retrieved_records) == 1
322+
match_all = db.match()
323+
retrieved_records = db.get_nodes(match_all)
324+
assert len(retrieved_records) == 1
325+
326+
# Import a 2nd node
327+
json = '[{"type":"node","id":"456","labels":["Person", "Client"],"properties":{"name":"Adam"}}]'
328+
details = db.import_json_dump(json)
329+
assert details == "Successful import of 1 node(s) and 0 relationship(s)"
330+
match = db.match(labels=["Person", "Client"], properties={"name": "Adam"})
331+
retrieved_records = db.get_nodes(match)
332+
assert len(retrieved_records) == 1
333+
retrieved_records = db.get_nodes(match_all)
334+
assert len(retrieved_records) == 2
335+
336+
337+
# Starting with an empty database, re-import the 2 earlier nodes, but this time both at once
338+
db.empty_dbase() # Completely clear the database
339+
340+
json = '[{"type":"node","id":"123","labels":["User"],"properties":{"name":"Eve"}},\n' \
341+
'{"type":"node","id":"456","labels":["Person", "Client"],"properties":{"name":"Adam"}}]'
342+
details = db.import_json_dump(json)
343+
assert details == "Successful import of 2 node(s) and 0 relationship(s)"
344+
345+
match = db.match(labels="User", properties={"name": "Eve"})
346+
retrieved_records = db.get_nodes(match)
347+
assert len(retrieved_records) == 1
348+
349+
match = db.match(labels=["Person", "Client"], properties={"name": "Adam"})
350+
retrieved_records = db.get_nodes(match)
351+
assert len(retrieved_records) == 1
352+
353+
retrieved_records = db.get_nodes(match_all)
354+
assert len(retrieved_records) == 2
355+
356+
357+
# Starting with an empty database, re-import the 2 earlier nodes, but this time both at once - and with a relationship between them
358+
db.empty_dbase() # Completely clear the database
359+
360+
json = '[{"type":"node","id":"123","labels":["User"],"properties":{"name":"Eve"}},\n' \
361+
'{"type":"node","id":"456","labels":["Person", "Client"],"properties":{"name":"Adam"}},\n' \
362+
'{"id":"7","type":"relationship","label":"is_friends_with","start":{"id":"123","labels":["User"]},"end":{"id":"456","labels":["Person", "Client"]}}]'
363+
details = db.import_json_dump(json)
364+
assert details == "Successful import of 2 node(s) and 1 relationship(s)"
365+
366+
match = db.match(labels="User", properties={"name": "Eve"})
367+
retrieved_records = db.get_nodes(match, return_internal_id=True)
368+
assert len(retrieved_records) == 1
369+
id_eve = retrieved_records[0]["internal_id"]
370+
371+
match = db.match(labels=["Person", "Client"], properties={"name": "Adam"})
372+
retrieved_records = db.get_nodes(match, return_internal_id=True)
373+
assert len(retrieved_records) == 1
374+
id_adam = retrieved_records[0]["internal_id"]
375+
376+
retrieved_records = db.get_nodes(match_all)
377+
assert len(retrieved_records) == 2
303378

304-
# TODO: extend
379+
q = '''
380+
MATCH (eve :User {name: "Eve"}) - [:is_friends_with] -> (adam :Person:Client {name: "Adam"})
381+
RETURN count(eve) AS num_eve, count(adam) AS num_adam, id(eve) AS id_eve, id(adam) AS id_adam
382+
'''
383+
result = db.query(q, single_row=True)
384+
assert result == {'num_eve': 1, 'num_adam': 1, 'id_eve': id_eve, 'id_adam': id_adam}

0 commit comments

Comments
 (0)