diff --git a/.gitignore b/.gitignore index 579fec53..a0d05c07 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,3 @@ hadoop progress.csv logs/ store_lock -import/ diff --git a/CONTRIBUTING.adoc b/CONTRIBUTING.adoc index 0dd357eb..3f170c7d 100644 --- a/CONTRIBUTING.adoc +++ b/CONTRIBUTING.adoc @@ -53,6 +53,23 @@ If there are multiple commits, and the PR is fine to merge online, use *Squash a Using the issue id in the summary line will allow us to keep track of commits belonging together. +=== Branching Policy + +We do have a branch for each major version of Neo4j, e.g. `3.5`, `4.0` and `4.1`. +As a contributor you're asked to use newest possible branch for you PR. + +When your PR is accepted and merged it's the responsibility of the maintainers who merged it to cherry-pick that changes to any newer branch. +Once cherry-picking is done, the maintainers should mark that PR with the `cherry-picked` label. + +Please indicate in your PR message text if your PR needs a different behaviour - e.g. if the feature you're fixing has been removed in a newer branch or APIs have changed too much and you have a separate PR for the newer branch. + +EXAMPLE: + +1. You're fixing a bug being reported for 3.5.x.x. +2. You're choosing the 3.5 branch as base for your PR branch. +3. Once you're done you send a PR. +4. When a maintainer merges that PR, they also take care to cherry-pick it to 4.0 and any more recent branches. + === Handling pull requests Be polite. @@ -84,4 +101,4 @@ curl $PULL_REQUEST_URL.patch | git am --ignore-whitespace Make sure to push changes to a PR to the original remote branch. This will cause the pull request UI in GitHub show and link those commits. -This guideline document is based on the https://github.com/spring-projects/spring-data-build/blob/master/CONTRIBUTING.adoc[spring-data guidelines], thanks @olivergierke. \ No newline at end of file +This guideline document is based on the https://github.com/spring-projects/spring-data-build/blob/master/CONTRIBUTING.adoc[spring-data guidelines], thanks @olivergierke. diff --git a/build.gradle b/build.gradle index 21d06a97..d56fcc1f 100644 --- a/build.gradle +++ b/build.gradle @@ -1,20 +1,25 @@ +import org.gradle.api.internal.artifacts.DefaultExcludeRule + plugins { id 'java' - id 'com.github.johnrengelman.shadow' version '4.0.2' + id 'com.github.johnrengelman.shadow' version '4.0.3' id "com.bmuschko.nexus" version "2.3.1" // id "me.champeau.gradle.jmh" version "0.4.8" id 'maven-publish' id 'antlr' + id "org.sonarqube" version "2.7" + id "org.jetbrains.kotlin.jvm" version "1.3.71" } + + java { sourceCompatibility = JavaVersion.VERSION_11 targetCompatibility = JavaVersion.VERSION_11 } group = 'org.neo4j.procedure' -//version = '4.0.0.1-SNAPSHOT' -version = '4.0.0.0' +version = '4.0.0.11' archivesBaseName = 'apoc' description = """neo4j-apoc-procedures""" @@ -26,7 +31,7 @@ jar { ext { // NB: due to version.json generation by parsing this file, the next line must not have any if/then/else logic - neo4jVersion = "4.0.0" + neo4jVersion = "4.0.3" // instead we apply the override logic here neo4jVersionEffective = project.hasProperty("neo4jVersionOverride") ? project.getProperty("neo4jVersionOverride") : neo4jVersion testContainersVersion = '1.12.2' @@ -50,7 +55,7 @@ configurations { } shadowJar { - classifier = 'all' + archiveClassifier.set('all') } compileJava { @@ -68,20 +73,25 @@ compileJava { // "-ADocumentation.ExportGrouping=SINGLE,PACKAGE"] } +compileKotlin { + kotlinOptions.jvmTarget = "1.8" +} + generateGrammarSource { arguments += ["-package", "apoc.custom"] } dependencies { - + compileOnly "org.jetbrains.kotlin:kotlin-stdlib" + testCompile "org.jetbrains.kotlin:kotlin-stdlib" // apt 'net.biville.florent:neo4j-sproc-compiler:1.2' // temporarily disabled until byte[] is supported by sproc compiler apt group: 'org.neo4j', name: 'neo4j', version: neo4jVersionEffective - compile group: 'commons-codec', name: 'commons-codec', version: '1.9' + compile group: 'commons-codec', name: 'commons-codec', version: '1.14' compileOnly group: 'com.sun.mail', name: 'javax.mail', version: '1.6.0' testCompile group: 'com.sun.mail', name: 'javax.mail', version: '1.6.0' compile group: 'com.jayway.jsonpath', name: 'json-path', version: '2.4.0' compile group: 'org.hdrhistogram', name: 'HdrHistogram', version: '2.1.9' - compile group: 'org.neo4j.driver', name: 'neo4j-java-driver', version: '4.0.0-beta03' + compile group: 'org.neo4j.driver', name: 'neo4j-java-driver', version: '4.0.1' compile group: 'com.novell.ldap', name: 'jldap', version: '2009-10-07' antlr "org.antlr:antlr4:4.7.2", { @@ -148,8 +158,19 @@ dependencies { compileOnly group: 'org.neo4j', name: 'neo4j', version: neo4jVersionEffective compileOnly group: 'org.codehaus.jackson', name: 'jackson-mapper-asl', version: '1.9.7' + + compileOnly group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: '2.10.3' + compileOnly 'org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.3.71' + + testCompile group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: '2.10.3' + testCompile 'org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.3.71' + + + compileOnly group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.11.683' + testImplementation group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.11.683' + testCompile group: 'org.codehaus.jackson', name: 'jackson-mapper-asl', version: '1.9.7' - compile group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.9.7' + compile group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.10.3' compile group: 'com.opencsv', name: 'opencsv', version: '4.2' compileOnly group: 'org.ow2.asm', name: 'asm', version: '5.0.2' compile group: 'com.github.javafaker', name: 'javafaker', version: '0.10' @@ -203,7 +224,7 @@ test { //exclude '**/CypherProceduresClusterTest.class'//, '**/AtomicTest.class' // neo4jDockerImage system property is used in TestContainerUtil - systemProperties 'user.language' : 'en' , 'user.country ' : 'US', 'neo4jDockerImage' : 'neo4j:4.0.0-rc01-enterprise' + systemProperties 'user.language' : 'en' , 'user.country ' : 'US', 'neo4jDockerImage' : "neo4j:${neo4jVersion}-enterprise" /* testLogging { @@ -370,8 +391,6 @@ task myJavadocJar(type: Jar) { publishing { - - repositories { maven { name = 'pipeline' @@ -379,36 +398,62 @@ publishing { } } publications { - full(MavenPublication) { - pom { - name = "neo4j-apoc-procedure" - description = "A collection of useful Neo4j Procedures" - groupId = groupId - artifactId = archivesBaseName - version = version - scm { url = "http://github.com/neo4j-contrib/neo4j-apoc-procedures" } - url = 'http://github.com/neo4j-contrib/neo4j-apoc-procedures' - - licenses { - license { - name ='The Apache Software License, Version 2.0' - url ='http://www.apache.org/licenses/LICENSE-2.0.txt' - comments ="""Note that this license is for the project itself, and not for its dependencies.""" - distribution= "repo" + shadow(MavenPublication) { publication -> + project.shadow.component(publication) + artifact(mySourcesJar) + artifact(myJavadocJar) + artifact(jar) + + pom.withXml { + def root = asNode() + root.appendNode("name", "neo4j-apoc-procedure") + root.appendNode("description", "A collection of useful Neo4j Procedures") + root.appendNode("url", "http://github.com/neo4j-contrib/neo4j-apoc-procedures") + + def scmNode = root.appendNode("scm") + scmNode.appendNode("url", "http://github.com/neo4j-contrib/neo4j-apoc-procedures") + + def licensesNode = root.appendNode("licenses") + def licenseNode = licensesNode.appendNode("license") + licenseNode.appendNode("name", 'The Apache Software License, Version 2.0') + licenseNode.appendNode("url", 'http://www.apache.org/licenses/LICENSE-2.0.txt') + licenseNode.appendNode("comments", 'Note that this license is for the project itself, and not for its dependencies.') + licenseNode.appendNode("distribution", 'repo') + + def developersNode = root.appendNode("developers") + def developerNode = developersNode.appendNode("developer") + developerNode.appendNode("id", 'michaelhunger') + developerNode.appendNode("name", 'Michael Hunger') + developerNode.appendNode("email", 'michael.hunger@neo4j.com') + + def dependenciesNode = root.get("dependencies")[0] + + configurations.default.allDependencies.forEach { + def dependency = dependenciesNode.appendNode('dependency') + dependency.appendNode('groupId', it.group) + dependency.appendNode('artifactId', it.name) + dependency.appendNode('version', it.version) + dependency.appendNode('scope', "compile") + + def myExcludeRules = it.excludeRules + if (myExcludeRules.size() == 0) { + myExcludeRules = new HashSet<>() } - } - developers { - developer { - id ='michaelhunger' - name= 'Michael Hunger' - email= 'michael.hunger@neo4j.com' + myExcludeRules.add(new DefaultExcludeRule("org.slf4j", "slf4j-nop")) + + // for exclusions + if (myExcludeRules.size() > 0) { + def exclusions = dependency.appendNode('exclusions') + myExcludeRules.each { ExcludeRule ex -> + def exclusion = exclusions.appendNode('exclusion') + exclusion.appendNode('artifactId', ex.module ? ex.module : "*") + exclusion.appendNode('groupId', ex.group) + } } + } - from(components.java) - artifact(mySourcesJar) - artifact(myJavadocJar) } } diff --git a/docs/asciidoc/_algorithms.adoc b/docs/asciidoc/_algorithms.adoc index 454a4744..d74ad763 100644 --- a/docs/asciidoc/_algorithms.adoc +++ b/docs/asciidoc/_algorithms.adoc @@ -10,9 +10,6 @@ include::algorithms/deprecated.adoc[] * <> * <> -* <> -* <> -* <> include::algorithms/path-finding-procedures.adoc[leveloffset=2] include::algorithms/similarity.adoc[leveloffset=2] diff --git a/docs/asciidoc/_nlp.adoc b/docs/asciidoc/_nlp.adoc new file mode 100644 index 00000000..45aa07a8 --- /dev/null +++ b/docs/asciidoc/_nlp.adoc @@ -0,0 +1,17 @@ +[[nlp]] +== Natural Language Processing + +[abstract] +-- +This chapter describes procedures that can be used for added Natural Language Processing functionality to graph applications. +-- + +The procedures described in this chapter act as wrappers around cloud based Natural Language APIs. + +This section includes: + +* <> +* <> + +include::nlp/gcp.adoc[] +include::nlp/aws.adoc[] \ No newline at end of file diff --git a/docs/asciidoc/_path_finding.adoc b/docs/asciidoc/_path_finding.adoc index 8981bd6c..c68acbeb 100644 --- a/docs/asciidoc/_path_finding.adoc +++ b/docs/asciidoc/_path_finding.adoc @@ -6,18 +6,25 @@ This chapter describes advanced graph querying procedures in the APOC library. -- +The procedures in this chapter enable the querying of graph data when pure Cypher isn't enough. For more information on how to use these procedures, see: * <> -* <> -* <> +** <> +** <> +** <> +** <> +** <> * <> * <> * <> * <> * <> +include::path-finding/expand-overview.adoc[leveloffset=1] include::path-finding/expand.adoc[leveloffset=1] +include::path-finding/expand-config.adoc[leveloffset=1] +include::path-finding/expand-subgraph-nodes.adoc[leveloffset=1] include::path-finding/expand-subgraph.adoc[leveloffset=1] include::path-finding/expand-spanning-tree.adoc[leveloffset=1] include::path-finding/neighborhood-search.adoc[leveloffset=1] diff --git a/docs/asciidoc/algorithms/algo.adoc b/docs/asciidoc/algorithms/algo.adoc index 24c8dad1..9eb330f6 100644 --- a/docs/asciidoc/algorithms/algo.adoc +++ b/docs/asciidoc/algorithms/algo.adoc @@ -1,29 +1,11 @@ [[community-detection]] -== Deprecated: Community Detection via Label Propagation +== Removed: Community Detection via Label Propagation [abstract] -- This section describes the Label Propagation community detection algorithm. -- -include::deprecated.adoc[] +This algorithm has been deprecated by the https://neo4j.com/docs/graph-data-science/current/[Graph Data Science Library^], which has more performant and battle hardened versions of the algorithms. -APOC includes a simple procedure for label propagation. -It may be used to detect communities or solve other graph partitioning problems. -The following example shows how it may be used. - -The example call with scan all nodes 25 times. -During a scan the procedure will look at all outgoing relationships of type :X for each node n. -For each of these relationships, it will compute a weight and use that as a vote for the other node's 'partition' property value. -Finally, n.partition is set to the property value that acquired the most votes. - -Weights are computed by multiplying the relationship weight with the weight of the other nodes. -Both weights are taken from the 'weight' property; if no such property is found, the weight is assumed to be 1.0. -Similarly, if no 'weight' property key was specified, all weights are assumed to be 1.0. - -[source,cypher] ----- -CALL apoc.algo.community(25,null,'partition','X','OUTGOING','weight',10000) ----- - -The second argument is a list of label names and may be used to restrict which nodes are scanned. +For documentation of the Label Propagation algorithm, see https://neo4j.com/docs/graph-data-science/current/algorithms/label-propagation/ diff --git a/docs/asciidoc/algorithms/centrality.adoc b/docs/asciidoc/algorithms/centrality.adoc index 1765d53b..ca7a2844 100644 --- a/docs/asciidoc/algorithms/centrality.adoc +++ b/docs/asciidoc/algorithms/centrality.adoc @@ -1,73 +1,12 @@ [[centrality]] -= Deprecated: Centrality Algorithms += Removed: Centrality Algorithms [abstract] -- This section describes centrality algorithms in the APOC library. -- -include::deprecated.adoc[] +These algorithms have been deprecated by the https://neo4j.com/docs/graph-data-science/current/[Graph Data Science Library^], which has more performant and battle hardened versions of the algorithms. -== Setup - -Let's create some test data to run the Centrality algorithms on. - -[source,cypher] ----- -// create 100 nodes -FOREACH (id IN range(0,1000) | CREATE (:Node {id:id})) - -// over the cross product (1M) create 100.000 relationships -MATCH (n1:Node),(n2:Node) WITH n1,n2 LIMIT 1000000 WHERE rand() < 0.1 - -CREATE (n1)-[:TYPE]->(n2) ----- - -== Closeness Centrality Procedure - -Centrality is an indicator of a node's influence in a graph. In graphs there is a natural distance metric between pairs of nodes, defined by the length of their shortest paths. -For both algorithms below we can measure based upon the direction of the relationship, whereby the 3rd argument represents the direction -and can be of value BOTH, INCOMING, OUTGOING. - -Closeness Centrality defines the farness of a node as the sum of its distances from all other nodes, and its closeness as the reciprocal of farness. - -The more central a node is the lower its total distance from all other nodes. - -Complexity: This procedure uses a BFS shortest path algorithm. With BFS the complexes becomes `O(n * m)` -Caution: Due to the complexity of this algorithm it is recommended to run it on only the nodes you are interested in. - -[source,cypher] ----- -MATCH (node:Node) -WHERE node.id %2 = 0 -WITH collect(node) AS nodes -CALL apoc.algo.closeness(['TYPE'],nodes,'INCOMING') YIELD node, score -RETURN node, score -ORDER BY score DESC ----- - - -== Betweenness Centrality Procedure - -The procedure will compute betweenness centrality as defined by Linton C. Freeman (1977) using the algorithm by Ulrik Brandes (2001). -Centrality is an indicator of a node's influence in a graph. - -Betweenness Centrality is equal to the number of shortest paths from all nodes to all others that pass through that node. - -High centrality suggests a large influence on the transfer of items through the graph. - -Centrality is applicable to numerous domains, including: social networks, biology, transport and scientific cooperation. - -Complexity: This procedure uses a BFS shortest path algorithm. With BFS the complexes becomes O(n * m) -Caution: Due to the complexity of this algorithm it is recommended to run it on only the nodes you are interested in. - -[source,cypher] ----- -MATCH (node:Node) -WHERE node.id %2 = 0 -WITH collect(node) AS nodes -CALL apoc.algo.betweenness(['TYPE'],nodes,'BOTH') YIELD node, score -RETURN node, score -ORDER BY score DESC ----- +For documentation of the Centrality algorithms, see https://neo4j.com/docs/graph-data-science/current/algorithms/centrality/ diff --git a/docs/asciidoc/algorithms/deprecated.adoc b/docs/asciidoc/algorithms/deprecated.adoc index a2f5890d..c0772561 100644 --- a/docs/asciidoc/algorithms/deprecated.adoc +++ b/docs/asciidoc/algorithms/deprecated.adoc @@ -1,3 +1,6 @@ [WARNING] -Graph Algorithms (similarity, centrality and clustering) in APOC are deprecated and about to be removed. -Please use the algorithms in the https://r.neo4j.com/algo[Graph Algorithms Library] instead. \ No newline at end of file +==== +The similarity algorithms in APOC are deprecated and will be removed in the APOC 4.1 release. + +They have been deprecated by the https://neo4j.com/docs/graph-data-science/current/[Graph Data Science Library^], which has more performant and battle hardened versions of the algorithms. +==== \ No newline at end of file diff --git a/docs/asciidoc/algorithms/pagerank.adoc b/docs/asciidoc/algorithms/pagerank.adoc index 70d2c20a..9bfe28df 100644 --- a/docs/asciidoc/algorithms/pagerank.adoc +++ b/docs/asciidoc/algorithms/pagerank.adoc @@ -1,71 +1,12 @@ [[pagerank]] -= Deprecated: PageRank Algorithm += Removed: PageRank Algorithm [abstract] -- This section describes the PageRank centrality algorithm. -- -include::deprecated.adoc[] +This algorithm has been deprecated by the https://neo4j.com/docs/graph-data-science/current/[Graph Data Science Library^], which has more performant and battle hardened versions of the algorithms. -== Setup +For documentation of the Page Rank algorithm, see https://neo4j.com/docs/graph-data-science/current/algorithms/page-rank/ -Let's create some test data to run the PageRank algorithm on. - -[source,cypher] ----- -// create 100 nodes -FOREACH (id IN range(0,1000) | CREATE (:Node {id:id})) - -// over the cross product (1M) create 100.000 relationships -MATCH (n1:Node),(n2:Node) WITH n1,n2 LIMIT 1000000 WHERE rand() < 0.1 - -CREATE (n1)-[:TYPE_1]->(n2) ----- - -== PageRank Procedure - -PageRank is an algorithm used by Google Search to rank websites in their search engine results. - -It is a way of measuring the importance of nodes in a graph. - -PageRank counts the number and quality of relationships to a node to approximate the importance of that node. - -PageRank assumes that more important nodes likely have more relationships. - -Caution: `nodes` specifies the nodes for which a PageRank score will be projected, but the procedure will _always_ compute the PageRank algorithm on the _entire_ graph. At present, there is no way to filter/reduce the number of elements that PageRank computes over. - -A future version of this procedure will provide the option of computing PageRank on a subset of the graph. - -[source,cypher] ----- -MATCH (node:Node) -WHERE node.id %2 = 0 -WITH collect(node) AS nodes -// compute over relationships of all types -CALL apoc.algo.pageRank(nodes) YIELD node, score -RETURN node, score -ORDER BY score DESC ----- - -[source,cypher] ----- -MATCH (node:Node) -WHERE node.id %2 = 0 -WITH collect(node) AS nodes -// only compute over relationships of types TYPE_1 or TYPE_2 -CALL apoc.algo.pageRankWithConfig(nodes,{types:'TYPE_1|TYPE_2'}) YIELD node, score -RETURN node, score -ORDER BY score DESC ----- - -[source,cypher] ----- -MATCH (node:Node) -WHERE node.id %2 = 0 -WITH collect(node) AS nodes -// peroform 10 page rank iterations, computing only over relationships of type TYPE_1 -CALL apoc.algo.pageRankWithConfig(nodes,{iterations:10,types:'TYPE_1'}) YIELD node, score -RETURN node, score -ORDER BY score DESC ----- \ No newline at end of file diff --git a/docs/asciidoc/algorithms/similarity.adoc b/docs/asciidoc/algorithms/similarity.adoc index 78baad4f..e5844f8f 100644 --- a/docs/asciidoc/algorithms/similarity.adoc +++ b/docs/asciidoc/algorithms/similarity.adoc @@ -6,6 +6,8 @@ This section describes similarity algorithms in the APOC library. -- +include::deprecated.adoc[] + [cols="3m,3"] |=== | apoc.algo.cosineSimilarity([vector1], [vector2]) | Compute cosine similarity @@ -15,20 +17,5 @@ This section describes similarity algorithms in the APOC library. [cols="3m,3"] |=== -| apoc.algo.betweenness(['TYPE',...],nodes,BOTH) YIELD node, score | calculate betweenness centrality for given nodes -| apoc.algo.closeness(['TYPE',...],nodes, INCOMING) YIELD node, score | calculate closeness centrality for given nodes | apoc.algo.cover(nodeIds) YIELD rel | return relationships between this set of nodes -|=== - -[cols="3m,3"] -|=== -| apoc.algo.pageRank(nodes) YIELD node, score | calculates page rank for given nodes -| apoc.algo.pageRankWithConfig(nodes,{iterations:_,types:_}) YIELD node, score | calculates page rank for given nodes -|=== - -[cols="3m,3"] -|=== -| apoc.algo.community(times,labels,partitionKey,type,direction,weightKey,batchSize) | simple label propagation kernel -| apoc.algo.cliques(minSize) YIELD clique | search the graph and return all maximal cliques at least at large as the minimum size argument. -| apoc.algo.cliquesWithNode(startNode, minSize) YIELD clique | search the graph and return all maximal cliques that are at least as large than the minimum size argument and contain this node |=== \ No newline at end of file diff --git a/docs/asciidoc/config.adoc b/docs/asciidoc/config.adoc index 4da35022..3787d315 100644 --- a/docs/asciidoc/config.adoc +++ b/docs/asciidoc/config.adoc @@ -33,6 +33,8 @@ All boolean options have default value set to **false**. This means that they ar procedures | apoc.es..uri=es-url-with-credentials | store es-urls under a key to be used by elasticsearch procedures | apoc.export.file.enabled=false/true | Enable writing local files to disk +| apoc.http.timeout.connect= (default 10000) | Sets a specified timeout value, in milliseconds, to be used when communicating with a URI. If the timeout expires before the connection can be established, a Neo.ClientError.Procedure.ProcedureCallFailed exception is raised. A timeout of zero is interpreted as an infinite timeout. +| apoc.http.timeout.read= (default 60000) | Sets the read timeout to a specified timeout, in milliseconds. A non-zero value specifies the timeout when reading from a connection established to a resource. If the timeout expires before there is data available for read, a Neo.ClientError.Procedure.ProcedureCallFailed exception is raised. A timeout of zero is interpreted as an infinite timeout. | apoc.import.file.enabled=false/true | Enable reading local files from disk | apoc.import.file.use_neo4j_config=true/false (default `true`) | the procedures check whether file system access is allowed and possibly constrained to a specific directory by reading the two configuration parameters `dbms.security.allow_csv_import_from_file_urls` and `dbms.directories.import` respectively | apoc.initializer.cypher | a cypher statment to be executed once the database is started @@ -51,6 +53,7 @@ apoc.spatial.geocode..= | apoc.ttl.limit= (default 1000) | Maximum number of nodes being deleted in one background transaction | apoc.uuid.enabled=false/true (default false) | global switch to enable uuid handlers + //public static final String APOC_JSON_ZIP_URL = "apoc.json.zip.url"; //public static final String APOC_JSON_SIMPLE_JSON_URL = "apoc.json.simpleJson.url"; diff --git a/docs/asciidoc/data/exportJSON/MapNode.json b/docs/asciidoc/data/exportJSON/MapNode.json index 849a1588..2c7294d2 100644 --- a/docs/asciidoc/data/exportJSON/MapNode.json +++ b/docs/asciidoc/data/exportJSON/MapNode.json @@ -1 +1 @@ -{"u":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]},"d":{"name":"Jim","age":42},"r":{"since":1993}} \ No newline at end of file +{"u":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]},"d":{"name":"Jim","age":42},"r":{"bffSince":"P5M1DT12H","since":1993}} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/MapPath.json b/docs/asciidoc/data/exportJSON/MapPath.json index 5b3e1fc9..cdffd54a 100644 --- a/docs/asciidoc/data/exportJSON/MapPath.json +++ b/docs/asciidoc/data/exportJSON/MapPath.json @@ -1,2 +1,2 @@ -{"map":{"key":{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]}},"name":"Kate"} -{"map":{"key":{"length":1,"rels":[{"id":"1","type":"relationship","label":"KNOWS","properties":{"since":1850},"start":{"id":"3","labels":["User"]},"end":{"id":"4","labels":["User"]}}],"nodes":[{"type":"node","id":"3","labels":["User"],"properties":{"name":"Mike","age":78,"male":true}},{"type":"node","id":"4","labels":["User"],"properties":{"name":"John","age":18}}]}},"name":"Kate"} +{"map":{"key":{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]}},"name":"Kate"} +{"map":{"key":{"length":1,"rels":[{"id":"1","type":"relationship","label":"KNOWS","properties":{"since":1850},"start":{"id":"3","labels":["User"]},"end":{"id":"4","labels":["User"]}}],"nodes":[{"type":"node","id":"3","labels":["User"],"properties":{"name":"Mike","age":78,"male":true}},{"type":"node","id":"4","labels":["User"],"properties":{"name":"John","age":18}}]}},"name":"Kate"} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/MapRel.json b/docs/asciidoc/data/exportJSON/MapRel.json index bb73df49..d2e3184c 100644 --- a/docs/asciidoc/data/exportJSON/MapRel.json +++ b/docs/asciidoc/data/exportJSON/MapRel.json @@ -1 +1 @@ -{"rel":{"since":1993}} \ No newline at end of file +{"rel":{"bffSince":"P5M1DT12H","since":1993}} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/all.json b/docs/asciidoc/data/exportJSON/all.json index a9197773..276924ae 100644 --- a/docs/asciidoc/data/exportJSON/all.json +++ b/docs/asciidoc/data/exportJSON/all.json @@ -1,4 +1,4 @@ {"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}} {"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}} {"type":"node","id":"2","labels":["User"],"properties":{"age":12}} -{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file +{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993,"bffSince":"P5M1DT12H"},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/data.json b/docs/asciidoc/data/exportJSON/data.json index a9197773..23dd8248 100644 --- a/docs/asciidoc/data/exportJSON/data.json +++ b/docs/asciidoc/data/exportJSON/data.json @@ -1,4 +1,4 @@ {"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}} {"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}} {"type":"node","id":"2","labels":["User"],"properties":{"age":12}} -{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file +{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/graph.json b/docs/asciidoc/data/exportJSON/graph.json index a9197773..23dd8248 100644 --- a/docs/asciidoc/data/exportJSON/graph.json +++ b/docs/asciidoc/data/exportJSON/graph.json @@ -1,4 +1,4 @@ {"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}} {"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}} {"type":"node","id":"2","labels":["User"],"properties":{"age":12}} -{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file +{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/listPath.json b/docs/asciidoc/data/exportJSON/listPath.json index b42bf0b7..28c30e1e 100644 --- a/docs/asciidoc/data/exportJSON/listPath.json +++ b/docs/asciidoc/data/exportJSON/listPath.json @@ -1 +1 @@ -{"list":[{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]}]} \ No newline at end of file +{"list":[{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]}]} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/listRel.json b/docs/asciidoc/data/exportJSON/listRel.json index 8681e0ea..1e0bdac4 100644 --- a/docs/asciidoc/data/exportJSON/listRel.json +++ b/docs/asciidoc/data/exportJSON/listRel.json @@ -1 +1 @@ -{"list":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}]} \ No newline at end of file +{"list":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}]} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/query_nodes_path.json b/docs/asciidoc/data/exportJSON/query_nodes_path.json index 56e3aff9..ff1f160d 100644 --- a/docs/asciidoc/data/exportJSON/query_nodes_path.json +++ b/docs/asciidoc/data/exportJSON/query_nodes_path.json @@ -1 +1 @@ -{"u":{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},"rel":{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}},"u2":{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}},"p":{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]},"u.name":"Adam"} \ No newline at end of file +{"u":{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},"rel":{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}},"u2":{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}},"p":{"length":1,"rels":[{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"]},"end":{"id":"1","labels":["User"]}}],"nodes":[{"type":"node","id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},{"type":"node","id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}]},"u.name":"Adam"} \ No newline at end of file diff --git a/docs/asciidoc/data/exportJSON/writeNodeProperties.json b/docs/asciidoc/data/exportJSON/writeNodeProperties.json index 1e02a9e9..6f55bf21 100644 --- a/docs/asciidoc/data/exportJSON/writeNodeProperties.json +++ b/docs/asciidoc/data/exportJSON/writeNodeProperties.json @@ -1 +1 @@ -{"rel":{"id":"0","type":"relationship","label":"KNOWS","properties":{"since":1993},"start":{"id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},"end":{"id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}}} \ No newline at end of file +{"rel":{"id":"0","type":"relationship","label":"KNOWS","properties":{"bffSince":"P5M1DT12H","since":1993},"start":{"id":"0","labels":["User"],"properties":{"born":"2015-07-04T19:32:24","name":"Adam","place":{"crs":"wgs-84","latitude":33.46789,"longitude":13.1,"height":null},"age":42,"male":true,"kids":["Sam","Anna","Grace"]}},"end":{"id":"1","labels":["User"],"properties":{"name":"Jim","age":42}}}} \ No newline at end of file diff --git a/docs/asciidoc/export/enableFileExport.adoc b/docs/asciidoc/export/enableFileExport.adoc index a8948bfd..8397e05a 100644 --- a/docs/asciidoc/export/enableFileExport.adoc +++ b/docs/asciidoc/export/enableFileExport.adoc @@ -10,7 +10,7 @@ apoc.export.file.enabled=true If we try to use any of the export procedures without having first set this property, we'll get the following error message: |=== -| Failed to invoke procedure `apoc.export.csv.all`: Caused by: java.lang.RuntimeException: Export to files not enabled, please set apoc.export.file.enabled=true in your neo4j.conf +| Failed to invoke procedure: Caused by: java.lang.RuntimeException: Export to files not enabled, please set apoc.export.file.enabled=true in your neo4j.conf |=== Export files are written to the `import` directory, which is defined by the `dbms.directories.import` property. diff --git a/docs/asciidoc/graph-updates/ttl.adoc b/docs/asciidoc/graph-updates/ttl.adoc index ccebde7b..9f3782b0 100644 --- a/docs/asciidoc/graph-updates/ttl.adoc +++ b/docs/asciidoc/graph-updates/ttl.adoc @@ -3,69 +3,179 @@ [abstract] -- -This section describes procedures that can be used to remove nodes from the database once a time limit has been reached. +This section describes procedures that can be used to remove nodes from the database once a time or time limit has been reached. -- Some nodes are not meant to live forever. That's why with APOC you can specify a time by when they are removed from the database, by utilizing a schema index and an additional label. -A few convenience procedures help with that. +A few procedures help with that. -ifdef::backend-html5[] -++++ - -++++ -endif::[] +This section includes: -Enable TTL with setting in `apoc.conf` : `apoc.ttl.enabled=true` +* <> +* <> +//* <> +* <> + ** <> + ** <> +* <> -There are some convenience procedures to expire nodes. +[[ttl-available-procedures]] +== Available Procedures -You can also do it yourself by running +The table below describes the available procedures: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.ttl.csv[] +|=== + +[[ttl-config-parameters]] +== Configuration and Parameters + +For configuration, you will need to enable time-to-live functionality with the following settings in `apoc.conf`: + +.apoc.conf +[source,properties] +---- +apoc.ttl.enabled=true + +# Optional: controls the repeat frequency +# apoc.ttl.schedule=5 +---- + +In the available procedures listed above, there are several parameters with specific values. +The table below outlines values and formats for the valid parameters. + +[options="header"] +|=== +| Parameter | Description | Possible Values | Examples +| `node` | The entity or entities to add the label and property of time-to-live (previous selection statement needed) | Any node or group of nodes fitting desired criteria | `n`, `person`, `group` +| `epochTime` | The datetime value of when the node(s) should expire | Any value in epoch seconds or millisecond format | `1540944000`, `1582209630000` +| `time-unit` | Measurement of units for input value | `ms, s, m, h, d` (long forms: `millis, milliseconds, seconds, minutes, hours, days`) | `milliseconds`, `h` +|=== + +//[[ttl-handson-video]] +//== Time-To-Live Hands-On +// +//ifdef::backend-html5[] +//++++ +// +//++++ +//endif::[] + +[[ttl-examples]] +== Examples: Time-To-Live + +This section includes examples showing how to use the time-to-live procedures. +These examples are based on a movies dataset, which can be imported by running the following Cypher query: + +include::../export/createExportGraph.adoc[] + +The Neo4j Browser visualization below shows the imported graph: + +image::play-movies.png[title="Movies Graph Visualization"] + +[[ttl-expireAt]] +=== Expire node(s) at specified time + +The `apoc.ttl.expireAtInstant` procedure deletes a node or group of nodes after the datetime specified. + +To remove a single node or set of nodes, we can use a selection query prior to calling the procedure that defines which nodes we want to apply the time-to-live label and property. +We then call the procedure and pass in the selected node(s), the future datetime at which we want the nodes to be removed, and the specificity of the datetime (seconds, milliseconds, etc). [source,cypher] ---- -SET n:TTL -SET n.ttl = timestamp() + 3600 +MATCH (movie:Movie)<-[produced:PRODUCED]-(person:Person) +CALL apoc.ttl.expireAtInstant(person,1585176720,'s') +RETURN movie, produced, person ---- -[cols="1m,5"] +.Results +[opts="header"] |=== -| CALL apoc.date.expire.in(node,time,'time-unit') | expire node in given time-delta by setting :TTL label and `ttl` property -| CALL apoc.date.expire(node,time,'time-unit') | expire node at given time by setting :TTL label and `ttl` property +| "movie" | "produced" | "person" +| {"title":"The Matrix","tagline":"Welcome to the Real World","released":1999} | {} | {"name":"Joel Silver","ttl":1585176720000,"born":1952} |=== -Optionally set `apoc.ttl.schedule=5` as repeat frequency. +After the point in time specified (in this case, after `2020-03-25 17:52:00`), the node(s) will be expired and deleted from the graph. +Running the statement below will return no results for our example graph. + +[source,cypher] +---- +MATCH (movie:Movie)<-[produced:PRODUCED]-(person:Person) +RETURN movie, produced, person +---- -== Process +[[ttl-expireIn]] +=== Expire node(s) after specified time period -30s after startup an index is created: +The `apoc.ttl.expireAfterTimeLength` procedure deletes a node or group of nodes after the length of time specified. +//LEFT OFF HERE! +Just as with the similar procedure above, we can use a selection query prior to calling the procedure that defines which nodes we want to apply the time-to-live label and property. +We then call the procedure and pass in the selected node(s), the time delta from current time at which we want the nodes to be removed, and the specificity of the time amount (seconds, milliseconds, etc). [source,cypher] ---- -CREATE INDEX ON :TTL(ttl) +MATCH (movie:Movie)<-[produced:PRODUCED]-(person:Person) +CALL apoc.ttl.expireAfterTimeLength(person,1585176720,'s') +RETURN movie, produced, person ---- -At startup a statement is scheduled to run every 60s (or configure in `apoc.conf` - `apoc.ttl.schedule=120`) +.Results +[opts="header"] +|=== +| "movie" | "produced" | "person" +| {"title":"The Matrix","tagline":"Welcome to the Real World","released":1999} | {} | {"name":"Joel Silver","ttl":120000,"born":1952} +|=== + +After the length of time specified has passed (in this case, after `120 seconds`), the node(s) will be expired and deleted from the graph. +Running the statement below will return no results for our example graph. [source,cypher] ---- -MATCH (t:TTL) where t.ttl < timestamp() WITH t LIMIT 1000 DETACH DELETE t +MATCH (movie:Movie)<-[produced:PRODUCED]-(person:Person) +RETURN movie, produced, person ---- -The `ttl` property holds the *time when the node is expired in milliseconds since epoch*. +[[ttl-process]] +== Manual Process: How TTL Works -You can expire your nodes by setting the :TTL label and the ttl property: +You can also do the time-to-live process manually by running the following steps: +* Set the `:TTL` label and `ttl` property on the node(s) you want to expire. [source,cypher] ---- -MATCH (n:Foo) WHERE n.bar SET n:TTL, n.ttl = timestamp() + 10000; +SET n:TTL +SET n.ttl = timestamp() + 3600 ---- -There is also a procedure that does the same: +The `ttl` property holds the *time when the node is expired in milliseconds since epoch*. + +* Create an index on the time-to-live label and property. [source,cypher] ---- -CALL apoc.date.expire(node,time,'time-unit'); -CALL apoc.date.expire(n,100,'s'); +CREATE INDEX ON :TTL(ttl) +---- + +When using the procedure, the index is created 30 seconds after startup. + +* Remove node(s) that have passed the expiration time or length of time + +[source,cypher] +---- +MATCH (t:TTL) where t.ttl < timestamp() WITH t LIMIT 1000 DETACH DELETE t +---- + +When using the procedure, the deletion statement to remove nodes past expiration will run every 60 seconds. +You can also configure the schedule by adding the following setting in `apoc.conf`: + + +.apoc.conf +[source,properties] ---- +# Optional: controls the repeat frequency +apoc.ttl.schedule=120 +---- \ No newline at end of file diff --git a/docs/asciidoc/import/enableFileImport.adoc b/docs/asciidoc/import/enableFileImport.adoc new file mode 100644 index 00000000..a75964b1 --- /dev/null +++ b/docs/asciidoc/import/enableFileImport.adoc @@ -0,0 +1,35 @@ +By default importing from the file system is disabled. +We can enable it by setting the following property in `apoc.conf`: + +.apoc.conf +[source,properties] +---- +apoc.import.file.enabled=true +---- + +If we try to use any of the import procedures without having first set this property, we'll get the following error message: + +|=== +| Failed to invoke procedure: Caused by: java.lang.RuntimeException: Import from files not enabled, please set apoc.import.file.enabled=true in your apoc.conf +|=== + +Import files are read from the `import` directory, which is defined by the `dbms.directories.import` property. +This means that any file path that we provide is relative to this directory. +If we try to read from an absolute path, such as `/tmp/filename`, we'll get an error message similar to the following one: + +|=== +| Failed to invoke procedure: Caused by: java.lang.RuntimeException: Can't read url or key file:/path/to/neo4j/import/tmp/filename as json: /path/to/neo4j//import/tmp/filename (No such file or directory) +|=== + +We can enable reading files from anywhere on the file system by setting the following property in `apoc.conf`: + +.apoc.conf +[source,properties] +---- +apoc.import.file.use_neo4j_config=false +---- + +[WARNING] +==== +Neo4j will now be able to read from anywhere on the file system, so be sure that this is your intention before setting this property. +==== \ No newline at end of file diff --git a/docs/asciidoc/import/graphml.adoc b/docs/asciidoc/import/graphml.adoc index 14ae330d..8c62d0be 100644 --- a/docs/asciidoc/import/graphml.adoc +++ b/docs/asciidoc/import/graphml.adoc @@ -1,5 +1,5 @@ [[graphml-import]] -= GraphML Import += Import GraphML [abstract] -- @@ -8,11 +8,241 @@ This chapter describes a procedure that can be using to load data from GraphML i GraphML is used by other tools, like Gephi and CytoScape, to read graph data. -// tag::export.graphml[] +This section includes: -`YIELD file, source, format, nodes, relationships, properties, time` +* <> +* <> +* <> + ** <> + ** <> -[separator=¦,opts=header,cols="1,1m,5"] +[[import-graphml-available-procedures]] +== Available Procedures + +The table below describes the available procedures: + +[separator=¦,opts=header] +|=== +include::../../../build/generated-documentation/apoc.import.csv[lines=1;3] +|=== + +[[import-graphml-config]] +== Configuration parameters + +The procedure support the following config parameters: + +.configuration options +[options=header] +|=== +| param | default | description +| readLabels | false | Creates node labels based on the value in the `labels` property of `node` elements +| defaultRelationshipType | RELATED | The default relationship type to use if none is specified in the GraphML file +| storeNodeIds | false | store the `id` property of `node` elements +| batchSize | 20000 | The number of elements to process per transaction +|=== + + +[[import-graphml-examples]] +== Examples + +This section includes examples showing how to use the import GraphML procedure. + +[[import-graphml-simple]] +=== Import simple GraphML file + +The `simple.graphml` file contains a graph representation from the http://graphml.graphdrawing.org/primer/graphml-primer.html[GraphML primer^]. + +image::apoc.import.graphml.simple-diagram.png[] + +.simple.graphml +[source,xml] +---- + + + + + + + + + + + + + + + + + + + + + + + + + + + + +---- + + +.The following imports a graph based on `simple.graphml` +[source,cypher] +---- +CALL apoc.import.graphml("http://graphml.graphdrawing.org/primer/simple.graphml", {}) +---- + +If we run this query, we'll see the following output: + +.Results +[opts="header"] +|=== +| file | source | format | nodes | relationships | properties | time | rows | batchSize | batches | done | data +| "http://graphml.graphdrawing.org/primer/simple.graphml" | "file" | "graphml" | 11 | 12 | 0 | 618 | 0 | -1 | 0 | TRUE | NULL +|=== + +We could also copy `simple.graphml` into Neo4j's `import` directory, and import the file from there. +If we take that approach, we'll need to add the following entry to `apoc.conf`: + +For reading from files we'll have to enable the following config option: + +.apoc.conf +[source,properties] +---- +apoc.import.file.enabled=true +---- + +We can then run the import procedure in the following way: + +.The following imports a graph based on `simple.graphml` +[source,cypher] +---- +CALL apoc.import.graphml("file://simple.graphml", {}) +---- + +The Neo4j Browser visualization below shows the imported graph: + +image::apoc.import.graphml.simple.png[title="Simple Graph Visualization"] + +[[import-graphml-apoc]] +== Import GraphML file created by Export GraphML procedures + +`movies.graphml` contains a subset of Neo4j's movies graph, and was generated by the <>. + +.movies.graphml +[source,xml] +---- + + + + + + + + + + + +:MovieThe MatrixWelcome to the Real World1999 +:Person1964Keanu Reeves +:Person1967Carrie-Anne Moss +:Person1961Laurence Fishburne +:Person1960Hugo Weaving +:Person1967Lilly Wachowski +:Person1965Lana Wachowski +:Person1952Joel Silver +ACTED_IN["Neo"] +ACTED_IN["Trinity"] +ACTED_IN["Morpheus"] +ACTED_IN["Agent Smith"] +DIRECTED +DIRECTED +PRODUCED + + +---- + + +.The following imports a graph based on `movies.graphml` +[source,cypher] +---- +CALL apoc.import.graphml("movies.graphml", {}) +---- + +If we run this query, we'll see the following output: + +.Results +[opts="header"] +|=== +| file | source | format | nodes | relationships | properties | time | rows | batchSize | batches | done | data +| "movies.graphml" | "file" | "graphml" | 8 | 7 | 36 | 23 | 0 | -1 | 0 | TRUE | NULL +|=== + +We can run the following query to see the imported graph: + +[source,cypher] +---- +MATCH p=()-->() +RETURN p +---- + +.Results +[opts="header"] +|=== +| p +| ({name: "Laurence Fishburne", born: "1961", labels: ":Person"})-[:ACTED_IN {roles: "[\"Morpheus\"]", label: "ACTED_IN"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", + labels: ":Movie"}) + | ({name: "Carrie-Anne Moss", born: "1967", labels: ":Person"})-[:ACTED_IN {roles: "[\"Trinity\"]", label: "ACTED_IN"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", la +bels: ":Movie"}) | ({name: "Lana Wachowski", born: "1965", labels: ":Person"})-[:DIRECTED {label: "DIRECTED"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", labels: ":Movie"}) + + | ({name: "Joel Silver", born: "1952", labels: ":Person"})-[:PRODUCED {label: "PRODUCED"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", labels: ":Movie"}) + + | ({name: "Lilly Wachowski", born: "1967", labels: ":Person"})-[:DIRECTED {label: "DIRECTED"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", labels: ":Movie"}) + + | ({name: "Keanu Reeves", born: "1964", labels: ":Person"})-[:ACTED_IN {roles: "[\"Neo\"]", label: "ACTED_IN"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", labels: ": +Movie"}) +| ({name: "Hugo Weaving", born: "1960", labels: ":Person"})-[:ACTED_IN {roles: "[\"Agent Smith\"]", label: "ACTED_IN"}]->({tagline: "Welcome to the Real World", title: "The Matrix", released: "1999", la +bels: ":Movie"}) +|=== + +The labels defined in the GraphML file have been added to the `labels` property on each node, rather than being added as a node label. +We can set the config property `readLabels: true` to import native labels: + +.The following imports a graph based on `movies.graphml` and stores node labels +[source,cypher] +---- +CALL apoc.import.graphml("movies.graphml", {readLabels: true}) +---- + +.Results +[opts="header"] +|=== +| file | source | format | nodes | relationships | properties | time | rows | batchSize | batches | done | data +| "movies.graphml" | "file" | "graphml" | 8 | 7 | 21 | 23 | 0 | -1 | 0 | TRUE | NULL +|=== + +And now let's re-run the query to see the imported graph: + +[source,cypher] +---- +MATCH p=()-->() +RETURN; +---- + +.Results +[opts="header"] +|=== +| p +| (:Person {name: "Lilly Wachowski", born: "1967"})-[:DIRECTED]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Carrie-Anne Moss", born: "1967"})-[:ACTED_IN {roles: "[\"Trinity\"]"}]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Hugo Weaving", born: "1960"})-[:ACTED_IN {roles: "[\"Agent Smith\"]"}]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Laurence Fishburne", born: "1961"})-[:ACTED_IN {roles: "[\"Morpheus\"]"}]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Keanu Reeves", born: "1964"})-[:ACTED_IN {roles: "[\"Neo\"]"}]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Joel Silver", born: "1952"})-[:PRODUCED]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) +| (:Person {name: "Lana Wachowski", born: "1965"})-[:DIRECTED]->(:Movie {tagline: "Welcome to the Real World", title: "The Matrix", released: "1999"}) |=== -include::../../../build/generated-documentation/apoc.export.graphml.csv[lines=1;2] -|=== \ No newline at end of file diff --git a/docs/asciidoc/import/jsonpath.adoc b/docs/asciidoc/import/jsonpath.adoc new file mode 100644 index 00000000..41dd18b5 --- /dev/null +++ b/docs/asciidoc/import/jsonpath.adoc @@ -0,0 +1,33 @@ +Using JSON paths gives you a condensed way to read and process sub-documents and sub-values from nested JSON structures. +This is especially helpful if you need to skip over unwinding higher-level parent objects in order to access more nested data, or if you need to manipulate values in those substructures. + +Rather than passing in a large JSON file and using Cypher to unwind each object and access what you need, you can pass in the file and provide the JSON path to the substructures you need, resulting in shorter statements for nested JSON. +The JSON path format follows the https://github.com/jayway/JsonPath#operators[Java implementation by Jayway^] of http://goessner.net/articles/JsonPath/[Stefan Gössner's JSONPath^], providing a consistent syntax for the paths. + +Many of the `apoc.convert.*Json*` procedures and functions, as well as the `apoc.load.json` procedure, now accept a json path as last argument. +Note that these functions are meant to stream arrays (of values or objects) and maps, not a single value. +If a single item containing a single value is specified as the path, the function must try to wrap it and will not return expected results. + +There is also the `apoc.json.path(json,path)` function that takes a JSON string (not map or list) and retrieves values from the json path provided as the second argument. +Note: if the JSON is not already in string format, you can use the `apoc.convert.toJson` function to convert it. + +More examples can be found at the links provided above, but let us look at an example of the syntax for JSON paths. +The syntax shown below pulls the `items` array from the StackOverflow API of Neo4j questions and retrieves the array of `tags` from the first object in the item list. + +`$.items[0].tags` + +All of the operators and options for specifying JSON paths are included in the next table. + +.Operators +[options="header",cols="2m,4a,2",subs=attributes] +|=== +| Operator | Description | Example +| $ | The root element to query. This starts all path expressions. | `$` - retrieve all data in parent object +| @ | The current node being processed by a filter predicate. | `$.items[?(@.answer_count > 0)]` - retrieve the item if it has an answer_count greater than 0 +| * | Wildcard. Available anywhere a name or numeric are required. | `$.items[\*]` - retrieve all items in array +| .. | Deep scan. Available anywhere a name is required. | `$..tags[\*]` - find substructure named tags and pull all the values +| . | Dot-notated child | `$.items[0:1].owner.user_id` - retrieve user_id for the first item (in the owner object) +| [ (,)] | Array index or indexes | `$.items[0,-1]` - retrieve first and last item in array +| [start:end] | Array slice operator | `$.items[0:5]` - retrieve the first through fifth items in the array +| [?()] | Filter expression. Expression must evaluate to a boolean value. | `$.items[?(@.is_answered == true)]` - retrieve items where the is_answered field is true +|=== diff --git a/docs/asciidoc/import/loadjson.adoc b/docs/asciidoc/import/loadjson.adoc index 5e068ba4..5ad9ce31 100644 --- a/docs/asciidoc/import/loadjson.adoc +++ b/docs/asciidoc/import/loadjson.adoc @@ -6,12 +6,10 @@ This section describes procedures that can be used to import JSON data from web APIs or files. -- -== Load JSON - Web APIs are a huge opportunity to access and integrate data from any sources with your graph. Most of them provide the data is JSON format. -With `apoc.load.json` we can retrieve data from URLs and turn it into map value(s) for Cypher to consume. +The Load JSON procedures retrieve data from URLs or maps and turn it into map value(s) for Cypher to consume. Cypher has support for deconstructing nested documents with dot syntax, slices, `UNWIND` etc. so it is easy to turn nested data into graphs. Sources with multiple JSON objects (JSONL,JSON Lines) in a stream, like the https://dev.twitter.com/streaming/overview/processing[streaming Twitter format] or the Yelp Kaggle dataset, are also supported, @@ -22,97 +20,264 @@ ifdef::backend-html5[] ++++ endif::[] -== Json-Path +This section includes: + +* <> + ** <> + ** <> + ** <> + ** <> + +* <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + ** <> + ** <> + +[[load-json-available-procedures]] +== Procedure Overview + +The table below describes the available procedures: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.load.json.csv[] +include::../../../build/generated-documentation/apoc.load.jsonParams.csv[lines=2:] +include::../../../build/generated-documentation/apoc.load.jsonArray.csv[lines=2:] +include::../../../build/generated-documentation/apoc.import.json.csv[lines=2:] +|=== -Most of the `apoc.load.json` and `apoc.convert.*Json` procedures and functions now accept a json-path as last argument. +[[load-json-available-procedures-apoc.load.json]] +=== `apoc.load.json` -The json-path uses the https://github.com/jayway/JsonPath#operators[Java implementation by Jayway] of http://goessner.net/articles/JsonPath/[Stefan Gössners JSON-Path] +This procedure takes a file or HTTP URL and parses the JSON into a map data structure. -Here is some syntax, there are more examples at the links above. +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.load.json-lite.csv[] +|=== -`$.store.book[0].title` +It supports the following config parameter: -.Operators -[options="header",cols="2m,4a"] +.Config +[opts=header] |=== -| Operator | Description -| $ | The root element to query. This starts all path expressions. -| @ | The current node being processed by a filter predicate. -| * | Wildcard. Available anywhere a name or numeric are required. -| .. | Deep scan. Available anywhere a name is required. -| . | Dot-notated child -| ['' (,'')] | Bracket-notated child or children -| [ (,)] | Array index or indexes -| [start:end] | Array slice operator -| [?()] | Filter expression. Expression must evaluate to a boolean value. +| name | type | default | description +| failOnError | boolean | true | fail if error encountered while parsing JSON |=== -If used, this path is applied to the json and can be used to extract sub-documents and -values before handing the result to Cypher, resulting in shorter statements with complex nested JSON. +[[load-json-available-procedures-apoc.load.jsonParams]] +=== `apoc.load.jsonParams` + +This procedure takes a file or HTTP URL and parses the JSON into a map data structure. +It is a more configurable version of <> that enables processing of endpoints that require HTTP headers or JSON payloads. + +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.load.jsonParams-lite.csv[] +|=== -There is also a direct `apoc.json.path(json,path)` function. +It supports the following config parameter: -To simplify the JSON URL syntax, you can configure aliases in `conf/apoc.conf`: +.Config +[opts=header] +|=== +| name | type | default | description +| failOnError | boolean | true | fail if error encountered while parsing JSON +|=== +[[load-json-available-procedures-apoc.load.jsonArray]] +=== `apoc.load.jsonArray` + +This procedure takes a file or HTTP URL containing a JSON array, and parses it into a stream of maps. + +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.load.jsonArray-lite.csv[] +|=== + +[[load-json-available-procedures-apoc.import.json]] +=== `apoc.import.json` + +This procedure can be used to import JSON files created by the <> procedures. + +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.import.json-lite.csv[] +|=== + +It supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| unwindBatchSize | Long | `5000` | the batch size of the unwind +| txBatchSize | Long | `5000` | the batch size of the transacttion +| importIdName | String | `neo4jImportId` | the name of the "id" field into the used for the import it refers to the "id" field into the root object of the json. +| nodePropertyMappings | Map | `{}` | The mapping label/property name/property type for Custom Neo4j types (point date). + +i.e. `{ User: { born: 'Point', dateOfBirth: 'Datetime' } }` +| relPropertyMappings | Map | `{}` | The mapping rel type/property name/property type for Custom Neo4j types (point date). + +i.e. `{ KNOWS: { since: 'Datetime' } }` +|=== + +`nodePropertyMappings` and `relPropertyMappings` support the following Neo4j types: + +`Point`, `Localdate`, `Localtime`, `Localdatetime`, `Duration`, `offsettime`, and `Zoneddatetime`. + + +[[load-json-file-import]] +== Importing from a file + +include::enableFileImport.adoc[] + +[[load-json-json-path]] +== JSON-Path + +include::jsonpath.adoc[] + +[[load-json-examples]] +== Examples + +The following section contains examples showing how to import data from various JSON sources. + +[[load-json-examples-local]] +=== Import from local file + +`person.json` contains a JSON document representing a person and their children. + +.person.json +[source,json] ---- -apoc.json.myJson.url=https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf +{ + "name":"Michael", + "age": 41, + "children": ["Selina","Rana","Selma"] +} ---- +We'll place this file into the `import` directory of our Neo4j instance. +Let's now write a query using the `apoc.load.json` procedure to explore this file. + +.The following query processes `person.json` and returns the content as Cypher data structures +[source,cypher] +---- +CALL apoc.load.json("file:///person.json") +YIELD value +RETURN value; ---- -CALL apoc.load.json('https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf') -becomes +.Results +[options="header"] +|=== +| value +| {name: "Michael", children: ["Selina", "Rana", "Selma"], age: 41} +|=== -CALL apoc.load.json('myJson') ----- +We get back a map that looks almost the same as the JSON document. +We can now extend that query to create a graph based on this JSON file. +We'll create a `Person` node for Michael and each of his children, and a `CHILD_OF` relationship from each child to the Michael node. -The 3rd value in the `apoc.json..url=` effectively defines an alias to be used in `apoc.load.json('',....` +.The following creates a graph based on `person.json` +[source,cypher] +---- +CALL apoc.load.json("file:///person.json") +YIELD value +MERGE (p:Person {name: value.name}) +SET p.age = value.age +WITH p, value +UNWIND value.children AS child +MERGE (c:Person {name: child}) +MERGE (c)-[:CHILD_OF]->(p); +---- -== Load JSON StackOverflow Example +The Neo4j Browser visualization below shows the imported graph: -There have been articles before about http://neo4j.com/blog/cypher-load-json-from-url/[loading JSON from Web-APIs like StackOverflow]. +image::apoc.load.json.local.file.svg[] -With `apoc.load.json` it's now very easy to load JSON data from any file or URL. +[[load-json-examples-stackoverflow]] +=== Import from StackOverflow API -If the result is a JSON object is returned as a singular map. -Otherwise if it was an array is turned into a stream of maps. +`apoc.load.json` enables loading JSON data from any file or URL. +If the result is a JSON object, it is returned as a singular map. +If the result is an array, it is turned into a stream of maps. -The URL for retrieving the last questions and answers of the http://stackoverflow.com/questions/tagged/neo4j[neo4j tag] is this: +StackOverflow provides several APIs, including one for retrieving recent questions and answers. +The URL for retrieving the last questions and answers for the http://stackoverflow.com/questions/tagged/neo4j[neo4j tag^] is: +[source,text] +---- https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf +---- + +[NOTE] +==== -Now it can be used from within Cypher directly, let's first introspect the data that is returned. +Since this is a rather long URL string, we can simplify the syntax by configuring aliases in `conf/apoc.conf`: -.JSON data from StackOverflow +.apoc.conf +[source,text] +---- +apoc.json.myJson.url=https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf +---- + +The third value in the `apoc.json..url=` effectively defines the variable to be used in `apoc.load.json('',....`. +With this, the massive JSON url string below can be aliased to a shorter string. + +.Original call with full json url string [source,cypher] ---- -WITH "https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" AS url -CALL apoc.load.json(url) YIELD value -UNWIND value.items AS item -RETURN item.title, item.owner, item.creation_date, keys(item) +CALL apoc.load.json('https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf') ---- -image::apoc.load.json.so.png[scaledwidth="100%"] +.New call with aliased string with full string in apoc.conf +[source,cypher] +---- +CALL apoc.load.json('myJson') +---- +==== +Let's introspect the data that is returned from this end point. -.Question authors from StackOverflow using json-path +.The following finds the 5 most recent questions with the `neo4j` tag on StackOverflow [source,cypher] ---- WITH "https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" AS url -CALL apoc.load.json(url,'$.items.owner.name') YIELD value -RETURN name, count(*); +CALL apoc.load.json(url) YIELD value +UNWIND value.items AS item +RETURN item.title, item.owner, item.creation_date, keys(item) +LIMIT 5; ---- -Combined with the cypher query from the original blog post it's easy to create the full Neo4j graph of those entities. -We filter the original poster last, b/c deleted users have no `user_id` anymore. +.Results +[options="header", cols="2,3,2,2"] +|=== +| item.title | item.owner | item.creation_date | keys(item) +| "Cypher patten for getting self related nodes" | {profile_image: "https://lh3.googleusercontent.com/-1FWbhuaEBiQ/AAAAAAAAAAI/AAAAAAAAAIA/tLM_mEb-8MY/photo.jpg?sz=128", user_type: "registered", user_id: 5730203, link: "https://stackoverflow.com/users/5730203/asif-ali", reputation: 1148, display_name: "Asif Ali", accept_rate: 90} | 1586944991 | ["owner", "comment_count", "link", "last_activity_date", "creation_date", "answer_count", "title", "question_id", "tags", "share_link", "score", "down_vote_count", "body_markdown", "favorite_count", "is_answered", "delete_vote_count", "close_vote_count", "view_count", "up_vote_count"] +| "Problem connecting .NET Client to Neo4j Desktop version 4" | {profile_image: "https://www.gravatar.com/avatar/a3fac35d600d1d462d8fc12f3926074c?s=128&d=identicon&r=PG&f=1", user_type: "registered", user_id: 2853912, link: "https://stackoverflow.com/users/2853912/user2853912", reputation: 21, display_name: "user2853912"} | 1586938954 | ["owner", "comment_count", "link", "last_activity_date", "creation_date", "answer_count", "title", "question_id", "tags", "share_link", "score", "down_vote_count", "body_markdown", "favorite_count", "is_answered", "delete_vote_count", "close_vote_count", "view_count", "up_vote_count"] +| "What kind of graph algorithm does Neo4j use?" | {profile_image: "https://www.gravatar.com/avatar/736024b862a229111d4b3119875753b0?s=128&d=identicon&r=PG&f=1", user_type: "registered", user_id: 4402081, link: "https://stackoverflow.com/users/4402081/mariappan", reputation: 7, display_name: "Mariappan"} | 1586901300 | ["owner", "comment_count", "answers", "link", "last_activity_date", "creation_date", "answer_count", "title", "question_id", "tags", "share_link", "score", "down_vote_count", "body_markdown", "favorite_count", "is_answered", "delete_vote_count", "close_vote_count", "view_count", "up_vote_count"] +| "Import json file to Neo4j" | {profile_image: "https://lh3.googleusercontent.com/-PWDC85Kp2ig/AAAAAAAAAAI/AAAAAAAAAAA/AB6qoq3nhmVZl-_0VDKESOG5MsyHvXnw_A/mo/photo.jpg?sz=128", user_type: "registered", user_id: 9964138, link: "https://stackoverflow.com/users/9964138/jo%c3%a3o-costa", reputation: 23, display_name: "João Costa"} | 1586897574 | ["owner", "comment_count", "answers", "link", "last_activity_date", "creation_date", "answer_count", "title", "question_id", "tags", "share_link", "score", "down_vote_count", "body_markdown", "favorite_count", "is_answered", "delete_vote_count", "close_vote_count", "view_count", "up_vote_count"] +| "Difference between Neo4j Graph Algorithms and Graph Data Science" | {profile_image: "https://i.stack.imgur.com/2rLPZ.jpg?s=128&g=1", user_type: "registered", user_id: 3297954, link: "https://stackoverflow.com/users/3297954/rotten", reputation: 1295, display_name: "rotten", accept_rate: 75} | 1586872077 | ["owner", "comment_count", "answers", "link", "last_activity_date", "creation_date", "answer_count", "title", "question_id", "tags", "share_link", "score", "down_vote_count", "body_markdown", "favorite_count", "is_answered", "delete_vote_count", "close_vote_count", "view_count", "up_vote_count"] +|=== + +Let's now create a Neo4j graph based on those entities. -.Graph data created via loading JSON from StackOverflow +.The following creates a graph based on data from the StackOverflow API [source,cypher] ---- WITH "https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" AS url CALL apoc.load.json(url) YIELD value UNWIND value.items AS q -MERGE (question:Question {id:q.question_id}) ON CREATE - SET question.title = q.title, question.share_link = q.share_link, question.favorite_count = q.favorite_count +MERGE (question:Question {id:q.question_id}) +ON CREATE SET question.title = q.title, + question.share_link = q.share_link, + question.favorite_count = q.favorite_count FOREACH (tagName IN q.tags | MERGE (tag:Tag {name:tagName}) MERGE (question)-[:TAGGED]->(tag)) FOREACH (a IN q.answers | @@ -120,78 +285,175 @@ FOREACH (a IN q.answers | MERGE (answerer:User {id:a.owner.user_id}) ON CREATE SET answerer.display_name = a.owner.display_name MERGE (answer)<-[:PROVIDED]-(answerer) ) + WITH * WHERE NOT q.owner.user_id IS NULL MERGE (owner:User {id:q.owner.user_id}) ON CREATE SET owner.display_name = q.owner.display_name MERGE (owner)-[:ASKED]->(question) ---- -image::apoc.load.json-so-result.png[scaledwidth="100%"] +The Neo4j Browser visualization below shows the imported graph: + +image::apoc-load-json-so.svg[width="1000px"] + +[[load-json-path-examples-stackoverflow]] +=== Use JSON Path and Import from StackOverflow API + +We can narrow down the data that we sift through and import using the JSON path syntax. +This will allow us to specify substructures to import and ignore the rest of the data. +For this example, we only want to import answers and the members posting those answers. + +.Find StackOverflow answers using JSON path (only retrieve sample of 5) +[source,cypher] +---- +WITH "https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" AS url +CALL apoc.load.json(url,'$.items[?(@.answer_count>0)].answers[*]') YIELD value +RETURN value LIMIT 5; +---- + +Notice that we are only looking at StackOverflow questions that have an answer count greater than 0. +That means we are only passing along the question JSON objects that have answers, as the rest do not pertain to our use case. +With this in mind, let us import those with this statement: -== Load JSON from Twitter (with additional parameters) +[source,cypher] +---- +WITH "https://api.stackexchange.com/2.2/questions?pagesize=100&order=desc&sort=creation&tagged=neo4j&site=stackoverflow&filter=!5-i6Zw8Y)4W7vpy91PMYsKM-k9yzEsSC1_Uxlf" AS url +CALL apoc.load.json(url,'$.items[?(@.answer_count>0)].answers[*]') YIELD value +MERGE (a:Answer {id: value.answer_id}) + ON CREATE SET a.accepted = value.is_accepted, + a.shareLink = value.share_link, + a.lastActivityDate = value.last_activity_date, + a.creationDate = value.creation_date, + a.title = value.title, + a.score = value.score +MERGE (q:Question {id: value.question_id}) +MERGE (a)-[rel:POSTED_TO]->(q) +WITH a as answer, value.owner as value +MERGE (u:User {userId: value.user_id}) + ON CREATE SET u.displayName = value.display_name, + u.userType = value.user_type, + u.reputation = value.reputation, + u.userLink = value.link +MERGE (u)-[rel2:SUBMITTED]->(answer) +RETURN count(answer) +---- -With `apoc.load.jsonParams` you can send additional headers or payload with your JSON GET request, e.g. for the Twitter API: +This imports around 78 answers to our graph. +We can then explore this graph to find out which users submitted the most answers, have the highest ratings, and more. -.Configure Bearer and Twitter Search Url token in `apoc.conf` +[[load-json-examples-twitter]] +=== Import from Twitter API (with additional parameters) +With `apoc.load.jsonParams` we can send additional headers or payload with our JSON GET request, e.g. for the Twitter API: + +Let's first configure the Bearer and Twitter Search URL token in `apoc.conf`: + +.apoc.conf ---- apoc.static.twitter.bearer=XXXX apoc.static.twitter.url=https://api.twitter.com/1.1/search/tweets.json?count=100&result_type=recent&lang=en&q= ---- -These values can then be retrieved using the functions described in <>. +These values can then be retrieved using the functions described in -.Twitter Search via Cypher +static-values>>. + +.The following queries the Twitter API and returns the results [source,cypher] ---- -CALL apoc.static.getAll("twitter") yield value AS twitter -CALL apoc.load.jsonParams(twitter.url + "oscon+OR+neo4j+OR+%23oscon+OR+%40neo4j",{Authorization:"Bearer "+twitter.bearer},null) yield value +WITH apoc.static.getAll("twitter") AS twitter +CALL apoc.load.jsonParams( + twitter.url + "oscon+OR+neo4j+OR+%23oscon+OR+%40neo4j", + {Authorization:"Bearer "+twitter.bearer}, + null // payload +) +YIELD value UNWIND value.statuses as status + WITH status, status.user as u, status.entities as e -RETURN status.id, status.text, u.screen_name, [t IN e.hashtags | t.text] as tags, e.symbols, [m IN e.user_mentions | m.screen_name] as mentions, [u IN e.urls | u.expanded_url] as urls +RETURN status.id, status.text, u.screen_name, + [t IN e.hashtags | t.text] as tags, + e.symbols, + [m IN e.user_mentions | m.screen_name] as mentions, + [u IN e.urls | u.expanded_url] as urls; ---- -== GeoCoding Example -Example for reverse geocoding and determining the route from one to another location. +[[load-json-examples-import-json]] +=== Import JSON file created by Export JSON procedures -[source,cypher] ----- -WITH - "21 rue Paul Bellamy 44000 NANTES FRANCE" AS fromAddr, - "125 rue du docteur guichard 49000 ANGERS FRANCE" AS toAddr +The `apoc.import.json` procedure can be used to import JSON files created by the `apoc.export.json.*` procedures. +This procedure supports the following config parameters: -call apoc.load.json("http://www.yournavigation.org/transport.php?url=http://nominatim.openstreetmap.org/search&format=json&q=" + replace(fromAddr, ' ', '%20')) YIELD value AS from +.Config parameters +[opts=header] +|=== +| name | default | description +| unwindBatchSize | `5000` | the batch size of the unwind +| txBatchSize | `5000` | the batch size of the transacttion +| importIdName | `neo4jImportId` | the name of the "id" field into the used for the import it refers to the "id" field into the root object of the json. +| nodePropertyMappings | `{}` | The mapping label/property name/property type for Custom Neo4j types (point date). I.e. { User: { born: 'Point', dateOfBirth: 'Datetime' } } +| relPropertyMappings | `{}` | The mapping rel type/property name/property type for Custom Neo4j types (point date). I.e. { KNOWS: { since: 'Datetime' } } +|=== -WITH from, toAddr LIMIT 1 +`nodePropertyMappings` and `relPropertyMappings` support the following Neo4j types: -call apoc.load.json("http://www.yournavigation.org/transport.php?url=http://nominatim.openstreetmap.org/search&format=json&q=" + replace(toAddr, ' ', '%20')) YIELD value AS to +* Point +* Localdate +* Localtime +* Localdatetime +* Duration +* offsettime +* Zoneddatetime -CALL apoc.load.json("https://router.project-osrm.org/viaroute?instructions=true&alt=true&z=17&loc=" + from.lat + "," + from.lon + "&loc=" + to.lat + "," + to.lon ) YIELD value AS doc -UNWIND doc.route_instructions as instruction +`all.json` contains a subset of Neo4j's movies graph, and was generated by the <>. -RETURN instruction +.all.json +[source,json] +---- +include::../data/exportJSON/all.json[leveloffset] ---- -== POST JSON values - -You can also perform a POST request by executing a query like the following: +We can import this file using `apoc.import.json`. [source,cypher] ---- -CALL apoc.load.jsonParams("https://neo4j.com/docs/search/", - {method: "POST"}, - apoc.convert.toJson({query: "pagecache", version: "3.5"})) +CALL apoc.import.json("file:///all.json") ---- -You will receive the following response: +.Results +[opts=header] +|=== +| file | source | format | nodes | relationships | properties | time | rows | batchSize | batches | done | data +| "file:///all.json" | "file" | "json" | 3 | 1 | 15 | 105 | 4 | -1 | 0 | TRUE | NULL +|=== + +[[load-json-examples-post-json]] +=== POST a query to the neo4j.com search API -[source,json] +We can perform a POST request to a JSON endpoint by setting the config parameter `method` to `POST`. +We'll also use the `apoc.convert.toJson` function to construct a JSON payload from a Cypher map. + +.The following makes a POST request to neo4j's search API +[source,cypher] ---- -{ - "description": "This section describes the Neo4j Admin tool.", - "title": "12.1. Neo4j Admin - Chapter 12. Tools", - "uri": "https://neo4j.com/docs/operations-manual/3.5/tools/neo4j-admin/", - "weight": 0.02548018842935562 -} +CALL apoc.load.jsonParams( + "https://neo4j.com/docs/search/", + {method: "POST"}, + apoc.convert.toJson({query: "subquery", version: "4.0"}) +) ---- + + +.Results +[options="header"] +|=== +| value +| {description: "The CALL {} clause evaluates a subquery that returns some values.", weight: 0.6460227966308594, title: "3.16. CALL {} (subquery) - Chapter 3. Clauses", uri: "https://neo4j.com/docs/cypher-manual/4.0/clauses/call-subquery/"} +| {description: "This section provides examples of queries and Cypher commands that can be used with Neo4j Fabric.", weight: 0.05099273845553398, title: "7.3. Queries - Chapter 7. Fabric", uri: "https://neo4j.com/docs/operations-manual/4.0/fabric/queries/"} +| {description: "WHERE adds constraints to the patterns in a MATCH or OPTIONAL MATCH clause or filters the results of a WITH clause.", weight: 0.03291567042469978, title: "3.6. WHERE - Chapter 3. Clauses", uri: "https://neo4j.com/docs/cypher-manual/4.0/clauses/where/"} +| {description: "This appendix contains the recommended style when writing Cypher queries.", weight: 0.031550146639347076, title: "Appendix A. Cypher styleguide - The Neo4j Cypher Manual v4.0", uri: "https://neo4j.com/docs/cypher-manual/4.0/styleguide/"} +| {description: "This section contains information on all the clauses in the Cypher query language.", weight: 0.02944066934287548, title: "Chapter 3. Clauses - The Neo4j Cypher Manual v4.0", uri: "https://neo4j.com/docs/cypher-manual/4.0/clauses/"} +| {description: "", weight: 0.01821548491716385, title: "2.3. Expressions - Chapter 2. Syntax", uri: "https://neo4j.com/docs/cypher-manual/4.0/syntax/expressions/"} + +|=== \ No newline at end of file diff --git a/docs/asciidoc/import/loadxml.adoc b/docs/asciidoc/import/loadxml.adoc index 550e2c41..7f67eccc 100644 --- a/docs/asciidoc/import/loadxml.adoc +++ b/docs/asciidoc/import/loadxml.adoc @@ -6,23 +6,159 @@ This section describes procedures that can be used to import data from XML files. -- -Many existing (enterprise) applications, endpoints and files use XML as data exchange format. +Many existing enterprise applications, endpoints, and files use XML as data exchange format. +The Load XML procedures allow us to process these files. -We can process these files using `apoc.load.xml`. -It takes a file or HTTP URL and parses the XML into a map data structure. +This section includes: -NOTE: in previous releases we've had `apoc.load.xmlSimple`. This is now deprecated and hsa been superseeded by `apoc.load.xml(url, [xPath], [config], true)`.Simple XML Format +* <> + ** <> + ** <> + ** <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + ** <> -See the following usage-examples for the procedures. +[[load-xml-available-procedures]] +== Procedure and Function Overview -== Example File +The table below describes the available procedures and functions: -"How do you access XML doc attributes in children fields ?" +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.load.xml.csv[] +include::../../../build/generated-documentation/apoc.xml.parse.csv[lines=2:] +include::../../../build/generated-documentation/apoc.import.xml.csv[lines=2:] +|=== + +[[load-xml-available-procedures-apoc.load.xml]] +=== `apoc.load.xml` + +This procedure takes a file or HTTP URL and parses the XML into a map data structure. + +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.load.xml-lite.csv[] +|=== + +The map is created using the following rules: + +* in simple mode, each type of children has its own entry in the parent map. +* the element-type as key is prefixed with `_` to prevent collisions with attributes. +* if there is a single element, the entry will just have that element as value, not a collection. +* if there is more than one element, there will be a list of values. +* each child will still have its `_type` field to discern them. + +This procedure supports the following config parameters: + +.Config +[opts=header] +|=== +| name | type | default | description +| failOnError | boolean | true | fail if error encountered while parsing XML +| headers | Map | {} | HTTP headers to be used when querying XML document +|=== + +[[load-xml-available-procedures-apoc.xml.parse]] +=== `apoc.xml.parse` + +If our dataset contains nodes with XML as property values, they can be parsed into maps with the `apoc.xml.parse` function. + +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.xml.parse-lite.csv[] +|=== + + +This function supports the following config parameter: + +.Config +[opts=header] +|=== +| name | type | default | description +| failOnError | boolean | true | fail if error encountered while parsing XML +|=== + +.The following parses an XML string into a Cypher map +[source,cypher] +---- +WITH '
' AS xmlString +RETURN apoc.xml.parse(xmlString) AS value +---- + +.Results +[options="header"] +|=== +| value +| {_type: "table", _children: [{_type: "tr", _children: [{_type: "td", _children: [{_type: "img", src: "pix/logo-tl.gif"}]}]}]} +|=== -(Thanks Nicolas Rouyer) +[[load-xml-available-procedures-apoc.import.xml]] +=== `apoc.import.xml` -For example, if my XML file is the example https://msdn.microsoft.com/en-us/library/ms762271%28v=vs.85%29.aspx[book.xml provided by Microsoft]. +If we don't want to do any transformation of the XML before creating a graph structure, we can create a 1:1 mapping of XML into the graph using the `apoc.import.xml` procedure. +[separator=¦,opts=header,cols="1m"] +|=== +include::../../../build/generated-documentation/apoc.import.xml-lite.csv[] +|=== + +This procedure will return a node representing the XML document containing nodes and relationships underneath mapping to the XML structure. + +The following mapping rules are applied: + +[options="header"] +|=== +| xml | label | properties +| document | XmlDocument | _xmlVersion, _xmlEncoding +| processing instruction | XmlProcessingInstruction | _piData, _piTarget +| Element/Tag | XmlTag | _name +| Attribute | n/a | property in the XmlTag node +| Text | XmlWord | for each word a separate node is created +|=== + +The nodes for the XML document are connected: + +[options="header"] +|=== +| relationship type | description +| :IS_CHILD_OF | pointing to a nested xml element +| :FIRST_CHILD_OF | pointing to the first child +| :NEXT_SIBLING | pointing to the next xml element on the same nesting level +| :NEXT | produces a linear chain through the full document +| :NEXT_WORD | only produced if config map has `createNextWordRelationships:true`. Connects words in XML to a text flow. +|=== + +This procedure supports the following config parameters: + +.Config +[options="header"] +|=== +| config option | default value | description +| connectCharacters | false | if `true` the xml text elements are child nodes of their tags, interconnected by relationships of type `relType` (see below) +| filterLeadingWhitespace | false | if `true` leading whitespace is skipped for each line +| delimiter | `\s` (regex whitespace) | if given, split text elements with the delimiter into separate nodes +| label | XmlCharacter | label to use for text element representation +| relType | `NE` | relationship type to be used for connecting the text elements into one linked list +| charactersForTag | {} | map of tagname -> string. For the given tag names an additional text element is added containing the value as `text` property. Useful e.g. for `` tags in TEI-XML to be represented as ` `. +|=== + + +[[load-xml-file-import]] +== Importing from a file + +include::enableFileImport.adoc[] + +[[load-xml-examples]] +== Examples + +The examples in this section are based on the Microsoft https://msdn.microsoft.com/en-us/library/ms762271%28v=vs.85%29.aspx[book.xml^] file. + +.book.xml [source,xml] ---- @@ -46,212 +182,271 @@ For example, if my XML file is the example https://msdn.microsoft.com/en-us/libr ... ---- -We have the file here, https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml[on GitHub]. - -== Simple XML Format - -In a simpler XML representation, each type of children gets it's own entry within the parent map. -The element-type as key is prefixed with "_" to prevent collisions with attributes. +This file can be downloaded from https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml[GitHub]. -If there is a single element, then the entry will just have that element as value, not a collection. -If there is more than one element there will be a list of values. +[[load-xml-examples-local-file]] +=== Import from local file -Each child will still have its `_type` field to discern them. +The `books.xml` file described below contains the first two books from the Microsoft Books XML file. +We'll use the smaller file in this section to simplify our examples. -Here is the example file from above loaded with `apoc.load.xmlSimple` - -[source,cypher,subs=attributes] ----- -call apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml", '', {}, true) ----- - -[source,javascript] +.books.xml +[source,xml] ---- -{_type: "catalog", _book: [ - {_type: "book", id: "bk101", - _author: [{_type: "author", _text: "Gambardella, Matthew"},{_type: author, _text: "Arciniegas, Fabio"}], - _title: {_type: "title", _text: "XML Developer's Guide"}, - _genre: {_type: "genre", _text: "Computer"}, - _price: {_type: "price", _text: "44.95"}, - _publish_date: {_type: "publish_date", _text: "2000-10-01"}, - _description: {_type: description, _text: An in-depth look at creating applications .... + + + + Gambardella, Matthew + Arciniegas, Fabio + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + ---- -=== Simple XML Examples - -.Example 1 -[source,cypher] ----- -WITH "https://maps.googleapis.com/maps/api/directions/xml?origin=Mertens%20en%20Torfsstraat%2046,%202018%20Antwerpen&destination=Rubensstraat%2010,%202300%20Turnhout&sensor=false&mode=bicycling&alternatives=false&key=AIzaSyAPPIXGudOyHD_KAa2f_1l_QVNbsd_pMQs" AS url -CALL apoc.load.xmlSimple(url) -YIELD value -RETURN value._route._leg._distance._value, keys(value), keys(value._route), keys(value._route._leg), keys(value._route._leg._distance._value) ----- -image::apoc.load.xmlSimple.ex1.png[scaledwidth="100%"] +We'll place this file into the `import` directory of our Neo4j instance. +Let's now write a query using the `apoc.load.xml` procedure to explore this file. -.Example 2 +.The following query processes `books.xml` and returns the content as Cypher data structures [source,cypher] ---- -WITH "https://maps.googleapis.com/maps/api/directions/xml?origin=Mertens%20en%20Torfsstraat%2046,%202018%20Antwerpen&destination=Rubensstraat%2010,%202300%20Turnhout&sensor=false&mode=bicycling&alternatives=false&key=AIzaSyAPPIXGudOyHD_KAa2f_1l_QVNbsd_pMQs" AS url -CALL apoc.load.xmlSimple(url) +CALL apoc.load.xml("file:///books.xml") YIELD value -UNWIND keys(value) AS key -RETURN key, apoc.meta.type(value[key]); +RETURN value ---- -image::apoc.load.xmlSimple.ex2.png[scaledwidth="100%"] - +.Results +[options="header"] +|=== +| value +| {_type: "catalog", _children: [{_type: "book", _children: [{_type: "author", _text: "Gambardella, Matthew"}, {_type: "author", _text: "Arciniegas, Fabio"}, {_type: "title", _text: "XML Developer's Guide"}, {_type: "genre", _text: "Computer"}, {_type: "price", _text: "44.95"}, {_type: "publish_date", _text: "2000-10-01"}, {_type: "description", _text: "An in-depth look at creating applications with XML."}], id: "bk101"}, {_type: "book", _children: [{_type: "author", _text: "Ralls, Kim"}, {_type: "title", _text: "Midnight Rain"}, {_type: "genre", _text: "Fantasy"}, {_type: "price", _text: "5.95"}, {_type: "publish_date", _text: "2000-12-16"}, {_type: "description", _text: "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world."}], id: "bk102"}]} +|=== -=== HTTP Headers - -You can provide a map of HTTP headers to the config property. +We get back a map representing the XML structure. +Every time an XML element is nested inside another one, it is accessible via the `.children` property. +We can write the following query to get a better understanding of what our file contains. +.The following query processes `book.xml` and parses the results to pull out the title, description, genre, and authors [source,cypher] ---- -WITH { `X-API-KEY`: 'abc123' } as headers, -WITH "https://myapi.com/api/v1/" AS url -CALL apoc.load.xml(url, '', { headers: headers }) +CALL apoc.load.xml("file:///books.xml") YIELD value -UNWIND keys(value) AS key -RETURN key, apoc.meta.type(value[key]); +UNWIND value._children AS book +RETURN book.id AS bookId, + [item in book._children WHERE item._type = "title"][0] AS title, + [item in book._children WHERE item._type = "description"][0] AS description, + [item in book._children WHERE item._type = "author"] AS authors, + [item in book._children WHERE item._type = "genre"][0] AS genre; ---- -== xPath - -It's possible to define a xPath (optional) to selecting nodes from the XML document. - -=== xPath Example +.Results +[options="header"] +|=== +| bookId | title | description | authors | genre +| "bk101" | {_type: "title", _text: "XML Developer's Guide"} | {_type: "description", _text: "An in-depth look at creating applications with XML."} | [{_type: "author", _text: "Gambardella, Matthew"}, {_type: "author", _text: "Arciniegas, Fabio"}] | {_type: "genre", _text: "Computer"} +| "bk102" | {_type: "title", _text: "Midnight Rain"} | {_type: "description", _text: "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world."} | [{_type: "author", _text: "Ralls, Kim"}] | {_type: "genre", _text: "Fantasy"} +|=== -From the Microsoft's book.xml file we can get only the books that have as `genre` Computer +Let's now create a graph of books and their metadata, authors, and genres. -[source,cypher,subs=attributes] ----- -CALL apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml", '/catalog/book[genre=\"Computer\"]') -YIELD value as book -WITH book.id as id, [attr IN book._children WHERE attr._type IN ['title','price'] | attr._text] as pairs -RETURN id, pairs[0] as title, pairs[1] as price +.The following query processes `book.xml` and parses the results to pull out the title, description, genre, and authors +[source,cypher] ---- +CALL apoc.load.xml("file:///books.xml") +YIELD value +UNWIND value._children AS book -image::apoc.load.xml.xpath.png[scaledwidth="100%"] +WITH book.id AS bookId, + [item in book._children WHERE item._type = "title"][0] AS title, + [item in book._children WHERE item._type = "description"][0] AS description, + [item in book._children WHERE item._type = "author"] AS authors, + [item in book._children WHERE item._type = "genre"][0] AS genre -In this case we return only `id`, `title` and `prize` but we can return any other elements +MERGE (b:Book {id: bookId}) +SET b.title = title._text, b.description = description._text -We can also return just a single specific element. -For example the `author` of the book with `id = bg102` +MERGE (g:Genre {name: genre._text}) +MERGE (b)-[:HAS_GENRE]->(g) -[source,cypher,subs=attributes] ----- -call apoc.load.xml('https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml', '/catalog/book[@id="bk102"]/author') -YIELD value as result -WITH result._text as author -RETURN author +WITH b, authors +UNWIND authors AS author +MERGE (a:Author {name:author._text}) +MERGE (a)-[:WROTE]->(b); ---- -image::apoc.load.xml.xpath2.png[scaledwidth="100%"] +The Neo4j Browser visualization below shows the imported graph: +image::apoc.load.xml.local.books.svg[] +[[load-xml-examples-simple-xml-format]] +=== Import from GitHub -== Load XML and Introspect +We can also process XML files from HTTP or HTTPS URIs. +Let's start by processing the `books.xml` file hosted on GitHub. -Let's just load it and see what it looks like. -It's returned as value map with nested `_type` and `_children` fields, per group of elements. -Attributes are turned into map-entries. -And each element into their own little map with `_type`, attributes and `_children` if applicable. +This time we'll pass in `true` as the 4th argument of the procedure. +This means that the XML will be parsed in simple mode. +.The following query loads the books.xml file from GitHub using simple mode [source,cypher,subs=attributes] ---- -CALL apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") +WITH "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml" AS uri +CALL apoc.load.xml(uri, '', {}, true) +YIELD value +RETURN value; ---- -[source,javascript] ----- -{_type: catalog, _children: [ - {_type: book, id: bk101, _children: [ - {_type: author, _text: Gambardella, Matthew}, - {_type: title, _text: XML Developer's Guide}, - {_type: genre, _text: Computer}, - {_type: price, _text: 44.95}, - {_type: publish_date, _text: 2000-10-01}, - {_type: description, _text: An in-depth look at creating applications .... ----- +.Results +[options="header"] +|=== +| value +|{_type: "catalog", _catalog: [{_type: "book", _book: [{_type: "author", _text: "Gambardella, Matthew"}, {_type: "author", _text: "Arciniegas, Fabio"}, {_type: "title", _text: "XML Developer's Guide"}, {_type: "genre", _text: "Computer"}, {_type: "price", _text: "44.95"}, {_type: "publish_date", _text: "2000-10-01"}, {_type: "description", _text: "An in-depth look at creating applications with XML."}], id: "bk101"}, {_type: "book", _book: [{_type: "author", _text: "Ralls, Kim"}, {_type: "title", _text: "Midnight Rain"}, {_type: "genre", _text: "Fantasy"}, {_type: "price", _text: "5.95"}, {_type: "publish_date", _text: "2000-12-16"}, {_type: "description", _text: "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world."}], id: "bk102"}, {_type: "book", _book: [{_type: "author", _text: "Corets, Eva"}, {_type: "title", _text: "Maeve Ascendant"}, {_type: "genre", _text: "Fantasy"}, {_type: "price", _text: "5.95"}, {_type: "publish_date", _text: "2000-11-17"}, {_type: "description", _text: "After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society."}], id: "bk103"}, {_type: "book", _book: [{_type: "author", _text: "Corets, Eva"}, {_type: "title", _text: "Oberon's Legacy"}, {_type: "genre", _text: "Fantasy"}, {_type: "price", _text: "5.95"}, {_type: "publish_date", _text: "2001-03-10"}, {_type: "description", _text: "In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant."}], id: "bk104"}, {_type: "book", _book: [{_type: "author", _text: "Corets, Eva"}, {_type: "title", _text: "The Sundered Grail"}, {_type: "genre", _text: "Fantasy"}, {_type: "price", _text: "5.95"}, {_type: "publish_date", _text: "2001-09-10"}, {_type: "description", _text: "The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy."}], id: "bk105"}, {_type: "book", _book: [{_type: "author", _text: "Randall, Cynthia"}, {_type: "title", _text: "Lover Birds"}, {_type: "genre", _text: "Romance"}, {_type: "price", _text: "4.95"}, {_type: "publish_date", _text: "2000-09-02"}, {_type: "description", _text: "When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled."}], id: "bk106"}, {_type: "book", _book: [{_type: "author", _text: "Thurman, Paula"}, {_type: "title", _text: "Splish Splash"}, {_type: "genre", _text: "Romance"}, {_type: "price", _text: "4.95"}, {_type: "publish_date", _text: "2000-11-02"}, {_type: "description", _text: "A deep sea diver finds true love twenty thousand leagues beneath the sea."}], id: "bk107"}, {_type: "book", _book: [{_type: "author", _text: "Knorr, Stefan"}, {_type: "title", _text: "Creepy Crawlies"}, {_type: "genre", _text: "Horror"}, {_type: "price", _text: "4.95"}, {_type: "publish_date", _text: "2000-12-06"}, {_type: "description", _text: "An anthology of horror stories about roaches, centipedes, scorpions and other insects."}], id: "bk108"}, {_type: "book", _book: [{_type: "author", _text: "Kress, Peter"}, {_type: "title", _text: "Paradox Lost"}, {_type: "genre", _text: "Science Fiction"}, {_type: "price", _text: "6.95"}, {_type: "publish_date", _text: "2000-11-02"}, {_type: "description", _text: "After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum."}], id: "bk109"}, {_type: "book", _book: [{_type: "author", _text: "O'Brien, Tim"}, {_type: "title", _text: "Microsoft .NET: The Programming Bible"}, {_type: "genre", _text: "Computer"}, {_type: "price", _text: "36.95"}, {_type: "publish_date", _text: "2000-12-09"}, {_type: "description", _text: "Microsoft's .NET initiative is explored in detail in this deep programmer's reference."}], id: "bk110"}, {_type: "book", _book: [{_type: "author", _text: "O'Brien, Tim"}, {_type: "title", _text: "MSXML3: A Comprehensive Guide"}, {_type: "genre", _text: "Computer"}, {_type: "price", _text: "36.95"}, {_type: "publish_date", _text: "2000-12-01"}, {_type: "description", _text: "The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more."}], id: "bk111"}, {_type: "book", _book: [{_type: "author", _text: "Galos, Mike"}, {_type: "title", _text: "Visual Studio 7: A Comprehensive Guide"}, {_type: "genre", _text: "Computer"}, {_type: "price", _text: "49.95"}, {_type: "publish_date", _text: "2001-04-16"}, {_type: "description", _text: "Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment."}], id: "bk112"}]} +|=== -=== For each book, how do I access book id ? +We again get back back a map representing the XML structure, but the structure is different than when we don't use simple mode. +This time nested XML elements are accessible via a property of the element name prefixed with an `_`. -You can access attributes per element directly. +We can write the following query to get a better understanding of what our file contains. -[source,cypher,subs=attributes] +.The following query processes `book.xml` and parses the results to pull out the title, description, genre, and authors +[source,cypher] ---- -CALL apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") -YIELD value as catalog -UNWIND catalog._children as book -RETURN book.id +WITH "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/4.0/src/test/resources/xml/books.xml" AS uri +CALL apoc.load.xml(uri, '', {}, true) +YIELD value +UNWIND value._catalog AS catalog +RETURN catalog.id AS bookId, + [item in catalog._book WHERE item._type = "title"][0] AS title, + [item in catalog._book WHERE item._type = "description"][0] AS description, + [item in catalog._book WHERE item._type = "author"] AS authors, + [item in catalog._book WHERE item._type = "genre"][0] AS genre; ---- .Results -[opts="header",cols="1"] +[options="header"] |=== -| book.id -| bk101 -| bk102 +| bookId | title | description | authors | genre +| "bk101" | {_type: "title", _text: "XML Developer's Guide"} | {_type: "description", _text: "An in-depth look at creating applications with XML."} | [{_type: "author", _text: "Gambardella, Matthew"}, {_type: "author", _text: "Arciniegas, Fabio"}] | {_type: "genre", _text: "Computer"} +| "bk102" | {_type: "title", _text: "Midnight Rain"} | {_type: "description", _text: "A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world."} | [{_type: "author", _text: "Ralls, Kim"}] | {_type: "genre", _text: "Fantasy"} +| "bk103" | {_type: "title", _text: "Maeve Ascendant"} | {_type: "description", _text: "After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society."} | [{_type: "author", _text: "Corets, Eva"}] | {_type: "genre", _text: "Fantasy"} +| "bk104" | {_type: "title", _text: "Oberon's Legacy"} | {_type: "description", _text: "In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant."} | [{_type: "author", _text: "Corets, Eva"}] | {_type: "genre", _text: "Fantasy"} +| "bk105" | {_type: "title", _text: "The Sundered Grail"} | {_type: "description", _text: "The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy."} | [{_type: "author", _text: "Corets, Eva"}] | {_type: "genre", _text: "Fantasy"} +| "bk106" | {_type: "title", _text: "Lover Birds"} | {_type: "description", _text: "When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled."} | [{_type: "author", _text: "Randall, Cynthia"}] | {_type: "genre", _text: "Romance"} +| "bk107" | {_type: "title", _text: "Splish Splash"} | {_type: "description", _text: "A deep sea diver finds true love twenty thousand leagues beneath the sea."} | [{_type: "author", _text: "Thurman, Paula"}] | {_type: "genre", _text: "Romance"} +| "bk108" | {_type: "title", _text: "Creepy Crawlies"} | {_type: "description", _text: "An anthology of horror stories about roaches, centipedes, scorpions and other insects."} | [{_type: "author", _text: "Knorr, Stefan"}] | {_type: "genre", _text: "Horror"} +| "bk109" | {_type: "title", _text: "Paradox Lost"} | {_type: "description", _text: "After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum."} | [{_type: "author", _text: "Kress, Peter"}] | {_type: "genre", _text: "Science Fiction"} +| "bk110" | {_type: "title", _text: "Microsoft .NET: The Programming Bible"} | {_type: "description", _text: "Microsoft's .NET initiative is explored in detail in this deep programmer's reference."} | [{_type: "author", _text: "O'Brien, Tim"}] | {_type: "genre", _text: "Computer"} +| "bk111" | {_type: "title", _text: "MSXML3: A Comprehensive Guide"} | {_type: "description", _text: "The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more."} | [{_type: "author", _text: "O'Brien, Tim"}] | {_type: "genre", _text: "Computer"} +| "bk112" | {_type: "title", _text: "Visual Studio 7: A Comprehensive Guide"} | {_type: "description", _text: "Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment."} | [{_type: "author", _text: "Galos, Mike"}] | {_type: "genre", _text: "Computer"} |=== +Rather than just returning that data, we can create a graph of books and their metadata, authors, and genres. -=== For each book, how do I access book author and title ? +.The following query processes `book.xml` and parses the results to pull out the title, description, genre, and authors +[source,cypher] +---- +WITH "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/4.0/src/test/resources/xml/books.xml" AS uri +CALL apoc.load.xml(uri, '', {}, true) +YIELD value +UNWIND value._catalog AS catalog +WITH catalog.id AS bookId, + [item in catalog._book WHERE item._type = "title"][0] AS title, + [item in catalog._book WHERE item._type = "description"][0] AS description, + [item in catalog._book WHERE item._type = "author"] AS authors, + [item in catalog._book WHERE item._type = "genre"][0] AS genre -==== Filter into collection +MERGE (b:Book {id: bookId}) +SET b.title = title._text, b.description = description._text -You have to filter over the sub-elements in the `_childrens` array in this case. +MERGE (g:Genre {name: genre._text}) +MERGE (b)-[:HAS_GENRE]->(g) + +WITH b, authors +UNWIND authors AS author +MERGE (a:Author {name:author._text}) +MERGE (a)-[:WROTE]->(b); +---- + +The Neo4j Browser visualization below shows the imported graph: + +image::apoc.load.xml.all.books.svg[] + +[[load-xml-examples-xpath]] +=== xPath expressions + +We can also provide an xPath expression to select nodes from an XML document. +If we only want to return books that have the `Computer` genre, we could write the following query: [source,cypher,subs=attributes] ---- -CALL apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") -YIELD value as catalog -UNWIND catalog._children as book -RETURN book.id, [attr IN book._children WHERE attr._type IN ['author','title'] | [attr._type, attr._text]] as pairs +CALL apoc.load.xml( + "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml", + '/catalog/book[genre=\"Computer\"]' +) +YIELD value as book +WITH book.id as id, [attr IN book._children WHERE attr._type IN ['title','price'] | attr._text] as pairs +RETURN id, pairs[0] as title, pairs[1] as price; ---- .Results -[opts="header",cols="1,1"] +[options="header"] |=== -| book.id | pairs -| bk101 | \[[author, Gambardella, Matthew], [title, XML Developer's Guide]] -| book.id | \[[author, Ralls, Kim], [title, Midnight Rain]] +| id | title | price +| "bk101" | "XML Developer's Guide" | "44.95" +| "bk110" | "Microsoft .NET: The Programming Bible" | "36.95" +| "bk111" | "MSXML3: A Comprehensive Guide" | "36.95" +| "bk112" | "Visual Studio 7: A Comprehensive Guide" | "49.95" |=== +In this case we return only `id`, `title` and `prize` but we can return any other elements -==== How do I return collection elements? - -This is not too nice, we could also just have returned the values and then grabbed them out of the list, but that relies on element-order. +We can also return just a single specific element. +For example, the following query returns the `author` of the book with `id = bg102` [source,cypher,subs=attributes] ---- -CALL apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") -YIELD value as catalog -UNWIND catalog._children as book -WITH book.id as id, [attr IN book._children WHERE attr._type IN ['author','title'] | attr._text] as pairs -RETURN id, pairs[0] as author, pairs[1] as title +CALL apoc.load.xml( + 'https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml', + '/catalog/book[@id="bk102"]/author' +) +YIELD value as result +WITH result._text as author +RETURN author; ---- .Results -[opts="header",cols="1,1,1"] +[options="header"] |=== -| id | author | title -| bk101 | Gambardella, Matthew | XML Developer's Guide -| bk102 | Ralls, Kim | Midnight Rain +| author +| "Ralls, Kim" |=== -== Extracting Datastructures - -=== Turn Pairs into Map +[[load-xml-examples-extracting-datastructures]] +=== Extracting data structures -So better is to turn them into a map with `apoc.map.fromPairs` +We can turn values into a map using the `apoc.map.fromPairs` function. [source,cypher,subs=attributes] ---- -call apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") yield value as catalog +call apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") +yield value as catalog UNWIND catalog._children as book WITH book.id as id, [attr IN book._children WHERE attr._type IN ['author','title'] | [attr._type, attr._text]] as pairs -CALL apoc.map.fromPairs(pairs) yield value +WITH id, apoc.map.fromPairs(pairs) AS value RETURN id, value ---- @@ -259,105 +454,74 @@ RETURN id, value [opts="header",cols="1,1"] |=== | id | value -| bk101 | {author: Gambardella, Matthew, title: XML Developer's Guide} -| bk102 | {author: Ralls, Kim, title: Midnight Rain} -| bk103 | {author: Corets, Eva, title: Maeve Ascendant} +| "bk101" | {title: "XML Developer's Guide", author: "Arciniegas, Fabio"} +| "bk102" | {title: "Midnight Rain", author: "Ralls, Kim"} +| "bk103" | {title: "Maeve Ascendant", author: "Corets, Eva"} +| "bk104" | {title: "Oberon's Legacy", author: "Corets, Eva"} +| "bk105" | {title: "The Sundered Grail", author: "Corets, Eva"} +| "bk106" | {title: "Lover Birds", author: "Randall, Cynthia"} +| "bk107" | {title: "Splish Splash", author: "Thurman, Paula"} +| "bk108" | {title: "Creepy Crawlies", author: "Knorr, Stefan"} +| "bk109" | {title: "Paradox Lost", author: "Kress, Peter"} +| "bk110" | {title: "Microsoft .NET: The Programming Bible", author: "O'Brien, Tim"} +| "bk111" | {title: "MSXML3: A Comprehensive Guide", author: "O'Brien, Tim"} +| "bk112" | {title: "Visual Studio 7: A Comprehensive Guide", author: "Galos, Mike"} |=== -==== Return individual Columns - And now we can cleanly access the attributes from the map. [source,cypher,subs=attributes] ---- -call apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") yield value as catalog +call apoc.load.xml("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml") +yield value as catalog UNWIND catalog._children as book WITH book.id as id, [attr IN book._children WHERE attr._type IN ['author','title'] | [attr._type, attr._text]] as pairs -CALL apoc.map.fromPairs(pairs) yield value -RETURN id, value.author, value.title +WITH id, apoc.map.fromPairs(pairs) AS value +RETURN id, value.title, value.author ---- .Results [opts="header",cols="1,1,1"] |=== -| id | value.author | value.title -| bk101 | Gambardella, Matthew | XML Developer's Guide -| bk102 | Ralls, Kim | Midnight Rain -| bk103 | Corets, Eva | Maeve Ascendant -|=== - -== import xml directly - -In case you don't want to transform your xml (like you do with `apoc.load.xml/apoc.load.xmlSimple` before you create nodes and relationships and you want to have a 1:1 mapping of xml into the graph you can use `apoc.xml.import`. - -=== usage +| id | value.title | value.author +| "bk101" | "XML Developer's Guide" | "Arciniegas, Fabio" +| "bk102" | "Midnight Rain" | "Ralls, Kim" +| "bk103" | "Maeve Ascendant" | "Corets, Eva" +| "bk104" | "Oberon's Legacy" | "Corets, Eva" +| "bk105" | "The Sundered Grail" | "Corets, Eva" +| "bk106" | "Lover Birds" | "Randall, Cynthia" +| "bk107" | "Splish Splash" | "Thurman, Paula" +| "bk108" | "Creepy Crawlies" | "Knorr, Stefan" +| "bk109" | "Paradox Lost" | "Kress, Peter" +| "bk110" | "Microsoft .NET: The Programming Bible" | "O'Brien, Tim" +| "bk111" | "MSXML3: A Comprehensive Guide" | "O'Brien, Tim" +| "bk112" | "Visual Studio 7: A Comprehensive Guide" | "Galos, Mike" -[source,cypher] ----- -CALL apoc.xml.import(, ?) YIELD node ----- - -The procedure will return a node representing the xml document containing nodes/rels underneath mapping to the xml structure. The following mapping rules are applied: - -[options="header"] -|=== -| xml | label | properties -| document | XmlDocument | _xmlVersion, _xmlEncoding -| processing instruction | XmlProcessingInstruction | _piData, _piTarget -| Element/Tag | XmlTag | _name -| Attribute | n/a | property in the XmlTag node -| Text | XmlWord | for each word a separate node is created |=== -The nodes for the xml document are connected: +[[load-xml-examples-import-xml-directly]] +=== Import XML directly -[options="header"] -|=== -| relationship type | description -| :IS_CHILD_OF | pointing to a nested xml element -| :FIRST_CHILD_OF | pointing to the first child -| :NEXT_SIBLING | pointing to the next xml element on the same nesting level -| :NEXT | produces a linear chain through the full document -| :NEXT_WORD | only produced if config map has `createNextWordRelationships:true`. Connects words in xml to a text flow. -|=== - -The following options are available for the `config` map: - -[options="header"] -|=== -| config option | default value | description -| connectCharacters | false | if `true` the xml text elements are child nodes of their tags, interconnected by relationships of type `relType` (see below) -| filterLeadingWhitespace | false | if `true` leading whitespace is skipped for each line -| delimiter | `\s` (regex whitespace) | if given, split text elements with the delimiter into separate nodes -| label | XmlCharacter | label to use for text element representation -| relType | `NE` | relationship type to be used for connecting the text elements into one linked list -| charactersForTag | {} | map of tagname -> string. For the given tag names an additional text element is added containing the value as `text` property. Useful e.g. for `` tags in TEI-XML to be represented as ` `. -|=== +We can write the following query to create a graph structure of the Microsoft books XML file. -=== example - +.The following creates a graph structure based on the contents of `books.xml` [source,cypher,subs=attributes] ---- -CALL -apoc.xml.import("https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml",{createNextWordRelationships: -true}) +CALL apoc.import.xml( + "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/xml/books.xml", + {relType:'NEXT_WORD', label:'XmlWord'} +) YIELD node RETURN node; - -CALL apoc.xml.import('https://seafile.rlp.net/f/6282a26504cc4f079ab9/?dl=1', {connectCharacters: true, charactersForTag:{lb:' '}, filterLeadingWhitespace: true}) yield node -return node; - ---- -=== Helper Function `apoc.xml.parse` - -In case you have in your dataset nodes with property values XML string you can parse them into Maps -with the `apoc.xml.parse` function. +[options="header"] +|=== +| node +| (:XmlDocument {_xmlVersion: "1.0", _xmlEncoding: "UTF-8", url: "https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/4.0/src/test/resources/xml/books.xml"}) +|=== -Following an example of how to use it: +The Neo4j Browser visualization below shows the imported graph: -``` -WITH '
' AS xmlString -RETURN apoc.xml.parse(xmlString) AS value -``` \ No newline at end of file +image::apoc.import.xml.svg[width="800px"] \ No newline at end of file diff --git a/docs/asciidoc/index.adoc b/docs/asciidoc/index.adoc index 11508c07..d45520b8 100644 --- a/docs/asciidoc/index.adoc +++ b/docs/asciidoc/index.adoc @@ -33,6 +33,7 @@ The guide covers the following areas: * <> -- A detailed guide to procedures that can be used to compare graphs. * <> -- A detailed guide to procedures that can be used for Cypher scripting. * <> -- A detailed guide to procedures that can be used to create virtual nodes and relationships. +* <> -- A detailed guide to procedures that can be used to add Natural Language Processing functionality to graph applications. * <> -- A detailed guide to procedures that can be used for background job management. * <> -- A detailed guide to procedures that can be used to introspect the database. * <> -- A detailed guide to operational procedures. @@ -145,6 +146,8 @@ include::_cypher_execution.adoc[] include::_virtual.adoc[] +include::_nlp.adoc[] + include::_job_management.adoc[] include::_database_introspection.adoc[] diff --git a/docs/asciidoc/introspection/meta.adoc b/docs/asciidoc/introspection/meta.adoc index 05acc050..98c2c3bb 100644 --- a/docs/asciidoc/introspection/meta.adoc +++ b/docs/asciidoc/introspection/meta.adoc @@ -14,6 +14,8 @@ image::apoc.meta.graph.jpg[scaledwidth="100%"] | CALL apoc.meta.data | examines a subset of the graph to provide a tabular meta information | CALL apoc.meta.schema | examines a subset of the graph to provide a map-like meta information | CALL apoc.meta.stats yield labelCount, relTypeCount, propertyKeyCount, nodeCount, relCount, labels, relTypes, stats | returns the information stored in the transactional database statistics +| CALL apoc.meta.nodeTypeProperties({includeLabels:[label,...],includeRels:[rel-type,...],excludeLabels:[label,,...],excludeRels:[rel-type,...]}) | replaces built-in function for node property schema to provide a sample-based result for high performance - used by the Neo4J BI Connector +| CALL apoc.meta.relTypeProperties({includeLabels:[label,...],includeRels:[rel-type,...],excludeLabels:[label,...],excludeRels:[rel-type,...]}) | replaces built-in function for relationship property schema to provide a sample-based result for high performance - used by the Neo4J BI Connector |=== .Functions @@ -49,4 +51,4 @@ endif::[] ---- MATCH (n:Person) RETURN apoc.meta.isType(n.age,"INTEGER") as ageType ----- \ No newline at end of file +---- diff --git a/docs/asciidoc/nlp/aws.adoc b/docs/asciidoc/nlp/aws.adoc new file mode 100644 index 00000000..4daa4907 --- /dev/null +++ b/docs/asciidoc/nlp/aws.adoc @@ -0,0 +1,516 @@ +[[nlp-aws]] +=== Amazon Web Services (AWS) + +[abstract] +-- +This chapter describes procedures that wrap Amazon Web Services Comprehend Natural Language API. +-- + +The Amazon Web Services (AWS) https://docs.aws.amazon.com/comprehend/index.html[Comprehend Natural Language API^] uses machine learning to find insights and relationships in text. +The procedures in this chapter act as a wrapper around calls to this API to extract entities from text stored as node properties. + +Each procedure has two modes: + +* Stream - returns a map constructed from the JSON returned from the API +* Graph - creates a graph or virtual graph based on the values returned by the API + +This section includes the following: + +* <> +* <> +* <> +* <> +* <> +* <> +* <> + ** <> + ** <> + ** <> + +[[nlp-aws-overview]] +==== Procedure Overview + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.aws.entities.csv[] +include::../../../build/generated-documentation/apoc.nlp.aws.keyPhrases.csv[lines=2;3] +include::../../../build/generated-documentation/apoc.nlp.aws.sentiment.csv[lines=2;3] +|=== + +[[nlp-aws-dependencies]] +==== Install Dependencies + +include::nlp-dependencies.adoc[] + +[[nlp-aws-api-key]] +==== Setting up API Key and Secret + +We can generate an Access Key and Secret by following the instructions at https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html[docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html^]. +Once we've done that, we can populate and execute the following commands to create parameters that contains these details. + +.The following define the `apiKey` and `apiSecret` parameters +[source,cypher] +---- +:param apiKey => (""); +:param apiSecret => (""); +---- + +[[nlp-aws-entities]] +==== Entity Extraction + +The entity extraction procedures (`apoc.nlp.aws.entities.*`) are wrappers around the https://docs.aws.amazon.com/comprehend/latest/dg/how-entities.html[Detect Entities^] operations of the AWS Comprehend Natural Language API. +This API method finds entities in the text, which are defined as a textual reference to the unique name of a real-world object such as people, places, and commercial items, and to precise references to measures such as dates and quantities. + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.aws.entities.csv[] +|=== + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| key | String | null | AWS Access Control Key +| secret | String | null | AWS Access Control Secret +| nodeProperty | String | text | The property on the provided node that contains the unstructured text to be analyzed +|=== + +In addition, `apoc.nlp.aws.entities.graph` supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| write | Boolean | false | persist the graph of entities +| relationshipType | String | ENTITY | relationship type for relationships from source node to entity nodes +|=== + +.Streaming mode +[source,cypher] +---- +CALL apoc.nlp.aws.entities.stream(source:Node or List, { + key: String, + secret: String, + nodeProperty: String +}) +YIELD value +---- + +.Graph mode +[source,cypher] +---- +CALL apoc.nlp.aws.entities.graph(source:Node or List, { + key: String, + secret: String, + nodeProperty: String, + relationshipType: String, + write: Boolean +}) +YIELD graph +---- + +[[nlp-aws-key-phrases]] +==== Key Phrases + +The key phrase procedures (`apoc.nlp.aws.keyPhrases.*`) are wrappers around the https://docs.aws.amazon.com/comprehend/latest/dg/how-key-phrases.html[Detect Key Phrases^] operations of the AWS Comprehend Natural Language API. +A key phrase is a string containing a noun phrase that describes a particular thing. +It generally consists of a noun and the modifiers that distinguish it. + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.aws.keyPhrases.csv[] +|=== + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| key | String | null | AWS Access Control Key +| secret | String | null | AWS Access Control Secret +| nodeProperty | String | text | The property on the provided node that contains the unstructured text to be analyzed +|=== + +In addition, `apoc.nlp.aws.keyPhrases.graph` supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| write | Boolean | false | persist the graph of key phrases +| relationshipType | String | ENTITY | relationship type for relationships from source node to entity nodes +|=== + +.Streaming mode +[source,cypher] +---- +CALL apoc.nlp.aws.keyPhrases.stream(source:Node or List, { + key: String, + secret: String, + nodeProperty: String +}) +YIELD value +---- + +.Graph mode +[source,cypher] +---- +CALL apoc.nlp.aws.keyPhrases.graph(source:Node or List, { + key: String, + secret: String, + nodeProperty: String, + relationshipType: String, + write: Boolean +}) +YIELD graph +---- + +[[nlp-aws-sentiment]] +==== Sentiment + +The sentiment procedures (`apoc.nlp.aws.keyPhrases.*`) are wrappers around the https://docs.aws.amazon.com/comprehend/latest/dg/how-sentiment.html[Determine Sentiment^] operations of the AWS Comprehend Natural Language API. +You can determine if the sentiment is positive, negative, neutral, or mixed. + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.aws.sentiment.csv[] +|=== + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| key | String | null | AWS Access Control Key +| secret | String | null | AWS Access Control Secret +| nodeProperty | String | text | The property on the provided node that contains the unstructured text to be analyzed +|=== + +In addition, `apoc.nlp.aws.keyPhrases.graph` supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| write | Boolean | false | persist the graph of sentiment +|=== + +.Streaming mode +[source,cypher] +---- +CALL apoc.nlp.aws.sentiment.stream(source:Node or List, { + key: String, + secret: String, + nodeProperty: String +}) +YIELD value +---- + +.Graph mode +[source,cypher] +---- +CALL apoc.nlp.aws.sentiment.graph(source:Node or List, { + key: String, + secret: String, + nodeProperty: String, + relationshipType: String, + write: Boolean +}) +YIELD graph +---- + + +[[nlp-aws-examples]] +==== Examples + +The examples in this section are based on the following sample graph: + +[source,cypher] +---- +CREATE (:Article { + uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/", + body: "These days I’m rarely more than a few feet away from my Nintendo Switch and I play board games, card games and role playing games with friends at least once or twice a week. I’ve even organised lunch-time Mario Kart 8 tournaments between the Neo4j European offices!" +}); + +CREATE (:Article { + uri: "https://en.wikipedia.org/wiki/Nintendo_Switch", + body: "The Nintendo Switch is a video game console developed by Nintendo, released worldwide in most regions on March 3, 2017. It is a hybrid console that can be used as a home console and portable device. The Nintendo Switch was unveiled on October 20, 2016. Nintendo offers a Joy-Con Wheel, a small steering wheel-like unit that a Joy-Con can slot into, allowing it to be used for racing games such as Mario Kart 8." +}); +---- + +[[nlp-aws-examples-entities]] +===== Entity Extraction + +Let's start by extracting the entities from the Article node. +The text that we want to analyze is stored in the `body` property of the node, so we'll need to specify that via the `nodeProperty` configuration parameter. + +.The following streams the entities for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.entities.stream(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body" +}) +YIELD value +UNWIND value.entities AS entity +RETURN entity; +---- + +.Results +[opts="header"] +|=== +| entity +| {score: 0.780032217502594, endOffset: 71, text: "Nintendo Switch", type: "COMMERCIAL_ITEM", beginOffset: 56} +| {score: 0.8155304193496704, endOffset: 151, text: "at least", type: "QUANTITY", beginOffset: 143} +| {score: 0.7507548332214355, endOffset: 156, text: "once", type: "QUANTITY", beginOffset: 152} +| {score: 0.8760746717453003, endOffset: 172, text: "twice a week", type: "QUANTITY", beginOffset: 160} +| {score: 0.9944096803665161, endOffset: 217, text: "Mario Kart 8", type: "TITLE", beginOffset: 205} +| {score: 0.9946564435958862, endOffset: 247, text: "Neo4j", type: "ORGANIZATION", beginOffset: 242} +| {score: 0.6274040937423706, endOffset: 256, text: "European", type: "LOCATION", beginOffset: 248} +|=== + +We get back 7 different entities. +We could then apply a Cypher statement that creates one node per entity and an `ENTITY` relationship from each of those nodes back to the `Article` node. + +.The following streams the entities for the Pokemon article and then creates nodes for each entity +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.entities.stream(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body" +}) +YIELD value +UNWIND value.entities AS entity +MERGE (e:Entity {name: entity.text}) +SET e.type = entity.type +MERGE (a)-[:ENTITY]->(e) +---- + +Alternatively we can use the graph mode to automatically create the entity graph. +As well as having the `Entity` label, each entity node will have another label based on the value of the `type` property. +By default a virtual graph is returned. + +.The following returns a virtual graph of entities for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.entities.graph(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body", + relationshipType: "ENTITY" +}) +YIELD graph AS g +RETURN g; +---- + +We can see a Neo4j Browser visualization of the virtual graph in <>. + +[[apoc.nlp.aws.entities.graph.svg]] +image::apoc.nlp.aws.entities.graph.svg[title="Pokemon entities graph"] + +We can compute the entities for multiple nodes by passing a list of nodes to the procedure. + +.The following returns a virtual graph of entities for the Pokemon and Nintendo Switch articles +[source,cypher] +---- +MATCH (a:Article) +WITH collect(a) AS articles +CALL apoc.nlp.aws.entities.graph(articles, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body", + relationshipType: "ENTITY" +}) +YIELD graph AS g +RETURN g +---- + +We can see a Neo4j Browser visualization of the virtual graph in <>. + +[[apoc.nlp.aws.entities-multiple.graph.svg]] +image::apoc.nlp.aws.entities-multiple.graph.svg[title="Pokemon and Nintendo Switch entities graph"] + +If we're happy with this graph and would like to persist it in Neo4j, we can do this by specifying the `write: true` configuration. + +.The following creates a `HAS_ENTITY` relationship from the article to each entity +[source,cypher] +---- +MATCH (a:Article) +WITH collect(a) AS articles +CALL apoc.nlp.aws.entities.graph(articles, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body", + relationshipType: "HAS_ENTITY", + write: true +}) +YIELD graph AS g +RETURN g; +---- + +We can then write a query to return the entities that have been created. + +.The following returns articles and their entities +[source,cypher] +---- +MATCH (article:Article) +RETURN article.uri AS article, + [(article)-[:HAS_ENTITY]->(e:Entity) | e.text] AS entities; +---- + +.Results +[opts="header"] +|=== +| article | entities +| "https://en.wikipedia.org/wiki/Nintendo_Switch" | ["October 20, 2016", "Mario Kart 8", "March 3, 2017", "Nintendo Switch", "Nintendo"] +| "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/" | ["Mario Kart 8", "European", "once", "Neo4j", "Nintendo Switch", "at least", "twice a week"] +|=== + +[[nlp-aws-examples-key-phrases]] +===== Key Phrases + +Let's now extract the key phrases from the Article node. +The text that we want to analyze is stored in the `body` property of the node, so we'll need to specify that via the `nodeProperty` configuration parameter. + +.The following streams the key phrases for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.keyPhrases.stream(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body" +}) +YIELD value +UNWIND value.keyPhrases AS keyPhrase +RETURN keyPhrase; +---- + +.Results +[opts="header"] +|=== +| keyPhrase +| {score: 0.9999966621398926, endOffset: 10, text: "These days", beginOffset: 0} +| {score: 0.9867414236068726, endOffset: 42, text: "more than a few feet", beginOffset: 22} +| {score: 0.9999999403953552, endOffset: 71, text: "my Nintendo Switch", beginOffset: 53} +| {score: 0.9999997019767761, endOffset: 94, text: "board games", beginOffset: 83} +| {score: 0.9999964237213135, endOffset: 106, text: "card games", beginOffset: 96} +| {score: 0.9998161792755127, endOffset: 129, text: "role playing games", beginOffset: 111} +| {score: 1.0, endOffset: 142, text: "friends", beginOffset: 135} +| {score: 0.8642383217811584, endOffset: 172, text: "a week", beginOffset: 166} +| {score: 0.9999430179595947, endOffset: 215, text: "lunch-time Mario Kart", beginOffset: 194} +| {score: 0.9983567595481873, endOffset: 229, text: "8 tournaments", beginOffset: 216} +| {score: 0.999997615814209, endOffset: 264, text: "the Neo4j European offices", beginOffset: 238} +|=== + +Alternatively we can use the graph mode to automatically create a key phrase graph. +One node with the `Keyphrase` label will be created for each key phrase extracted. + +By default a virtual graph is returned, but the graph can be persisted by specifying the `write: true` configuration. + +.The following returns a graph of key phrases for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.keyPhrases.graph(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body", + relationshipType: "KEY_PHRASE", + write: true +}) +YIELD graph AS g +RETURN g; +---- + +We can see a Neo4j Browser visualization of the virtual graph in <>. + +[[apoc.nlp.aws.keyPhrases.graph.svg]] +image::apoc.nlp.aws.keyPhrases.graph.svg[title="Pokemon key phrases graph"] + +We can then write a query to return the key phrases that have been created. + +.The following returns articles and their entities +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +RETURN a.uri AS article, + [(a)-[:KEY_PHRASE]->(k:Keyphrase) | k.text] AS keyPhrases; +---- + +.Results +[opts="header"] +|=== +| article | keyPhrases +| "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/" | ["the Neo4j European offices", "a week", "friends", "8 tournaments", "lunch-time Mario Kart", "card games", "board games", "role playing games", "my Nintendo Switch", "more than a few feet", "These days"] +|=== + +[[nlp-aws-examples-sentiment]] +===== Sentiment + +Let's now extract the key phrases from the Article node. +The text that we want to analyze is stored in the `body` property of the node, so we'll need to specify that via the `nodeProperty` configuration parameter. + +.The following streams the key phrases for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.sentiment.stream(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body" +}) +YIELD value +RETURN value; +---- + +.Results +[opts="header"] +|=== +| value +| {index: 0, sentiment: "POSITIVE", sentimentScore: {neutral: 0.33138760924339294, negative: 0.0026062370743602514, mixed: 3.5950531582784606E-6, positive: 0.6660025119781494}} +|=== + +Alternatively we can use the graph mode to automatically store the sentiment and its score. + +By default a virtual graph is returned, but the graph can be persisted by specifying the `write: true` configuration. +The sentiment is stored in the `sentiment` property and the score for that sentiment in the `sentimentScore` property. + +.The following returns a graph with the sentiment for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.aws.sentiment.graph(a, { + key: $apiKey, + secret: $apiSecret, + nodeProperty: "body", + write: true +}) +YIELD graph AS g +UNWIND g.nodes AS node +RETURN node {.uri, .sentiment, .sentimentScore} AS node; +---- + +.Results +[opts="header"] +|=== +| node +| {sentiment: "Positive", sentimentScore: 0.6660025119781494, uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"} +|=== \ No newline at end of file diff --git a/docs/asciidoc/nlp/gcp.adoc b/docs/asciidoc/nlp/gcp.adoc new file mode 100644 index 00000000..62020c7a --- /dev/null +++ b/docs/asciidoc/nlp/gcp.adoc @@ -0,0 +1,387 @@ +[[nlp-gcp]] +=== Google Cloud Platform (GCP) + +[abstract] +-- +This chapter describes procedures that wrap Google Cloud Platform's Natural Language API. +-- + +Google Cloud Platform's https://cloud.google.com/natural-language[Natural Language API^] lets users derive insights from unstructured text using Google machine learning. +The procedures in this chapter act as a wrapper around calls to this API to extract entities, categories, or sentiment from text stored as node properties. + +Each procedure has two modes: + +* Stream - returns a map constructed from the JSON returned from the API +* Graph - creates a graph or virtual graph based on the values returned by the API + +This section includes the following: + +* <> +* <> +* <> +* <> +* <> +* <> + ** <> + ** <> + +[[nlp-gcp-overview]] +==== Procedure Overview + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.gcp.entities.csv[] +include::../../../build/generated-documentation/apoc.nlp.gcp.classify.csv[lines=2..3] +|=== + +[[nlp-gcp-dependencies]] +==== Install Dependencies + +include::nlp-dependencies.adoc[] + + +[[nlp-gcp-api-key]] +==== Setting up API Key + +We can generate an API Key that has access to the Cloud Natural Language API by going to https://console.cloud.google.com/apis/credentials[console.cloud.google.com/apis/credentials^]. +Once we've created a key, we can populate and execute the following command to create a parameter that contains these details. + +.The following defines the `apiKey` parameter +[source,cypher] +---- +:param apiKey => ("") +---- + +[[nlp-gcp-entities]] +==== Entity Extraction + +The entity extraction procedures (`apoc.nlp.gcp.entities.*`) are wrappers around the https://cloud.google.com/natural-language/docs/reference/rest/v1/documents/analyzeEntities[`documents.analyzeEntities`^] method of the Google Natural Language API. +This API method finds named entities (currently proper names and common nouns) in the text along with entity types, salience, mentions for each entity, and other properties. + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.gcp.entities.csv[] +|=== + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| key | String | null | API Key for Google Natural Language API +| nodeProperty | String | text | The property on the provided node that contains the unstructured text to be analyzed +|=== + +In addition, `apoc.nlp.gcp.entities.graph` supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| write | Boolean | false | persist the graph of entities +| relationshipType | String | ENTITY | relationship type for relationships from source node to entity nodes +|=== + +.Streaming mode +[source,cypher] +---- +CALL apoc.nlp.gcp.entities.stream(sourceNode:Node, { + key: String, + nodeProperty: String +}) +YIELD value +---- + +.Graph mode +[source,cypher] +---- +CALL apoc.nlp.gcp.entities.graph(sourceNode:Node, { + key: String, + nodeProperty: String, + relationshipType: String, + write: Boolean +}) +YIELD graph +---- + + +[[nlp-gcp-classify]] +==== Classification + +The entity extraction procedures (`apoc.nlp.gcp.classify.*`) are wrappers around the https://cloud.google.com/natural-language/docs/reference/rest/v1/documents/classifyText[`documents.classifyText`^] method of the Google Natural Language API. +This API method classifies a document into categories. + +The procedures are described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.nlp.gcp.classify.csv[] +|=== + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| key | String | null | API Key for Google Natural Language API +| nodeProperty | String | text | The property on the provided node that contains the unstructured text to be analyzed +|=== + +In addition, `apoc.nlp.gcp.classify.graph` supports the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| write | Boolean | false | persist the graph of entities +| relationshipType | String | CATEGORY | relationship type for relationships from source node to entity nodes +|=== + +.Streaming mode +[source,cypher] +---- +CALL apoc.nlp.gcp.classify.stream(sourceNode:Node, { + key: String, + nodeProperty: String +}) +YIELD value +---- + +.Graph mode +[source,cypher] +---- +CALL apoc.nlp.gcp.classify.graph(sourceNode:Node, { + key: String, + nodeProperty: String, + relationshipType: String, + write: Boolean +}) +YIELD graph +---- + + +[[nlp-gcp-examples]] +==== Examples + +The examples in this section are based on the following sample graph: + +[source,cypher] +---- +CREATE (:Article { + uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/", + body: "These days I’m rarely more than a few feet away from my Nintendo Switch and I play board games, card games and role playing games with friends at least once or twice a week. I’ve even organised lunch-time Mario Kart 8 tournaments between the Neo4j European offices!" +}); +---- + +[[nlp-gcp-examples-entities]] +===== Entity Extraction + +Let's start by extracting the entities from the Article node. +The text that we want to analyze is stored in the `body` property of the node, so we'll need to specify that via the `nodeProperty` configuration parameter. + +.The following streams the entities for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.entities.stream(a, { + key: $apiKey, + nodeProperty: "body" +}) +YIELD value +UNWIND value.entities AS entity +RETURN entity; +---- + +.Results +[opts="header"] +|=== +| entity +| {name: "card games", salience: 0.17967656, metadata: {}, type: "CONSUMER_GOOD", mentions: [{type: "COMMON", text: {content: "card games", beginOffset: -1}}]} +| {name: "role playing games", salience: 0.16441391, metadata: {}, type: "OTHER", mentions: [{type: "COMMON", text: {content: "role playing games", beginOffset: -1}}]} +| {name: "Switch", salience: 0.143287, metadata: {}, type: "OTHER", mentions: [{type: "COMMON", text: {content: "Switch", beginOffset: -1}}]} +| {name: "friends", salience: 0.13336793, metadata: {}, type: "PERSON", mentions: [{type: "COMMON", text: {content: "friends", beginOffset: -1}}]} +| {name: "Nintendo", salience: 0.12601112, metadata: {mid: "/g/1ymzszlpz"}, type: "ORGANIZATION", mentions: [{type: "PROPER", text: {content: "Nintendo", beginOffset: -1}}]} +| {name: "board games", salience: 0.08861496, metadata: {}, type: "CONSUMER_GOOD", mentions: [{type: "COMMON", text: {content: "board games", beginOffset: -1}}]} +| {name: "tournaments", salience: 0.0603245, metadata: {}, type: "EVENT", mentions: [{type: "COMMON", text: {content: "tournaments", beginOffset: -1}}]} +| {name: "offices", salience: 0.034420907, metadata: {}, type: "LOCATION", mentions: [{type: "COMMON", text: {content: "offices", beginOffset: -1}}]} +| {name: "Mario Kart 8", salience: 0.029095741, metadata: {wikipedia_url: "https://en.wikipedia.org/wiki/Mario_Kart_8", mid: "/m/0119mf7q"}, type: "PERSON", mentions: [{type: "PROPER", text: {content: "Mario Kart 8", beginOffset: -1}}]} +| {name: "European", salience: 0.020393685, metadata: {mid: "/m/02j9z", wikipedia_url: "https://en.wikipedia.org/wiki/Europe"}, type: "LOCATION", mentions: [{type: "PROPER", text: {content: "European", beginOffset: -1}}]} +| {name: "Neo4j", salience: 0.020393685, metadata: {mid: "/m/0b76t3s", wikipedia_url: "https://en.wikipedia.org/wiki/Neo4j"}, type: "ORGANIZATION", mentions: [{type: "PROPER", text: {content: "Neo4j", beginOffset: -1}}]} +| {name: "8", salience: 0, metadata: {value: "8"}, type: "NUMBER", mentions: [{type: "TYPE_UNKNOWN", text: {content: "8", beginOffset: -1}}]} +|=== + +We get back 12 different entities. +We could then apply a Cypher statement that creates one node per entity and an `ENTITY` relationship from each of those nodes back to the `Article` node. + +.The following streams the entities for the Pokemon article and then creates nodes for each entity +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.entities.stream(a, { + key: $apiKey, + nodeProperty: "body" +}) +YIELD value +UNWIND value.entities AS entity +MERGE (e:Entity {name: entity.name}) +SET e.type = entity.type +MERGE (a)-[:ENTITY]->(e) +---- + +Alternatively we can use the graph mode to automatically create the entity graph. +As well as having the `Entity` label, each entity node will have another label based on the value of the `type` property. +By default a virtual graph is returned. + +.The following returns a virtual graph of entities for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.entities.graph(a, { + key: $apiKey, + nodeProperty: "body", + relationshipType: "ENTITY" +}) +YIELD graph AS g +RETURN g; +---- + +We can see a Neo4j Browser visualization of the virtual graph in <>. + +[[apoc.nlp.gcp.entities.graph.svg]] +image::apoc.nlp.gcp.entities.graph.svg[title="Pokemon entities graph"] + +If we're happy with this graph and would like to persist it in Neo4j, we can do this by specifying the `write: true` configuration. + +.The following creates a `HAS_ENTITY` relationship from the article to each entity +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.entities.graph(a, { + key: $apiKey, + nodeProperty: "body", + relationshipType: "HAS_ENTITY", + write: true +}) +YIELD graph AS g +RETURN g; +---- + +We can then write a query to return the entities that have been created. + +.The following returns articles and their entities +[source,cypher] +---- +MATCH (article:Article) +RETURN article.uri AS article, + [(article)-[:HAS_ENTITY]->(e) | e.name] AS entities; +---- + +.Results +[opts="header"] +|=== +| article | entities +| "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/" | ["tournaments", "Neo4j", "Switch", "card games", "board games", "Mario Kart 8", "Nintendo", "friends", "8", "offices", "European", "role playing games"] +|=== + +[[nlp-gcp-examples-classify]] +===== Classification + +Now let's extract categories from the Article node. +The text that we want to analyze is stored in the `body` property of the node, so we'll need to specify that via the `nodeProperty` configuration parameter. + +.The following streams the categories for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.classify.stream(a, { + key: $apiKey, + nodeProperty: "body" +}) +YIELD value +UNWIND value.categories AS category +RETURN category; +---- + +.Results +[opts="header"] +|=== +| category +| {name: "/Games", confidence: 0.91} +|=== + +We get back only one category +We could then apply a Cypher statement that creates one node per category and a `CATEGORY` relationship from each of those nodes back to the `Article` node. + +.The following streams the categories for the Pokemon article and then creates nodes for each category +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.classify.stream(a, { + key: $apiKey, + nodeProperty: "body" +}) +YIELD value +UNWIND value.categories AS category +MERGE (c:Category {name: category.name}) +MERGE (a)-[:CATEGORY]->(c) +---- + + +Alternatively we can use the graph mode to automatically create the category graph. +As well as having the `Category` label, each category node will have another label based on the value of the `type` property. +By default a virtual graph is returned. + +.The following returns a virtual graph of categories for the Pokemon article +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.classify.graph(a, { + key: $apiKey, + nodeProperty: "body", + relationshipType: "CATEGORY" +}) +YIELD graph AS g +RETURN g; +---- + +We can see a Neo4j Browser visualization of the virtual graph in <>. + +[[apoc.nlp.gcp.classify.graph.svg]] +image::apoc.nlp.gcp.classify.graph.svg[title="Pokemon categories graph"] + +.The following creates a `HAS_CATEGORY` relationship from the article to each entity +[source,cypher] +---- +MATCH (a:Article {uri: "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/"}) +CALL apoc.nlp.gcp.classify.graph(a, { + key: $apiKey, + nodeProperty: "body", + relationshipType: "HAS_CATEGORY", + write: true +}) +YIELD graph AS g +RETURN g; +---- + +We can then write a query to return the entities that have been created. + +.The following returns articles and their entities +[source,cypher] +---- +MATCH (article:Article) +RETURN article.uri AS article, + [(article)-[:HAS_CATEGORY]->(c) | c.name] AS categories; +---- + +.Results +[opts="header"] +|=== +| article | categories +| "https://neo4j.com/blog/pokegraph-gotta-graph-em-all/" | ["/Games"] +|=== \ No newline at end of file diff --git a/docs/asciidoc/nlp/nlp-dependencies.adoc b/docs/asciidoc/nlp/nlp-dependencies.adoc new file mode 100644 index 00000000..85d6b2e8 --- /dev/null +++ b/docs/asciidoc/nlp/nlp-dependencies.adoc @@ -0,0 +1,4 @@ +The NLP procedures have dependencies on Kotlin and client libraries that are not included in the APOC Library. + +These dependencies are included in https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/{apoc-release}/apoc-nlp-dependencies-{apoc-release}.jar[apoc-nlp-dependencies-{apoc-release}.jar^], which can be downloaded from the https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/tag/{apoc-release}[releases page^]. +Once that file is downloaded, it should be placed in the `plugins` directory and the Neo4j Server restarted. \ No newline at end of file diff --git a/docs/asciidoc/path-finding/config-params.adoc b/docs/asciidoc/path-finding/config-params.adoc new file mode 100644 index 00000000..e4746d9e --- /dev/null +++ b/docs/asciidoc/path-finding/config-params.adoc @@ -0,0 +1,35 @@ +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| minLevel | Long | -1 | the minimum number of hops in the traversal. Must be 0 or 1 if specified +| maxLevel | Long | -1 | the maximum number of hops in the traversal +| relationshipFilter | String | null | the relationship types and directions to traverse. + +See <>. +| labelFilter | String | null | the node labels to traverse. + +See <>. +| beginSequenceAtStart | Boolean | true | starts matching sequences of node labels and/or relationship types (defined in `relationshipFilter`, `labelFilter`, or `sequences`) one node away from the start node. +| bfs | Boolean | true | use Breadth First Search when traversing. Uses Depth First Search if set to `false` +| filterStartNode | Boolean | false | whether the `labelFilter` and `sequence` apply to the start node of the expansion. +| limit | Long | -1 | limit the number of paths returned. When using `bfs:true`, this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. +If set to `true`, a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. +| endNodes | List | null | only these nodes can end returned paths, and expansion will continue past these nodes, if possible. +| terminatorNodes | List | null | Only these nodes can end returned paths, and expansion won't continue past these nodes. +| whiteListNodes | List | null | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). +| blackListNodes | List | null | None of the paths returned will include these nodes. +|=== + +It also has the following fixed parameter: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| uniqueness | String | NODE_GLOBAL | the strategy to use when expanding relationships in a traversal. +`NODE_GLOBAL` means that a node cannot be traversed more than once. +This is what the legacy traversal framework does. +|=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/expand-config-sample-graph.adoc b/docs/asciidoc/path-finding/expand-config-sample-graph.adoc new file mode 100644 index 00000000..669dcfe2 --- /dev/null +++ b/docs/asciidoc/path-finding/expand-config-sample-graph.adoc @@ -0,0 +1,46 @@ +[source,cypher] +---- +MERGE (mark:Person:DevRel {name: "Mark"}) +MERGE (lju:Person:DevRel {name: "Lju"}) +MERGE (praveena:Person:Engineering {name: "Praveena"}) +MERGE (zhen:Person:Engineering {name: "Zhen"}) +MERGE (martin:Person:Engineering {name: "Martin"}) +MERGE (joe:Person:Field {name: "Joe"}) +MERGE (stefan:Person:Field {name: "Stefan"}) +MERGE (alicia:Person:Product {name: "Alicia"}) +MERGE (jake:Person:Product {name: "Jake"}) +MERGE (john:Person:Product {name: "John"}) +MERGE (jonny:Person:Sales {name: "Jonny"}) +MERGE (anthony:Person:Sales {name: "Anthony"}) +MERGE (rik:Person:Sales {name: "Rik"}) + +MERGE (zhen)-[:KNOWS]-(stefan) +MERGE (zhen)-[:KNOWS]-(lju) +MERGE (zhen)-[:KNOWS]-(praveena) +MERGE (zhen)-[:KNOWS]-(martin) +MERGE (mark)-[:KNOWS]-(jake) +MERGE (alicia)-[:KNOWS]-(jake) +MERGE (jonny)-[:KNOWS]-(anthony) +MERGE (john)-[:KNOWS]-(rik) + +MERGE (alicia)-[:FOLLOWS]->(joe) +MERGE (joe)-[:FOLLOWS]->(mark) +MERGE (joe)-[:FOLLOWS]->(praveena) +MERGE (joe)-[:FOLLOWS]->(zhen) +MERGE (mark)-[:FOLLOWS]->(stefan) +MERGE (stefan)-[:FOLLOWS]->(joe) +MERGE (praveena)-[:FOLLOWS]->(joe) +MERGE (lju)-[:FOLLOWS]->(jake) +MERGE (alicia)-[:FOLLOWS]->(jonny) +MERGE (zhen)-[:FOLLOWS]->(john) +MERGE (anthony)-[:FOLLOWS]->(joe) +---- + +The Neo4j Browser visualization below shows the sample graph: + +image::apoc.path.expandConfig.svg[title="Sample Graph"] + +The `KNOWS` relationship type is considered to be bidirectional, where if Zhen knows Stefan, we can imply that Stefan knows Zhen. +When using the `KNOWS` relationship we will ignore the direction. + +The `FOLLOWS` relationship has a direction, so we will specify a direction when we use it. diff --git a/docs/asciidoc/path-finding/expand-config.adoc b/docs/asciidoc/path-finding/expand-config.adoc new file mode 100644 index 00000000..216921de --- /dev/null +++ b/docs/asciidoc/path-finding/expand-config.adoc @@ -0,0 +1,896 @@ +[[path-expander-paths-config]] +== Expand paths with config + +[abstract] +-- +This section describes a procedure that can be used to expand the paths of variable length path traversals, while providing configuration options. +-- + +The expand paths with config procedure enables powerful variable length path traversals with fine grained control over the traversals. +For a more basic version of the algorithm where fine grained control over traversals isn't required, see <>. + +This section includes: + +* <> +* <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + ** <> + ** <> + ** <> + +[[path-expander-paths-config-overview]] +=== Procedure Overview + +The procedure is described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.path.expandConfig.csv[] +|=== + +[[path-expander-paths-config-config]] +=== Configuration parameters +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| minLevel | Long | -1 | the minimum number of hops in the traversal +| maxLevel | Long | -1 | the maximum number of hops in the traversal +| relationshipFilter | String | null | the relationship types and directions to traverse. + +See <>. +| labelFilter | String | null | the node labels to traverse. + +See <>. +| sequence | String | null | comma-separated alternating label and relationship filters, for each step in a repeating sequence. +If present, `labelFilter`, and `relationshipFilter` are ignored, as this takes priority. + +See <>. +| beginSequenceAtStart | Boolean | true | starts matching sequences of node labels and/or relationship types (defined in `relationshipFilter`, `labelFilter`, or `sequences`) one node away from the start node. +| uniqueness | String | RELATIONSHIP_PATH | the strategy to use when expanding relationships in a traversal. + +See <>. +| bfs | Boolean | true | use Breadth First Search when traversing. Uses Depth First Search if set to `false` +| filterStartNode | Boolean | false | whether the `labelFilter` and `sequence` apply to the start node of the expansion. +| limit | Long | -1 | limit the number of paths returned. When using `bfs:true`, this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. +| optional | Boolean | false | is path expansion optional? +If set to `true`, a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. +| endNodes | List | null | only these nodes can end returned paths, and expansion will continue past these nodes, if possible. +| terminatorNodes | List | null | Only these nodes can end returned paths, and expansion won't continue past these nodes. +| whitelistNodes | List | null | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). +| blacklistNodes | List | null | None of the paths returned will include these nodes. +|=== + +[[path-expander-paths-config-config-relationship-filters]] +==== Relationship Filters + +The syntax for relationship filters is described below: + +include::relationship-filter.adoc[] + +[[path-expander-paths-config-config-label-filters]] +==== Label Filters + +The syntax for label filters is described below: + +include::label-filter.adoc[] + +*Label filter operator precedence and behavior* + +include::label-filter-precedence.adoc[] + +[[path-expander-paths-config-config-uniqueness]] +==== Uniqueness + +include::uniqueness-expand.adoc[] + +[[path-expander-paths-config-config-sequences]] +=== Specifying Sequences of node labels and relationship types + +include::sequences.adoc[] +include::sequence-tips.adoc[] + +[[path-expander-paths-config-examples]] +=== Examples + +The examples in this section are based on the following sample graph: + +include::expand-config-sample-graph.adoc[] + +[[path-expander-paths-config-examples-label-relationship-filters]] +==== Relationship Type and Node Label filters + +Let's start by expanding paths from the Praveena node. +We only want to consider the `KNOWS` relationship type, so we'll specify that as the `relationshipFilter` parameter. + +.The following returns the paths to people that Praveena `KNOWS` from 1 to 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "KNOWS", + minLevel: 1, + maxLevel: 2 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 1 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 2 +|=== + +Praveena only has a direct `KNOWS` relationship to Zhen, but Zhen has `KNOWS` relationships to 3 other people, which means they're 2 hops away from Praveena. + +We can also provide a node label filter to restrict the nodes that are returned. +If we want to only return paths where every node has the `Engineering` label, we'll provide the value `+Engineering` to the `labelFilter` parameter. + +.The following returns paths containing only `Engineering` people that Praveena `KNOWS` from 1 to 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "KNOWS", + labelFilter: "+Engineering", + minLevel: 1, + maxLevel: 2 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 1 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 +|=== + +We lose the paths that ended with Lju and Stefan because neither of those nodes had the `Engineering` label. + +We can specify multiple relationship types. +The following query starts from the Alicia node, and then expands the `FOLLOWS` and `KNOWS` relationships: + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"}) | 1 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +|=== + +This query returns 19 paths, Alicia is very well connected! + +We can see a Neo4j Browser visualization of the returned paths in <>. + +[[viz-apoc-path-config-alicia]] +image::apoc.path.expandConfig.alicia.svg[title="Paths from Alicia"] + +We can also specify traversal termination criteria using label filters. +If we wanted to terminate a traversal as soon as the traversal encounters a node containing the `Engineering` label, we can use the `/Engineering` node filter. + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, terminating as soon as a node with the `Engineering` label is reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: "/Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +|=== + +We're now down to only two paths. +But this query doesn't capture all of the paths from Alicia that end in a node with the `Engineering` label. +We can use the `>Engineering` node filter to define a traversal that: + +* only returns paths that terminate at nodes with the `Engineering` label +* continues expansion to end nodes after that, looking for more paths that end with the `Engineering` label + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, where paths end with a node with the `Engineering` label +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: ">Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +|=== + +Our query now also returns paths going through Praveena and Zhen, one going to Martin, and other others going back to Zhen and Praveena! + +[[path-expander-paths-config-examples-end-nodes-terminator-nodes]] +==== Terminator Nodes and End Nodes + +As well as specifying terminator and end labels for traversals, we can also specify terminator and end nodes. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want any returned paths to stop as soon as the Joe node is encountered, which we can do by passing the Joe node to the `terminatorNodes` parameter. + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, terminating as soon as Joe is reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + terminatorNodes: [joe] +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +|=== + +Alicia `FOLLOWS` Joe, but there's also another path that goes via Jonny and Anthony. + +The terminator nodes approach doesn't necessarily find all the paths that exist between Alicia and Joe. +There might be other paths that go through the Joe node twice. +We can find these paths by passing the Joe node to the `endNodes` parameter. +If we use this parameter, all returned paths will end at the Joe node, but expansion will continue past this node to try and find other paths that end at Joe. + + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, where paths end when they reach Joe +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + endNodes: [joe] +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +|=== + +We've got the two paths we got with the terminator nodes approach, from Alicia to Joe, and from Alicia to Jonny to Jonny to Joe. +But we've also got an extra path that goes from Alicia to Joe to Praveena to Joe. + +[[path-expander-paths-config-examples-whitelist-blacklist]] +==== Whitelist Nodes and Blacklist Nodes + +Whitelist and blacklist nodes can also be specified. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want any returned paths to only include the nodes Mark, Joe, Zhen, and Praveena, which we can do by passing these nodes to the parameter `whitelistNodes`. + +.The following returns paths from Alicia following the `FOLLOWS` or `KNOWS` relationship types from 1 to 3 hops, only including paths that contain Mark, Joe, Zhen, and Praveena +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (whitelist:Person) +WHERE whitelist.name IN ["Mark", "Joe", "Zhen", "Praveena"] +WITH p, collect(whitelist) AS whitelistNodes +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + whitelistNodes: whitelistNodes +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +|=== + +Out of the white list, the only person with a direct connection to Alicia is Joe, so all paths go through him. +We then go from Joe to the others, and then between each other for the paths of 3 hops. + +We can see a Neo4j Browser visualization of the returned paths in <>. + +[[viz-apoc-path-config-whitelist]] +image::apoc.path.expandConfig.whitelist.svg[title="Paths from Alicia to Mark, Joe, Zhen, and Praveena"] + +A blacklist is used to exclude nodes from the returned paths. +If we want to exclude paths that contain Joe, we can do this by passing the Joe node to the `blacklistNodes` parameter. + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, excluding paths that include Joe +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + blacklistNodes: [joe] +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"}) | 1 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +|=== + +This returns a very small set of paths since Joe was a very pivotal node in connecting Alicia to the rest of the graph. + +We can see a Neo4j Browser visualization of the returned paths in <>. + +[[viz-apoc-path-config-blacklist]] +image::apoc.path.expandConfig.blacklist.svg[title="Paths from Alicia that don't include Joe"] + +[[path-expander-paths-config-examples-bfs-dfs]] +==== Breadth First Search and Depth First Search + +We can control whether the traversal uses the Breadth First Search (BFS), by specifying `bfs: true`, or Depth First Search algorithm (DFS), by specifying `bfs: false`. +This is often combined with the `limit` parameter to find the nearest nodes based on the chosen algorithm. + +.The following returns 10 paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, using BFS +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 5, + bfs: true, + limit: 10 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"}) | 1 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +|=== + +From these results we can see that paths are completely expanded at each level before going onto the next one. +For example, we first expand from: + +* Alicia `->` Joe +* Alicia `->` Jonny +* Alicia `->` Jake + +Before then following relationships from those nodes. +And once it's expanded everything at level 2, it will then explore level 3. + +[[viz-apoc-path-config-alicia-bfs]] +image::apoc.path.expandConfig.alicia.bfs.svg[title="Paths from Alicia using Breadth First Search"] + +If we use the Depth First Search algorithm, the traversal will go as far as it can (up to the `maxLevel` of hops) down a particular path, before going back up and exploring other ones. + +.The following returns 10 paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, using DFS +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + bfs: false, + limit: 10 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +|=== + +Now we have a different set of paths returned. +We don't even see the paths from Alicia to Jonny or Alicia to Jake because our limit of 10 paths is completely taken up with paths going through Joe. + +We can see a Neo4j Browser visualization of the returned paths in <>. + +[[viz-apoc-path-config-alicia-dfs]] +image::apoc.path.expandConfig.alicia.dfs.svg[title="Paths from Alicia using Depth First Search"] + +[[path-expander-paths-config-examples-uniqueness]] +==== Uniqueness + +We can specify the uniqueness strategy to be used by the traversal through the `uniqueness` parameter. +See <> for a list of valid strategies. +The default value is `RELATIONSHIP_PATH`. + +In this section we're going to write queries that start from Joe and traverse the `FOLLOWS` relationship. + +.The following returns the nodes in paths starting from Joe and traversing the `FOLLOWS` relationship type from 1 to 3 hops +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>", + minLevel: 1, + maxLevel: 3, + uniqueness: "RELATIONSHIP_PATH" // default +}) +YIELD path +RETURN [node in nodes(path) | node.name] AS nodes, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| nodes | hops +| ["Joe", "Zhen"] | 1 +| ["Joe", "Praveena"] | 1 +| ["Joe", "Mark"] | 1 +| ["Joe", "Zhen", "John"] | 2 +| ["Joe", "Praveena", "Joe"] | 2 +| ["Joe", "Mark", "Stefan"] | 2 +| ["Joe", "Praveena", "Joe", "Zhen"] | 3 +| ["Joe", "Praveena", "Joe", "Mark"] | 3 +| ["Joe", "Mark", "Stefan", "Joe"] | 3 +|=== + +Several of the paths returned contain the Joe node twice. +If we want to ensure that the nodes in a path are unique, we can use the `NODE_PATH` strategy. + +.The following returns the nodes in paths starting from Joe and traversing the `FOLLOWS` relationship type from 1 to 3 hops, using the `NODE_PATH` strategy +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>", + minLevel: 1, + maxLevel: 3, + uniqueness: "NODE_PATH" +}) +YIELD path +RETURN [node in nodes(path) | node.name] AS nodes, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| nodes | hops +| ["Joe", "Zhen"] | 1 +| ["Joe", "Praveena"] | 1 +| ["Joe", "Mark"] | 1 +| ["Joe", "Zhen", "John"] | 2 +| ["Joe", "Mark", "Stefan"] | 2 +|=== + +The paths returned now have unique lists of nodes. + +[[path-expander-paths-config-examples-sequence-rel-types]] +==== Sequences of relationship types + +Sequences of relationship types can be specified by comma separating the values passed to `relationshipFilter`. + +For example, if we want to start from the Joe node and traverse a sequence of the `FOLLOWS` relationship in the outgoing direction and the `KNOWS` relationship in either direction, we can specify the relationship filter `FOLLOWS>,KNOWS`. + +.The following returns the paths of 1 to 4 hops from Joe where the relationship types alternate between `FOLLOWS` and `KNOWS` +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>,KNOWS", + beginSequenceAtStart: true, + minLevel: 1, + maxLevel: 4 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 1 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 1 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 1 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 4 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:Product {name: "Alicia"}) | 4 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"})-[:KNOWS]->(:Person:Sales {name: "Rik"}) | 4 +|=== + +The `minLevel` and `maxLevel` values refer to the number of relationships in the path. +Using a `minLevel` of 1 means that paths one hop from Joe with the `FOLLOWS` relationship type will be returned. +If we want to ensure that the relationship type sequence defined in this `relationshipFilter` is matched at least once, we need to use a `minLevel` of `2` since there are two relationship types in the filter. + +.The following returns the paths of 2 to 4 hops from Joe where the relationship types alternate between `FOLLOWS` and `KNOWS` +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "FOLLOWS>,KNOWS", + beginSequenceAtStart: true, + minLevel: 2, + maxLevel: 4 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 2 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 4 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"})-[:FOLLOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:Product {name: "Alicia"}) | 4 +| (:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"})-[:KNOWS]->(:Person:Sales {name: "Rik"}) | 4 +|=== + +This config can also be used in combination with `beginSequenceAtStart: false`, which means that the sequence will start one hop away from the starting node. +If we use this config, it means that the first relationship type defined in `relationshipFilter` will only apply to the starting node. + +.The following returns the paths of 3 to 5 hops from Jake where the relationship types alternate between `FOLLOWS` and `KNOWS`, after first following `KNOWS` relationships from Jake +[source,cypher] +---- +MATCH (p:Person {name: "Jake"}) +CALL apoc.path.expandConfig(p, { + relationshipFilter: "KNOWS,FOLLOWS>,KNOWS", + beginSequenceAtStart: false, + minLevel: 3, + maxLevel: 7 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +| (:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"}) | 3 +| (:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 4 +| (:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 4 +| (:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"})-[:KNOWS]->(:Person:Sales {name: "Rik"}) | 5 +|=== + +[[path-expander-paths-config-examples-sequence-node-labels]] +==== Sequences of node labels + +Sequences of node labels can be specified by comma separating values passed to `labelFilter`. +This is usually used in combination with `beginSequenceAtStart: false`, which means that sequences will start one hop away from the starting node. + +For example, if we start from the Praveena node and want to return the paths that contain alternating `Field` and `DevRel` nodes, we can specify a label filter of `"+Field,+DevRel"`. + +.The following returns the paths of 1 to 4 hops from Praveena where the nodes alternate between having the `Field` and `DevRel` labels. +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + labelFilter: "+Field,+DevRel", + beginSequenceAtStart: false, + minLevel: 1, + maxLevel: 4 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"}) | 1 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +|=== + +The `minLevel` and `maxLevel` values refer to the number of relationships in the path. +Using a `minLevel` of 1 means that paths where the node one hop from Praveena has the `Field` label will be returned. +If we want to ensure that the label sequence defined in this `labelFilter` is matched at least once, we need to use a `minLevel` of `2`. + +.The following returns the paths of 2 to 4 hops from Praveena where the nodes alternate between having the `Field` and `DevRel` labels. +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + labelFilter: "+Field,+DevRel", + beginSequenceAtStart: false, + minLevel: 2, + maxLevel: 4 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +|=== + +The paths that only contain a relationship from Praveena to Joe have now been filtered out. + +But what if we don't want to specify multiple labels exist, but instead want to find paths where a node doesn't have a label? +To find paths that contain alternating `Field` and not `Field` nodes, we can specify a label filter of `"+Field,-Field"`. + +.The following returns the paths of 1 to 4 hops from Praveena where the nodes alternate between having the `Field` label and not having the `Field` label +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + labelFilter: "+Field,-Field", + beginSequenceAtStart: false, + minLevel: 2, + maxLevel: 4 +}) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})<-[:FOLLOWS]-(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})<-[:FOLLOWS]-(:Person:Product {name: "Alicia"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})<-[:FOLLOWS]-(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})<-[:FOLLOWS]-(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})<-[:FOLLOWS]-(:Person:Product {name: "Alicia"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"})<-[:FOLLOWS]-(:Person:DevRel {name: "Mark"}) | 4 +| (:Person:Engineering {name: "Praveena"})<-[:FOLLOWS]-(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 4 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"})<-[:FOLLOWS]-(:Person:DevRel {name: "Mark"}) | 4 +| (:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 4 +|=== + +We've got a lot more paths, with path lengths between 2 and 4 hops. +These paths have the following labels: + +* 2 hops - `Field` -> Not `Field` +* 3 hops - `Field` -> Not `Field` -> `Field` +* 4 hops - `Field` -> Not `Field` -> `Field` -> Not `Field` + +These paths are a bit difficult to read, so we can simplify the output by using the `nodes` function to just return the nodes. +We'll also filter the results so that we only return paths that match the complete `+Field,-Field` label filter. +We can do this by only returning paths of even length: + +.The following returns nodes of paths of 1 to 4 hops from Praveena where the nodes alternate between having the `Field` label and not having the `Field` label +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + labelFilter: "+Field,-Field", + beginSequenceAtStart: false, + minLevel: 2, + maxLevel: 4 +}) +YIELD path +WHERE length(path) % 2 = 0 + +// Remove the Praveena node from the returned path +RETURN nodes(path)[1..] AS nodes, length(path) AS hops + +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| nodes | hops +| [(:Person:Field {name: "Joe"}), (:Person:Sales {name: "Anthony"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Zhen"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Product {name: "Alicia"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Praveena"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Sales {name: "Anthony"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Zhen"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Product {name: "Alicia"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Praveena"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Zhen"}), (:Person:Field {name: "Stefan"}), (:Person:DevRel {name: "Mark"})] | 4 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"}), (:Person:Field {name: "Stefan"}), (:Person:Engineering {name: "Zhen"})] | 4 +| [(:Person:Field {name: "Joe"}), (:Person:Engineering {name: "Zhen"}), (:Person:Field {name: "Stefan"}), (:Person:DevRel {name: "Mark"})] | 4 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"}), (:Person:Field {name: "Stefan"}), (:Person:Engineering {name: "Zhen"})] | 4 +|=== + +The `\*` character can be used as a wildcard in a node sequence to indicate that any label can appear in that position. +If we want to match a sequence of nodes with any label followed by one with the `DevRel` label, we can specify the label filter `*,+DevRel` + +.The following returns nodes of paths of 2 to 4 hops from Praveena where the nodes alternate between having any label and the `DevRel` label +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expandConfig(p, { + labelFilter: "*,+DevRel", + beginSequenceAtStart: false, + minLevel: 2, + maxLevel: 4 +}) +YIELD path +WHERE length(path) % 2 = 0 + +// Remove the Praveena node from the returned path +RETURN nodes(path)[1..] AS nodes, length(path) AS hops + +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| nodes | hops +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"})] | 2 +| [(:Person:Engineering {name: "Zhen"}), (:Person:DevRel {name: "Lju"})] | 2 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"}), (:Person:Product {name: "Jake"}), (:Person:DevRel {name: "Lju"})] | 4 +| [(:Person:Field {name: "Joe"}), (:Person:DevRel {name: "Mark"}), (:Person:Product {name: "Jake"}), (:Person:DevRel {name: "Lju"})] | 4 +| [(:Person:Engineering {name: "Zhen"}), (:Person:DevRel {name: "Lju"}), (:Person:Product {name: "Jake"}), (:Person:DevRel {name: "Mark"})] | 4 +|=== + diff --git a/docs/asciidoc/path-finding/expand-overview.adoc b/docs/asciidoc/path-finding/expand-overview.adoc new file mode 100644 index 00000000..0242caee --- /dev/null +++ b/docs/asciidoc/path-finding/expand-overview.adoc @@ -0,0 +1,27 @@ +[[path-expander]] +== Path Expander Overview + +[abstract] +-- +This section describes procedures that can be used to do variable length path traversals. +-- + +The Cypher query language supports https://neo4j.com/docs/cypher-manual/current/syntax/patterns/#cypher-pattern-varlength[variable-length pattern matching^], but path expansion is limited to relationship types. +The path expander procedures enable more powerful variable length path traversals, where users can specify the following: + +* the direction of the relationship per relationship type. +* a list of label names which act as a "whitelist" or a "blacklist". +* end nodes for the expansion. + +This functionality is supported by five procedures: + +[options="header"] +|=== +| Procedure | Description | Documentation +| `apoc.path.expand()` | expands paths using Cypher's default expansion modes (bfs and 'RELATIONSHIP_PATH' uniqueness). | <> +| `apoc.path.expandConfig()` | expands paths with more flexible configuration of parameters and expansion modes. | <> +|`apoc.path.subgraphNodes()` | expands to nodes of a subgraph. | <> +| `apoc.path.subgraphAll()` | expands to nodes of a subgraph and also returns all relationships in the subgraph. | <> +| `apoc.path.spanningTree()` | expands to paths collectively forming a spanning tree. | <> + +|=== diff --git a/docs/asciidoc/path-finding/expand-sample-graph.adoc b/docs/asciidoc/path-finding/expand-sample-graph.adoc new file mode 100644 index 00000000..fe3423db --- /dev/null +++ b/docs/asciidoc/path-finding/expand-sample-graph.adoc @@ -0,0 +1,36 @@ +[source,cypher] +---- +MERGE (mark:Person:DevRel {name: "Mark"}) +MERGE (praveena:Person:Engineering {name: "Praveena"}) +MERGE (joe:Person:Field {name: "Joe"}) +MERGE (lju:Person:DevRel {name: "Lju"}) +MERGE (zhen:Person:Engineering {name: "Zhen"}) +MERGE (stefan:Person:Field {name: "Stefan"}) +MERGE (alicia:Person:Product {name: "Alicia"}) +MERGE (martin:Person:Engineering {name: "Martin"}) +MERGE (jake:Person:Product {name: "Jake"}) + +MERGE (zhen)-[:KNOWS]-(stefan) +MERGE (zhen)-[:KNOWS]-(lju) +MERGE (zhen)-[:KNOWS]-(praveena) +MERGE (zhen)-[:KNOWS]-(martin) +MERGE (mark)-[:KNOWS]-(jake) +MERGE (alicia)-[:KNOWS]-(jake) + +MERGE (alicia)-[:FOLLOWS]->(joe) +MERGE (joe)-[:FOLLOWS]->(mark) +MERGE (joe)-[:FOLLOWS]->(praveena) +MERGE (joe)-[:FOLLOWS]->(zhen) +MERGE (mark)-[:FOLLOWS]->(stefan) +MERGE (stefan)-[:FOLLOWS]->(joe) +MERGE (praveena)-[:FOLLOWS]->(joe) +---- + +The Neo4j Browser visualization below shows the sample graph: + +image::apoc.path.expand.svg[] + +The `KNOWS` relationship type is considered to be bidirectional, where if Zhen knows Stefan, we can imply that Stefan knows Zhen. +When using the `KNOWS` relationship we will ignore the direction. + +The `FOLLOWS` relationship has a direction, so we will specify a direction when we use it. diff --git a/docs/asciidoc/path-finding/expand-spanning-tree.adoc b/docs/asciidoc/path-finding/expand-spanning-tree.adoc index dced8b20..ad541529 100644 --- a/docs/asciidoc/path-finding/expand-spanning-tree.adoc +++ b/docs/asciidoc/path-finding/expand-spanning-tree.adoc @@ -6,22 +6,358 @@ This section describes a procedure that finds a spanning tree that starts from a set of start nodes. -- +Expands a spanning tree reachable from start node following relationships to max-level adhering to the label filters. +The paths returned collectively form a spanning tree. + +This procedure has the same behaviour as <> with the config `uniqueness: "NODE_GLOBAL"`. + + +* <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + +[[expand-spanning-tree-overview]] +=== Procedure Overview + +The procedure is described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.path.spanningTree.csv[] +|=== + +[[expand-spanning-tree-config]] +=== Configuration parameters + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| minLevel | Long | -1 | the minimum number of hops in the traversal. Must be 0 or 1 if specified +| maxLevel | Long | -1 | the maximum number of hops in the traversal +| relationshipFilter | String | null | the relationship types and directions to traverse. + +See <>. +| labelFilter | String | null | the node labels to traverse. + +See <>. +| beginSequenceAtStart | Boolean | true | starts matching sequences of node labels and/or relationship types (defined in `relationshipFilter`, `labelFilter`, or `sequences`) one node away from the start node. +| bfs | Boolean | true | use Breadth First Search when traversing. Uses Depth First Search if set to `false` +| filterStartNode | Boolean | false | whether the `labelFilter` and `sequence` apply to the start node of the expansion. +| limit | Long | -1 | limit the number of paths returned. When using `bfs:true`, this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. +If set to `true`, a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. +| endNodes | List | null | only these nodes can end returned paths, and expansion will continue past these nodes, if possible. +| terminatorNodes | List | null | Only these nodes can end returned paths, and expansion won't continue past these nodes. +| whiteListNodes | List | null | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). +| blackListNodes | List | null | None of the paths returned will include these nodes. +|=== + +It also has the following fixed parameter: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| uniqueness | String | NODE_GLOBAL | the strategy to use when expanding relationships in a traversal. +`NODE_GLOBAL` means that a node cannot be traversed more than once. +This is what the legacy traversal framework does. +|=== + + +[[expand-spanning-tree-relationship-filters]] +==== Relationship Filters + +The syntax for relationship filters is described below: + +include::relationship-filter.adoc[] + +[[expand-spanning-tree-label-filters]] +==== Label Filters + +The syntax for label filters is described below: + +include::label-filter.adoc[] + +*Label filter operator precedence and behavior* + +include::label-filter-precedence.adoc[] + +[[expand-spanning-tree-examples]] +=== Examples + +The examples in this section are based on the following sample graph: + +include::expand-config-sample-graph.adoc[] + +[[expand-spanning-tree-examples-label-relationship-filters]] +==== Relationship Type and Node Label filters + +Let's start by expanding paths from the Praveena node. +We only want to consider the `KNOWS` relationship type, so we'll specify that as the `relationshipFilter` parameter. + +.The following returns the spanning tree starting from Praveena and traversing the `KNOWS` relationship type for 1 to 2 hops +[source,cypher] ---- -apoc.path.spanningTree(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit:-1, optional:false}) yield path +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "KNOWS", + minLevel: 1, + maxLevel: 2 +}) +YIELD path +RETURN path; ---- -Expand a spanning tree reachable from start node following relationships to max-level adhering to the label filters. -The paths returned collectively form a spanning tree. +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-praveena]] +image::apoc.path.subtree.praveena.svg[title="Spanning tree from Praveena"] + +The spanning tree contains 4 nodes apart from Praveena. +Praveena only has a direct `KNOWS` relationship to Zhen, but Zhen has `KNOWS` relationships to 3 other people, which means they're also included in the spanning tree. + +We can also provide a node label filter to restrict the nodes that are returned. +If we want to only return paths where every node has the `Engineering` label, we'll provide the value `+Engineering` to the `labelFilter` parameter. + +.The following returns the spanning tree starting from Praveena and traversing the `KNOWS` relationship type for 1 to 2 hops, only includin `Engineering` nodes +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "KNOWS", + labelFilter: "+Engineering", + minLevel: 1, + maxLevel: 2 +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-praveena-eng]] +image::apoc.path.subtree.praveena-eng.svg[title="Spanning tree from Praveena to engineering nodes"] + +We lose Lju and Stefan from the spanning tree because neither of those nodes had the `Engineering` label. -Accepts the same `config` values as in `expandConfig()`, though `uniqueness` is not configurable and `minLevel`, if present, must be 0 or 1. +We can specify multiple relationship types. +The following query starts from the Alicia node, and then expands the `FOLLOWS` and `KNOWS` relationships: -.Example +.The following returns the spanning tree starting from Alicia and traversing the `FOLLOWS` or `KNOWS` relationship type for 1 to 3 hops +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3 +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-alicia]] +image::apoc.path.subtree.alicia.svg[title="Spanning tree from Alicia"] + +This query returns paths to 11 of the 12 people in the graph, which indicates that Alicia is very well connected. -Expand a spanning tree of all contiguous :User nodes: +We can also specify traversal termination criteria using label filters. +If we wanted to terminate a traversal as soon as the traversal encounters a node containing the `Engineering` label, we can use the `/Engineering` node filter. +.The following returns the spanning tree starting from Alicia and traversing the `FOLLOWS` or `KNOWS` relationship type for 1 to 3 hops, terminating as soon as a node with the `Engineering` label is reached [source,cypher] ---- -MATCH (user:User) WHERE user.id = 460 -CALL apoc.path.spanningTree(user, {labelFilter:'+User'}) YIELD path +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: "/Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD path RETURN path; ----- \ No newline at end of file +---- + +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-alicia-eng]] +image::apoc.path.subtree.alicia-eng.svg[title="Spanning tree from Alicia terminating at `Engineering` nodes"] + +Our spanning tree has been reduced to only 3 other nodes apart from Alicia. +But this query doesn't capture the complete spanning tree from Alicia containing nodes with the `Engineering` label. +We can use the `>Engineering` node filter to define a traversal that: + +* only returns paths that terminate at nodes with the `Engineering` label +* continues expansion to end nodes after that, looking for more paths that end with the `Engineering` label + +.The following returns the spanning tree starting from Alicia and traversing the `FOLLOWS` or `KNOWS` relationship type for 1 to 3 hops, where paths end with a node with the `Engineering` label + +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: ">Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-alicia-eng-end]] +image::apoc.path.subtree.alicia-eng-end.svg[title="Spanning tree from Alicia to `Engineering` nodes"] + +The spanning tree now also reaches Martin, via a relationship from Zhen. + +[[expand-spanning-tree-examples-end-nodes-terminator-nodes]] +==== Terminator Nodes and End Nodes + +As well as specifying terminator and end labels for traversals, we can also specify terminator and end nodes. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want the returned spanning tree to stop as soon as the Mark, Joe, Zhen, or Praveena nodes are reached. +We can do that by passing those nodes to the `terminatorNodes` parameter. + +.The following returns the spanning tree of people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, terminating as soon as Mark, Joe, Zhen, or Rik nodes are reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (terminator:Person) +WHERE terminator.name IN ["Mark", "Joe", "Zhen", "Rik"] +WITH p, collect(terminator) AS terminatorNodes +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + terminatorNodes: terminatorNodes +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the spanning tree in <>. + +[[viz-apoc-path-subtree-alicia-terminator]] +image::apoc.path.subtree.alicia-terminator.svg[title="Spanning tree from Alicia, terminating at Mark, Joe, Zhen, or Rik"] + +Mark and Joe are included in the spanning tree, but Rik and Zhen can't be reached. +This could be because there is no path to Zhen and Rik that doesn't go through Mark and Joe, or it could mean that there's no path based on the other traversal criteria. + +We can find out whether Mark, Joe, Zhen, or Rik are reachable by passing these nodes to the `endNodes` parameter. + +.The following returns the spanning tree of people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, ending as soon as Mark, Joe, Zhen, or Rik nodes are reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (end:Person) +WHERE end.name IN ["Mark", "Joe", "Zhen", "Rik"] +WITH p, collect(end) AS endNodes +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + endNodes: endNodes +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the returned spanning tree in <>. + +[[viz-apoc-path-subtree-alicia-end]] +image::apoc.path.subtree.alicia-end.svg[title="Spanning tree from Alicia, ending at Mark, Joe, Zhen, or Rik"] + +Our spanning tree now includes Joe, Mark, and Zhen, but Rik is still unreachable. + +[[expand-spanning-tree-examples-whitelist-blacklist]] +==== Whitelist Nodes and Blacklist Nodes + +Whitelist and blacklist nodes can also be specified. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want any returned paths to only include the nodes Mark, Joe, Zhen, and Praveena, which we can do by passing these nodes to the parameter `whitelistNodes`. + +.The following returns the spanning tree reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes must only include Mark, Jonny, or Zhen +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (whitelist:Person) +WHERE whitelist.name IN ["Jonny", "Mark", "Zhen"] +WITH p, collect(whitelist) AS whitelistNodes +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + whitelistNodes: whitelistNodes +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the returned spanning tree in <>. + +[[viz-apoc-path-spanningTree-alicia-whitelist]] +image::apoc.path.spanningTree.alicia-whitelist.svg[title="Spanning Tree from Alicia where paths to nodes include Mark, Jonny, or Zhen"] + +Only Jonny can be reached. +We can therefore infer that Mark and Zhen are only reachable via another node that wasn't include in the whitelist. + +A blacklist is used to exclude nodes from the paths that lead to reachable nodes. +If we want to return nodes that are reachable without going through Joe, we can do this by passing the Joe node to the `blacklistNodes` parameter. + +.The following returns the spanning tree reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes do not go through Joe +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + blacklistNodes: [joe] +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the returned spanning tree in <>. + +[[viz-apoc-path-spanningTree-alicia-blacklist-joe]] +image::apoc.path.spanningTree.alicia-blacklist-joe.svg[title="Spanning tree from Alicia where paths to nodes can't go via Joe"] + +[[expand-spanning-tree-examples-sequence-rel-types]] +==== Sequences of relationship types + +Sequences of relationship types can be specified by comma separating the values passed to `relationshipFilter`. + +For example, if we want to start from the Joe node and traverse a sequence of the `FOLLOWS` relationship in the outgoing direction and the `KNOWS` relationship in either direction, we can specify the relationship filter `FOLLOWS>,KNOWS`. + +.The following returns the reachable nodes by following the `FOLLOWS` and `KNOWS` relationship types alternately from Joe +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.spanningTree(p, { + relationshipFilter: "FOLLOWS>,KNOWS", + beginSequenceAtStart: true, + minLevel: 1, + maxLevel: 4 +}) +YIELD path +RETURN path; +---- + +We can see a Neo4j Browser visualization of the returned spanning tree in <>. + +[[viz-apoc-path-spanningTree-joe-rel-sequence]] +image::apoc.path.spanningTree.joe-sequence.svg[title="Spanning tree from Joe via alternate `FOLLOWS` and `KNOWS` relationship types"] \ No newline at end of file diff --git a/docs/asciidoc/path-finding/expand-subgraph-nodes.adoc b/docs/asciidoc/path-finding/expand-subgraph-nodes.adoc new file mode 100644 index 00000000..afaf1ba7 --- /dev/null +++ b/docs/asciidoc/path-finding/expand-subgraph-nodes.adoc @@ -0,0 +1,412 @@ +[[expand-subgraph-nodes]] +== Expand to nodes in a subgraph + +[abstract] +-- +This section describes a procedure that expands a subgraph from a set of start nodes, and returns all nodes within the subgraph. +-- + +This procedure expands to subgraph nodes reachable from the start node following relationships to max-level adhering to the label filters. +It allows fine grained control over the traversals that expand the subgraph. + +This section includes: + +* <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + +[[expand-subgraph-nodes-overview]] +=== Procedure Overview + +The procedure is described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.path.subgraphNodes.csv[] +|=== + +[[expand-subgraph-nodes-config]] +=== Configuration parameters + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| minLevel | Long | -1 | the minimum number of hops in the traversal. Must be 0 or 1 if specified +| maxLevel | Long | -1 | the maximum number of hops in the traversal +| relationshipFilter | String | null | the relationship types and directions to traverse. + +See <>. +| labelFilter | String | null | the node labels to traverse. + +See <>. +| beginSequenceAtStart | Boolean | true | starts matching sequences of node labels and/or relationship types (defined in `relationshipFilter`, `labelFilter`, or `sequences`) one node away from the start node. +| bfs | Boolean | true | use Breadth First Search when traversing. Uses Depth First Search if set to `false` +| filterStartNode | Boolean | false | whether the `labelFilter` and `sequence` apply to the start node of the expansion. +| limit | Long | -1 | limit the number of paths returned. When using `bfs:true`, this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. +If set to `true`, a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. +| endNodes | List | null | only these nodes can end returned paths, and expansion will continue past these nodes, if possible. +| terminatorNodes | List | null | Only these nodes can end returned paths, and expansion won't continue past these nodes. +| whiteListNodes | List | null | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). +| blackListNodes | List | null | None of the paths returned will include these nodes. +|=== + +It also has the following fixed parameter: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| uniqueness | String | NODE_GLOBAL | the strategy to use when expanding relationships in a traversal. +`NODE_GLOBAL` means that a node cannot be traversed more than once. +This is what the legacy traversal framework does. +|=== + + +[[expand-subgraph-nodes-relationship-filters]] +==== Relationship Filters + +The syntax for relationship filters is described below: + +include::relationship-filter.adoc[] + +[[expand-subgraph-nodes-label-filters]] +==== Label Filters + +The syntax for label filters is described below: + +include::label-filter.adoc[] + +*Label filter operator precedence and behavior* + +include::label-filter-precedence.adoc[] + +[[expand-subgraph-nodes-examples]] +=== Examples + +The examples in this section are based on the following sample graph: + +include::expand-config-sample-graph.adoc[] + +[[expand-subgraph-nodes-examples-label-relationship-filters]] +==== Relationship Type and Node Label filters + +Let's start by expanding paths from the Praveena node. +We only want to consider the `KNOWS` relationship type, so we'll specify that as the `relationshipFilter` parameter. + +.The following returns the people reachable by the `KNOWS` relationship at 1 to 2 hops from Praveena +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "KNOWS", + minLevel: 1, + maxLevel: 2 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Engineering {name: "Martin"}) +| (:Person:DevRel {name: "Lju"}) +| (:Person:Field {name: "Stefan"}) +|=== + +4 people are reachable from Praveena. + +We can also provide a node label filter to restrict the nodes that are returned. +If we want to only return paths where every node has the `Engineering` label, we'll provide the value `+Engineering` to the `labelFilter` parameter. + +.The following returns the `Engineering` people reachable by the `KNOWS` relationship at 1 to 2 hops from Praveena +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "KNOWS", + labelFilter: "+Engineering", + minLevel: 1, + maxLevel: 2 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Engineering {name: "Martin"}) +|=== + +We lose Lju and Stefan because those nodes don't have the `Engineering` label. + +We can specify multiple relationship types. +The following query starts from the Alicia node, and then expands the `FOLLOWS` and `KNOWS` relationships: + +[[expand-subgraph-nodes-examples-label-relationship-filters-alicia-all]] +.The following returns the people reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Sales {name: "Jonny"}) +| (:Person:Field {name: "Joe"}) +| (:Person:Product {name: "Jake"}) +| (:Person:Sales {name: "Anthony"}) +| (:Person:Engineering {name: "Praveena"}) +| (:Person:DevRel {name: "Mark"}) +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Field {name: "Stefan"}) +| (:Person:Product {name: "John"}) +| (:Person:Engineering {name: "Martin"}) +| (:Person:DevRel {name: "Lju"}) +|=== + +This list includes all but one of the people in our graph, which means that Alicia is very well connected. + +We can also specify traversal termination criteria using label filters. +If we wanted to terminate a traversal as soon as the traversal encounters a node containing the `Engineering` label, we can use the `/Engineering` node filter. + +.The following returns the people reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia, terminating as soon as a node with the `Engineering` label is reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: "/Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Engineering {name: "Praveena"}) +|=== + +We're now down to only 2 people - Zhen and Praveena. +But this query doesn't capture all of the paths from Alicia that end in a node with the `Engineering` label. +We can use the `>Engineering` node filter to define a traversal that: + +* only returns nodes that have the `Engineering` label +* continues expansion to end nodes after that, looking for more nodes that have the `Engineering` label + +.The following returns `Engineering` people reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: ">Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Engineering {name: "Praveena"}) +| (:Person:Engineering {name: "Martin"}) +|=== + +Our query now also returns Martin, who must have been reachable via either Zhen or Praveena. + +[[expand-subgraph-nodes-examples-end-nodes-terminator-nodes]] +==== Terminator Nodes and End Nodes + +As well as specifying terminator and end labels for traversals, we can also specify terminator and end nodes. +For this procedure, these parameters both behave the same way - the procedure will determine whether any of the nodes provided as terminator or end nodes are reachable from the start node. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want to know whether there's a way to get from Alicia to Joe, which we can do by passing the Joe node to the `terminatorNodes` parameter. + +.The following returns the terminator nodes reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + terminatorNodes: [joe] +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Field {name: "Joe"}) +|=== + +We do indeed have a path from Alicia to Joe. + +And we know from <> that Alicia can actually reach all other nodes in the graph using the `KNOWS` or `FOLLOWS` relationships. +But what if we want to determine whether Mark, Joe, Zhen, and Praveena are reachable using only the `KNOWS` relationship? + +.The following returns the end nodes reachable by the `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (end:Person) +WHERE end.name IN ["Mark", "Joe", "Zhen", "Praveena"] +WITH p, collect(end) AS endNodes +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "KNOWS", + minLevel: 1, + maxLevel: 3, + endNodes: endNodes +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:DevRel {name: "Mark"}) +|=== + +Only Mark is reachable! + +[[expand-subgraph-nodes-examples-whitelist-blacklist]] +==== Whitelist Nodes and Blacklist Nodes + +Whitelist and blacklist nodes can also be specified. + +Let's build on the query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want to find the nodes reachable via paths that only include Jonny, Mark, or Zhen. +We can do this by passing those odes to the parameter `whitelistNodes`. + +.The following returns nodes reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes must only include Mark, Jonny, or Zhen +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (whitelist:Person) +WHERE whitelist.name IN ["Jonny", "Mark", "Zhen"] +WITH p, collect(whitelist) AS whitelistNodes +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + whitelistNodes: whitelistNodes +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Sales {name: "Jonny"}) +|=== + +Only Jonny can be reached. +We can therefore infer that Mark and Zhen are only reachable via another node that wasn't include in the whitelist. + +A blacklist is used to exclude nodes from the paths that lead to reachable nodes. +If we want to return nodes that are reachable without going through Joe, we can do this by passing the Joe node to the `blacklistNodes` parameter. + +.The following returns nodes reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes do not go through Joe +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + blacklistNodes: [joe] +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Sales {name: "Jonny"}) +| (:Person:Product {name: "Jake"}) +| (:Person:Sales {name: "Anthony"}) +| (:Person:DevRel {name: "Mark"}) +| (:Person:Field {name: "Stefan"}) +|=== + +Only 5 nodes are reachable without going through the Joe node. +If we remember back to <>, 11 nodes were reachable when we didn't specify a blacklist. +This indicates that Joe is an important connector in this graph. + + +[[expand-subgraph-nodes-examples-sequence-rel-types]] +==== Sequences of relationship types + +Sequences of relationship types can be specified by comma separating the values passed to `relationshipFilter`. + +For example, if we want to start from the Joe node and traverse a sequence of the `FOLLOWS` relationship in the outgoing direction and the `KNOWS` relationship in either direction, we can specify the relationship filter `FOLLOWS>,KNOWS`. + +.The following returns the reachable nodes by following the `FOLLOWS` and `KNOWS` relationship types alternately from Joe +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.subgraphNodes(p, { + relationshipFilter: "FOLLOWS>,KNOWS", + beginSequenceAtStart: true, + minLevel: 1, + maxLevel: 4 +}) +YIELD node +RETURN node; +---- + +.Results +[opts="header"] +|=== +| node +| (:Person:Engineering {name: "Praveena"}) +| (:Person:DevRel {name: "Mark"}) +| (:Person:Engineering {name: "Zhen"}) +| (:Person:Product {name: "Jake"}) +| (:Person:Engineering {name: "Martin"}) +| (:Person:DevRel {name: "Lju"}) +| (:Person:Field {name: "Stefan"}) +|=== diff --git a/docs/asciidoc/path-finding/expand-subgraph.adoc b/docs/asciidoc/path-finding/expand-subgraph.adoc index 3b88b8bf..8bc2ae42 100644 --- a/docs/asciidoc/path-finding/expand-subgraph.adoc +++ b/docs/asciidoc/path-finding/expand-subgraph.adoc @@ -6,24 +6,355 @@ This section describes a procedure that expands a subgraph from a set of start nodes, and returns all nodes and relationships within the subgraph. -- +The expand to subgraph procedure expands to subgraph nodes reachable from the start node following relationships to max-level adhering to the label filters. +Returns the collection of nodes in the subgraph, and the collection of relationships between all subgraph nodes. +It allows fine grained control over the traversals that expand these subgraphs. + +This section includes: + +* <> +* <> +* <> + ** <> + ** <> + ** <> + ** <> + +[[expand-subgraph-overview]] +=== Procedure Overview + +The procedure is described below: + +[separator=¦,opts=header,cols="1,1m,1m,5"] +|=== +include::../../../build/generated-documentation/apoc.path.subgraphAll.csv[] +|=== + +[[expand-subgraph-config]] +=== Configuration parameters + +The procedures support the following config parameters: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| minLevel | Long | -1 | the minimum number of hops in the traversal. Must be 0 or 1 if specified +| maxLevel | Long | -1 | the maximum number of hops in the traversal +| relationshipFilter | String | null | the relationship types and directions to traverse. + +See <>. +| labelFilter | String | null | the node labels to traverse. + +See <>. +| beginSequenceAtStart | Boolean | true | starts matching sequences of node labels and/or relationship types (defined in `relationshipFilter`, `labelFilter`, or `sequences`) one node away from the start node. +| bfs | Boolean | true | use Breadth First Search when traversing. Uses Depth First Search if set to `false` +| filterStartNode | Boolean | false | whether the `labelFilter` and `sequence` apply to the start node of the expansion. +| limit | Long | -1 | limit the number of paths returned. When using `bfs:true`, this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. +If set to `true`, a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. +| endNodes | List | null | only these nodes can end returned paths, and expansion will continue past these nodes, if possible. +| terminatorNodes | List | null | Only these nodes can end returned paths, and expansion won't continue past these nodes. +| whiteListNodes | List | null | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). +| blackListNodes | List | null | None of the paths returned will include these nodes. +|=== + +It also has the following fixed parameter: + +.Config parameters +[opts=header] +|=== +| name | type | default | description +| uniqueness | String | NODE_GLOBAL | the strategy to use when expanding relationships in a traversal. +`NODE_GLOBAL` means that a node cannot be traversed more than once. +This is what the legacy traversal framework does. +|=== + +[[expand-subgraph-relationship-filters]] +==== Relationship Filters + +The syntax for relationship filters is described below: + +include::relationship-filter.adoc[] + +[[expand-subgraph-label-filters]] +==== Label Filters + +The syntax for label filters is described below: + +include::label-filter.adoc[] + +*Label filter operator precedence and behavior* + +include::label-filter-precedence.adoc[] + + +[[expand-subgraph-examples]] +=== Examples + +The examples in this section are based on the following sample graph: + +include::expand-config-sample-graph.adoc[] + +[[expand-subgraph-examples-label-relationship-filters]] +==== Relationship Type and Node Label filters + +Let's start by expanding paths from the Praveena node. +We only want to consider the `KNOWS` relationship type, so we'll specify that as the `relationshipFilter` parameter. + +.The following returns the subgraph reachable by the `KNOWS` relationship at 1 to 2 hops from Praveena +[source,cypher] ---- -apoc.path.subgraphAll(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit:-1}) yield nodes, relationships +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "KNOWS", + minLevel: 1, + maxLevel: 2 +}) +YIELD nodes, relationships +RETURN nodes, relationships; ---- -Expand to subgraph nodes reachable from the start node following relationships to max-level adhering to the label filters. -Returns the collection of nodes in the subgraph, and the collection of relationships between all subgraph nodes. +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-praveena]] +image::apoc.path.subgraphAll.praveena.svg[title="Subgraph from Praveena"] + + +We can also provide a node label filter to restrict the nodes that are returned. +If we want to only return paths where every node has the `Engineering` label, we'll provide the value `+Engineering` to the `labelFilter` parameter. + +.The following returns the subgraph o `Engineering` people reachable by the `KNOWS` relationship at 1 to 2 hops from Praveena +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "KNOWS", + labelFilter: "+Engineering", + minLevel: 1, + maxLevel: 2 +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-praveena-engineering]] +image::apoc.path.subgraphAll.praveena-engineering.svg[title="Subgraph of `Engineering` nodes from Praveena"] + +We lose Lju and Stefan because those nodes don't have the `Engineering` label. + +We can specify multiple relationship types. +The following query starts from the Alicia node, and then expands the `FOLLOWS` and `KNOWS` relationships: + +.The following returns the subgraph of people reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3 +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia]] +image::apoc.path.subgraphAll.alicia.svg[title="Subgraph from Alicia"] + +This subgraph includes all but one of the people in our graph, which means that Alicia is very well connected. + +We can also specify traversal termination criteria using label filters. +If we wanted to terminate a traversal as soon as the traversal encounters a node containing the `Engineering` label, we can use the `/Engineering` node filter. + +.The following returns the subgraph reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia, terminating as soon as a node with the `Engineering` label is reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: "/Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-engineering]] +image::apoc.path.subgraphAll.alicia-engineering.svg[title="Subgraph from Alicia terminating at `Engineering` nodes"] + +We're now down to only 2 people - Zhen and Praveena. +But this query doesn't capture all of the paths from Alicia that end in a node with the `Engineering` label. +We can use the `>Engineering` node filter to define a traversal that: + +* only returns paths that terminate at nodes with the `Engineering` label +* continues expansion to end nodes after that, looking for more paths that end with the `Engineering` label + +.The following returns the subgraph of `Engineering` people reachable by the `FOLLOWS` or `KNOWS` relationships at 1 to 3 hops from Alicia +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + labelFilter: ">Engineering", + minLevel: 1, + maxLevel: 3 +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-engineering-end]] +image::apoc.path.subgraphAll.alicia-engineering-end.svg[title="Subgraph from Alicia ending at `Engineering` nodes"] + +Our subgraph now also includes Martin, who is reached via a relationship from Zhen. -Accepts the same `config` values as in `expandConfig()`, though `uniqueness` is not configurable and `minLevel`, if present, must be 0 or 1. +[[expand-subgraph-examples-end-nodes-terminator-nodes]] +==== Terminator Nodes and End Nodes -The `optional` config value isn't needed, as empty lists are yielded if there are no results, so rows are never eliminated. +As well as specifying terminator and end labels for traversals, we can also specify terminator and end nodes. -.Example +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want the returned subgraph to stop as soon as the Mark, Joe, Zhen, or Praveena nodes are reached. +We can do that by passing those nodes to the `terminatorNodes` parameter. -Expand to local subgraph (and all its relationships) within 4 traversals: +.The following returns the subgraph of people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, terminating as soon as Mark, Joe, Zhen, or Rik nodes are reached +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (terminator:Person) +WHERE terminator.name IN ["Mark", "Joe", "Zhen", "Rik"] +WITH p, collect(terminator) AS terminatorNodes +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + terminatorNodes: terminatorNodes +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-terminator]] +image::apoc.path.subgraphAll.alicia-terminator.svg[title="Subgraph from Alicia terminating at Mark, Joe, Zhen, or Rik"] + +We have paths to Mark and Joe, but Zhen and Rik can't be reached +This could be because there is no path to Zhen and Rik that doesn't go through Mark and Joe, or it could mean that there's no path based on the other traversal criteria. +We can find out whether Mark, Joe, Zhen, or Rik are reachable by passing these nodes to the `endNodes` parameter. + +.The following returns the subgraph of people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, ending as soon as Mark, Joe, Zhen, or Rik nodes are reached [source,cypher] ---- -MATCH (user:User) WHERE user.id = 460 -CALL apoc.path.subgraphAll(user, {maxLevel:4}) YIELD nodes, relationships +MATCH (p:Person {name: "Alicia"}) +MATCH (end:Person) +WHERE end.name IN ["Mark", "Joe", "Zhen", "Rik"] +WITH p, collect(end) AS endNodes +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + endNodes: endNodes +}) +YIELD nodes, relationships RETURN nodes, relationships; ----- \ No newline at end of file +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-end]] +image::apoc.path.subgraphAll.alicia-end.svg[title="Subgraph from Alicia ending at Mark, Joe, Zhen, or Rik"] + +We can now reach Joe, Mark, and Zhen, but Rik is still unreachable. + +[[expand-subgraph-examples-whitelist-blacklist]] +==== Whitelist Nodes and Blacklist Nodes + +Whitelist and blacklist nodes can also be specified. + +Let's build on the previous query that found people that Alicia `KNOWS` or `FOLLOWS`. +We want any returned paths to only include the nodes Mark, Joe, Zhen, and Praveena, which we can do by passing these nodes to the parameter `whitelistNodes`. + +.The following returns nodes reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes must only include Mark, Jonny, or Zhen +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (whitelist:Person) +WHERE whitelist.name IN ["Jonny", "Mark", "Zhen"] +WITH p, collect(whitelist) AS whitelistNodes +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + whitelistNodes: whitelistNodes +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-whitelist]] +image::apoc.path.subgraphAll.alicia-whitelist.svg[title="Subgraph from Alicia where paths to nodes include Mark, Jonny, or Zhen"] + +Only Jonny can be reached. +We can therefore infer that Mark and Zhen are only reachable via another node that wasn't include in the whitelist. + +A blacklist is used to exclude nodes from the paths that lead to reachable nodes. +If we want to return nodes that are reachable without going through Joe, we can do this by passing the Joe node to the `blacklistNodes` parameter. + +.The following returns nodes reachable by the `FOLLOWS` or `KNOWS` relationship types at 1 to 3 hops from Alicia, where the paths to those nodes do not go through Joe +[source,cypher] +---- +MATCH (p:Person {name: "Alicia"}) +MATCH (joe:Person {name: "Joe"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>|KNOWS", + minLevel: 1, + maxLevel: 3, + blacklistNodes: [joe] +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-alicia-blacklist-joe]] +image::apoc.path.subgraphAll.alicia-blacklist-joe.svg[title="Subgraph from Alicia where paths to nodes can't go via Joe"] + +[[expand-subgraph-examples-sequence-rel-types]] +==== Sequences of relationship types + +Sequences of relationship types can be specified by comma separating the values passed to `relationshipFilter`. + +For example, if we want to start from the Joe node and traverse a sequence of the `FOLLOWS` relationship in the outgoing direction and the `KNOWS` relationship in either direction, we can specify the relationship filter `FOLLOWS>,KNOWS`. + +.The following returns the reachable nodes by following the `FOLLOWS` and `KNOWS` relationship types alternately from Joe +[source,cypher] +---- +MATCH (p:Person {name: "Joe"}) +CALL apoc.path.subgraphAll(p, { + relationshipFilter: "FOLLOWS>,KNOWS", + beginSequenceAtStart: true, + minLevel: 1, + maxLevel: 4 +}) +YIELD nodes, relationships +RETURN nodes, relationships; +---- + +We can see a Neo4j Browser visualization of the returned subgraph in <>. + +[[viz-apoc-path-subgraphAll-joe-rel-sequence]] +image::apoc.path.subgraphAll.joe-sequence.svg[title="Subgraph from Joe via alternate `FOLLOWS` and `KNOWS` relationship types"] \ No newline at end of file diff --git a/docs/asciidoc/path-finding/expand.adoc b/docs/asciidoc/path-finding/expand.adoc index 9ba663f7..d0212134 100644 --- a/docs/asciidoc/path-finding/expand.adoc +++ b/docs/asciidoc/path-finding/expand.adoc @@ -1,542 +1,197 @@ -[[path-expander]] -== Path Expander +[[path-expander-paths]] +== Expand paths [abstract] -- -This section describes procedures that can be used to do variable length path traversals. +This section describes a procedure that can be used to expand the paths of variable length path traversals. -- -Thanks to Andrew Bowman (@inversefalcon) and Kees Vegter (@keesvegter) +The expand paths procedure is the most basic of the <>. +This procedure enables path traversals based on relationship filters and node filters. +See <> if more control is required over the traversal. -The apoc.path.expand procedure makes it possible to do variable length path traversals where you can specify the direction of the relationship per relationship type and a list of Label names which act as a "whitelist" or a "blacklist" or define end nodes for the expansion. -The procedure will return a list of Paths in a variable name called "path". +This section includes the following: -[cols="1m,5"] -|=== -| call apoc.path.expand(startNode \|Node, relationshipFilter, labelFilter, minDepth, maxDepth ) yield path as | expand from given nodes(s) taking the provided restrictions into account -|=== - -Variations allow more configurable expansions, and expansions for more specific use cases: - -[cols="1m,5"] -|=== -| call apoc.path.expandConfig(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, uniqueness:'RELATIONSHIP_PATH', filterStartNode:true, limit, optional:false, endNodes, terminatorNodes, sequence, beginSequenceAtStart:true}) yield path | expand from given nodes(s) taking the provided restrictions into account -| call apoc.path.subgraphNodes(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit, optional:false, endNodes, terminatorNodes, sequence, beginSequenceAtStart:true}) yield node | expand a subgraph from given nodes(s) taking the provided restrictions into account; returns all nodes in the subgraph -| call apoc.path.subgraphAll(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit, endNodes, terminatorNodes, sequence, beginSequenceAtStart:true}) yield nodes, relationships | expand a subgraph from given nodes(s) taking the provided restrictions into account; returns the collection of subgraph nodes, and the collection of all relationships within the subgraph -| call apoc.path.spanningTree(startNode Node/list, {minLevel, maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit, optional:false, endNodes, terminatorNodes, sequence, beginSequenceAtStart:true}) yield path | expand a spanning tree from given nodes(s) taking the provided restrictions into account; the paths returned collectively form a spanning tree -|=== - -=== Relationship Filter - -Syntax: `[<]RELATIONSHIP_TYPE1[>]|[<]RELATIONSHIP_TYPE2[>]|...` - -[opts=header,cols="m,m,a"] -|=== -| input | type | direction -| LIKES> | LIKES | OUTGOING -| | any type | OUTGOING -| < | any type | INCOMING -|=== - -=== Label Filter - -Syntax: `[+-/>]LABEL1|LABEL2|*|...` - - -[opts=header,cols="m,a"] -|=== -| input | result -| -Foe | blacklist filter - No node in the path will have a label in the blacklist. -| +Friend | whitelist filter - All nodes in the path must have a label in the whitelist (exempting termination and end nodes, if using those filters). -If no whitelist operator is present, all labels are considered whitelisted. -| /Friend | termination filter - Only return paths up to a node of the given labels, and stop further expansion beyond it. -Termination nodes do not have to respect the whitelist. Termination filtering takes precedence over end node filtering. -| >Friend | end node filter - Only return paths up to a node of the given labels, but continue expansion to match on end nodes beyond it. -End nodes do not have to respect the whitelist to be returned, but expansion beyond them is only allowed if the node has a label in the whitelist. -|=== - -.Syntax Changes - -As of APOC 3.1.3.x multiple label filter operations are allowed. -In prior versions, only one type of operation is allowed in the label filter (`+` or `-` or `/` or `>`, never more than one). - -With APOC 3.2.x.x, label filters will no longer apply to starting nodes of the expansion by default, but this can be toggled with the `filterStartNode` config parameter. - -With the APOC releases in January 2018, some behavior has changed in the label filters: - -[opts=header,cols="m,a"] -|=== -| filter | changed behavior -| No filter | Now indicates the label is whitelisted, same as if it were prefixed with `+`. -Previously, a label without a filter symbol reused the previously used symbol. -| `>` (end node filter) | The label is additionally whitelisted, so expansion will always continue beyond an end node (unless prevented by the blacklist). -Previously, expansion would only continue if allowed by the whitelist and not disallowed by the blacklist. -This also applies at a depth below `minLevel`, allowing expansion to continue. -| `/` (termination filter) | When at depth below `minLevel`, expansion is allowed to continue and no pruning will take place (unless prevented by the blacklist). -Previously, expansion would only continue if allowed by the whitelist and not disallowed by the blacklist. -| All filters | `*` is allowed as a standin for all labels. -Additionally, compound labels are supported (like `Person:Manager`), and only apply to nodes with all of those labels present (order agnositic). -|=== - - -=== Sequences - -Introduced in the February 2018 APOC releases, path expander procedures can expand on repeating sequences of labels, relationship types, or both. - -If only using label sequences, just use the `labelFilter`, but use commas to separate the filtering for each step in the repeating sequence. - -If only using relationship sequences, just use the `relationshipFilter`, but use commas to separate the filtering for each step of the repeating sequence. - -If using sequences of both relationships and labels, use the `sequence` parameter. - -[opts=header,cols="a, m,a,m,a"] -|=== -| Usage | config param | description | syntax | explanation -| label sequences only | labelFilter | Same syntax and filters, but uses commas (`,`) to separate the filters for each step in the sequence. | - labelFilter:'Post\|-Blocked,Reply,>Admin' | Start node must be a :Post node that isn't :Blocked, next node must be a :Reply, and the next must be an :Admin, then repeat if able. Only paths ending with the `:Admin` node in that position of the sequence will be returned. -| relationship sequences only | relationshipFilter | Same syntax, but uses commas (`,`) to separate the filters for each relationship traversal in the sequence. | -relationshipFilter:'NEXT>,\|REPLIED>' | Expansion will first expand `NEXT>` from the start node, then `` or `REPLIED>`, then repeat if able. -| sequences of both labels and relationships | sequence | A string of comma-separated alternating label and relationship filters, for each step in a repeating sequence. The sequence should begin with a label filter, and end with a relationship filter. If present, `labelFilter`, and `relationshipFilter` are ignored, as this takes priority. | -sequence:'Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. -|=== +* <> +* <> +* <> +[[path-expander-paths-procedure-overview]] +=== Procedure Overview -==== Starting the sequence at one-off from the start node +The procedure is described below: -There are some uses cases where the sequence does not begin at the start node, but at one node distant. - -A new config parameter, `beginSequenceAtStart`, can toggle this behavior. - -Default value is `true`. - -If set to `false`, this changes the expected values for `labelFilter`, `relationshipFilter`, and `sequence` as noted below: - - -[opts=header,cols="m,a,m,a"] +[separator=¦,opts=header,cols="1,1m,1m,5"] |=== -| sequence | altered behavior | example | explanation -| labelFilter | The start node is not considered part of the sequence. The sequence begins one node off from the start node. | -beginSequenceAtStart:false, labelFilter:'Post\|-Blocked,Reply,>Admin' | The next node(s) out from the start node begins the sequence (and must be a :Post node that isn't :Blocked), and only paths ending with `Admin` nodes returned. -| relationshipFilter | The first relationship filter in the sequence string will not be considered part of the repeating sequence, and will only be used for the first relationship from the start node to the node that will be the actual start of the sequence. | -beginSequenceAtStart:false, relationshipFilter:'FIRST>,NEXT>,\|REPLIED>' | `FIRST>` will be traversed just from the start node to the node that will be the start of the repeating `NEXT>,\|REPLIED>` sequence. -| sequence | Combines the above two behaviors. | -beginSequenceAtStart:false, sequence:'FIRST>, Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. +include::../../../build/generated-documentation/apoc.path.expand.csv[] |=== -.Sequence tips +[[path-expander-paths-parameter-syntax]] +=== Parameter Syntax -Label filtering in sequences work together with the `endNodes`+`terminatorNodes`, though inclusion of a node must be unanimous. +This procedure takes the following parameters: -Remember that `filterStartNode` defaults to `false` for APOC 3.2.x.x and newer. If you want the start node filtered according to the first step in the sequence, you may need to set this explicitly to `true`. +* `start` - a list of nodes or node ids +* `relationshipFilter` - the relationship types to be expanded +* `labelFilter` - the node labels to be expanded +* `minLevel` - the minimum number of hops in our traversal +* `maxLevel` - the maximum number of hops in our traversal -If you need to limit the number of times a sequence repeats, this can be done with the `maxLevel` config param (multiply the number of iterations with the size of the nodes in the sequence). +*Relationship Filters* -As paths are important when expanding sequences, we recommend avoiding `apoc.path.subgraphNodes()`, `apoc.path.subgraphAll()`, and `apoc.path.spanningTree()` when using sequences, -as the configurations that make these efficient at matching to distinct nodes may interfere with sequence pathfinding. +The syntax for relationship filters is described below: +include::relationship-filter.adoc[] -=== Uniqueness +*Label Filters* -Uniqueness of nodes and relationships guides the expansion and the returned results. -Uniqueness is only configurable using `expandConfig()`. +The syntax for label filters is described below: -`subgraphNodes()`, `subgraphAll()`, and `spanningTree()` all use 'NODE_GLOBAL' uniqueness. +include::label-filter.adoc[] -[opts=header,cols="m,a"] -|=== -| value | description -| RELATIONSHIP_PATH | For each returned node there's a (relationship wise) unique path from the start node to it. This is Cypher's default expansion mode. -| NODE_GLOBAL | A node cannot be traversed more than once. This is what the legacy traversal framework does. -| NODE_LEVEL | Entities on the same level are guaranteed to be unique. -| NODE_PATH | For each returned node there's a unique path from the start node to it. -| NODE_RECENT | This is like NODE_GLOBAL, but only guarantees uniqueness among the most recent visited nodes, with a configurable count. Traversing a huge graph is quite memory intensive in that it keeps track of all the nodes it has visited. -For huge graphs a traverser can hog all the memory in the JVM, causing OutOfMemoryError. Together with this Uniqueness you can supply a count, which is the number of most recent visited nodes. This can cause a node to be visited more than once, but scales infinitely. -| RELATIONSHIP_GLOBAL | A relationship cannot be traversed more than once, whereas nodes can. -| RELATIONSHIP_LEVEL | Entities on the same level are guaranteed to be unique. -| RELATIONSHIP_RECENT | Same as for NODE_RECENT, but for relationships. -| NONE | No restriction (the user will have to manage it) -|=== - - -=== Node Filters -While label filters use labels to allow whitelisting, blacklisting, and restrictions on which kind of nodes can end or terminate expansion, -you can also filter based upon actual nodes. +[[path-expander-paths-examples]] +=== Examples -Each of these config parameter accepts a list of nodes, or a list of node ids. +The examples in this section are based on the following sample graph: -[opts=header,cols="m,a,a"] -|=== -| config parameter | description | added in -| endNodes | Only these nodes can end returned paths, and expansion will continue past these nodes, if possible. | Winter 2018 APOC releases. -| terminatorNodes | Only these nodes can end returned paths, and expansion won't continue past these nodes. | Winter 2018 APOC releases. -| whitelistNodes | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). | Spring 2018 APOC releases. -| blacklistNodes | None of the paths returned will include these nodes. | Spring 2018 APOC releases. -|=== +include::expand-sample-graph.adoc[] -==== Updated behavior for Fall 2019 releases - -The node filters behavior will now more closely match those of the label filters, with respect to the `filterStartNode` and `minLevel` config options. - -1. None of the filters will apply to the start node in any way when `filterStartNode=false`. - -2. The `endNodes` and `terminatorNodes` filters will not apply when evaluating nodes below the configured `minLevel`. -(`blacklistNodes` and `whitelistNodes` will continue to apply in all cases, excepting the above mentioned case of the start node when `filterStartNode=false`) - - -=== Expand paths - -Expand from start node following the given relationships from min to max-level adhering to the label filters. Several variations exist: - -`apoc.path.expand()` expands paths using Cypher's default expansion modes (bfs and 'RELATIONSHIP_PATH' uniqueness) - -`apoc.path.expandConfig()` allows more flexible configuration of parameters and expansion modes - -`apoc.path.subgraphNodes()` expands to nodes of a subgraph - -`apoc.path.subgraphAll()` expands to nodes of a subgraph and also returns all relationships in the subgraph - -`apoc.path.spanningTree()` expands to paths collectively forming a spanning tree - -=== Expand +Let's start by expanding paths from the Praveena node. +We only want to consider the `KNOWS` relationship type, so we'll specify that as the relationship filter. +.The following returns the paths to people that Praveena `KNOWS` from 1 to 2 hops [source,cypher] ---- -CALL apoc.path.expand(startNode |Node, relationshipFilter, labelFilter, minLevel, maxLevel ) - -CALL apoc.path.expand(startNode |Node|list, 'TYPE|TYPE_OUT>|EndNodeLabel', minLevel, maxLevel ) yield path +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expand(p, "KNOWS", null, 1, 2) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; ---- -=== Relationship Filter - -Syntax: `[<]RELATIONSHIP_TYPE1[>]|[<]RELATIONSHIP_TYPE2[>]|...` - -[opts=header,cols="m,m,a"] +.Results +[opts="header"] |=== -| input | type | direction -| LIKES> | LIKES | OUTGOING -| | any type | OUTGOING -| < | any type | INCOMING +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 1 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 2 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 2 |=== -=== Label Filter - -Syntax: `[+-/>]LABEL1|LABEL2|*|...` - - -[opts=header,cols="m,a"] -|=== -| input | result -| -Foe | blacklist filter - No node in the path will have a label in the blacklist. -| +Friend | whitelist filter - All nodes in the path must have a label in the whitelist (exempting termination and end nodes, if using those filters). -If no whitelist operator is present, all labels are considered whitelisted. -| /Friend | termination filter - Only return paths up to a node of the given labels, and stop further expansion beyond it. -Termination nodes do not have to respect the whitelist. Termination filtering takes precedence over end node filtering. -| >Friend | end node filter - Only return paths up to a node of the given labels, but continue expansion to match on end nodes beyond it. -End nodes do not have to respect the whitelist to be returned, but expansion beyond them is only allowed if the node has a label in the whitelist. -|=== - -.Syntax Changes - -As of APOC 3.1.3.x multiple label filter operations are allowed. -In prior versions, only one type of operation is allowed in the label filter (`+` or `-` or `/` or `>`, never more than one). +Praveena only has a direct `KNOWS` relationship to Zhen, but Zhen has `KNOWS` relationships to 3 other people, which means they're 2 hops away from Praveena. -With APOC 3.2.x.x, label filters will no longer apply to starting nodes of the expansion by default, but this can be toggled with the `filterStartNode` config parameter. - -With the APOC releases in January 2018, some behavior has changed in the label filters: - -[opts=header,cols="m,a"] -|=== -| filter | changed behavior -| No filter | Now indicates the label is whitelisted, same as if it were prefixed with `+`. -Previously, a label without a filter symbol reused the previously used symbol. -| `>` (end node filter) | The label is additionally whitelisted, so expansion will always continue beyond an end node (unless prevented by the blacklist). -Previously, expansion would only continue if allowed by the whitelist and not disallowed by the blacklist. -This also applies at a depth below `minLevel`, allowing expansion to continue. -| `/` (termination filter) | When at depth below `minLevel`, expansion is allowed to continue and no pruning will take place (unless prevented by the blacklist). -Previously, expansion would only continue if allowed by the whitelist and not disallowed by the blacklist. -| All filters | `*` is allowed as a standin for all labels. -Additionally, compound labels are supported (like `Person:Manager`), and only apply to nodes with all of those labels present (order agnositic). -|=== - -.Examples +We can also provide a node label filter to restrict the nodes that are returned. +The following query only returns paths where every node has the `Engineering` label. +.The following returns paths containing only `Engineering` people that Praveena `KNOWS` from 1 to 2 hops [source,cypher] ---- -call apoc.path.expand(1,"ACTED_IN>|PRODUCED<|FOLLOWS<","+Movie|Person",0,3) -call apoc.path.expand(1,"ACTED_IN>|PRODUCED<|FOLLOWS<","-BigBrother",0,3) -call apoc.path.expand(1,"ACTED_IN>|PRODUCED<|FOLLOWS<","",0,3) - -// combined with cypher: - -match (tom:Person {name :"Tom Hanks"}) -call apoc.path.expand(tom,"ACTED_IN>|PRODUCED<|FOLLOWS<","+Movie|Person",0,3) yield path as pp -return pp; - -// or - -match (p:Person) with p limit 3 -call apoc.path.expand(p,"ACTED_IN>|PRODUCED<|FOLLOWS<","+Movie|Person",1,2) yield path as pp -return p, pp +MATCH (p:Person {name: "Praveena"}) +CALL apoc.path.expand(p, "KNOWS", "+Engineering", 1, 2) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; ---- -.Termination and end node label filter example - -We will first set a `:Western` label on some nodes. - -[source,cypher] ----- -match (p:Person) -where p.name in ['Clint Eastwood', 'Gene Hackman'] -set p:Western ----- - -Now expand from 'Keanu Reeves' to all `:Western` nodes with a termination filter: - -[source,cypher] ----- -match (k:Person {name:'Keanu Reeves'}) -call apoc.path.expandConfig(k, {relationshipFilter:'ACTED_IN|PRODUCED|DIRECTED', labelFilter:'/Western', uniqueness: 'NODE_GLOBAL'}) yield path -return path ----- - -The one returned path only matches up to 'Gene Hackman'. -While there is a path from 'Keanu Reeves' to 'Clint Eastwood' through 'Gene Hackman', no further expansion is permitted through a node in the termination filter. - -If you didn't want to stop expansion on reaching 'Gene Hackman', and wanted 'Clint Eastwood' returned as well, use the end node filter instead (`>`). - -.Label filter operator precedence and behavior - -As of APOC 3.1.3.x, multiple label filter operators are allowed at the same time. - -When processing the labelFilter string, once a filter operator is introduced, it remains the active filter until another filter supplants it. -(Not applicable after February 2018 release, as no filter will now mean the label is whitelisted). - -In the following example, `:Person` and `:Movie` labels are whitelisted, `:SciFi` is blacklisted, with `:Western` acting as an end node label, and `:Romance` acting as a termination label. - -`... labelFilter:'+Person|Movie|-SciFi|>Western|/Romance' ...` - -The precedence of operator evaluation isn't dependent upon their location in the labelFilter but is fixed: - -Blacklist filter `-`, termination filter `/`, end node filter `>`, whitelist filter `+`. - -The consequences are as follows: - -* No blacklisted label `-` will ever be present in the nodes of paths returned, no matter if the same label (or another label of a node with a blacklisted label) is included in another filter list. -* If the termination filter `/` or end node filter `>` is used, then only paths up to nodes with those labels will be returned as results. These end nodes are exempt from the whitelist filter. -* If a node is a termination node `/`, no further expansion beyond the node will occur. -* If a node is an end node `>`, expansion beyond that node will only occur if the end node has a label in the whitelist. This is to prevent returning paths to nodes where a node on that path violates the whitelist. -(this no longer applies in releases after February 2018) -* The whitelist only applies to nodes up to but not including end nodes from the termination or end node filters. If no end node or termination node operators are present, then the whitelist applies to all nodes of the path. -* If no whitelist operators are present in the labelFilter, this is treated as if all labels are whitelisted. -* If `filterStartNode` is false (which will be default in APOC 3.2.x.x), then the start node is exempt from the label filter. - - -=== Sequences - -Introduced in the February 2018 APOC releases, path expander procedures can expand on repeating sequences of labels, relationship types, or both. - -If only using label sequences, just use the `labelFilter`, but use commas to separate the filtering for each step in the repeating sequence. - -If only using relationship sequences, just use the `relationshipFilter`, but use commas to separate the filtering for each step of the repeating sequence. - -If using sequences of both relationships and labels, use the `sequence` parameter. - -[opts=header,cols="a, m,a,m,a"] -|=== -| Usage | config param | description | syntax | explanation -| label sequences only | labelFilter | Same syntax and filters, but uses commas (`,`) to separate the filters for each step in the sequence. | - labelFilter:'Post\|-Blocked,Reply,>Admin' | Start node must be a :Post node that isn't :Blocked, next node must be a :Reply, and the next must be an :Admin, then repeat if able. Only paths ending with the `:Admin` node in that position of the sequence will be returned. -| relationship sequences only | relationshipFilter | Same syntax, but uses commas (`,`) to separate the filters for each relationship traversal in the sequence. | -relationshipFilter:'NEXT>,\|REPLIED>' | Expansion will first expand `NEXT>` from the start node, then `` or `REPLIED>`, then repeat if able. -| sequences of both labels and relationships | sequence | A string of comma-separated alternating label and relationship filters, for each step in a repeating sequence. The sequence should begin with a label filter, and end with a relationship filter. If present, `labelFilter`, and `relationshipFilter` are ignored, as this takes priority. | -sequence:'Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. -|=== - - -==== Starting the sequence at one-off from the start node - -There are some uses cases where the sequence does not begin at the start node, but at one node distant. - -A new config parameter, `beginSequenceAtStart`, can toggle this behavior. - -Default value is `true`. - -If set to `false`, this changes the expected values for `labelFilter`, `relationshipFilter`, and `sequence` as noted below: - - -[opts=header,cols="m,a,m,a"] -|=== -| sequence | altered behavior | example | explanation -| labelFilter | The start node is not considered part of the sequence. The sequence begins one node off from the start node. | -beginSequenceAtStart:false, labelFilter:'Post\|-Blocked,Reply,>Admin' | The next node(s) out from the start node begins the sequence (and must be a :Post node that isn't :Blocked), and only paths ending with `Admin` nodes returned. -| relationshipFilter | The first relationship filter in the sequence string will not be considered part of the repeating sequence, and will only be used for the first relationship from the start node to the node that will be the actual start of the sequence. | -beginSequenceAtStart:false, relationshipFilter:'FIRST>,NEXT>,\|REPLIED>' | `FIRST>` will be traversed just from the start node to the node that will be the start of the repeating `NEXT>,\|REPLIED>` sequence. -| sequence | Combines the above two behaviors. | -beginSequenceAtStart:false, sequence:'FIRST>, Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. -|=== - -.Sequence tips - -Label filtering in sequences work together with the `endNodes`+`terminatorNodes`, though inclusion of a node must be unanimous. - -Remember that `filterStartNode` defaults to `false` for APOC 3.2.x.x and newer. If you want the start node filtered according to the first step in the sequence, you may need to set this explicitly to `true`. - -If you need to limit the number of times a sequence repeats, this can be done with the `maxLevel` config param (multiply the number of iterations with the size of the nodes in the sequence). - -As paths are important when expanding sequences, we recommend avoiding `apoc.path.subgraphNodes()`, `apoc.path.subgraphAll()`, and `apoc.path.spanningTree()` when using sequences, -as the configurations that make these efficient at matching to distinct nodes may interfere with sequence pathfinding. - - -=== Expand with Config - ----- -apoc.path.expandConfig(startNode Node/list, {config}) yield path expands from start nodes using the given configuration and yields the resulting paths ----- - -Takes an additional map parameter, `config`, to provide configuration options: - -.Config ----- -{minLevel: -1|number, - maxLevel: -1|number, - relationshipFilter: '[<]RELATIONSHIP_TYPE1[>]|[<]RELATIONSHIP_TYPE2[>], [<]RELATIONSHIP_TYPE3[>]|[<]RELATIONSHIP_TYPE4[>]', - labelFilter: '[+-/>]LABEL1|LABEL2|*,[+-/>]LABEL1|LABEL2|*,...', - uniqueness: RELATIONSHIP_PATH|NONE|NODE_GLOBAL|NODE_LEVEL|NODE_PATH|NODE_RECENT| - RELATIONSHIP_GLOBAL|RELATIONSHIP_LEVEL|RELATIONSHIP_RECENT, - bfs: true|false, - filterStartNode: true|false, - limit: -1|number, - optional: true|false, - endNodes: [nodes], - terminatorNodes: [nodes], - beginSequenceAtStart: true|false} ----- - -=== Start Node and label filters - -The config parameter `filterStartNode` defines whether or not the labelFilter (and `sequence`) applies to the start node of the expansion. - -Use `filterStartNode: false` when you want your label filter to only apply to all other nodes in the path, ignoring the start node. - -`filterStartNode` defaults for all path expander procedures: - -[opts=header,cols="a,a"] -|=== -| version | default -| >= APOC 3.2.x.x | filterStartNode = false -| < APOC 3.2.x.x | filterStartNode = true -|=== - -=== Limit - -You can use the `limit` config parameter to limit the number of paths returned. - -When using `bfs:true` (which is the default for all expand procedures), this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. - -The default limit value, `-1`, means no limit. - -If you want to make sure multiple paths should never match to the same node, use `expandConfig()` with 'NODE_GLOBAL' uniqueness, or any expand procedure which already uses this uniqueness -(`subgraphNodes()`, `subgraphAll()`, and `spanningTree()`). - - -=== Optional - -When `optional` is set to true, the path expansion is optional, much like an OPTIONAL MATCH, so a `null` value is yielded whenever the expansion would normally eliminate rows due to no results. - -By default `optional` is false for all expansion procedures taking a config parameter. - - -.Uniqueness - -Uniqueness of nodes and relationships guides the expansion and the results returned. -Uniqueness is only configurable using `expandConfig()`. - -`subgraphNodes()`, `subgraphAll()`, and `spanningTree()` all use 'NODE_GLOBAL' uniqueness. - -[opts=header,cols="m,a"] +.Results +[opts="header"] |=== -| value | description -| RELATIONSHIP_PATH | For each returned node there's a (relationship wise) unique path from the start node to it. This is Cypher's default expansion mode. -| NODE_GLOBAL | A node cannot be traversed more than once. This is what the legacy traversal framework does. -| NODE_LEVEL | Entities on the same level are guaranteed to be unique. -| NODE_PATH | For each returned node there's a unique path from the start node to it. -| NODE_RECENT | This is like NODE_GLOBAL, but only guarantees uniqueness among the most recent visited nodes, with a configurable count. Traversing a huge graph is quite memory intensive in that it keeps track of all the nodes it has visited. -For huge graphs a traverser can hog all the memory in the JVM, causing OutOfMemoryError. Together with this Uniqueness you can supply a count, which is the number of most recent visited nodes. This can cause a node to be visited more than once, but scales infinitely. -| RELATIONSHIP_GLOBAL | A relationship cannot be traversed more than once, whereas nodes can. -| RELATIONSHIP_LEVEL | Entities on the same level are guaranteed to be unique. -| RELATIONSHIP_RECENT | Same as for NODE_RECENT, but for relationships. -| NONE | No restriction (the user will have to manage it) +| path | hops +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 1 +| (:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 2 |=== -=== Node filters +We lose the paths that ended with Lju and Stefan because neither of those nodes had the `Engineering` label. -While label filters use labels to allow whitelisting, blacklisting, and restrictions on which kind of nodes can end or terminate expansion, -you can also filter based upon actual nodes. - -Each of these config parameter accepts a list of nodes, or a list of node ids. - -[opts=header,cols="m,a,a"] -|=== -| config parameter | description | added in -| endNodes | Only these nodes can end returned paths, and expansion will continue past these nodes, if possible. | Winter 2018 APOC releases. -| terminatorNodes | Only these nodes can end returned paths, and expansion won't continue past these nodes. | Winter 2018 APOC releases. -| whitelistNodes | Only these nodes are allowed in the expansion (though endNodes and terminatorNodes will also be allowed, if present). | Spring 2018 APOC releases. -| blacklistNodes | None of the paths returned will include these nodes. | Spring 2018 APOC releases. -|=== - -=== General Examples - -You can turn this cypher query: +We can specify multiple relationship types. +The following query starts from the Alicia node, and then expands the `FOLLOWS` and `KNOWS` relationships: +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops [source,cypher] ---- -MATCH (user:User) WHERE user.id = 460 -MATCH (user)-[:RATED]->(movie)<-[:RATED]-(collab)-[:RATED]->(reco) -RETURN count(*); ----- - -into this procedure call, with changed semantics for uniqueness and bfs (which is Cypher's expand mode) - +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expand(p, "FOLLOWS>|KNOWS", "", 1, 3) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; +---- + +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"}) | 1 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 1 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"}) | 2 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:FOLLOWS]->(:Person:Product {name: "John"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:DevRel {name: "Lju"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:DevRel {name: "Mark"})-[:KNOWS]->(:Person:Product {name: "Jake"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Sales {name: "Jonny"})-[:KNOWS]->(:Person:Sales {name: "Anthony"})-[:FOLLOWS]->(:Person:Field {name: "Joe"}) | 3 +| (:Person:Product {name: "Alicia"})-[:KNOWS]->(:Person:Product {name: "Jake"})<-[:KNOWS]-(:Person:DevRel {name: "Mark"})-[:FOLLOWS]->(:Person:Field {name: "Stefan"}) | 3 +|=== + +This query returns 19 paths, Alicia is very well connected! + +We can also specify traversal termination criteria using label filters. +If we wanted to terminate a traversal as soon as the traversal encounters a node containing the `Engineering` label, we can use the `/Engineering` node filter. + +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, terminating as soon as a node with the `Engineering` label is reached [source,cypher] ---- -MATCH (user:User) WHERE user.id = 460 -CALL apoc.path.expandConfig(user,{relationshipFilter:"RATED",minLevel:3,maxLevel:3,bfs:false,uniqueness:"NONE"}) YIELD path -RETURN count(*); ----- - -=== Expand to nodes in a subgraph - ----- -apoc.path.subgraphNodes(startNode Node/list, {maxLevel, relationshipFilter, labelFilter, bfs:true, filterStartNode:true, limit:-1, optional:false}) yield node +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expand(p, "FOLLOWS>|KNOWS", "/Engineering", 1, 3) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; ---- -Expand to subgraph nodes reachable from the start node following relationships to max-level adhering to the label filters. - -Accepts the same `config` values as in `expandConfig()`, though `uniqueness` is not configurable and `minLevel`, if present, must be 0 or 1 if present. - -.Examples - -Expand to all nodes of a connected subgraph: +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +|=== -[source,cypher] ----- -MATCH (user:User) WHERE user.id = 460 -CALL apoc.path.subgraphNodes(user, {}) YIELD node -RETURN node; ----- +We're now down to only two paths. +But this query doesn't capture all of the paths from Alicia that end in a node with the `Engineering` label. +We can use the `>Engineering` node filter to define a traversal that: -Expand to all nodes reachable by :FRIEND relationships: +* only returns paths that terminate at nodes with the `Engineering` label +* continues expansion to end nodes after that, looking for more paths that end with the `Engineering` label +.The following returns paths containing people that Alicia `FOLLOWS` or `KNOWS` from 1 to 3 hops, where paths end with a node with the `Engineering` label [source,cypher] ---- -MATCH (user:User) WHERE user.id = 460 -CALL apoc.path.subgraphNodes(user, {relationshipFilter:'FRIEND'}) YIELD node -RETURN node; +MATCH (p:Person {name: "Alicia"}) +CALL apoc.path.expand(p, "FOLLOWS>|KNOWS", ">Engineering", 1, 3) +YIELD path +RETURN path, length(path) AS hops +ORDER BY hops; ---- +.Results +[opts="header"] +|=== +| path | hops +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"}) | 2 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Martin"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Zhen"})-[:KNOWS]->(:Person:Engineering {name: "Praveena"}) | 3 +| (:Person:Product {name: "Alicia"})-[:FOLLOWS]->(:Person:Field {name: "Joe"})-[:FOLLOWS]->(:Person:Engineering {name: "Praveena"})<-[:KNOWS]-(:Person:Engineering {name: "Zhen"}) | 3 +|=== +Our query now also returns paths going through Praveena and Zhen, one going to Martin, and other others going back to Zhen and Praveena! \ No newline at end of file diff --git a/docs/asciidoc/path-finding/label-filter-precedence.adoc b/docs/asciidoc/path-finding/label-filter-precedence.adoc new file mode 100644 index 00000000..cf1f1f39 --- /dev/null +++ b/docs/asciidoc/path-finding/label-filter-precedence.adoc @@ -0,0 +1,24 @@ +Multiple label filter operators are allowed at the same time. +Take the following example: + +`labelFilter:'+Person|Movie|-SciFi|>Western|/Romance'` + +If we work through this label filter, we can see that: + +* `:Person` and `:Movie` labels are whitelisted +* `:SciFi` is blacklisted +* `:Western` is an end node label +* `:Romance` is as a termination label. + +The precedence of operator evaluation isn't dependent upon their location in the labelFilter but is fixed: + +Blacklist filter `-`, termination filter `/`, end node filter `>`, whitelist filter `+`. + +This means: + +* No blacklisted label `-` will ever be present in the nodes of paths returned, even if the same label (or another label of a node with a blacklisted label) is included in another filter list. +* If the termination filter `/` or end node filter `>` is used, then only paths up to nodes with those labels will be returned as results. These end nodes are exempt from the whitelist filter. +* If a node is a termination node `/`, no further expansion beyond the node will occur. +* The whitelist only applies to nodes up to but not including end nodes from the termination or end node filters. If no end node or termination node operators are present, then the whitelist applies to all nodes of the path. +* If no whitelist operators are present in the labelFilter, this is treated as if all labels are whitelisted. + diff --git a/docs/asciidoc/path-finding/label-filter.adoc b/docs/asciidoc/path-finding/label-filter.adoc new file mode 100644 index 00000000..b639473d --- /dev/null +++ b/docs/asciidoc/path-finding/label-filter.adoc @@ -0,0 +1,14 @@ +Syntax: `[+-/>]LABEL1|LABEL2|*|...` + + +[opts=header,cols="m,a"] +|=== +| input | result +| -Foe | blacklist filter - No node in the path will have a label in the blacklist. +| +Friend | whitelist filter - All nodes in the path must have a label in the whitelist (exempting termination and end nodes, if using those filters). +If no whitelist operator is present, all labels are considered whitelisted. +| /Friend | termination filter - Only return paths up to a node of the given labels, and stop further expansion beyond it. +Termination nodes do not have to respect the whitelist. Termination filtering takes precedence over end node filtering. +| >Friend | end node filter - Only return paths up to a node of the given labels, but continue expansion to match on end nodes beyond it. +End nodes do not have to respect the whitelist to be returned, but expansion beyond them is only allowed if the node has a label in the whitelist. +|=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/limit-other.adoc b/docs/asciidoc/path-finding/limit-other.adoc new file mode 100644 index 00000000..1b43bca7 --- /dev/null +++ b/docs/asciidoc/path-finding/limit-other.adoc @@ -0,0 +1,5 @@ +We can use the `limit` config parameter to limit the number of paths returned. + +When using `bfs:true` (which is the default for all expand procedures), this has the effect of returning paths to the `n` nearest nodes with labels in the termination or end node filter, where `n` is the limit given. + +The default limit value, `-1`, means no limit. \ No newline at end of file diff --git a/docs/asciidoc/path-finding/neighborhood-search.adoc b/docs/asciidoc/path-finding/neighborhood-search.adoc index c55236e1..2511a5a7 100644 --- a/docs/asciidoc/path-finding/neighborhood-search.adoc +++ b/docs/asciidoc/path-finding/neighborhood-search.adoc @@ -6,12 +6,309 @@ This section describes procedures that can be used to find the nodes in the neighborhood of a specified node. -- -[cols="1m,5"] -|=== -| apoc.neighbors.tohop(node, rel-direction-pattern, distance) | returns distinct nodes of the given relationships in the pattern up to a certain distance -| apoc.neighbors.tohop.count(node, rel-direction-pattern, distance) | returns the count of distinct nodes of the given relationships in the pattern up to a certain distance -| apoc.neighbors.byhop(node, rel-direction-pattern, distance) | returns distinct nodes of the given relationships in the pattern grouped by distance -| apoc.neighbors.byhop.count(node, rel-direction-pattern, distance) | returns the count distinct nodes of the given relationships in the pattern grouped by distance -| apoc.neighbors.athop(node, rel-direction-pattern, distance) | returns distinct nodes of the given relationships in the pattern at a certain distance -| apoc.neighbors.athop.count(node, rel-direction-pattern, distance) | returns the count of distinct nodes of the given relationships in the pattern at a certain distance +The Neighborhood search procedures enable quick discovery of surrounding nodes based on a specific relationship type and number of hops. + +This section includes: + +* <> +* <> +* <> +** <> +** <> +** <> + +[[neighborhood-search-available-procedures]] +=== Available Procedures + +The table below describes the available procedures: + +[separator=¦,opts=header] +|=== +include::../../../build/generated-documentation/apoc.neighbors.csv[] +include::../../../build/generated-documentation/apoc.neighbors.athop.csv[lines=2] +include::../../../build/generated-documentation/apoc.neighbors.byhop.csv[lines=2] +include::../../../build/generated-documentation/apoc.neighbors.tohop.csv[lines=2] +|=== + +[[neighborhood-search-rel-filters]] +=== Relationship Filters + +The 2nd parameter in each of the neighborhood search procedures is a relationship filter. +A relationship filter is a `|` separated list of relationship types, using the following syntax: + +include::relationship-filter.adoc[] + +[NOTE] +==== +Relationship filters are white space sensitive, so check for trailing white spaces (and then remove them!) if you're not seeing expected results. +==== + +[[neighborhood-search-examples]] +=== Examples + +The examples in this section are based on the following sample graph: + +[source,cypher] +---- +MERGE (mark:Person {name: "Mark"}) +MERGE (praveena:Person {name: "Praveena"}) +MERGE (joe:Person {name: "Joe"}) +MERGE (lju:Person {name: "Lju"}) +MERGE (michael:Person {name: "Michael"}) +MERGE (emil:Person {name: "Emil"}) +MERGE (ryan:Person {name: "Ryan"}) + +MERGE (ryan)-[:FOLLOWS]->(joe) +MERGE (joe)-[:FOLLOWS]->(mark) +MERGE (mark)-[:FOLLOWS]->(emil) +MERGE (michael)-[:KNOWS]-(emil) +MERGE (michael)-[:KNOWS]-(lju) +MERGE (michael)-[:KNOWS]-(praveena) +MERGE (emil)-[:FOLLOWS]->(joe) +MERGE (praveena)-[:FOLLOWS]->(joe) +---- + +The `KNOWS` relationship type is considered to be bidirectional, where if Michael knows Emil, we can imply that Emil knows Michael. +When using the `KNOWS` relationship we will ignore the direction. + +The `FOLLOWS` relationship has a direction, so we will specify a direction when we use it. + +[[neighborhood-search-specified-hop-count]] +==== Find neighbors at specified hop count + +The `apoc.neighbors.athop` procedures compute a node's neighborhood at a specific hop count. + +.The following returns the people that Emil `KNOWS` at 1 hop +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.athop(p, "KNOWS", 1) +YIELD node +RETURN node +---- + +.Results +[opts="header"] +|=== +| node +| (:Person {name: "Michael"}) +|=== + +Emil only has a direct `KNOWS` relationship to Michael, so Michael is the only node returned by this query. + +.The following returns the people that Emil `KNOWS` at 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.athop(p, "KNOWS", 2) +YIELD node +RETURN node +---- + +.Results +[opts="header"] +|=== +| node +| (:Person {name: "Praveena"}) +| (:Person {name: "Lju"}) +|=== + +Michael also `KNOWS` Praveena and Lju, and since Emil doesn't `KNOW` either of those directly, he only `KNOWS` them at a hop distance of 2. +If we aren't interested in knowing which nodes are in our neighborhood, but just want a count of the number, we can do that as well. + +.The following returns the number of people that Emil `KNOWS` at 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.athop.count(p, "KNOWS", 2) +YIELD value +RETURN value +---- + +.Results +[opts="header"] +|=== +| value +| 2 +|=== + +As expected we get a count of 2, those people being Praveena and Lju! + +[[neighborhood-search-specified-hop-counts]] +==== Find neighbors at specified hop counts + +The `apoc.neighbors.byhop` procedures compute a node's neighborhood at multiple hop counts. + +.The following returns the people that Emil `KNOWS` up to 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.byhop(p, "KNOWS", 2) +YIELD nodes +RETURN nodes +---- + +.Results +[opts="header"] +|=== +| nodes +| [(:Person {name: "Michael"})] +| [(:Person {name: "Praveena"}), (:Person {name: "Lju"})] +|=== + +From these results we can see that at level 1 Emil `KNOWS` Michael, and at level 2 Emil `KNOWS` Lju and Praveena. +The following graph patterns describe how Emil knows the different people: + +.Level 1 +* `(emil)-[:KNOWS]-(michael)` + +.Level 2 +* `(emil)-[:KNOWS]-(michael)-[:KNOWS]-(lju)` +* `(emil)-[:KNOWS]-(michael)-[:KNOWS]-(praveena)` + +We can also use multiple relationship types when searching the neighborhood. + +Let's say that as well as finding the people that Emil knows, we also want to find the people that follow him. +We can specify a direction to the relationship types, by using `<` to indicate an incoming relationship, or `>` to indicate an outgoing relationship. +So to find people that follow Emil, we'd use `(emil)` + +.Level 2 +* `(emil)-[:KNOWS]-(michael)-[:KNOWS]-(lju)` +* `(emil)-[:KNOWS]-(michael)-[:KNOWS]-(praveena)` +* `(joe)-[:FOLLOWS]->(mark)-[:FOLLOWS]->(emil)` + +.Level 3 +* `(ryan)-[:FOLLOWS]->(joe)-[:FOLLOWS]->(mark)-[:FOLLOWS]->(emil)` + +And, as with the `apoc.neighbors.athop` procedure, we can also return just the neighborhood size at each hop. + +.The following returns the number of people that Emil `KNOWS` and the number that have `FOLLOWS` relationships to him, at up to 3 hops +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.byhop.count(p, "KNOWS|> with the key being the number of hops and the value the neighborhood size. +The following query shows how to do this using the `apoc.map.fromLists` function: + +[source,cypher] +---- +MATCH (p:Person {name: "Emil"}) +CALL apoc.neighbors.byhop.count(p, "KNOWS|", 1) +YIELD node +RETURN node +---- + +.Results +[opts="header"] +|=== +| nodes +| (:Person {name: "Joe"}) +|=== + +The only person that Praveena follows is Joe, so that's the only node returned. +What about if we include people at up to 2 hops? + +.The following returns the people that Praveena `FOLLOWS` up to 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.neighbors.tohop(p, "FOLLOWS>", 2) +YIELD node +RETURN node +---- + + +.Results +[opts="header"] +|=== +| nodes +| (:Person {name: "Mark"}) +| (:Person {name: "Joe"}) +|=== + +Now Mark is returned as well. +The following graph patterns describe how Emil knows the different people: + +* `(praveena)-[:FOLLOWS]-(joe)` +* `(praveena)-[:FOLLOWS]-(joe)-[:FOLLOWS]->(mark)` + +And if we just want a count of the number of people, we can use the count variant. + +.The following returns the number of people that Praveena `FOLLOWS` up to 2 hops +[source,cypher] +---- +MATCH (p:Person {name: "Praveena"}) +CALL apoc.neighbors.tohop.count(p, "FOLLOWS>", 2) +YIELD value +RETURN value +---- + +.Results +[opts="header"] +|=== +| value +| 2 |=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/relationship-filter.adoc b/docs/asciidoc/path-finding/relationship-filter.adoc new file mode 100644 index 00000000..ffb23788 --- /dev/null +++ b/docs/asciidoc/path-finding/relationship-filter.adoc @@ -0,0 +1,11 @@ +Syntax: `[<]RELATIONSHIP_TYPE1[>]|[<]RELATIONSHIP_TYPE2[>]|...` + +[opts=header,cols="m,m,a"] +|=== +| input | type | direction +| LIKES> | LIKES | OUTGOING +| | any type | OUTGOING +| < | any type | INCOMING +|=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/sequence-tips-other.adoc b/docs/asciidoc/path-finding/sequence-tips-other.adoc new file mode 100644 index 00000000..3daf3bb6 --- /dev/null +++ b/docs/asciidoc/path-finding/sequence-tips-other.adoc @@ -0,0 +1,11 @@ +[NOTE] +==== +.Sequence tips + +Label filtering in sequences work together with the `endNodes`+`terminatorNodes`, though inclusion of a node must be unanimous. + +If you need to limit the number of times a sequence repeats, this can be done with the `maxLevel` config param (multiply the number of iterations with the size of the nodes in the sequence). + +As paths are important when expanding sequences, we recommend avoiding `apoc.path.subgraphNodes()`, `apoc.path.subgraphAll()`, and `apoc.path.spanningTree()` when using sequences, +as the configurations that make these efficient at matching to distinct nodes may interfere with sequence pathfinding. +==== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/sequence-tips.adoc b/docs/asciidoc/path-finding/sequence-tips.adoc new file mode 100644 index 00000000..38d45ebd --- /dev/null +++ b/docs/asciidoc/path-finding/sequence-tips.adoc @@ -0,0 +1,8 @@ +[NOTE] +==== +.Sequence tips + +Label filtering in sequences work together with the `endNodes`+`terminatorNodes`, though inclusion of a node must be unanimous. + +If you need to limit the number of times a sequence repeats, this can be done with the `maxLevel` config param (multiply the number of iterations with the size of the nodes in the sequence). +==== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/sequences.adoc b/docs/asciidoc/path-finding/sequences.adoc new file mode 100644 index 00000000..31086527 --- /dev/null +++ b/docs/asciidoc/path-finding/sequences.adoc @@ -0,0 +1,34 @@ +Path expander procedures can expand on repeating sequences of labels, relationship types, or both. +Sequences can be defined as follows: + +* If only using label sequences, use the `labelFilter`, but use commas to separate the filtering for each step in the repeating sequence. +* If only using relationship sequences, use the `relationshipFilter`, but use commas to separate the filtering for each step of the repeating sequence. +* If using sequences of both relationships and labels, use the `sequence` parameter. + +[opts=header,cols="a, m,a,m,a"] +|=== +| Usage | config param | description | syntax | explanation +| label sequences only | labelFilter | Same syntax and filters, but uses commas (`,`) to separate the filters for each step in the sequence. | + labelFilter:'Post\|-Blocked,Reply,>Admin' | Start node must be a :Post node that isn't :Blocked, next node must be a :Reply, and the next must be an :Admin, then repeat if able. Only paths ending with the `:Admin` node in that position of the sequence will be returned. +| relationship sequences only | relationshipFilter | Same syntax, but uses commas (`,`) to separate the filters for each relationship traversal in the sequence. | +relationshipFilter:'NEXT>,\|REPLIED>' | Expansion will first expand `NEXT>` from the start node, then `` or `REPLIED>`, then repeat if able. +| sequences of both labels and relationships | sequence | A string of comma-separated alternating label and relationship filters, for each step in a repeating sequence. The sequence should begin with a label filter, and end with a relationship filter. If present, `labelFilter`, and `relationshipFilter` are ignored, as this takes priority. | +sequence:'Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. +|=== + +There are some uses cases where the sequence does not begin at the start node, but at one node distant. + +The config parameter `beginSequenceAtStart` toggles this behavior. +Its default value is `true`. +If set to `false`, this changes the expected values for `labelFilter`, `relationshipFilter`, and `sequence` as noted below: + +[opts=header,cols="m,a,m,a"] +|=== +| sequence | altered behavior | example | explanation +| labelFilter | The start node is not considered part of the sequence. The sequence begins one node off from the start node. | +beginSequenceAtStart:false, labelFilter:'Post\|-Blocked,Reply,>Admin' | The next node(s) out from the start node begins the sequence (and must be a :Post node that isn't :Blocked), and only paths ending with `Admin` nodes returned. +| relationshipFilter | The first relationship filter in the sequence string will not be considered part of the repeating sequence, and will only be used for the first relationship from the start node to the node that will be the actual start of the sequence. | +beginSequenceAtStart:false, relationshipFilter:'FIRST>,NEXT>,\|REPLIED>' | `FIRST>` will be traversed just from the start node to the node that will be the start of the repeating `NEXT>,\|REPLIED>` sequence. +| sequence | Combines the above two behaviors. | +beginSequenceAtStart:false, sequence:'FIRST>, Post\|-Blocked, NEXT>, Reply, Admin, POSTED>\|REPLIED>' | Combines the behaviors above. +|=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/uniqueness-expand.adoc b/docs/asciidoc/path-finding/uniqueness-expand.adoc new file mode 100644 index 00000000..cbcf8da3 --- /dev/null +++ b/docs/asciidoc/path-finding/uniqueness-expand.adoc @@ -0,0 +1,17 @@ +Uniqueness of nodes and relationships guides the expansion and the returned results. +The table below describes the available values: + +[opts=header,cols="m,a"] +|=== +| value | description +| RELATIONSHIP_PATH | For each returned node there's a (relationship wise) unique path from the start node to it. This is Cypher's default expansion mode. +| NODE_GLOBAL | A node cannot be traversed more than once. This is what the legacy traversal framework does. +| NODE_LEVEL | Entities on the same level are guaranteed to be unique. +| NODE_PATH | For each returned node there's a unique path from the start node to it. +| NODE_RECENT | This is like NODE_GLOBAL, but only guarantees uniqueness among the most recent visited nodes, with a configurable count. Traversing a huge graph is quite memory intensive in that it keeps track of all the nodes it has visited. +For huge graphs a traverser can hog all the memory in the JVM, causing OutOfMemoryError. Together with this Uniqueness you can supply a count, which is the number of most recent visited nodes. This can cause a node to be visited more than once, but scales infinitely. +| RELATIONSHIP_GLOBAL | A relationship cannot be traversed more than once, whereas nodes can. +| RELATIONSHIP_LEVEL | Entities on the same level are guaranteed to be unique. +| RELATIONSHIP_RECENT | Same as for NODE_RECENT, but for relationships. +| NONE | No restriction (the user will have to manage it) +|=== \ No newline at end of file diff --git a/docs/asciidoc/path-finding/uniqueness-other.adoc b/docs/asciidoc/path-finding/uniqueness-other.adoc new file mode 100644 index 00000000..08d85fb6 --- /dev/null +++ b/docs/asciidoc/path-finding/uniqueness-other.adoc @@ -0,0 +1,5 @@ +Uniqueness of nodes and relationships guides the expansion and the returned results. +This procedure uses 'NODE_GLOBAL' uniqueness, which means that a node cannot be traversed more than once. +This is what the legacy traversal framework does. + +See <> for a path expander procedure that has fine grained control over its uniqueness strategy. \ No newline at end of file diff --git a/docs/asciidoc/temporal/datetime.adoc b/docs/asciidoc/temporal/datetime.adoc index 5c725ff2..cb8386b6 100644 --- a/docs/asciidoc/temporal/datetime.adoc +++ b/docs/asciidoc/temporal/datetime.adoc @@ -100,7 +100,6 @@ RETURN apoc.date.fields('2015/01/02_EET', 'yyyy/MM/dd_z') AS output; } |=== - == Notes on formats: * the default format is `yyyy-MM-dd HH:mm:ss` @@ -118,7 +117,6 @@ Extracts the value of one field from a datetime epoch. RETURN apoc.date.field(12345) ---- - Following fields are supported: [options="header"] @@ -158,4 +156,3 @@ RETURN apoc.date.field(12345, 'years') AS output; | Output | 1970 |=== - diff --git a/docs/build.gradle b/docs/build.gradle index ae845ef4..2b06314d 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -29,7 +29,7 @@ plugins { id 'org.neo4j.doc.build.docbook' version '1.0-alpha12' } -if (!project.hasProperty('apocVersion')) { ext.apocVersion = '4.0.0.0' } +if (!project.hasProperty('apocVersion')) { ext.apocVersion = '4.0.0.11' } ext { versionParts = apocVersion.split('-') diff --git a/docs/css/extra.css b/docs/css/extra.css index b90f65b1..6f973e60 100644 --- a/docs/css/extra.css +++ b/docs/css/extra.css @@ -41,7 +41,7 @@ pre code { line-height: 1; word-wrap: normal; overflow-wrap: normal; - white-space: pre; + white-space: pre-wrap; } .tabbed-example .content > div:last-child, diff --git a/docs/docbook/content-map.xml b/docs/docbook/content-map.xml index 80ca3406..bb02ef03 100644 --- a/docs/docbook/content-map.xml +++ b/docs/docbook/content-map.xml @@ -125,12 +125,23 @@ + + + + + + + + + + + @@ -183,6 +194,13 @@ + + + + + + + diff --git a/docs/images/apoc-load-json-so.svg b/docs/images/apoc-load-json-so.svg new file mode 100644 index 00000000..8c8e9f1e --- /dev/null +++ b/docs/images/apoc-load-json-so.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)ASKEDASKEDANSWERSASKEDASKEDANSWERSANSWERSASKEDASKEDANSWERSANSWERSASKEDASKEDANSWERSASKEDANSWERSASKEDANSWERSASKEDANSWERSASKEDANSWERSASKEDANSWERSASKEDPROVIDEDPROVIDEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGEDTAGGED Asif Ali Cypher patten for getting self related nodes user2853912 Problem connecting .NET Client to Neo4j Desktop version 4 61219356 What kind of graph algorithm does Neo4j use? Mariappan Jo&#227;o Costa Import json file to Neo4j 61217207 61213173 Difference between Neo4j Graph Algorithms and Graph Data Science rotten Castigliano Access Neo4J from another container 61207100 61200511 NEO4J - While reading CSV, i&#39;d like to set certain rows as different record Madhav Thaker KusGrus Try to connect neo4j-Python 61197887 Malin Cypher (Neo4j) create relationship to all other nodes (except itself) 61194029 How do I implement a Neo4j kind of graph data structure in Java? 61223808 NEO4J: What is a good practice to store a returned path with addtional inform… Trincerone Livrio 61180332 Create doesn&#39;t make all nodes and relationships appear Tameiki 61180532 Cypher (Neo4j) Match all paths with specific length and value 61180634 How do I run a cypher query on virtual nodes and relationships in Ne… nhanlam graph neo4j cypher neo4j-ap… \ No newline at end of file diff --git a/docs/images/apoc.import.graphml.simple-diagram.png b/docs/images/apoc.import.graphml.simple-diagram.png new file mode 100644 index 00000000..1f2d42a0 Binary files /dev/null and b/docs/images/apoc.import.graphml.simple-diagram.png differ diff --git a/docs/images/apoc.import.graphml.simple.png b/docs/images/apoc.import.graphml.simple.png new file mode 100644 index 00000000..d958f004 Binary files /dev/null and b/docs/images/apoc.import.graphml.simple.png differ diff --git a/docs/images/apoc.import.xml.svg b/docs/images/apoc.import.xml.svg new file mode 100644 index 00000000..c2b9f5f7 --- /dev/null +++ b/docs/images/apoc.import.xml.svg @@ -0,0 +1,5 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)NEXTNEXT_SIBLINGNEXT_WORDNEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXT_WORDNEXTNEXT_SIBLINGNEXTNEXT_…NEXT_WO…NEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXTNEXT_SIBLINGNEXT_SIBLINGNEXT_WORDNEXTNEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXT_WORDNEXTNEXT_SIBLINGNEXTNEXT_SIBLINGNEXT_WORDNEXTNEXT_WORDNEXTNEXT_WORDNEXT_SIBLINGNEXT_WORDNEXTNEXT_SIBLINGNEXT_SIBLINGNEXT_WORDNEXTNEXT_SIBLI…NEXT_W…NEXTNEXT_SIBLINGNEXTNEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXT_WORDNEXTNEXT_SIBLINGNEXT_WORDNEXTNEXT_…NEXT_SIB…NEXTNEXT_WORDIS_CHILD_OFNEXT_SIBLINGNEXTFIRST_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFIS_CHILD_OFNEXT_SIBLINGNEXTFIRST…IS_C…IS_CHILD_OFLAST_CHIL…IS_CHILD_OFNEXTNEXT_SIBLINGIS_CHILD_OFFIRST_CHILD_OFLAST_CHILD_OFIS_CHILD_OF + + Gambardella, Matthew + Arciniegas, Fabio + book author author \ No newline at end of file diff --git a/docs/images/apoc.load.json.local.file.svg b/docs/images/apoc.load.json.local.file.svg new file mode 100644 index 00000000..f9b11172 --- /dev/null +++ b/docs/images/apoc.load.json.local.file.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)CHILD_OFCHILD_OFCHILD_OF Rana Michael Selma Selina \ No newline at end of file diff --git a/docs/images/apoc.load.xml.all.books.svg b/docs/images/apoc.load.xml.all.books.svg new file mode 100644 index 00000000..51346c92 --- /dev/null +++ b/docs/images/apoc.load.xml.all.books.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)WROTEWROTEHAS_GENREHAS_…HAS_GENREHAS_GENREWROTEHAS_GENREHAS_GENREHAS_GENREHAS_GENREWROTEWROTEWROTEWROTEHAS_GENREHAS_GENREWROTEWROTEHAS_…WROTEHAS_GENREWROTEWROTEWROTE Arciniegas, Fabio XML Developer's Guide Gambardella, Matthew Microsoft .NET: The Programming Bible Computer MSXML3: A Comprehensive Guide Visual Studio 7: A Comprehensive Guide Ralls, Kim Midnight Rain Fantasy The Sundered Grail Maeve Ascendant Oberon's Legacy Corets, Eva Randall, Cynthia Lover Birds Splish Splash Romance Thurman, Paula Knorr, Stefan Creepy Crawlies Horror Kress, Peter Paradox Lost Science Fiction O'Brien, Tim Galos, Mike \ No newline at end of file diff --git a/docs/images/apoc.load.xml.local.books.svg b/docs/images/apoc.load.xml.local.books.svg new file mode 100644 index 00000000..1fbf6b70 --- /dev/null +++ b/docs/images/apoc.load.xml.local.books.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)WROTEWROTEHAS_GENREWROTEHAS_GENRE Gambardella, Matthew XML Developer's Guide Arciniegas, Fabio Computer Ralls, Kim Midnight Rain Fantasy \ No newline at end of file diff --git a/docs/images/apoc.nlp.aws.entities-multiple.graph.svg b/docs/images/apoc.nlp.aws.entities-multiple.graph.svg new file mode 100644 index 00000000..97b314e3 --- /dev/null +++ b/docs/images/apoc.nlp.aws.entities-multiple.graph.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)ENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITY Nintendo Switch Nintendo March 3, 2017 October 20, 2016 Mario Kart 8 https://en.wikip… at least once twice a week Neo4j European https://neo4j.co… \ No newline at end of file diff --git a/docs/images/apoc.nlp.aws.entities.graph.svg b/docs/images/apoc.nlp.aws.entities.graph.svg new file mode 100644 index 00000000..0392f409 --- /dev/null +++ b/docs/images/apoc.nlp.aws.entities.graph.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)ENTITYENTITYENTITYENTITYENTITYENTITYENTITY Nintendo Switch at least once twice a week Mario Kart 8 Neo4j European https://neo4j.co… \ No newline at end of file diff --git a/docs/images/apoc.nlp.aws.keyPhrases.graph.svg b/docs/images/apoc.nlp.aws.keyPhrases.graph.svg new file mode 100644 index 00000000..14773cf5 --- /dev/null +++ b/docs/images/apoc.nlp.aws.keyPhrases.graph.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASEKEY_PHRASE https://neo4j.co… These days more than a few feet my Nintendo Switch board games card games role playing games friends a week lunch-time Mario Kart 8 tournaments the Neo4j European offices \ No newline at end of file diff --git a/docs/images/apoc.nlp.gcp.classify.graph.svg b/docs/images/apoc.nlp.gcp.classify.graph.svg new file mode 100644 index 00000000..6160be69 --- /dev/null +++ b/docs/images/apoc.nlp.gcp.classify.graph.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)CATEGORY /Games https://neo4j.co… \ No newline at end of file diff --git a/docs/images/apoc.nlp.gcp.entities.graph.svg b/docs/images/apoc.nlp.gcp.entities.graph.svg new file mode 100644 index 00000000..c6857b0f --- /dev/null +++ b/docs/images/apoc.nlp.gcp.entities.graph.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)ENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITYENTITY card games board games role playing games Switch friends Mario Kart 8 Nintendo Neo4j tournaments offices European 8 https://neo4j.co… \ No newline at end of file diff --git a/docs/images/apoc.path.expand.svg b/docs/images/apoc.path.expand.svg new file mode 100644 index 00000000..a65d9bd8 --- /dev/null +++ b/docs/images/apoc.path.expand.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSKNOWSFOLLOWSFOLLOWSFOLLOWSKNOWSFOLLOWSKNOWSFOLLOWSKNOWSKNOWSKNOWS Joe Mark Praveena Zhen Stefan Alicia Lju Martin Jake \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.alicia.bfs.svg b/docs/images/apoc.path.expandConfig.alicia.bfs.svg new file mode 100644 index 00000000..d5b79568 --- /dev/null +++ b/docs/images/apoc.path.expandConfig.alicia.bfs.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSKNOWSFOLLOWSFOLLOWSFOLLOWSKNOWSKNOWSFOLLOWSKNOWS Alicia Joe Jonny Jake Zhen Praveena Mark Anthony John Martin \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.alicia.dfs.svg b/docs/images/apoc.path.expandConfig.alicia.dfs.svg new file mode 100644 index 00000000..1a8e158f --- /dev/null +++ b/docs/images/apoc.path.expandConfig.alicia.dfs.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWSFOLLOWSKNOWSKNOWSKNOWSKNOWSFOLLOWS Alicia Joe Zhen Praveena John Martin Lju Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.alicia.svg b/docs/images/apoc.path.expandConfig.alicia.svg new file mode 100644 index 00000000..8395deba --- /dev/null +++ b/docs/images/apoc.path.expandConfig.alicia.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSKNOWSFOLLOWSFOLLOWSFOLLOWSKNOWSKNOWSFOLLOWSKNOWSKNOWSKNOWSKNOWSFOLLOWSFOLLOWSFOLLOWS Alicia Joe Jonny Jake Zhen Praveena Mark Anthony John Martin Lju Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.blacklist.svg b/docs/images/apoc.path.expandConfig.blacklist.svg new file mode 100644 index 00000000..50f448e3 --- /dev/null +++ b/docs/images/apoc.path.expandConfig.blacklist.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSKNOWSKNOWSKNOWSFOLLOWS Alicia Jonny Jake Anthony Mark Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.svg b/docs/images/apoc.path.expandConfig.svg new file mode 100644 index 00000000..44237bdd --- /dev/null +++ b/docs/images/apoc.path.expandConfig.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSKNOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSKNOWSFOLLOWSFOLLOWSKNOWSKNOWSFOLLOWSKNOWSKNOWSFOLLOWSKNOWSFOLLOWSKNOWS Joe Mark Praveena Zhen Anthony Alicia Stefan Lju Martin Jake Jonny John Rik \ No newline at end of file diff --git a/docs/images/apoc.path.expandConfig.whitelist.svg b/docs/images/apoc.path.expandConfig.whitelist.svg new file mode 100644 index 00000000..19b6aca4 --- /dev/null +++ b/docs/images/apoc.path.expandConfig.whitelist.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWSFOLLOWSKNOWSFOLLOWS Alicia Joe Zhen Praveena Mark \ No newline at end of file diff --git a/docs/images/apoc.path.spanningTree.alicia-blacklist-joe.svg b/docs/images/apoc.path.spanningTree.alicia-blacklist-joe.svg new file mode 100644 index 00000000..2f93aaea --- /dev/null +++ b/docs/images/apoc.path.spanningTree.alicia-blacklist-joe.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSKNOWSKNOWSKNOWSFOLLOWS Alicia Jonny Jake Anthony Mark Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.spanningTree.alicia-whitelist.svg b/docs/images/apoc.path.spanningTree.alicia-whitelist.svg new file mode 100644 index 00000000..15a29868 --- /dev/null +++ b/docs/images/apoc.path.spanningTree.alicia-whitelist.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWS Alicia Jonny \ No newline at end of file diff --git a/docs/images/apoc.path.spanningTree.joe-sequence.svg b/docs/images/apoc.path.spanningTree.joe-sequence.svg new file mode 100644 index 00000000..3f09967a --- /dev/null +++ b/docs/images/apoc.path.spanningTree.joe-sequence.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWSKNOWSKNOWSKNOWSKNOWS Joe Praveena Mark Zhen Jake Martin Lju Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-blacklist-joe.svg b/docs/images/apoc.path.subgraphAll.alicia-blacklist-joe.svg new file mode 100644 index 00000000..1d370d34 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-blacklist-joe.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSFOLLOWSKNOWS Jonny Jake Anthony Mark Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-end.svg b/docs/images/apoc.path.subgraphAll.alicia-end.svg new file mode 100644 index 00000000..0a8bc84a --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-end.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWS Joe Mark Zhen \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-engineering-end.svg b/docs/images/apoc.path.subgraphAll.alicia-engineering-end.svg new file mode 100644 index 00000000..77fa7fa3 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-engineering-end.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSKNOWS Praveena Zhen Martin \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-engineering.svg b/docs/images/apoc.path.subgraphAll.alicia-engineering.svg new file mode 100644 index 00000000..0d826423 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-engineering.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWS Praveena Zhen \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-terminator.svg b/docs/images/apoc.path.subgraphAll.alicia-terminator.svg new file mode 100644 index 00000000..0ba0b9a1 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-terminator.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWS Joe Mark \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia-whitelist.svg b/docs/images/apoc.path.subgraphAll.alicia-whitelist.svg new file mode 100644 index 00000000..3380f1e6 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia-whitelist.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/) Jonny \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.alicia.svg b/docs/images/apoc.path.subgraphAll.alicia.svg new file mode 100644 index 00000000..921d754e --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.alicia.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSKNOWSFOLLOWSKNOWSKNOWSKNOWSKNOWSFOLLOWSFOLLOWS Jonny Joe Jake Anthony Praveena Mark Zhen Stefan John Martin Lju \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.joe-sequence.svg b/docs/images/apoc.path.subgraphAll.joe-sequence.svg new file mode 100644 index 00000000..5c6e08c9 --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.joe-sequence.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSKNOWSKNOWSKNOWSKNOWSKNOWSFOLLOWS Praveena Mark Zhen Jake Martin Lju Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.praveena-engineering.svg b/docs/images/apoc.path.subgraphAll.praveena-engineering.svg new file mode 100644 index 00000000..7c2290fa --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.praveena-engineering.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWS Zhen Martin \ No newline at end of file diff --git a/docs/images/apoc.path.subgraphAll.praveena.svg b/docs/images/apoc.path.subgraphAll.praveena.svg new file mode 100644 index 00000000..49379f3e --- /dev/null +++ b/docs/images/apoc.path.subgraphAll.praveena.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSKNOWSKNOWS Zhen Martin Lju Stefan \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.alicia-end.svg b/docs/images/apoc.path.subtree.alicia-end.svg new file mode 100644 index 00000000..038264b1 --- /dev/null +++ b/docs/images/apoc.path.subtree.alicia-end.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWS Alicia Joe Mark Zhen \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.alicia-eng-end.svg b/docs/images/apoc.path.subtree.alicia-eng-end.svg new file mode 100644 index 00000000..becad2c9 --- /dev/null +++ b/docs/images/apoc.path.subtree.alicia-eng-end.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWSKNOWS Alicia Joe Praveena Zhen Martin \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.alicia-eng.svg b/docs/images/apoc.path.subtree.alicia-eng.svg new file mode 100644 index 00000000..8c265eb5 --- /dev/null +++ b/docs/images/apoc.path.subtree.alicia-eng.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSFOLLOWS Alicia Joe Praveena Zhen \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.alicia-terminator.svg b/docs/images/apoc.path.subtree.alicia-terminator.svg new file mode 100644 index 00000000..4e8d9512 --- /dev/null +++ b/docs/images/apoc.path.subtree.alicia-terminator.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSKNOWSKNOWS Alicia Joe Jake Mark \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.alicia.svg b/docs/images/apoc.path.subtree.alicia.svg new file mode 100644 index 00000000..c87f69d7 --- /dev/null +++ b/docs/images/apoc.path.subtree.alicia.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)FOLLOWSFOLLOWSKNOWSKNOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSFOLLOWSKNOWSKNOWS Alicia Jonny Joe Jake Anthony Praveena Mark Zhen Stefan John Martin Lju \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.praveena-eng.svg b/docs/images/apoc.path.subtree.praveena-eng.svg new file mode 100644 index 00000000..d947850f --- /dev/null +++ b/docs/images/apoc.path.subtree.praveena-eng.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSKNOWS Praveena Zhen Martin \ No newline at end of file diff --git a/docs/images/apoc.path.subtree.praveena.svg b/docs/images/apoc.path.subtree.praveena.svg new file mode 100644 index 00000000..c81fd31b --- /dev/null +++ b/docs/images/apoc.path.subtree.praveena.svg @@ -0,0 +1 @@ +Neo4j Graph VisualizationCreated using Neo4j (http://www.neo4j.com/)KNOWSKNOWSKNOWSKNOWS Praveena Zhen Martin Lju Stefan \ No newline at end of file diff --git a/docs/javascript/feedback.js b/docs/javascript/feedback.js index 2e52fcce..f119a1e8 100644 --- a/docs/javascript/feedback.js +++ b/docs/javascript/feedback.js @@ -16,7 +16,7 @@ $(document).ready(function() { const documentHelpful = event.target.attributes["data-helpful"].value if ("yes" === documentHelpful) { - $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: true }); + $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: true, url: window.location.href }); $("div#feedback-form").html(`

Thanks for your feedback. We're happy to hear that the information on this page was helpful

`) } else { const specificFeedback = ` @@ -64,14 +64,14 @@ $(document).ready(function() { const submitType = event.target.attributes["data-submit"].value if("skip" === submitType) { - $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: false }); + $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: false, url: window.location.href }); $("div#feedback-form").html("

Thanks for your feedback. We'll take it account when we're updating our documentation

") } else { const reason = $("input[name='specific']:checked")[0].attributes["data-reason"].value const moreInformation = $("textarea[name='more-information']")[0].value - $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: false, reason: reason, moreInformation: moreInformation }); + $.post("https://uglfznxroe.execute-api.us-east-1.amazonaws.com/dev/Feedback", { helpful: false, url: window.location.href, reason: reason, moreInformation: moreInformation }); $("div#feedback-form").html("

Thanks for your feedback. We'll take it account when we're updating our documentation

") } }); diff --git a/docs/javascript/version.js b/docs/javascript/version.js index 63290335..5af7c64a 100644 --- a/docs/javascript/version.js +++ b/docs/javascript/version.js @@ -1,21 +1,20 @@ window.docMeta = (function () { - var version = '3.4'; - var name = 'APOC'; + var version = '4.0'; + var name = 'apoc'; var href = window.location.href; - var len = href.indexOf('/' + version) != -1 ? href.indexOf('/' + version) : href.length -1; return { name: name, version: version, - availableDocVersions: ["3.3", "3.4", "3.5"], - thisPubBaseUri: href.substring(0,len) + '/' + version, - unversionedDocBaseUri: href.substring(0, len) + '/', + availableDocVersions: ["3.5", "4.0"], + thisPubBaseUri: href.substring(0, href.indexOf(name) + name.length) + '/' + version, + unversionedDocBaseUri: href.substring(0, href.indexOf(name) + name.length) + '/', commonDocsBaseUri: href.substring(0, href.indexOf(name) - 1) } })(); (function () { - var baseUri = window.docMeta.unversionedDocBaseUri; // + window.location.pathname.split(window.docMeta.name + '/')[1].split('/')[0] + '/'; + var baseUri = window.docMeta.unversionedDocBaseUri + window.location.pathname.split(window.docMeta.name + '/')[1].split('/')[0] + '/'; var docPath = window.location.href.replace(baseUri, ''); window.neo4jPageId = docPath; })(); -// vim: set sw=2 ts=2: +// vim: set sw=2 ts=2: \ No newline at end of file diff --git a/docs/javascript/versionswitcher.js b/docs/javascript/versionswitcher.js index ea26e993..aeaa6eeb 100644 --- a/docs/javascript/versionswitcher.js +++ b/docs/javascript/versionswitcher.js @@ -17,7 +17,7 @@ function versionSwitcher( $ ) var currentVersion = window.docMeta.version; var currentPage = window.neo4jPageId; - // TODO re-enable loadVersions(); + loadVersions() /** * Load an array of version into a div element and check if the current page actually exists in these versions. diff --git a/extra-dependencies/gradle/wrapper/gradle-wrapper.jar b/extra-dependencies/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..e289a283 Binary files /dev/null and b/extra-dependencies/gradle/wrapper/gradle-wrapper.jar differ diff --git a/extra-dependencies/gradle/wrapper/gradle-wrapper.properties b/extra-dependencies/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..e54747b5 --- /dev/null +++ b/extra-dependencies/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Feb 06 14:27:44 CET 2018 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip \ No newline at end of file diff --git a/extra-dependencies/nlp/build.gradle b/extra-dependencies/nlp/build.gradle new file mode 100644 index 00000000..d2766a6e --- /dev/null +++ b/extra-dependencies/nlp/build.gradle @@ -0,0 +1,38 @@ +plugins { + id 'java' + id 'com.github.johnrengelman.shadow' version '4.0.3' +} + +java { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 +} + +group = 'org.neo4j.contrib' +version = '4.0.0.11' +archivesBaseName = 'apoc-nlp-dependencies' +description = """APOC NLP Dependencies""" + +jar { + manifest { + attributes 'Implementation-Version': version + } +} + +repositories { + jcenter() + maven { + url "https://repo.gradle.org/gradle/libs-releases" + } +} + + +dependencies { + compile group: 'com.amazonaws', name: 'aws-java-sdk-comprehend', version: '1.11.683' + compile group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: '2.10.3' + compile 'org.jetbrains.kotlin:kotlin-stdlib-jdk8:1.3.71' +} + + + + diff --git a/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.jar b/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..e289a283 Binary files /dev/null and b/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.jar differ diff --git a/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.properties b/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..e54747b5 --- /dev/null +++ b/extra-dependencies/nlp/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Feb 06 14:27:44 CET 2018 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip \ No newline at end of file diff --git a/extra-dependencies/nlp/gradlew b/extra-dependencies/nlp/gradlew new file mode 100755 index 00000000..2fe81a7d --- /dev/null +++ b/extra-dependencies/nlp/gradlew @@ -0,0 +1,183 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/extra-dependencies/nlp/gradlew.bat b/extra-dependencies/nlp/gradlew.bat new file mode 100644 index 00000000..9618d8d9 --- /dev/null +++ b/extra-dependencies/nlp/gradlew.bat @@ -0,0 +1,100 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/extra-dependencies/nlp/settings.gradle b/extra-dependencies/nlp/settings.gradle new file mode 100644 index 00000000..988ada53 --- /dev/null +++ b/extra-dependencies/nlp/settings.gradle @@ -0,0 +1,17 @@ + +pluginManagement { + repositories { +// mavenLocal() + maven { + url "https://neo.jfrog.io/neo/docs-maven" // System.env.ARTIFACTORY_URI +/* + credentials { + username System.env.ARTIFACTORY_USERNAME + password System.env.ARTIFACTORY_PASSWORD + } +*/ + } + gradlePluginPortal() + + } +} \ No newline at end of file diff --git a/readme.adoc b/readme.adoc index f0870371..9a337614 100644 --- a/readme.adoc +++ b/readme.adoc @@ -1,7 +1,7 @@ :readme: :branch: 4.0 :docs: https://neo4j.com/docs/labs/apoc/current -:apoc-release: 4.0.0.2 +:apoc-release: 4.0.0.7 :neo4j-version: 4.0.0 :img: https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/docs/images @@ -65,7 +65,7 @@ User defined *Functions* can be used in *any* expression or predicate, just like But you can also integrate them into your Cypher statements which makes them so much more powerful. .Load JSON example -[source,cypher] +[source,cypher,subs=attributes] ---- WITH 'https://raw.githubusercontent.com/neo4j-contrib/neo4j-apoc-procedures/{branch}/src/test/resources/person.json' AS url @@ -182,7 +182,8 @@ The trailing `` part of the version number will be incremented with every [opts=header] |=== |apoc version | neo4j version -| http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/3.5.0.6[3.5.0.6^] | 3.5.12 (3.5.x) +| http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/4.0.0.7[4.0.0.7#^] | 4.0.0 (4.0.x) +| http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/3.5.0.11[3.5.0.11^] | 3.5.16 (3.5.x) | http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/3.4.0.4[3.4.0.6^] | 3.4.12 (3.4.x) | http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/3.3.0.4[3.3.0.4^] | 3.3.6 (3.3.x) | http://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/3.2.3.6[3.2.3.6^] | 3.2.9 (3.2.x) diff --git a/src/main/java/apoc/ApocConfig.java b/src/main/java/apoc/ApocConfig.java index 6e541cc6..c7ac742b 100644 --- a/src/main/java/apoc/ApocConfig.java +++ b/src/main/java/apoc/ApocConfig.java @@ -29,8 +29,7 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.stream.Stream; import static apoc.util.FileUtils.isFile; import static org.neo4j.configuration.GraphDatabaseSettings.SYSTEM_DATABASE_NAME; @@ -45,7 +44,6 @@ public class ApocConfig extends LifecycleAdapter { public static final String SUN_JAVA_COMMAND = "sun.java.command"; - public static final Pattern CONF_DIR_PATTERN = Pattern.compile("--config-dir=(\\S+)"); public static final String APOC_IMPORT_FILE_ENABLED = "apoc.import.file.enabled"; public static final String APOC_EXPORT_FILE_ENABLED = "apoc.export.file.enabled"; public static final String APOC_IMPORT_FILE_USE_NEO4J_CONFIG = "apoc.import.file.use_neo4j_config"; @@ -62,7 +60,7 @@ public class ApocConfig extends LifecycleAdapter { public static final String APOC_CONFIG_INITIALIZER_CYPHER = "apoc.initializer.cypher"; public static final String APOC_CONFIG_BROKERS_NUM_THREADS = "apoc.brokers.num_threads"; - public static final List NEO4J_DIRECTORY_CONFIGURATION_SETTING_NAMES = new ArrayList<>(Arrays.asList( + private static final List NEO4J_DIRECTORY_CONFIGURATION_SETTING_NAMES = new ArrayList<>(Arrays.asList( data_directory, load_csv_file_url_root, logs_directory, @@ -71,6 +69,8 @@ public class ApocConfig extends LifecycleAdapter { transaction_logs_root_path, neo4j_home )); + private static final String DEFAULT_PATH = "."; + private static final String CONFIG_DIR = "config-dir="; private final Config neo4jConfig; private final Log log; @@ -122,19 +122,22 @@ public void init() throws Exception { protected String determineNeo4jConfFolder() { String command = System.getProperty(SUN_JAVA_COMMAND); - if (command==null) { + if (command == null) { log.warn("system property %s is not set, assuming '.' as conf dir. This might cause `apoc.conf` not getting loaded.", SUN_JAVA_COMMAND); - return "."; + return DEFAULT_PATH; } else { - Matcher matcher = CONF_DIR_PATTERN.matcher(command); - if (matcher.find()) { - String neo4jConfFolder = matcher.group(1); - log.info("from system properties: NEO4J_CONF=%s", neo4jConfFolder); - return neo4jConfFolder; - } else { + final String neo4jConfFolder = Stream.of(command.split("--")) + .map(String::trim) + .filter(s -> s.startsWith(CONFIG_DIR)) + .map(s -> s.substring(CONFIG_DIR.length())) + .findFirst() + .orElse(DEFAULT_PATH); + if (DEFAULT_PATH.equals(neo4jConfFolder)) { log.info("cannot determine conf folder from sys property %s, assuming '.' ", command); - return "."; + } else { + log.info("from system properties: NEO4J_CONF=%s", neo4jConfFolder); } + return neo4jConfFolder; } } diff --git a/src/main/java/apoc/bolt/Bolt.java b/src/main/java/apoc/bolt/Bolt.java index 38317a7c..849eedb3 100644 --- a/src/main/java/apoc/bolt/Bolt.java +++ b/src/main/java/apoc/bolt/Bolt.java @@ -54,7 +54,7 @@ public Stream load(@Name("url") String url, @Name("kernelTransaction" return withDriver(uri.getConfiguredUri(), uri.getToken(), driverConfig, driver -> withSession(driver, sessionConfig, session -> { if (addStatistics) { - StatementResult statementResult = session.run(statement, params); + Result statementResult = session.run(statement, params); SummaryCounters counters = statementResult.consume().counters(); return Stream.of(new RowResult(toMap(counters))); } else diff --git a/src/main/java/apoc/cluster/Cluster.java b/src/main/java/apoc/cluster/Cluster.java index 00e5e9f3..a10ad917 100644 --- a/src/main/java/apoc/cluster/Cluster.java +++ b/src/main/java/apoc/cluster/Cluster.java @@ -24,8 +24,8 @@ public class Cluster @Context public GraphDatabaseAPI api; - public static final String boltAddressKey = "bolt_address"; - public static final Map shortName = new HashMap() + private static final String boltAddressKey = "bolt_address"; + private static final Map shortName = new HashMap() {{ put( "LEADER", "L" ); put( "FOLLOWER", "F" ); diff --git a/src/main/java/apoc/coll/Coll.java b/src/main/java/apoc/coll/Coll.java index a14920fe..4a5c0b4d 100644 --- a/src/main/java/apoc/coll/Coll.java +++ b/src/main/java/apoc/coll/Coll.java @@ -3,10 +3,7 @@ import apoc.result.ListResult; import org.apache.commons.lang3.mutable.MutableInt; import org.apache.commons.math3.util.Combinations; -import org.neo4j.graphdb.GraphDatabaseService; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Path; -import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.*; import org.neo4j.internal.helpers.collection.Pair; import org.neo4j.procedure.*; @@ -27,6 +24,8 @@ public class Coll { @Context public GraphDatabaseService db; + @Context public Transaction tx; + @Procedure @Description("apoc.coll.zipToRows(list1,list2) - creates pairs like zip but emits one row per pair") public Stream zipToRows(@Name("list1") List list1, @Name("list2") List list2) { @@ -88,21 +87,22 @@ public Double avg(@Name("numbers") List list) { @Description("apoc.coll.min([0.5,1,2.3])") public Object min(@Name("values") List list) { if (list == null || list.isEmpty()) return null; - return Collections.min((List)list, Coll::compareAsDoubles); + return Collections.min(list, Coll::compareAsDoubles); } @UserFunction @Description("apoc.coll.max([0.5,1,2.3])") public Object max(@Name("values") List list) { - if (list == null || list.isEmpty()) return null; - return Collections.max((List)list, Coll::compareAsDoubles); + if (list == null || list.isEmpty()) return null; + return Collections.max(list, Coll::compareAsDoubles); } private static int compareAsDoubles(Object a, Object b) { - return Double.compare(((Number)a).doubleValue(), ((Number)b).doubleValue()); + return Double.compare(((Number) a).doubleValue(), ((Number) b).doubleValue()); } - @Procedure + + @Procedure @Description("apoc.coll.elements(list,limit,offset) yield _1,_2,..,_10,_1s,_2i,_3f,_4m,_5l,_6n,_7r,_8p - deconstruct subset of mixed list into identifiers of the correct type") public Stream elements(@Name("values") List list, @Name(value = "limit",defaultValue = "-1") long limit,@Name(value = "offset",defaultValue = "0") long offset) { int elements = (limit < 0 ? list.size() : Math.min((int)(offset+limit),list.size())) - (int)offset; @@ -396,6 +396,7 @@ public List remove(@Name("coll") List coll, @Name("index") long @UserFunction @Description("apoc.coll.indexOf(coll, value) | position of value in the list") public long indexOf(@Name("coll") List coll, @Name("value") Object value) { + // return reduce(res=[0,-1], x in $list | CASE WHEN x=$value AND res[1]=-1 THEN [res[0], res[0]+1] ELSE [res[0]+1, res[1]] END)[1] as value if (coll == null || coll.isEmpty()) return -1; return new ArrayList<>(coll).indexOf(value); } diff --git a/src/main/java/apoc/custom/CypherProceduresHandler.java b/src/main/java/apoc/custom/CypherProceduresHandler.java index 1267fc1c..4bb68f44 100644 --- a/src/main/java/apoc/custom/CypherProceduresHandler.java +++ b/src/main/java/apoc/custom/CypherProceduresHandler.java @@ -97,7 +97,7 @@ public class CypherProceduresHandler implements AvailabilityListener { private final ThrowingFunction transactionComponentFunction; private Set registeredProcedureSignatures = emptySet(); private Set registeredUserFunctionSignatures = emptySet(); - public static Group REFRESH_GROUP = Group.STORAGE_MAINTENANCE; + private static Group REFRESH_GROUP = Group.STORAGE_MAINTENANCE; private JobHandle restoreProceduresHandle; diff --git a/src/main/java/apoc/cypher/Cypher.java b/src/main/java/apoc/cypher/Cypher.java index cb4393ad..b44e8be1 100644 --- a/src/main/java/apoc/cypher/Cypher.java +++ b/src/main/java/apoc/cypher/Cypher.java @@ -11,13 +11,29 @@ import org.neo4j.graphdb.Transaction; import org.neo4j.internal.helpers.collection.Iterators; import org.neo4j.logging.Log; -import org.neo4j.procedure.*; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Mode; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; +import org.neo4j.procedure.TerminationGuard; import java.io.IOException; import java.io.Reader; import java.io.StringReader; -import java.util.*; -import java.util.concurrent.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -76,11 +92,12 @@ public Stream runFile(@Name("file") String fileName, @Name(value = "c public Stream runFiles(@Name("file") List fileNames, @Name(value = "config",defaultValue = "{}") Map config) { boolean addStatistics = Util.toBoolean(config.getOrDefault("statistics",true)); int timeout = Util.toInteger(config.getOrDefault("timeout",10)); + int queueCapacity = Util.toInteger(config.getOrDefault("queueCapacity",100)); List result = new ArrayList<>(); @SuppressWarnings( "unchecked" ) Map parameters = (Map)config.getOrDefault("parameters",Collections.emptyMap()); for (String f : fileNames) { - List rowResults = runManyStatements(readerForFile(f), parameters, false, addStatistics, timeout).collect(Collectors.toList()); + List rowResults = runManyStatements(readerForFile(f), parameters, false, addStatistics, timeout, queueCapacity).collect(Collectors.toList()); result.addAll(rowResults); } return result.stream(); @@ -97,26 +114,48 @@ public Stream runSchemaFile(@Name("file") String fileName, @Name(valu public Stream runSchemaFiles(@Name("file") List fileNames, @Name(value = "config",defaultValue = "{}") Map config) { boolean addStatistics = Util.toBoolean(config.getOrDefault("statistics",true)); int timeout = Util.toInteger(config.getOrDefault("timeout",10)); + int queueCapacity = Util.toInteger(config.getOrDefault("queueCapacity",100)); List result = new ArrayList<>(); for (String f : fileNames) { - List rowResults = runManyStatements(readerForFile(f), Collections.emptyMap(), true, addStatistics, timeout).collect(Collectors.toList()); + List rowResults = runManyStatements(readerForFile(f), Collections.emptyMap(), true, addStatistics, timeout, queueCapacity).collect(Collectors.toList()); result.addAll(rowResults); } return result.stream(); } - private Stream runManyStatements(Reader reader, Map params, boolean schemaOperation, boolean addStatistics, int timeout) { - BlockingQueue queue = new ArrayBlockingQueue<>(100); - Util.inThread(pools, () -> { + private Stream runManyStatements(Reader reader, Map params, boolean schemaOperation, boolean addStatistics, int timeout, int queueCapacity) { + BlockingQueue queue = runInSeparateThreadAndSendTombstone(queueCapacity, internalQueue -> { if (schemaOperation) { - runSchemaStatementsInTx(reader, queue, params, addStatistics,timeout); + runSchemaStatementsInTx(reader, internalQueue, params, addStatistics, timeout); } else { - runDataStatementsInTx(reader, queue, params, addStatistics,timeout); + runDataStatementsInTx(reader, internalQueue, params, addStatistics, timeout); } - queue.put(RowResult.TOMBSTONE); - return null; - }); - return StreamSupport.stream(new QueueBasedSpliterator<>(queue, RowResult.TOMBSTONE, terminationGuard, timeout), false); + }, RowResult.TOMBSTONE); + return StreamSupport.stream(new QueueBasedSpliterator<>(queue, RowResult.TOMBSTONE, terminationGuard, Integer.MAX_VALUE), false); + } + + + private BlockingQueue runInSeparateThreadAndSendTombstone(int queueCapacity, Consumer> action, T tombstone) { + /* NB: this must not be called via an existing thread pool - otherwise we could run into a deadlock + other jobs using the same pool might completely exhaust at and the thread sending TOMBSTONE will + wait in the pool's job queue. + */ + BlockingQueue queue = new ArrayBlockingQueue<>(queueCapacity); + new Thread(() -> { + try { + action.accept(queue); + } finally { + while (true) { // ensure we send TOMBSTONE even if there's an InterruptedException + try { + queue.put(tombstone); + return; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + }).start(); + return queue; } private void runDataStatementsInTx(Reader reader, BlockingQueue queue, Map params, boolean addStatistics, long timeout) { @@ -130,8 +169,8 @@ private void runDataStatementsInTx(Reader reader, BlockingQueue queue Util.inThread(pools , () -> db.executeTransactionally(stmt, params, result -> consumeResult(result, queue, addStatistics, timeout))); } else { - Util.inThread(pools, () -> { - try (Result result = tx.execute(stmt, params)) { + Util.inTx(db, pools, threadTx -> { + try (Result result = threadTx.execute(stmt, params)) { return consumeResult(result, queue, addStatistics, timeout); } }); @@ -161,8 +200,10 @@ private void runSchemaStatementsInTx(Reader reader, BlockingQueue que public Stream runMany(@Name("cypher") String cypher, @Name("params") Map params, @Name(value = "config",defaultValue = "{}") Map config) { boolean addStatistics = Util.toBoolean(config.getOrDefault("statistics",true)); int timeout = Util.toInteger(config.getOrDefault("timeout",1)); + int queueCapacity = Util.toInteger(config.getOrDefault("queueCapacity",100)); + StringReader stringReader = new StringReader(cypher); - return runManyStatements(stringReader ,params, false, addStatistics, timeout); + return runManyStatements(stringReader ,params, false, addStatistics, timeout, queueCapacity); } private final static Pattern shellControl = Pattern.compile("^:?\\b(begin|commit|rollback)\\b", Pattern.CASE_INSENSITIVE); @@ -176,7 +217,7 @@ private Object consumeResult(Result result, BlockingQueue queue, bool queue.put(new RowResult(row++, result.next())); } if (addStatistics) { - queue.offer(new RowResult(-1, toMap(result.getQueryStatistics(), System.currentTimeMillis() - time, row)), timeout, TimeUnit.SECONDS); + queue.put(new RowResult(-1, toMap(result.getQueryStatistics(), System.currentTimeMillis() - time, row))); } return row; } catch (InterruptedException e) { @@ -307,7 +348,7 @@ public Stream mapParallel2(@Name("fragment") String fragment, @Name(" queue.put(RowResult.TOMBSTONE); return total; }); - return StreamSupport.stream(new QueueBasedSpliterator<>(queue, RowResult.TOMBSTONE, terminationGuard, timeout),true).map((rowResult) -> new MapResult(rowResult.result)); + return StreamSupport.stream(new QueueBasedSpliterator<>(queue, RowResult.TOMBSTONE, terminationGuard, (int)timeout),true).map((rowResult) -> new MapResult(rowResult.result)); } public Map parallelParams(@Name("params") Map params, String key, List partition) { diff --git a/src/main/java/apoc/date/Date.java b/src/main/java/apoc/date/Date.java index ac2fa6d8..7814439c 100644 --- a/src/main/java/apoc/date/Date.java +++ b/src/main/java/apoc/date/Date.java @@ -63,22 +63,25 @@ public double toYears(@Name("value") Object value, @Name(value = "format", defau } } - @Procedure(mode = Mode.WRITE) - @Description("CALL apoc.date.expire(node,time,'time-unit') - expire node in given time by setting :TTL label and `ttl` property") + @Procedure(mode = Mode.WRITE, deprecatedBy = "apoc.ttl.expireAtInstant") + @Description("CALL apoc.date.expire(node,time,'time-unit') - expire node at specified time by setting :TTL label and `ttl` property") + @Deprecated public void expire(@Name("node") Node node, @Name("time") long time, @Name("timeUnit") String timeUnit) { node.addLabel(Label.label("TTL")); node.setProperty("ttl",unit(timeUnit).toMillis(time)); } - @Procedure(mode = Mode.WRITE) - @Description("CALL apoc.date.expire.in(node,time,'time-unit') - expire node in given time-delta by setting :TTL label and `ttl` property") + @Procedure(mode = Mode.WRITE, deprecatedBy = "apoc.ttl.expireAfterTimeLength") + @Description("CALL apoc.date.expire.in(node,time,'time-unit') - expire node after specified length of time time by setting :TTL label and `ttl` property") + @Deprecated public void expireIn(@Name("node") Node node, @Name("timeDelta") long time, @Name("timeUnit") String timeUnit) { node.addLabel(Label.label("TTL")); node.setProperty("ttl",System.currentTimeMillis() + unit(timeUnit).toMillis(time)); } - @UserFunction + @UserFunction(deprecatedBy = "Neo4j native datetime using instant.field") @Description("apoc.date.fields('2012-12-23',('yyyy-MM-dd')) - return columns and a map representation of date parsed with the given format with entries for years,months,weekdays,days,hours,minutes,seconds,zoneid") + @Deprecated public Map fields(final @Name("date") String date, final @Name(value = "pattern", defaultValue = DEFAULT_FORMAT) String pattern) { if (date == null) { return Util.map(); @@ -94,8 +97,9 @@ public Map fields(final @Name("date") String date, final @Name(va return result.asMap(); } - @UserFunction + @UserFunction(deprecatedBy = "Neo4j native datetime using instant.field - e.g. datetime({epochMillis: dateInteger}).year") @Description("apoc.date.field(12345,('ms|s|m|h|d|month|year'),('TZ')") + @Deprecated public Long field(final @Name("time") Long time, @Name(value = "unit", defaultValue = "d") String unit, @Name(value = "timezone",defaultValue = "UTC") String timezone) { return (time == null) ? null @@ -121,7 +125,7 @@ public Map asMap() { } } - private TimeUnit unit(String unit) { + public static TimeUnit unit(String unit) { if (unit == null) return TimeUnit.MILLISECONDS; switch (unit.toLowerCase()) { diff --git a/src/main/java/apoc/export/csv/CsvLoaderConfig.java b/src/main/java/apoc/export/csv/CsvLoaderConfig.java index 66ea01de..b55a7e1c 100644 --- a/src/main/java/apoc/export/csv/CsvLoaderConfig.java +++ b/src/main/java/apoc/export/csv/CsvLoaderConfig.java @@ -9,20 +9,20 @@ public class CsvLoaderConfig { public static final String DELIMITER = "delimiter"; - public static final String ARRAY_DELIMITER = "arrayDelimiter"; - public static final String QUOTATION_CHARACTER = "quotationCharacter"; - public static final String STRING_IDS = "stringIds"; - public static final String SKIP_LINES = "skipLines"; - public static final String BATCH_SIZE = "batchSize"; - public static final String IGNORE_DUPLICATE_NODES = "ignoreDuplicateNodes"; - - public static char DELIMITER_DEFAULT = ','; - public static char ARRAY_DELIMITER_DEFAULT = ';'; - public static char QUOTATION_CHARACTER_DEFAULT = '"'; - public static boolean STRING_IDS_DEFAULT = true; - public static int SKIP_LINES_DEFAULT = 1; - public static int BATCH_SIZE_DEFAULT = 2000; - public static boolean IGNORE_DUPLICATE_NODES_DEFAULT = false; + private static final String ARRAY_DELIMITER = "arrayDelimiter"; + private static final String QUOTATION_CHARACTER = "quotationCharacter"; + private static final String STRING_IDS = "stringIds"; + private static final String SKIP_LINES = "skipLines"; + private static final String BATCH_SIZE = "batchSize"; + private static final String IGNORE_DUPLICATE_NODES = "ignoreDuplicateNodes"; + + private static char DELIMITER_DEFAULT = ','; + private static char ARRAY_DELIMITER_DEFAULT = ';'; + private static char QUOTATION_CHARACTER_DEFAULT = '"'; + private static boolean STRING_IDS_DEFAULT = true; + private static int SKIP_LINES_DEFAULT = 1; + private static int BATCH_SIZE_DEFAULT = 2000; + private static boolean IGNORE_DUPLICATE_NODES_DEFAULT = false; private final char delimiter; private final char arrayDelimiter; diff --git a/src/main/java/apoc/export/csv/CsvLoaderConstants.java b/src/main/java/apoc/export/csv/CsvLoaderConstants.java index 1d7f864d..158d81ea 100644 --- a/src/main/java/apoc/export/csv/CsvLoaderConstants.java +++ b/src/main/java/apoc/export/csv/CsvLoaderConstants.java @@ -4,7 +4,7 @@ public class CsvLoaderConstants { - public static final Pattern FIELD_PATTERN = Pattern.compile("^(?[^:]*)(:(?\\w+))?(\\((?\\w+)\\))?(?\\[\\])?$"); + public static final Pattern FIELD_PATTERN = Pattern.compile("^(?[^:]*)(:(?\\w+))?(\\((?[-a-zA-Z_0-9]+)\\))?(?\\[\\])?$"); public static final String ARRAY_PATTERN = "[]"; public static final String IGNORE_FIELD = "IGNORE"; diff --git a/src/main/java/apoc/export/cypher/ExportCypher.java b/src/main/java/apoc/export/cypher/ExportCypher.java index 3a518600..ef40609b 100644 --- a/src/main/java/apoc/export/cypher/ExportCypher.java +++ b/src/main/java/apoc/export/cypher/ExportCypher.java @@ -7,14 +7,23 @@ import apoc.export.util.ProgressReporter; import apoc.result.ProgressInfo; import apoc.util.QueueBasedSpliterator; +import apoc.util.QueueUtil; import apoc.util.Util; import org.apache.commons.lang3.StringUtils; import org.neo4j.cypher.export.CypherResultSubGraph; import org.neo4j.cypher.export.DatabaseSubGraph; import org.neo4j.cypher.export.SubGraph; -import org.neo4j.graphdb.*; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.Result; +import org.neo4j.graphdb.Transaction; import org.neo4j.internal.helpers.collection.Iterables; -import org.neo4j.procedure.*; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; +import org.neo4j.procedure.TerminationGuard; import java.io.IOException; import java.util.Collection; @@ -118,9 +127,9 @@ private Stream exportCypher(@Name("file") String fileName, Str long timeout = c.getTimeoutSeconds(); final BlockingQueue queue = new ArrayBlockingQueue<>(1000); ProgressReporter reporterWithConsumer = reporter.withConsumer( - (pi) -> Util.put(queue,pi == ProgressInfo.EMPTY ? DataProgressInfo.EMPTY : new DataProgressInfo(pi).enrich(cypherFileManager),timeout)); + (pi) -> QueueUtil.put(queue,pi == ProgressInfo.EMPTY ? DataProgressInfo.EMPTY : new DataProgressInfo(pi).enrich(cypherFileManager),timeout)); Util.inTxFuture(pools.getDefaultExecutorService(), db, txInThread -> { doExport(graph, c, onlySchema, reporterWithConsumer, cypherFileManager); return true; }); - QueueBasedSpliterator spliterator = new QueueBasedSpliterator<>(queue, DataProgressInfo.EMPTY, terminationGuard, timeout); + QueueBasedSpliterator spliterator = new QueueBasedSpliterator<>(queue, DataProgressInfo.EMPTY, terminationGuard, Integer.MAX_VALUE); return StreamSupport.stream(spliterator, false); } else { doExport(graph, c, onlySchema, reporter, cypherFileManager); diff --git a/src/main/java/apoc/export/json/ImportJson.java b/src/main/java/apoc/export/json/ImportJson.java new file mode 100644 index 00000000..ee07c68c --- /dev/null +++ b/src/main/java/apoc/export/json/ImportJson.java @@ -0,0 +1,49 @@ +package apoc.export.json; + +import apoc.Pools; +import apoc.export.util.CountingReader; +import apoc.export.util.ProgressReporter; +import apoc.result.ProgressInfo; +import apoc.util.FileUtils; +import apoc.util.JsonUtil; +import apoc.util.Util; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Mode; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; + +import java.util.Map; +import java.util.Scanner; +import java.util.stream.Stream; + +public class ImportJson { + @Context + public GraphDatabaseService db; + + @Context + public Pools pools; + + @Procedure(value = "apoc.import.json", mode = Mode.WRITE) + @Description("apoc.import.json(file,config) - imports the json list to the provided file") + public Stream all(@Name("file") String fileName, @Name(value = "config", defaultValue = "{}") Map config) { + ProgressInfo result = + Util.inThread(pools, () -> { + ImportJsonConfig importJsonConfig = new ImportJsonConfig(config); + ProgressReporter reporter = new ProgressReporter(null, null, new ProgressInfo(fileName, "file", "json")); + + try (final CountingReader reader = FileUtils.readerFor(fileName); + final Scanner scanner = new Scanner(reader).useDelimiter("\n|\r"); + JsonImporter jsonImporter = new JsonImporter(importJsonConfig, db, reporter)) { + while (scanner.hasNext()) { + Map row = JsonUtil.OBJECT_MAPPER.readValue(scanner.nextLine(), Map.class); + jsonImporter.importRow(row); + } + } + + return reporter.getTotal(); + }); + return Stream.of(result); + } +} diff --git a/src/main/java/apoc/export/json/ImportJsonConfig.java b/src/main/java/apoc/export/json/ImportJsonConfig.java new file mode 100644 index 00000000..386c3a9b --- /dev/null +++ b/src/main/java/apoc/export/json/ImportJsonConfig.java @@ -0,0 +1,52 @@ +package apoc.export.json; + +import apoc.util.Util; +import org.apache.commons.lang3.StringUtils; + +import java.util.Collection; +import java.util.Collections; +import java.util.Map; + +public class ImportJsonConfig { + + private final Map> nodePropertyMappings; + private final Map> relPropertyMappings; + + private final int unwindBatchSize; + private final int txBatchSize; + + private final String importIdName; + + public ImportJsonConfig(Map config) { + config = config == null ? Collections.emptyMap() : config; + this.nodePropertyMappings = (Map>) config.getOrDefault("nodePropertyMappings", Collections.emptyMap()); + this.relPropertyMappings = (Map>) config.getOrDefault("relPropertyMappings", Collections.emptyMap()); + this.unwindBatchSize = Util.toInteger(config.getOrDefault("unwindBatchSize", 5000)); + this.txBatchSize = Util.toInteger(config.getOrDefault("txBatchSize", 5000)); + this.importIdName = (String) config.getOrDefault("importIdName", "neo4jImportId"); + } + + public String typeForNode(Collection labels, String property) { + return labels.stream() + .map(label -> nodePropertyMappings.getOrDefault(label, Collections.emptyMap()).get(property)) + .filter(StringUtils::isNotBlank) + .findFirst() + .orElse(null); + } + + public String typeForRel(String type, String property) { + return relPropertyMappings.getOrDefault(type, Collections.emptyMap()).get(property); + } + + public int getUnwindBatchSize() { + return unwindBatchSize; + } + + public int getTxBatchSize() { + return txBatchSize; + } + + public String getImportIdName() { + return importIdName; + } +} diff --git a/src/main/java/apoc/export/json/JsonImporter.java b/src/main/java/apoc/export/json/JsonImporter.java new file mode 100644 index 00000000..8616a662 --- /dev/null +++ b/src/main/java/apoc/export/json/JsonImporter.java @@ -0,0 +1,331 @@ +package apoc.export.json; + +import apoc.export.util.Reporter; +import apoc.util.Util; +import org.apache.commons.lang3.StringUtils; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Transaction; +import org.neo4j.values.storable.CoordinateReferenceSystem; +import org.neo4j.values.storable.DurationValue; +import org.neo4j.values.storable.PointValue; +import org.neo4j.values.storable.Values; + +import java.io.Closeable; +import java.io.IOException; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetTime; +import java.time.ZonedDateTime; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class JsonImporter implements Closeable { + private static final String CREATE_NODE = "UNWIND $rows AS row " + + "CREATE (n:%s {%s: row.id}) SET n += row.properties"; + private static final String CREATE_RELS = "UNWIND $rows AS row " + + "MATCH (s:%s {%s: row.start.id}) " + + "MATCH (e:%s {%2$s: row.end.id}) " + + "CREATE (s)-[r:%s]->(e) SET r += row.properties"; + + private final List> paramList; + private final int unwindBatchSize; + private final int txBatchSize; + private final GraphDatabaseService db; + private final Reporter reporter; + + private String lastType; + private List lastLabels; + private Map lastRelTypes; + + private final ImportJsonConfig importJsonConfig; + + public JsonImporter(ImportJsonConfig importJsonConfig, + GraphDatabaseService db, + Reporter reporter) { + this.paramList = new ArrayList<>(importJsonConfig.getUnwindBatchSize()); + this.db = db; + this.txBatchSize = importJsonConfig.getTxBatchSize(); + this.unwindBatchSize = Math.min(importJsonConfig.getUnwindBatchSize(), txBatchSize); + this.reporter = reporter; + this.importJsonConfig = importJsonConfig; + } + + public void importRow(Map param) { + final String type = (String) param.get("type"); + + manageEntityType(type); + + switch (type) { + case "node": + manageNode(param); + break; + case "relationship": + manageRelationship(param); + break; + default: + throw new IllegalArgumentException("Current type not supported: " + type); + } + + final Map properties = (Map) param.getOrDefault("properties", Collections.emptyMap()); + updateReporter(type, properties); + param.put("properties", convertProperties(type, properties, null)); + + paramList.add(param); + if (paramList.size() % txBatchSize == 0) { + final Collection>> results = chunkData(); + paramList.clear(); + // write + writeUnwindBatch(results); + } + } + + private void writeUnwindBatch(Collection>> results) { + try (final Transaction tx = db.beginTx()) { + results.forEach(resultList -> { + if (resultList.size() == unwindBatchSize) { + write(tx, resultList); + } else { + paramList.addAll(resultList); + } + }); + tx.close(); + } + } + + private void manageEntityType(String type) { + if (lastType == null) { + lastType = type; + } + if (!type.equals(lastType)) { + flush(); + lastType = type; + } + } + + private void manageRelationship(Map param) { + Map relType = Util.map( + "start", getLabels((Map) param.get("start")), + "end", getLabels((Map) param.get("end")), + "label", getType(param)); + if (lastRelTypes == null) { + lastRelTypes = relType; + } + if (!relType.equals(lastRelTypes)) { + flush(); + lastRelTypes = relType; + } + } + + private void manageNode(Map param) { + List labels = getLabels(param); + if (lastLabels == null) { + lastLabels = labels; + } + if (!labels.equals(lastLabels)) { + flush(); + lastLabels = labels; + } + } + + private void updateReporter(String type, Map properties) { + final int size = properties.size() + 1; // +1 is for the "neo4jImportId" + switch (type) { + case "node": + reporter.update(1, 0, size); + break; + case "relationship": + reporter.update(0, 1, size); + break; + default: + throw new IllegalArgumentException("Current type not supported: " + type); + } + } + + private Stream> flatMap(Map map, String key) { + final String prefix = key != null ? key : ""; + return map.entrySet().stream() + .flatMap(e -> { + if (e.getValue() instanceof Map) { + return flatMap((Map) e.getValue(), prefix + "." + e.getKey()); + } else { + return Stream.of(new AbstractMap.SimpleEntry<>(prefix + "." + e.getKey(), e.getValue())); + } + }); + } + + private List convertList(Collection coll, String classType) { + return coll.stream() + .map(c -> { + if (c instanceof Collection) { + return convertList((Collection) c, classType); + } + return convertMappedValue(c, classType); + }) + .collect(Collectors.toList()); + } + + private Map convertProperties(String type, Map properties, String keyPrefix) { + return properties.entrySet().stream() + .flatMap(e -> { + if (e.getValue() instanceof Map) { + Map map = (Map) e.getValue(); + String classType = getClassType(type, e.getKey()); + if (classType != null && "POINT".equals(classType.toUpperCase())) { + return Stream.of(e); + } + return flatMap(map, e.getKey()); + } else { + return Stream.of(e); + } + }) + .map(e -> { + String key = e.getKey(); + final String classType = getClassType(type, key); + if (e.getValue() instanceof Collection) { + final List coll = convertList((Collection) e.getValue(), classType); + return new AbstractMap.SimpleEntry<>(e.getKey(), coll); + } else { + return new AbstractMap.SimpleEntry<>(e.getKey(), + convertMappedValue(e.getValue(), classType)); + } + }) + .filter(e -> e.getValue() != null) + .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())); + } + + private String getClassType(String type, String key) { + final String classType; + switch (type) { + case "node": + classType = importJsonConfig.typeForNode(lastLabels, key); + break; + case "relationship": + classType = importJsonConfig.typeForRel((String) lastRelTypes.get("label"), key); + break; + default: + classType = null; + break; + } + return classType; + } + + private Object convertMappedValue(Object value, String classType) { + if (classType == null) { + return value; + } + switch (classType.toUpperCase()) { + case "POINT": + value = toPoint((Map) value); + break; + case "LOCALDATE": + value = LocalDate.parse((String) value); + break; + case "LOCALTIME": + value = LocalTime.parse((String) value); + break; + case "LOCALDATETIME": + value = LocalDateTime.parse((String) value); + break; + case "DURATION": + value = DurationValue.parse((String) value); + break; + case "OFFSETTIME": + value = OffsetTime.parse((String) value); + break; + case "ZONEDDATETIME": + value = ZonedDateTime.parse((String) value); + break; + default: + break; + } + return value; + } + + private PointValue toPoint(Map pointMap) { + double x; + double y; + Double z = null; + + final CoordinateReferenceSystem crs = CoordinateReferenceSystem.byName((String) pointMap.get("crs")); + if (crs.getName().startsWith("wgs-84")) { + x = (double) pointMap.get("latitude"); + y = (double) pointMap.get("longitude"); + if (crs.getName().endsWith("-3d")) { + z = (double) pointMap.get("height"); + } + } else { + x = (double) pointMap.get("x"); + y = (double) pointMap.get("y"); + if (crs.getName().endsWith("-3d")) { + z = (double) pointMap.get("z"); + } + } + + return z != null ? Values.pointValue(crs, x, y, z) : Values.pointValue(crs, x, y); + } + + private String getType(Map param) { + return Util.quote((String) param.get("label")); + } + + private List getLabels(Map param) { + return ((List) param.getOrDefault("labels", Collections.emptyList())).stream() + .map(Util::quote) + .collect(Collectors.toList()); + } + + + private void write(Transaction tx, List> resultList) { + if (resultList.isEmpty()) return; + final String type = (String) resultList.get(0).get("type"); + String query = null; + switch (type) { + case "node": + query = String.format(CREATE_NODE, StringUtils.join(lastLabels, ":"), importJsonConfig.getImportIdName()); + break; + case "relationship": + String startLabels = StringUtils.join((List) lastRelTypes.get("start"), ":"); + String endLabels = StringUtils.join((List) lastRelTypes.get("end"), ":"); + String rel = (String) lastRelTypes.get("label"); + query = String.format(CREATE_RELS, startLabels, importJsonConfig.getImportIdName(), endLabels, rel); + break; + default: + throw new IllegalArgumentException("Current type not supported: " + type); + } + if (StringUtils.isNotBlank(query)) { + db.executeTransactionally(query, Collections.singletonMap("rows", resultList)); + } + } + + private Collection>> chunkData() { + AtomicInteger chunkCounter = new AtomicInteger(0); + return paramList.stream() + .collect(Collectors.groupingBy(it -> chunkCounter.getAndIncrement() / unwindBatchSize)) + .values(); + } + + @Override + public void close() throws IOException { + flush(); + reporter.done(); + } + + private void flush() { + if (!paramList.isEmpty()) { + final Collection>> results = chunkData(); + try (final Transaction tx = db.beginTx()) { + results.forEach(resultList -> write(tx, resultList)); + tx.close(); + } + paramList.clear(); + } + } +} diff --git a/src/main/java/apoc/export/util/DurationValueSerializer.java b/src/main/java/apoc/export/util/DurationValueSerializer.java new file mode 100644 index 00000000..88e822a1 --- /dev/null +++ b/src/main/java/apoc/export/util/DurationValueSerializer.java @@ -0,0 +1,20 @@ +package apoc.export.util; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import org.neo4j.values.storable.DurationValue; + +import java.io.IOException; + +public class DurationValueSerializer extends JsonSerializer { + + @Override + public void serialize(DurationValue value, JsonGenerator jsonGenerator, SerializerProvider serializers) throws IOException { + if (value == null) { + jsonGenerator.writeNull(); + } + jsonGenerator.writeString(value.toString()); + } +} + diff --git a/src/main/java/apoc/export/util/ExportUtils.java b/src/main/java/apoc/export/util/ExportUtils.java index 10c3edc6..38851c4f 100644 --- a/src/main/java/apoc/export/util/ExportUtils.java +++ b/src/main/java/apoc/export/util/ExportUtils.java @@ -3,6 +3,7 @@ import apoc.export.cypher.ExportFileManager; import apoc.result.ProgressInfo; import apoc.util.QueueBasedSpliterator; +import apoc.util.QueueUtil; import apoc.util.Util; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.procedure.TerminationGuard; @@ -27,13 +28,13 @@ public static Stream getProgressInfoStream(GraphDatabaseService db long timeout = exportConfig.getTimeoutSeconds(); final ArrayBlockingQueue queue = new ArrayBlockingQueue<>(1000); ProgressReporter reporterWithConsumer = reporter.withConsumer( - (pi) -> Util.put(queue, pi == ProgressInfo.EMPTY ? ProgressInfo.EMPTY : new ProgressInfo(pi).drain(cypherFileManager.getStringWriter(format)), timeout) + (pi) -> QueueUtil.put(queue, pi == ProgressInfo.EMPTY ? ProgressInfo.EMPTY : new ProgressInfo(pi).drain(cypherFileManager.getStringWriter(format)), timeout) ); Util.inTxFuture(executorService, db, tx -> { dump.accept(reporterWithConsumer); return true; }); - QueueBasedSpliterator spliterator = new QueueBasedSpliterator<>(queue, ProgressInfo.EMPTY, terminationGuard, timeout); + QueueBasedSpliterator spliterator = new QueueBasedSpliterator<>(queue, ProgressInfo.EMPTY, terminationGuard, (int) timeout); return StreamSupport.stream(spliterator, false); } } diff --git a/src/main/java/apoc/export/util/FormatUtils.java b/src/main/java/apoc/export/util/FormatUtils.java index 71125b8b..54af2602 100644 --- a/src/main/java/apoc/export/util/FormatUtils.java +++ b/src/main/java/apoc/export/util/FormatUtils.java @@ -23,7 +23,7 @@ */ public class FormatUtils { - public static DecimalFormat decimalFormat = new DecimalFormat() { + private static DecimalFormat decimalFormat = new DecimalFormat() { { setMaximumFractionDigits(340); setGroupingUsed(false); diff --git a/src/main/java/apoc/gephi/Gephi.java b/src/main/java/apoc/gephi/Gephi.java index 51c54087..5494cccd 100644 --- a/src/main/java/apoc/gephi/Gephi.java +++ b/src/main/java/apoc/gephi/Gephi.java @@ -38,8 +38,8 @@ public static double doubleValue(Entity pc, String prop, Number defaultValue) { } - public static final String[] CAPTIONS = new String[]{"name", "title", "label"}; - public static final List RESERVED = Arrays.asList("label", "TYPE", "id", "source", "target", "weight", "directed"); + private static final String[] CAPTIONS = new String[]{"name", "title", "label"}; + private static final List RESERVED = Arrays.asList("label", "TYPE", "id", "source", "target", "weight", "directed"); // http://127.0.0.1:8080/workspace0?operation=updateGraph // TODO configure property-filters or transfer all properties @Procedure diff --git a/src/main/java/apoc/graph/document/builder/DocumentToGraph.java b/src/main/java/apoc/graph/document/builder/DocumentToGraph.java index 8cf360b9..175b81ce 100644 --- a/src/main/java/apoc/graph/document/builder/DocumentToGraph.java +++ b/src/main/java/apoc/graph/document/builder/DocumentToGraph.java @@ -23,6 +23,7 @@ public class DocumentToGraph { private static final String JSON_ROOT = "$"; + private final Map, Set> initialNodes; private Transaction tx; private RelationshipBuilder documentRelationBuilder; @@ -30,10 +31,31 @@ public class DocumentToGraph { private GraphsConfig config; public DocumentToGraph(Transaction tx, GraphsConfig config) { + this(tx, config, new HashSet<>()); + } + + public DocumentToGraph(Transaction tx, GraphsConfig config, Set initialNodes) { this.tx = tx; this.documentRelationBuilder = new RelationshipBuilder(config); this.documentLabelBuilder = new LabelBuilder(config); this.config = config; + + this.initialNodes = new HashMap<>(); + for (Node initialNode : initialNodes) { + Set labels = StreamSupport.stream(initialNode.getLabels().spliterator(), false).map(Label::name).collect(Collectors.toSet()); + if(this.initialNodes.containsKey(labels)) { + this.initialNodes.get(labels).add(initialNode); + } else { + this.initialNodes.put(labels, new HashSet<>(Arrays.asList(initialNode))); + } + } + } + + public Set toSet(Iterable collection) { + HashSet set = new HashSet(); + for (T item: collection) + set.add(item); + return set; } private boolean hasId(Map map, String path) { @@ -207,7 +229,12 @@ private Set getNodesWithSameLabels(Map, Set> nodes, Labe private Node getOrCreateVirtualNode(Map, Set> nodes, Label[] labels, Map idValues) { Set nodesWithSameIds = getNodesWithSameLabels(nodes, labels); - return nodesWithSameIds + Set initialNodesWithSameIds = getNodesWithSameLabels(this.initialNodes, labels); + + HashSet searchableNodes = new HashSet<>(nodesWithSameIds); + searchableNodes.addAll(initialNodesWithSameIds); + + return searchableNodes .stream() .filter(n -> { if (Stream.of(labels).anyMatch(label -> n.hasLabel(label))) { diff --git a/src/main/java/apoc/graph/document/builder/LabelBuilder.java b/src/main/java/apoc/graph/document/builder/LabelBuilder.java index 1848ea98..e4196b79 100644 --- a/src/main/java/apoc/graph/document/builder/LabelBuilder.java +++ b/src/main/java/apoc/graph/document/builder/LabelBuilder.java @@ -1,12 +1,14 @@ package apoc.graph.document.builder; import apoc.graph.util.GraphsConfig; +import apoc.text.Strings; import org.neo4j.graphdb.Label; import java.util.ArrayList; import java.util.List; import java.util.Map; +import static org.apache.commons.text.WordUtils.capitalize; import static org.apache.commons.text.WordUtils.capitalizeFully; public class LabelBuilder { @@ -18,6 +20,8 @@ public LabelBuilder(GraphsConfig config) { } public Label[] buildLabel(Map obj, String path) { + Strings strings = new Strings(); + List rawLabels = new ArrayList<>(); if (obj.containsKey(config.getLabelField())) { diff --git a/src/main/java/apoc/index/SchemaIndex.java b/src/main/java/apoc/index/SchemaIndex.java index 23f3a0c4..f9cd4056 100644 --- a/src/main/java/apoc/index/SchemaIndex.java +++ b/src/main/java/apoc/index/SchemaIndex.java @@ -8,7 +8,12 @@ import org.neo4j.graphdb.schema.IndexDefinition; import org.neo4j.internal.helpers.collection.Iterables; import org.neo4j.internal.helpers.collection.Iterators; -import org.neo4j.internal.kernel.api.*; +import org.neo4j.internal.kernel.api.CursorFactory; +import org.neo4j.internal.kernel.api.IndexReadSession; +import org.neo4j.internal.kernel.api.NodeValueIndexCursor; +import org.neo4j.internal.kernel.api.Read; +import org.neo4j.internal.kernel.api.SchemaRead; +import org.neo4j.internal.kernel.api.TokenRead; import org.neo4j.internal.schema.IndexDescriptor; import org.neo4j.internal.schema.IndexOrder; import org.neo4j.internal.schema.LabelSchemaDescriptor; @@ -17,7 +22,11 @@ import org.neo4j.kernel.impl.api.KernelStatement; import org.neo4j.kernel.impl.coreapi.InternalTransaction; import org.neo4j.kernel.internal.GraphDatabaseAPI; -import org.neo4j.procedure.*; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; +import org.neo4j.procedure.TerminationGuard; import org.neo4j.values.storable.Value; import java.util.Collections; @@ -68,7 +77,7 @@ public Stream distinctCount(@Name(value = "label", defaultVa .collect(new QueuePoisoningCollector(queue, POISON)) ).start(); - return StreamSupport.stream(new QueueBasedSpliterator<>(queue, POISON, terminationGuard, Long.MAX_VALUE),false); + return StreamSupport.stream(new QueueBasedSpliterator<>(queue, POISON, terminationGuard, Integer.MAX_VALUE),false); } private Object scanIndexDefinitionForKeys(IndexDefinition indexDefinition, @Name(value = "key", defaultValue = "") String keyName, BlockingQueue queue) { diff --git a/src/main/java/apoc/load/Jdbc.java b/src/main/java/apoc/load/Jdbc.java index a907f029..390c468d 100644 --- a/src/main/java/apoc/load/Jdbc.java +++ b/src/main/java/apoc/load/Jdbc.java @@ -7,6 +7,7 @@ import org.neo4j.logging.Log; import org.neo4j.procedure.Context; import org.neo4j.procedure.Description; +import org.neo4j.procedure.Mode; import org.neo4j.procedure.Name; import org.neo4j.procedure.Procedure; @@ -55,7 +56,7 @@ public static void loadDriver(@Name("driverClass") String driverClass) { } } - @Procedure + @Procedure(mode = Mode.WRITE) @Description("apoc.load.jdbc('key or url','table or statement', params, config) YIELD row - load from relational database, from a full table or a sql statement") public Stream jdbc(@Name("jdbc") String urlOrKey, @Name("tableOrSql") String tableOrSelect, @Name (value = "params", defaultValue = "[]") List params, @Name(value = "config",defaultValue = "{}") Map config) { @@ -104,7 +105,7 @@ private Stream executeQuery(String urlOrKey, String tableOrSelect, Ma } } - @Procedure + @Procedure(mode = Mode.DBMS) @Description("apoc.load.jdbcUpdate('key or url','statement',[params],config) YIELD row - update relational database, from a SQL statement with optional parameters") public Stream jdbcUpdate(@Name("jdbc") String urlOrKey, @Name("query") String query, @Name(value = "params", defaultValue = "[]") List params, @Name(value = "config",defaultValue = "{}") Map config) { log.info( String.format( "Executing SQL update: %s", query ) ); diff --git a/src/main/java/apoc/load/Xml.java b/src/main/java/apoc/load/Xml.java index e757ffcd..f99331fe 100644 --- a/src/main/java/apoc/load/Xml.java +++ b/src/main/java/apoc/load/Xml.java @@ -7,6 +7,7 @@ import apoc.result.NodeResult; import apoc.util.FileUtils; import apoc.util.Util; +import jdk.jfr.MemoryAddress; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.StringUtils; import org.neo4j.graphdb.Label; @@ -538,6 +539,14 @@ public void addCurrentCharacterIndex(int length) { } @Procedure(mode = Mode.WRITE, value = "apoc.xml.import") + @Deprecated + @Description("Deprecated by apoc.import.xml") + public Stream importToGraphDeprecated(@Name("url") String url, @Name(value = "config", defaultValue = "{}") Map config) throws IOException, XMLStreamException { + return importToGraph(url, config); + } + + @Procedure(mode = Mode.WRITE, value = "apoc.import.xml") + @Description("apoc.import.xml(file,config) - imports graph from provided file") public Stream importToGraph(@Name("url") String url, @Name(value = "config", defaultValue = "{}") Map config) throws IOException, XMLStreamException { XmlImportConfig importConfig = new XmlImportConfig(config); //TODO: make labels, reltypes and magic properties configurable diff --git a/src/main/java/apoc/meta/Meta.java b/src/main/java/apoc/meta/Meta.java index b9e436fb..f4875076 100644 --- a/src/main/java/apoc/meta/Meta.java +++ b/src/main/java/apoc/meta/Meta.java @@ -1,11 +1,13 @@ package apoc.meta; +import org.neo4j.logging.Log; import apoc.result.GraphResult; import apoc.result.MapResult; import apoc.result.VirtualNode; import apoc.result.VirtualRelationship; import apoc.util.MapUtil; import org.neo4j.graphdb.*; +import org.neo4j.graphdb.schema.ConstraintType; import org.neo4j.graphdb.schema.ConstraintDefinition; import org.neo4j.graphdb.schema.IndexDefinition; import org.neo4j.graphdb.schema.Schema; @@ -15,6 +17,7 @@ import org.neo4j.internal.kernel.api.Read; import org.neo4j.internal.kernel.api.TokenRead; import org.neo4j.kernel.api.KernelTransaction; +import org.neo4j.kernel.internal.GraphDatabaseAPI; import org.neo4j.procedure.*; import org.neo4j.values.storable.DurationValue; @@ -31,7 +34,7 @@ import static org.neo4j.internal.kernel.api.TokenRead.ANY_LABEL; import static org.neo4j.internal.kernel.api.TokenRead.ANY_RELATIONSHIP_TYPE; -public class Meta { +public class Meta { @Context public Transaction tx; @@ -45,6 +48,15 @@ public class Meta { @Context public Transaction transaction; + @Context public Log log; + + public static class ConstraintTracker { + // The following maps are (label|rel-type)/constraintdefinition entries + + public static final Map> relConstraints = new HashMap<>(20);; + public static final Map> nodeConstraints = new HashMap<>(20);; + } + public enum Types { INTEGER,FLOAT,STRING,BOOLEAN,RELATIONSHIP,NODE,PATH,NULL,ANY,MAP,LIST,POINT,DATE,DATE_TIME,LOCAL_TIME,LOCAL_DATE_TIME,TIME,DURATION; @@ -430,6 +442,120 @@ public Stream schema(@Name(value = "config",defaultValue = "{}") Map< return Stream.of(new MapResult(nodes)); } + + // Start new code + + /** + * This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it + * supports flexible sampling options, and does not scan the entire database. The result is producing a table of + * metadata that is useful for generating "Tables 4 Labels" schema designs for RDBMSs, but in a more performant way. + */ + @Procedure + @Description("apoc.meta.nodeTypeProperties()") + public Stream nodeTypeProperties(@Name(value = "config",defaultValue = "{}") Map config) { + MetaConfig metaConfig = new MetaConfig(config); + try { + return collectTables4LabelsProfile(metaConfig).asNodeStream(); + } catch (Exception e) { + log.debug("meta.nodeTypeProperties(): Failed to return stream", e); + throw new RuntimeException(e); + } + } + + /** + * This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it + * supports flexible sampling options, and does not scan the entire database. The result is producing a table of + * metadata that is useful for generating "Tables 4 Labels" schema designs for RDBMSs, but in a more performant way. + */ + @Procedure + @Description("apoc.meta.relTypeProperties()") + public Stream relTypeProperties(@Name(value = "config",defaultValue = "{}") Map config) { + MetaConfig metaConfig = new MetaConfig(config); + try { + return collectTables4LabelsProfile(metaConfig).asRelStream(); + } catch (Exception e) { + log.debug("meta.relTypeProperties(): Failed to return stream", e); + throw new RuntimeException(e); + } + } + + private Tables4LabelsProfile collectTables4LabelsProfile (MetaConfig config) { + Tables4LabelsProfile profile = new Tables4LabelsProfile(); + + Schema schema = tx.schema(); + + for (ConstraintDefinition cd : schema.getConstraints()) { + if (cd.isConstraintType(ConstraintType.NODE_PROPERTY_EXISTENCE)) { + List props = new ArrayList(10); + if (ConstraintTracker.nodeConstraints.containsKey(cd.getLabel().name())) { + props = ConstraintTracker.nodeConstraints.get(cd.getLabel().name()); + } + cd.getPropertyKeys().forEach(props::add); + ConstraintTracker.nodeConstraints.put(cd.getLabel().name(),props); + + } else if (cd.isConstraintType(ConstraintType.RELATIONSHIP_PROPERTY_EXISTENCE)) { + List tcd = new ArrayList(10); + List props = new ArrayList(10); + if (ConstraintTracker.relConstraints.containsKey(cd.getRelationshipType().name())) { + props = ConstraintTracker.relConstraints.get(cd.getRelationshipType().name()); + } + cd.getPropertyKeys().forEach(props::add); + ConstraintTracker.relConstraints.put(cd.getRelationshipType().name(), props); + } + } + + Map countStore = getLabelCountStore(); + + Set includeLabels = config.getIncludesLabels(); + Set excludes = config.getExcludes(); + + Set includeRels = config.getIncludesRels(); + Set excludeRels = config.getExcludeRels(); + + for (Label label : tx.getAllLabelsInUse()) { + String labelName = label.name(); + + if (!excludes.contains(labelName) && (includeLabels.isEmpty() || includeLabels.contains(labelName))) { + // Skip if explicitly excluded or at least 1 include specified and not included + + for (ConstraintDefinition cd : schema.getConstraints(label)) { profile.noteConstraint(label, cd); } + for (IndexDefinition index : schema.getIndexes(label)) { profile.noteIndex(label, index); } + + long labelCount = countStore.get(labelName); + long sample = getSampleForLabelCount(labelCount, config.getSample()); + + //System.out.println("Sampling " + sample + " for " + labelName); + + try (ResourceIterator nodes = tx.findNodes(label)) { + int count = 1; + while (nodes.hasNext()) { + Node node = nodes.next(); + if(count++ % sample == 0) { + boolean skipNode = false; + for (RelationshipType rel : node.getRelationshipTypes()) { + String relName = rel.name(); + if (excludeRels.contains(relName)) { + // Skip if explicitly excluded + skipNode = true; + } else if (!includeRels.isEmpty() && !includeRels.contains(relName)) { + // Skip if included set is specified and this is not in it. + skipNode = true; + } + } + if (skipNode != true) { + profile.observe(node, config); + } + } + } + } + } + } + + return profile.finished(); + } + + // End new code + private Map> collectMetaData (MetaConfig config) { Map> metaData = new LinkedHashMap<>(100); Schema schema = transaction.schema(); diff --git a/src/main/java/apoc/meta/MetaConfig.java b/src/main/java/apoc/meta/MetaConfig.java index 699b2947..7ba3a16b 100644 --- a/src/main/java/apoc/meta/MetaConfig.java +++ b/src/main/java/apoc/meta/MetaConfig.java @@ -1,5 +1,9 @@ package apoc.meta; +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.RelationshipType; + import java.util.*; public class MetaConfig { @@ -7,18 +11,55 @@ public class MetaConfig { private Set includesLabels; private Set includesRels; private Set excludes; + private Set excludeRels; private long maxRels; private long sample; + /** + * A map of values, with the following keys and meanings. + * - labels: a list of strings, which are whitelisted node labels. If this list + * is specified **only these labels** will be examined. + * - rels: a list of strings, which are whitelisted rel types. If this list is + * specified, **only these reltypes** will be examined. + * - excludes: a list of strings, which are node labels. This + * works like a blacklist: if listed here, the thing won't be considered. Everything + * else (subject to the whitelist) will be. + * - sample: a long number, i.e. "1 in (SAMPLE)". If set to 1000 this means that + * every 1000th node will be examined. It does **not** mean that a total of 1000 nodes + * will be sampled. + * - maxRels: the maximum number of relationships of a given type to look at. + * @param config + */ + public MetaConfig(Map config) { config = config != null ? config : Collections.emptyMap(); - this.includesLabels = new HashSet<>((Collection)config.getOrDefault("labels",Collections.EMPTY_SET)); - this.includesRels = new HashSet<>((Collection)config.getOrDefault("rels",Collections.EMPTY_SET)); - this.excludes = new HashSet<>((Collection)config.getOrDefault("excludes",Collections.EMPTY_SET)); + + // To maintain backwards compatibility, need to still support "labels", "rels" and "excludes" for "includeLabels", "includeRels" and "excludeLabels" respectively. + + Set includesLabelsLocal = new HashSet<>((Collection)config.getOrDefault("labels",Collections.EMPTY_SET)); + Set includesRelsLocal = new HashSet<>((Collection)config.getOrDefault("rels",Collections.EMPTY_SET)); + Set excludesLocal = new HashSet<>((Collection)config.getOrDefault("excludes",Collections.EMPTY_SET)); + + + if (includesLabelsLocal.isEmpty()) { + includesLabelsLocal = new HashSet<>((Collection)config.getOrDefault("includeLabels",Collections.EMPTY_SET)); + } + if (includesRelsLocal.isEmpty()) { + includesRelsLocal = new HashSet<>((Collection)config.getOrDefault("includeRels",Collections.EMPTY_SET)); + } + if (excludesLocal.isEmpty()) { + excludesLocal = new HashSet<>((Collection)config.getOrDefault("excludeLabels",Collections.EMPTY_SET)); + } + + this.includesLabels = includesLabelsLocal; + this.includesRels = includesRelsLocal; + this.excludes = excludesLocal; + this.excludeRels = new HashSet<>((Collection)config.getOrDefault("excludeRels",Collections.EMPTY_SET)); this.sample = (long) config.getOrDefault("sample", 1000L); this.maxRels = (long) config.getOrDefault("maxRels", 100L); } + public Set getIncludesLabels() { return includesLabels; } @@ -35,12 +76,60 @@ public void setExcludes(Set excludes) { this.excludes = excludes; } + public Set getExcludeRels() { + return excludeRels; + } + public long getSample() { return sample; } - public long getMaxRels() { return maxRels; } + + /** + * @param l + * @return true if the label matches the mask expressed by this object, false otherwise. + */ + public boolean matches(Label l) { + if (getExcludes().contains(l.name())) { return false; } + if (getIncludesLabels().isEmpty()) { return true; } + return getIncludesLabels().contains(l.name()); + } + + /** + * @param labels + * @return true if any of the labels matches the mask expressed by this object, false otherwise. + */ + public boolean matches(Iterable