From 123d483a824448660aeaa4b16cdd38565319ab02 Mon Sep 17 00:00:00 2001 From: neoarchitect <57357079+neoarchitect@users.noreply.github.com> Date: Sun, 16 Feb 2020 23:10:36 -0800 Subject: [PATCH 001/125] Tables4Labels - 4.x (#1401) * Adding T4L feature from APOC 3.5 * Fixed double underscore issue on node labels for relTypeProperties * Adding T4L feature from APOC 3.5 * Fixed double underscore issue on node labels for relTypeProperties * Changes to error handling, etc, based on code review comments by sarmbruster * Added tests for T4L * Added asciidoc documentation for new procedures * Rewrite of constraint checking mechanism for performance in T4L --- docs/asciidoc/introspection/meta.adoc | 4 +- src/main/java/apoc/meta/Meta.java | 125 +++++++ src/main/java/apoc/meta/MetaConfig.java | 97 +++++- .../java/apoc/meta/Tables4LabelsProfile.java | 313 ++++++++++++++++++ .../meta/tablesforlabels/OrderedLabels.java | 43 +++ .../PropertyContainerProfile.java | 88 +++++ .../meta/tablesforlabels/PropertyTracker.java | 74 +++++ .../apoc/meta/MetaEnterpriseFeaturesTest.java | 111 +++++++ src/test/java/apoc/meta/MetaTest.java | 279 ++++++++++++++++ 9 files changed, 1129 insertions(+), 5 deletions(-) create mode 100644 src/main/java/apoc/meta/Tables4LabelsProfile.java create mode 100644 src/main/java/apoc/meta/tablesforlabels/OrderedLabels.java create mode 100644 src/main/java/apoc/meta/tablesforlabels/PropertyContainerProfile.java create mode 100644 src/main/java/apoc/meta/tablesforlabels/PropertyTracker.java create mode 100644 src/test/java/apoc/meta/MetaEnterpriseFeaturesTest.java diff --git a/docs/asciidoc/introspection/meta.adoc b/docs/asciidoc/introspection/meta.adoc index 05acc050..98c2c3bb 100644 --- a/docs/asciidoc/introspection/meta.adoc +++ b/docs/asciidoc/introspection/meta.adoc @@ -14,6 +14,8 @@ image::apoc.meta.graph.jpg[scaledwidth="100%"] | CALL apoc.meta.data | examines a subset of the graph to provide a tabular meta information | CALL apoc.meta.schema | examines a subset of the graph to provide a map-like meta information | CALL apoc.meta.stats yield labelCount, relTypeCount, propertyKeyCount, nodeCount, relCount, labels, relTypes, stats | returns the information stored in the transactional database statistics +| CALL apoc.meta.nodeTypeProperties({includeLabels:[label,...],includeRels:[rel-type,...],excludeLabels:[label,,...],excludeRels:[rel-type,...]}) | replaces built-in function for node property schema to provide a sample-based result for high performance - used by the Neo4J BI Connector +| CALL apoc.meta.relTypeProperties({includeLabels:[label,...],includeRels:[rel-type,...],excludeLabels:[label,...],excludeRels:[rel-type,...]}) | replaces built-in function for relationship property schema to provide a sample-based result for high performance - used by the Neo4J BI Connector |=== .Functions @@ -49,4 +51,4 @@ endif::[] ---- MATCH (n:Person) RETURN apoc.meta.isType(n.age,"INTEGER") as ageType ----- \ No newline at end of file +---- diff --git a/src/main/java/apoc/meta/Meta.java b/src/main/java/apoc/meta/Meta.java index b9e436fb..2fd197df 100644 --- a/src/main/java/apoc/meta/Meta.java +++ b/src/main/java/apoc/meta/Meta.java @@ -1,11 +1,13 @@ package apoc.meta; +import org.neo4j.logging.Log; import apoc.result.GraphResult; import apoc.result.MapResult; import apoc.result.VirtualNode; import apoc.result.VirtualRelationship; import apoc.util.MapUtil; import org.neo4j.graphdb.*; +import org.neo4j.graphdb.schema.ConstraintType; import org.neo4j.graphdb.schema.ConstraintDefinition; import org.neo4j.graphdb.schema.IndexDefinition; import org.neo4j.graphdb.schema.Schema; @@ -15,6 +17,7 @@ import org.neo4j.internal.kernel.api.Read; import org.neo4j.internal.kernel.api.TokenRead; import org.neo4j.kernel.api.KernelTransaction; +import org.neo4j.kernel.internal.GraphDatabaseAPI; import org.neo4j.procedure.*; import org.neo4j.values.storable.DurationValue; @@ -45,6 +48,15 @@ public class Meta { @Context public Transaction transaction; + @Context public Log log; + + public static class ConstraintTracker { + // The following maps are (label|rel-type)/constraintdefinition entries + + public static Map> relConstraints = new HashMap<>(20);; + public static Map> nodeConstraints = new HashMap<>(20);; + } + public enum Types { INTEGER,FLOAT,STRING,BOOLEAN,RELATIONSHIP,NODE,PATH,NULL,ANY,MAP,LIST,POINT,DATE,DATE_TIME,LOCAL_TIME,LOCAL_DATE_TIME,TIME,DURATION; @@ -430,6 +442,119 @@ public Stream schema(@Name(value = "config",defaultValue = "{}") Map< return Stream.of(new MapResult(nodes)); } + + // Start new code + + /** + * This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it + * supports flexible sampling options, and does not scan the entire database. The result is producing a table of + * metadata that is useful for generating "Tables 4 Labels" schema designs for RDBMSs, but in a more performant way. + */ + @Procedure + @Description("apoc.meta.nodeTypeProperties()") + public Stream nodeTypeProperties(@Name(value = "config",defaultValue = "{}") Map config) { + MetaConfig metaConfig = new MetaConfig(config); + try { + return collectTables4LabelsProfile(metaConfig).asNodeStream(); + } catch (Exception e) { + log.debug("meta.nodeTypeProperties(): Failed to return stream", e); + throw new RuntimeException(e); + } + } + + /** + * This procedure is intended to replicate what's in the core Neo4j product, but with the crucial difference that it + * supports flexible sampling options, and does not scan the entire database. The result is producing a table of + * metadata that is useful for generating "Tables 4 Labels" schema designs for RDBMSs, but in a more performant way. + */ + @Procedure + @Description("apoc.meta.relTypeProperties()") + public Stream relTypeProperties(@Name(value = "config",defaultValue = "{}") Map config) { + MetaConfig metaConfig = new MetaConfig(config); + try { + return collectTables4LabelsProfile(metaConfig).asRelStream(); + } catch (Exception e) { + log.debug("meta.relTypeProperties(): Failed to return stream", e); + throw new RuntimeException(e); + } + } + + private Tables4LabelsProfile collectTables4LabelsProfile (MetaConfig config) { + Tables4LabelsProfile profile = new Tables4LabelsProfile(); + + Schema schema = tx.schema(); + + for (ConstraintDefinition cd : schema.getConstraints()) { + if (cd.isConstraintType(ConstraintType.NODE_PROPERTY_EXISTENCE)) { + List tcd = new ArrayList(10); + if (ConstraintTracker.nodeConstraints.containsKey(cd.getLabel().name())) { + tcd = ConstraintTracker.nodeConstraints.get(cd.getLabel().name()); + } + tcd.add(cd); + ConstraintTracker.nodeConstraints.put(cd.getLabel().name(), tcd); + + } else if (cd.isConstraintType(ConstraintType.RELATIONSHIP_PROPERTY_EXISTENCE)) { + List tcd = new ArrayList(10); + if (ConstraintTracker.relConstraints.containsKey(cd.getRelationshipType().name())) { + tcd = ConstraintTracker.relConstraints.get(cd.getRelationshipType().name()); + } + tcd.add(cd); + ConstraintTracker.relConstraints.put(cd.getRelationshipType().name(), tcd); + } + } + + Map countStore = getLabelCountStore(); + + Set includeLabels = config.getIncludesLabels(); + Set excludes = config.getExcludes(); + + Set includeRels = config.getIncludesRels(); + Set excludeRels = config.getExcludeRels(); + + for (Label label : tx.getAllLabelsInUse()) { + String labelName = label.name(); + + if (!excludes.contains(labelName) && (includeLabels.isEmpty() || includeLabels.contains(labelName))) { + // Skip if explicitly excluded or at least 1 include specified and not included + + for (ConstraintDefinition cd : schema.getConstraints(label)) { profile.noteConstraint(label, cd); } + for (IndexDefinition index : schema.getIndexes(label)) { profile.noteIndex(label, index); } + + long labelCount = countStore.get(labelName); + long sample = getSampleForLabelCount(labelCount, config.getSample()); + + //System.out.println("Sampling " + sample + " for " + labelName); + + try (ResourceIterator nodes = tx.findNodes(label)) { + int count = 1; + while (nodes.hasNext()) { + Node node = nodes.next(); + if(count++ % sample == 0) { + boolean skipNode = false; + for (RelationshipType rel : node.getRelationshipTypes()) { + String relName = rel.name(); + if (excludeRels.contains(relName)) { + // Skip if explicitly excluded + skipNode = true; + } else if (!includeRels.isEmpty() && !includeRels.contains(relName)) { + // Skip if included set is specified and this is not in it. + skipNode = true; + } + } + if (skipNode != true) { + profile.observe(node, config); + } + } + } + } + } + } + + return profile.finished(); + } + + // End new code + private Map> collectMetaData (MetaConfig config) { Map> metaData = new LinkedHashMap<>(100); Schema schema = transaction.schema(); diff --git a/src/main/java/apoc/meta/MetaConfig.java b/src/main/java/apoc/meta/MetaConfig.java index 699b2947..7ba3a16b 100644 --- a/src/main/java/apoc/meta/MetaConfig.java +++ b/src/main/java/apoc/meta/MetaConfig.java @@ -1,5 +1,9 @@ package apoc.meta; +import org.neo4j.graphdb.Label; +import org.neo4j.graphdb.Relationship; +import org.neo4j.graphdb.RelationshipType; + import java.util.*; public class MetaConfig { @@ -7,18 +11,55 @@ public class MetaConfig { private Set includesLabels; private Set includesRels; private Set excludes; + private Set excludeRels; private long maxRels; private long sample; + /** + * A map of values, with the following keys and meanings. + * - labels: a list of strings, which are whitelisted node labels. If this list + * is specified **only these labels** will be examined. + * - rels: a list of strings, which are whitelisted rel types. If this list is + * specified, **only these reltypes** will be examined. + * - excludes: a list of strings, which are node labels. This + * works like a blacklist: if listed here, the thing won't be considered. Everything + * else (subject to the whitelist) will be. + * - sample: a long number, i.e. "1 in (SAMPLE)". If set to 1000 this means that + * every 1000th node will be examined. It does **not** mean that a total of 1000 nodes + * will be sampled. + * - maxRels: the maximum number of relationships of a given type to look at. + * @param config + */ + public MetaConfig(Map config) { config = config != null ? config : Collections.emptyMap(); - this.includesLabels = new HashSet<>((Collection)config.getOrDefault("labels",Collections.EMPTY_SET)); - this.includesRels = new HashSet<>((Collection)config.getOrDefault("rels",Collections.EMPTY_SET)); - this.excludes = new HashSet<>((Collection)config.getOrDefault("excludes",Collections.EMPTY_SET)); + + // To maintain backwards compatibility, need to still support "labels", "rels" and "excludes" for "includeLabels", "includeRels" and "excludeLabels" respectively. + + Set includesLabelsLocal = new HashSet<>((Collection)config.getOrDefault("labels",Collections.EMPTY_SET)); + Set includesRelsLocal = new HashSet<>((Collection)config.getOrDefault("rels",Collections.EMPTY_SET)); + Set excludesLocal = new HashSet<>((Collection)config.getOrDefault("excludes",Collections.EMPTY_SET)); + + + if (includesLabelsLocal.isEmpty()) { + includesLabelsLocal = new HashSet<>((Collection)config.getOrDefault("includeLabels",Collections.EMPTY_SET)); + } + if (includesRelsLocal.isEmpty()) { + includesRelsLocal = new HashSet<>((Collection)config.getOrDefault("includeRels",Collections.EMPTY_SET)); + } + if (excludesLocal.isEmpty()) { + excludesLocal = new HashSet<>((Collection)config.getOrDefault("excludeLabels",Collections.EMPTY_SET)); + } + + this.includesLabels = includesLabelsLocal; + this.includesRels = includesRelsLocal; + this.excludes = excludesLocal; + this.excludeRels = new HashSet<>((Collection)config.getOrDefault("excludeRels",Collections.EMPTY_SET)); this.sample = (long) config.getOrDefault("sample", 1000L); this.maxRels = (long) config.getOrDefault("maxRels", 100L); } + public Set getIncludesLabels() { return includesLabels; } @@ -35,12 +76,60 @@ public void setExcludes(Set excludes) { this.excludes = excludes; } + public Set getExcludeRels() { + return excludeRels; + } + public long getSample() { return sample; } - public long getMaxRels() { return maxRels; } + + /** + * @param l + * @return true if the label matches the mask expressed by this object, false otherwise. + */ + public boolean matches(Label l) { + if (getExcludes().contains(l.name())) { return false; } + if (getIncludesLabels().isEmpty()) { return true; } + return getIncludesLabels().contains(l.name()); + } + + /** + * @param labels + * @return true if any of the labels matches the mask expressed by this object, false otherwise. + */ + public boolean matches(Iterable