@@ -242,6 +242,73 @@ detectCommunitiesWithKCoreDecomposition() {
242
242
calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
243
243
}
244
244
245
+ # Node Embeddings using Fast Random Projection
246
+ #
247
+ # Required Parameters:
248
+ # - dependencies_projection=...
249
+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
250
+ # - dependencies_projection_node=...
251
+ # Label of the nodes that will be used for the projection. Example: "Package"
252
+ # - dependencies_projection_weight_property=...
253
+ # Name of the node property that contains the dependency weight. Example: "weight"
254
+ # - dependencies_projection_embedding_dimension=...
255
+ # Number of the dimensions and therefore size of the resulting array of floating point numbers
256
+ nodeEmbeddingsWithFastRandomProjectionForHDBSCAN () {
257
+ local PROJECTION_CYPHER_DIR=" ${CYPHER_DIR} /Dependencies_Projection"
258
+ local NODE_EMBEDDINGS_CYPHER_DIR=" ${CYPHER_DIR} /Node_Embeddings"
259
+ local mutatePropertyName=" dependencies_projection_write_property=embeddingsFastRandomProjection"
260
+ local embeddingsDimension=" dependencies_projection_embedding_dimension=2"
261
+
262
+ # Statistics
263
+ execute_cypher " ${NODE_EMBEDDINGS_CYPHER_DIR} /Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher" " ${@ } " " ${mutatePropertyName} " ${embeddingsDimension}
264
+ execute_cypher " ${NODE_EMBEDDINGS_CYPHER_DIR} /Node_Embeddings_1b_Fast_Random_Projection_Statistics.cypher" " ${@ } " ${embeddingsDimension}
265
+
266
+ # Run the algorithm and write the result into the in-memory projection ("mutate")
267
+ execute_cypher " ${NODE_EMBEDDINGS_CYPHER_DIR} /Node_Embeddings_1c_Fast_Random_Projection_Mutate.cypher" " ${@ } " " ${mutatePropertyName} " ${embeddingsDimension}
268
+ }
269
+
270
+ # Community Detection using Hierarchical Density-Based Spatial Clustering (HDBSCAN) Algorithm
271
+ #
272
+ # Required Parameters:
273
+ # - dependencies_projection=...
274
+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
275
+ # - dependencies_projection_node=...
276
+ # Label of the nodes that will be used for the projection. Example: "Package"
277
+ # - dependencies_projection_weight_property=...
278
+ # Name of the node property that contains the dependency weight. Example: "weight"
279
+ # - dependencies_projection_node_embeddings_property=...
280
+ # Name of the node property that contains node embeddings. Example: "embeddingsFastRandomProjection"
281
+ #
282
+ # Special Requirements:
283
+ # - This algorithm needs a node property with an array of floats to compute clusters.
284
+ # One possible way is to use node embeddings for that (like FastRP).
285
+ detectCommunitiesWithHDBSCAN () {
286
+ local COMMUNITY_DETECTION_CYPHER_DIR=" ${CYPHER_DIR} /Community_Detection"
287
+ local PROJECTION_CYPHER_DIR=" ${CYPHER_DIR} /Dependencies_Projection"
288
+
289
+ local writePropertyName=" dependencies_projection_write_property=communityHdbscanLabel"
290
+ local writeLabelName=" dependencies_projection_write_label=HDBSCAN"
291
+
292
+ # Statistics
293
+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11a_HDBSCAN_Estimate.cypher" " ${@ } " " ${writePropertyName} "
294
+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11b_HDBSCAN_Statistics.cypher" " ${@ } "
295
+
296
+ # Run the algorithm and write the result into the in-memory projection ("mutate")
297
+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11c_HDBSCAN_Mutate.cypher" " ${@ } " " ${writePropertyName} "
298
+
299
+ # Stream to CSV
300
+ local nodeLabel
301
+ nodeLabel=$( extractQueryParameter " dependencies_projection_node" " ${@ } " )
302
+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_8_Stream_Mutated_Grouped.cypher" " ${@ } " " ${writePropertyName} " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} _Communities_HDBSCAN.csv"
303
+
304
+ # Update Graph (node properties and labels) using the already mutated property projection
305
+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_9_Write_Mutated.cypher" " ${@ } " " ${writePropertyName} "
306
+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_10_Delete_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
307
+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_11_Add_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
308
+
309
+ calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
310
+ }
311
+
245
312
# Community Detection using the Approximate Maximum k-cut Algorithm
246
313
#
247
314
# Required Parameters:
@@ -402,6 +469,13 @@ detectCommunities() {
402
469
time detectCommunitiesWithKCoreDecomposition " ${@ } "
403
470
time detectCommunitiesWithApproximateMaximumKCut " ${@ } "
404
471
time calculateLocalClusteringCoefficient " ${@ } "
472
+
473
+ # TODO Hard-wire build-in dependencies_projection_node_embeddings_property
474
+ nodeEmbeddingsProperty=$( extractQueryParameter " dependencies_projection_node_embeddings_property" " ${@ } " )
475
+ if [ -n " ${nodeEmbeddingsProperty} " ]; then
476
+ time nodeEmbeddingsWithFastRandomProjectionForHDBSCAN " ${@ } "
477
+ time detectCommunitiesWithHDBSCAN " ${@ } "
478
+ fi
405
479
compareCommunityDetectionResults " ${@ } "
406
480
listAllResults " ${@ } "
407
481
}
@@ -415,7 +489,7 @@ ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00
415
489
ARTIFACT_KCUT=" dependencies_maxkcut=5" # default = 2
416
490
417
491
if createUndirectedDependencyProjection " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " ; then
418
- detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} "
492
+ detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} " # "${ARTIFACT_NODE_EMBEDDINGS}"
419
493
writeLeidenModularity " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} "
420
494
fi
421
495
@@ -426,9 +500,10 @@ PACKAGE_NODE="dependencies_projection_node=Package"
426
500
PACKAGE_WEIGHT=" dependencies_projection_weight_property=weight25PercentInterfaces"
427
501
PACKAGE_GAMMA=" dependencies_leiden_gamma=1.14" # default = 1.00
428
502
PACKAGE_KCUT=" dependencies_maxkcut=20" # default = 2
503
+ PACKAGE_NODE_EMBEDDINGS=" dependencies_projection_node_embeddings_property=embeddingsFastRandomProjection" # default = none
429
504
430
505
if createUndirectedDependencyProjection " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " ; then
431
- detectCommunities " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " " ${PACKAGE_GAMMA} " " ${PACKAGE_KCUT} "
506
+ detectCommunities " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " " ${PACKAGE_GAMMA} " " ${PACKAGE_KCUT} " " ${PACKAGE_NODE_EMBEDDINGS} "
432
507
writeLeidenModularity " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} "
433
508
434
509
# Package Community Detection - Special CSV Queries after update
@@ -444,8 +519,7 @@ TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00
444
519
TYPE_KCUT=" dependencies_maxkcut=100" # default = 2
445
520
446
521
if createUndirectedJavaTypeDependencyProjection " ${TYPE_PROJECTION} " ; then
447
- detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} "
448
-
522
+ detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} " " ${TYPE_NODE_EMBEDDINGS} "
449
523
# Type Community Detection - Special CSV Queries after update
450
524
execute_cypher " ${CYPHER_DIR} /Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
451
525
execute_cypher " ${CYPHER_DIR} /Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_communities_with_few_members_in_foreign_packages.csv"
0 commit comments