diff --git a/configs/datasets/toy_point_cloud.yaml b/configs/datasets/toy_point_cloud.yaml new file mode 100644 index 00000000..18b8aeb4 --- /dev/null +++ b/configs/datasets/toy_point_cloud.yaml @@ -0,0 +1,12 @@ +data_domain: point_cloud +data_type: toy_dataset +data_name: toy_point_cloud +data_dir: datasets/${data_domain}/${data_type} + +# Dataset parameters +num_points: 8 +num_classes: 2 + +num_features: 1 +task: classification +loss_type: cross_entropy \ No newline at end of file diff --git a/configs/transforms/liftings/pointcloud2simplicial/tangential_lifting.yaml b/configs/transforms/liftings/pointcloud2simplicial/tangential_lifting.yaml new file mode 100644 index 00000000..a472db06 --- /dev/null +++ b/configs/transforms/liftings/pointcloud2simplicial/tangential_lifting.yaml @@ -0,0 +1,3 @@ +transform_type: "lifting" +transform_name: "TangentialLifting" +feature_lifting: ProjectionSum \ No newline at end of file diff --git a/modules/data/load/loaders.py b/modules/data/load/loaders.py index 8ccafb11..4ce265e3 100755 --- a/modules/data/load/loaders.py +++ b/modules/data/load/loaders.py @@ -12,6 +12,7 @@ load_cell_complex_dataset, load_hypergraph_pickle_dataset, load_manual_graph, + load_point_cloud, load_simplicial_dataset, ) @@ -204,3 +205,34 @@ def load( torch_geometric.data.Dataset object containing the loaded data. """ return load_hypergraph_pickle_dataset(self.parameters) + + +class PointCloudLoader(AbstractLoader): + r"""Loader for point-cloud dataset. + Parameters + ---------- + parameters: DictConfig + Configuration parameters + """ + + def __init__(self, parameters: DictConfig): + super().__init__(parameters) + self.parameters = parameters + self.data_dir = self.parameters["data_dir"] + if "num_classes" not in self.cfg: + self.cfg["num_classes"] = 2 + + def load(self) -> torch_geometric.data.Dataset: + r"""Load point-cloud dataset. + Parameters + ---------- + None + Returns + ------- + torch_geometric.data.Dataset + torch_geometric.data.Dataset object containing the loaded data. + """ + data = load_point_cloud( + num_classes=self.cfg["num_classes"], num_points=self.cfg["num_points"] + ) + return CustomDataset([data], self.cfg["data_dir"]) diff --git a/modules/data/utils/utils.py b/modules/data/utils/utils.py index 93ab5021..f1a913bd 100755 --- a/modules/data/utils/utils.py +++ b/modules/data/utils/utils.py @@ -50,16 +50,16 @@ def get_complex_connectivity(complex, max_rank, signed=False): ) except ValueError: # noqa: PERF203 if connectivity_info == "incidence": - connectivity[f"{connectivity_info}_{rank_idx}"] = ( - generate_zero_sparse_connectivity( - m=practical_shape[rank_idx - 1], n=practical_shape[rank_idx] - ) + connectivity[ + f"{connectivity_info}_{rank_idx}" + ] = generate_zero_sparse_connectivity( + m=practical_shape[rank_idx - 1], n=practical_shape[rank_idx] ) else: - connectivity[f"{connectivity_info}_{rank_idx}"] = ( - generate_zero_sparse_connectivity( - m=practical_shape[rank_idx], n=practical_shape[rank_idx] - ) + connectivity[ + f"{connectivity_info}_{rank_idx}" + ] = generate_zero_sparse_connectivity( + m=practical_shape[rank_idx], n=practical_shape[rank_idx] ) connectivity["shape"] = practical_shape return connectivity @@ -283,6 +283,17 @@ def load_hypergraph_pickle_dataset(cfg): return data +def load_point_cloud(num_classes: int = 2, num_points: int = 18, seed: int = 42): + """Create a toy point cloud dataset""" + rng = np.random.default_rng(seed) + + points = torch.tensor(rng.random((num_points, 2)), dtype=torch.float) + classes = torch.tensor(rng.integers(num_classes, size=num_points), dtype=torch.long) + features = torch.tensor(rng.integers(3, size=(num_points, 1)), dtype=torch.float) + + return torch_geometric.data.Data(x=features, y=classes, pos=points) + + def load_manual_graph(): """Create a manual graph for testing purposes.""" # Define the vertices (just 8 vertices) diff --git a/modules/transforms/data_transform.py b/modules/transforms/data_transform.py index 59253ecf..28e4ec1e 100755 --- a/modules/transforms/data_transform.py +++ b/modules/transforms/data_transform.py @@ -15,6 +15,9 @@ from modules.transforms.liftings.graph2simplicial.clique_lifting import ( SimplicialCliqueLifting, ) +from modules.transforms.liftings.pointcloud2simplicial.tangential_lifting import ( + TangentialLifting, +) TRANSFORMS = { # Graph -> Hypergraph @@ -23,6 +26,8 @@ "SimplicialCliqueLifting": SimplicialCliqueLifting, # Graph -> Cell Complex "CellCycleLifting": CellCycleLifting, + # Point-cloud -> Simplicial Complex + "TangentialLifting": TangentialLifting, # Feature Liftings "ProjectionSum": ProjectionSum, # Data Manipulations diff --git a/modules/transforms/liftings/pointcloud2simplicial/tangential_lifting.py b/modules/transforms/liftings/pointcloud2simplicial/tangential_lifting.py new file mode 100644 index 00000000..01f70804 --- /dev/null +++ b/modules/transforms/liftings/pointcloud2simplicial/tangential_lifting.py @@ -0,0 +1,42 @@ +import gudhi as gd +import torch +import torch_geometric +from toponetx.classes import SimplicialComplex + +from modules.data.utils.utils import get_complex_connectivity +from modules.transforms.liftings.pointcloud2simplicial.base import ( + PointCloud2SimplicialLifting, +) + + +class TangentialLifting(PointCloud2SimplicialLifting): + # intrinsic dimension of the manifold set to 1 by default + def __init__(self, intrisic_dim=2, **kwargs): + super().__init__(**kwargs) + self.intrisic_dim = intrisic_dim + + def _get_lifted_topology(self, simplicial_complex: SimplicialComplex) -> dict: + lifted_topology = get_complex_connectivity(simplicial_complex, self.complex_dim) + + lifted_topology["x_0"] = torch.stack( + list(simplicial_complex.get_simplex_attributes("features", 0).values()) + ) + + return lifted_topology + + def lift_topology(self, data: torch_geometric.data.Data, **kwargs) -> dict: + + # initialize tangential complex object + tangential_complex = gd.TangentialComplex(self.intrisic_dim, data.pos) + + # build the complex + tangential_complex.compute_tangential_complex() + + simplicial_complex = SimplicialComplex().from_gudhi(tangential_complex.create_simplex_tree()) + + self.complex_dim = simplicial_complex.dim + + node_features = {i: data.x[i, :] for i in range(data.x.shape[0])} + simplicial_complex.set_simplex_attributes(node_features, name="features") + + return self._get_lifted_topology(simplicial_complex) diff --git a/test/transforms/liftings/pointcloud2simplicial/test_tangential_lifting.py b/test/transforms/liftings/pointcloud2simplicial/test_tangential_lifting.py new file mode 100644 index 00000000..21fb9f89 --- /dev/null +++ b/test/transforms/liftings/pointcloud2simplicial/test_tangential_lifting.py @@ -0,0 +1,62 @@ +import torch + +from modules.data.utils.utils import load_point_cloud +from modules.transforms.liftings.pointcloud2simplicial.tangential_lifting import ( + TangentialLifting, +) + + +class TestTangentialLifting: + """Test the DelaunayLifting class.""" + + def setup_method(self): + # Load the point cloud + SEED = 42 + self.data = load_point_cloud(num_points=5, seed=SEED) + + # Initialise the TangentialLifting class + self.lifting_signed = TangentialLifting(signed=True) + self.lifting_unsigned = TangentialLifting(signed=False) + + def test_lift_topology(self): + """Test the lift_topology method.""" + + # Test the lift_topology method + lifted_data_signed = self.lifting_signed.forward(self.data.clone()) + lifted_data_unsigned = self.lifting_unsigned.forward(self.data.clone()) + + expected_incidence_1 = torch.tensor( + [ + [1., 1., 1., 0., 0., 0., 0.], + [1., 0., 0., 1., 0., 0., 0.], + [0., 0., 0., 0., 1., 1., 0.], + [0., 1., 0., 1., 1., 0., 1.], + [0., 0., 1., 0., 0., 1., 1.] + ] + ) + + assert ( + abs(expected_incidence_1) == lifted_data_unsigned.incidence_1.to_dense() + ).all(), "Something is wrong with unsigned incidence_1 (nodes to edges)." + assert ( + expected_incidence_1 == lifted_data_signed.incidence_1.to_dense() + ).all(), "Something is wrong with signed incidence_1 (nodes to edges)." + + expected_incidence_2 = torch.tensor( + [ + [1., 0., 0.], + [1., 1., 0.], + [0., 1., 0.], + [1., 0., 0.], + [0., 0., 1.], + [0., 0., 1.], + [0., 1., 1.] + ] + ) + + assert ( + abs(expected_incidence_2) == lifted_data_unsigned.incidence_2.to_dense() + ).all(), "Something is wrong with unsigned incidence_2 (edges to triangles)." + assert ( + expected_incidence_2 == lifted_data_signed.incidence_2.to_dense() + ).all(), "Something is wrong with signed incidence_2 (edges to triangles)." diff --git a/tutorials/pointcloud2simplicial/tangential_lifting.ipynb b/tutorials/pointcloud2simplicial/tangential_lifting.ipynb new file mode 100644 index 00000000..b84ec021 --- /dev/null +++ b/tutorials/pointcloud2simplicial/tangential_lifting.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ab9ff375-2012-457b-8cea-d836f2885ff3", + "metadata": {}, + "source": [ + "# Point Cloud-to-Simplicial Complex Lifting Tutorial" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0d89c203", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "from modules.data.load.loaders import PointCloudLoader\n", + "from modules.data.preprocess.preprocessor import PreProcessor\n", + "from modules.utils.utils import (\n", + " describe_data,\n", + " load_dataset_config,\n", + " load_model_config,\n", + " load_transform_config,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "008c11c4-6753-477b-8a5d-aab0e70f9999", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset configuration for toy_point_cloud:\n", + "\n", + "{'data_domain': 'point_cloud',\n", + " 'data_type': 'toy_dataset',\n", + " 'data_name': 'toy_point_cloud',\n", + " 'data_dir': 'datasets/point_cloud/toy_dataset',\n", + " 'num_points': 8,\n", + " 'num_classes': 2,\n", + " 'num_features': 1,\n", + " 'task': 'classification',\n", + " 'loss_type': 'cross_entropy'}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + } + ], + "source": [ + "dataset_name = \"toy_point_cloud\"\n", + "dataset_config = load_dataset_config(dataset_name)\n", + "loader = PointCloudLoader(dataset_config)\n", + "\n", + "dataset = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "eb249eaa-77db-433b-a934-63d3e75f0e15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Transform configuration for pointcloud2simplicial/tangential_lifting:\n", + "\n", + "{'transform_type': 'lifting',\n", + " 'transform_name': 'TangentialLifting',\n", + " 'feature_lifting': 'ProjectionSum'}\n" + ] + } + ], + "source": [ + "transform_type = \"liftings\"\n", + "# If the transform is a topological lifting, it should include both the type of the lifting and the identifier\n", + "transform_id = \"pointcloud2simplicial/tangential_lifting\"\n", + "\n", + "# Read yaml file\n", + "transform_config = {\n", + " \"lifting\": load_transform_config(transform_type, transform_id)\n", + " # other transforms (e.g. data manipulations, feature liftings) can be added here\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "75e6c321-850c-4852-8c29-fa9fc7cb524b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Processing...\n", + "Done!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Dataset only contains 1 sample:\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - The complex has 8 0-cells.\n", + " - The 0-cells have features dimension 1\n", + " - The complex has 14 1-cells.\n", + " - The 1-cells have features dimension 1\n", + " - The complex has 7 2-cells.\n", + " - The 2-cells have features dimension 1\n", + "\n" + ] + } + ], + "source": [ + "lifted_dataset = PreProcessor(dataset, transform_config, loader.data_dir)\n", + "describe_data(lifted_dataset)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "78a43208-9028-4122-b10f-931bb47fade7", + "metadata": {}, + "source": [ + "# Tangential Complex: theory\n", + "\n", + "(Taken from respective [Gudhi tutorial](https://gudhi.inria.fr/python/latest/tangential_complex_user.html) by Clément Jamin). Tangential complexes were introduced in [1]. A Tangential Delaunay complex is a simplicial complex designed to reconstruct a $k$-dimensional smooth manifold embedded in \n", + "$d$-dimensional Euclidean space.\n", + "\n", + "[1] Jean-Daniel Boissonnat and Arijit Ghosh. Manifold reconstruction using tangential delaunay complexes. _Discrete & Computational Geometry_, 51(1):221–267, 2014. URL: http://dx.doi.org/10.1007/s00454-013-9557-2, doi:10.1007/s00454-013-9557-2.\n", + "\n", + "Consider the following point cloud, being a sample from a 1D curve. \n", + "\n", + "![](https://gudhi.inria.fr/python/latest/_images/tc_example_01.png)\n", + "\n", + "Tangent subspaces of points are estimated with PCA (more points are needed for that, 4 points are shown here for simplicity):\n", + "\n", + "![](https://gudhi.inria.fr/python/latest/_images/tc_example_02.png)\n", + "\n", + "Now consider a Voronoi diagram of these points shown in orange:\n", + "\n", + "![](https://gudhi.inria.fr/python/latest/_images/tc_example_03.png)\n", + "\n", + "For each point, construct its star in the Delaunay triangulation, restricted to its tangent subspace. The Tangential Delaunay complex is the union of those stars.\n", + "\n", + "In practice, neither the whole ambient Voronoi diagram nor the ambient Delaunay triangulation is computed – instead, local \n", + "$k$-dimensional regular triangulations are computed with a limited number of points as we only need the star of each point. More details can be found in [1].\n", + "\n", + "It is worth noting that inconsistencies between the stars may occur (a simplex is not in the star of all its vertices). One way to fix these inconsistencies is to perturb the points – `gudhi.TangentialComplex` [has methods for that](https://gudhi.inria.fr/python/latest/tangential_complex_ref.html).\n", + "\n", + "In the above example, $k$, the intrinsic dimension of the manifold, was set equal to $d$, the ambient dimension, thus the resulting TC resorted to just Delaunay, hence the presence of 2-cells. It is more practically interesting to use TCs when the data is concentrated near some lower-dimensional embedded manifold.\n", + "\n", + "The **lifting** from the original point cloud to the TC is quite straightforward: the points' features are simply transferred to the nodes (0-simplices) of the TC, then, for higher-order simplices, various lifting methods are possible." + ] + }, + { + "cell_type": "markdown", + "id": "d5d6383f-78e9-4a1a-8cd2-44262ca17e0b", + "metadata": {}, + "source": [ + "# Create and Run a Simplicial NN Model\n", + "\n", + "In this section a simple model is created to test that the used lifting works as intended. In this case the model uses the `up_laplacian_1` and the `down_laplacian_1` so the lifting should make sure to add them to the data." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f5776a9c-b062-4c4a-b479-0bf7a38a037a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Model configuration for simplicial SAN:\n", + "\n", + "{'in_channels': None,\n", + " 'hidden_channels': 32,\n", + " 'out_channels': None,\n", + " 'n_layers': 2,\n", + " 'n_filters': 2,\n", + " 'order_harmonic': 5,\n", + " 'epsilon_harmonic': 0.1}\n" + ] + } + ], + "source": [ + "from modules.models.simplicial.san import SANModel\n", + "\n", + "model_type = \"simplicial\"\n", + "model_id = \"san\"\n", + "model_config = load_model_config(model_type, model_id)\n", + "\n", + "model = SANModel(model_config, dataset_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "93f4b784-ce5e-47f5-aa24-833753b4e508", + "metadata": {}, + "outputs": [], + "source": [ + "y_hat = model(lifted_dataset.get(0))" + ] + }, + { + "cell_type": "markdown", + "id": "b0c5b2d1-9557-4c15-b6d7-f336d0db9af0", + "metadata": {}, + "source": [ + "If everything is correct the cell above should execute without errors." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}