diff --git a/.gitignore b/.gitignore index fa757a6..3e65b03 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ __pycache__/ /data/ /scripts/data/ /node_modules/ +.coverage # docs /docs/generated/ diff --git a/docs/notebooks/download_dataset_from_lamin.ipynb b/docs/notebooks/download_dataset_from_lamin.ipynb new file mode 100644 index 0000000..b4a6561 --- /dev/null +++ b/docs/notebooks/download_dataset_from_lamin.ipynb @@ -0,0 +1,1201 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[92m→\u001b[0m connected lamindb: scverse/spatialdata-db\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/groups/ml01/workspace/lea.zimmermann/software/miniconda3/envs/sddb_test/lib/python3.11/site-packages/dask/dataframe/__init__.py:31: FutureWarning: The legacy Dask DataFrame implementation is deprecated and will be removed in a future version. Set the configuration option `dataframe.query-planning` to `True` or None to enable the new Dask Dataframe implementation and silence this warning.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import lamindb as ln\n", + "import shutil\n", + "import spatialdata as sd\n", + "from spatialdata_db.integrations import store_dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a list of available datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidkeydescriptionsuffixkindotypesizehashn_filesn_observations_hash_type_key_is_virtual_overwrite_versionsspace_idstorage_idschema_idversionis_latestrun_idcreated_atcreated_by_id_aux_branch_code
id
301J3fKc0m6IfFKqGT10000NoneView Visium SpatialData Example in Vitessce.vitessce.jsonNoneNone1842_ogAN9X4lJ-dYDkdL3SEYwNaNNonemd5TrueFalse12NoneNoneTrue257.02025-02-03 13:28:06.909252+00:008None1
300dBWsSTHSEVoYm7t70000vitessce_examples/visium.sdata.zarrVisium SpatialData Example.zarrdatasetNone14731802454r3hK7svWsNx1TIWdRyRsw9167.0Nonemd5-dTrueTrue12NoneNoneTrueNaN2025-02-03 13:11:16.519472+00:008None1
298wlgPi3jjfVoYUWjA0000None10X, Visium, Mouse, healthy, brain, 2023.zarrNonespatialdata116459181Ss6q2vgfyG3ARYWp0SUmsg443.0Nonemd5-dTrueTrue12NoneNoneTrue252.02025-01-21 14:17:50.268625+00:008None1
292bENTLCi4bBxkFdk30000None10X, anndata, Human, healthy, lymph node.anndata.zarrdatasetAnnData1990027410f1ev_U6XEzhfdyka2aSVA2038.0Nonemd5-dTrueTrue12NoneNoneTrue230.02025-01-20 16:49:45.453742+00:008None1
257Gdt4nEPT3QTzdmXc0000None10X, Visium, Human, healthy, lymph node.ome.zarrdatasetNone13404384mTHeMyuDsPVFBsfON2SZMQ265.0Nonemd5-dTrueTrue12NoneNoneTrue230.02024-12-19 09:56:39.264348+00:008None1
293jsq1NJbwJfXL1EtX0000NoneView Human Lymph Node Example in Vitessce.vitessce.jsonNoneNone3005qHci84d3Z-fLw8ML_3S0lgNaNNonemd5TrueFalse12NoneNoneTrue250.02025-01-20 16:51:28.656230+00:008None1
290u7ShvF2TOpFCKKOT0000None10X, Visium, Human, unknown, spinal_cord, 2020.zarrNonespatialdata27522152jW3Cq3su6K5FgMgaaz9DvA263.0Nonemd5-dTrueTrue12NoneNoneTrueNaN2025-01-19 21:54:04.931463+00:008None1
2884gLfyEWea7fJpXyQ0000None10X, Visium, Mouse, unknown, brain, 2020.zarrNonespatialdata54817412yocuS6gw1aoX2zwDNZsebg327.0Nonemd5-dTrueTrue12NoneNoneTrue243.02025-01-19 21:51:51.708790+00:008None1
284AqRfNjb1YHyMDZt00000None10X, Visium, Mouse, healthy, brain, 2022.zarrNonespatialdata54817400NTOnZzCUoYiXjypzHEud7g327.0Nonemd5-dTrueTrue12NoneNoneTrue239.02025-01-19 21:45:48.348520+00:008None1
278h298F4wLSpYnMo2C0000None10X, Visium, Human, unknown, brain_cerebellum,....zarrNonespatialdata28680862I3p1SB4RL5YR8EcLcsZsNQ227.0Nonemd5-dTrueTrue12NoneNoneTrue248.02025-01-19 21:34:25.397573+00:008None1
276o4CvH5ipm5g1Psyy0000None10X, Visium, Human, healthy, prostate, 2021.zarrNonespatialdata34069617Mjt1UQFsL-sd_eEXGnJWjQ259.0Nonemd5-dTrueTrue12NoneNoneTrue247.02025-01-19 21:32:33.064038+00:008None1
274PmbrQBYxBxayyhls0000None10X, Visium, Human, unknown, heart, 2020.zarrNonespatialdata41793228tNEMxpH6G5uoTY3CxRRMGw263.0Nonemd5-dTrueTrue12NoneNoneTrue245.02025-01-19 21:30:51.916798+00:008None1
272wg5zQhHX5cQ9UG4N0000None10X, Visium, Mouse, unknown, brain, 2020.zarrNonespatialdata57953090raOCaxJLh_weCxY7z4mu3w327.0Nonemd5-dTrueTrue12NoneNoneTrue244.02025-01-19 21:29:00.556320+00:008None1
271KgUohDusDLA4S6WW0000None10X, Visium, Mouse, unknown, brain, 2020.zarrNonespatialdata38143587Xdl3V5khe8508IjaJQhm8g327.0Nonemd5-dTrueTrue12NoneNoneTrue241.02025-01-19 21:26:41.761595+00:008None1
2708Xdm6p3J8FPOxok00000None10X, Visium, Mouse, unknown, brain, 2020.zarrNonespatialdata381435871SFkEeE1UvvswCpuCstDPw327.0Nonemd5-dTrueTrue12NoneNoneTrue241.02025-01-19 21:25:28.550886+00:008None1
269McoRBaiIuHp0Q8RQ0000None10X, Visium, Mouse, healthy, brain, 2022.zarrNonespatialdata54841203PlYRrY4VI-frNKHES5gvqw327.0Nonemd5-dTrueTrue12NoneNoneTrue240.02025-01-19 21:23:37.660374+00:008None1
268SYTEY2CR0OfTdq4N0000None10X, Visium, Mouse, unknown, brain, 2019.zarrNonespatialdata56180342pgkbcq_Rhf24MfhY-1zc1Q327.0Nonemd5-dTrueTrue12NoneNoneTrueNaN2025-01-19 21:19:25.551402+00:008None1
2668XBBsxnNHuWCOeOb0000None10X, Visium, Human, large intestine colorectal....zarrNonespatialdata52401277TUrS6Myy91ExOsSws_zQ2A323.0Nonemd5-dTrueTrue12NoneNoneTrue237.02025-01-19 16:42:31.115433+00:008None1
265LumVWsITkcZQteL70000NoneVisium vitessce demo.vitessce.jsonNoneNone2047SdxuKSaTOOT23HkA2jlLdgNaNNonemd5TrueFalse12NoneNoneTrue235.02025-01-16 19:17:02.333550+00:008None1
2630BDn4bWTAMtQmOXS0000NoneView Human Lymph Node Example in Vitessce.vitessce.jsonNoneNone3005-PchR8TOA4y0lj6IjmayiANaNNonemd5TrueFalse12NoneNoneTrue231.02025-01-08 13:31:22.631439+00:008None1
2561X1HPFLZIq5Qlwra0000NoneAnnData object for Human lymph node, 10X Genomics.anndata.zarrdatasetAnnData199002959YolRLEIAX6AE9IGLRSOjGA2038.0Nonemd5-dTrueTrue12NoneNoneTrue230.02024-12-19 09:55:30.210836+00:008None1
262I38PyB0QZJLwi3Io0000NoneView Human Lymph Node Example in Vitessce.vitessce.jsonNoneNone29876QGkKjYdn3QMkGBLOAip0wNaNNonemd5TrueFalse12NoneNoneTrue229.02024-12-19 10:12:16.511765+00:008None1
239Co2Bwl9TymNlnv9c0000None10X, Visium, Human, breast cancer, breast, 2022.zarrNonespatialdata53442529Z05oc7m9alFGiMo1l33DOQ311.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 15:04:08.589580+00:008None1
2308mjX2vs8KhTS3Y4c0000None10X, Visium, Human, prostate cancer, prostate,....zarrNonespatialdata60838635TNg5OjUQowqBR_B0komdBQ326.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 11:05:27.316266+00:008None1
228J6HsBBMXCJ8amco80000None10X, Xenium, Human, cancer, intestine_colon, 2023.zarrNonespatialdata5129800776AyFqQ-0ipr-sgDzUuEErmg786.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 10:59:12.338173+00:008None1
227AcJ31iZl6KgZR7BO0000None10X, Xenium, Human, healthy, heart, 2024.zarrNonespatialdata1483994224f7BgRVX9kNxvnSpm4BrOMA368.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 10:56:05.258469+00:008None1
226fVvF7TCAlJfKsWgo0000None10X, Xenium, Human, glioblastoma, brain, 2024.zarrNonespatialdata25439820101Y7XqV167fo0hN6xNsRQSWw2426.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 10:48:23.337843+00:008None1
2245UbbJ6cR0WzaBDHB0000None10X, Xenium, Human, acute lymphoid leukemia, b....zarrNonespatialdata2490544230JBdHTZJUvNltQNjFB8UdcQ598.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 10:27:24.088735+00:008None1
222ubllg1iYQBaiDJfW0000None10X, VisiumHD, Mouse, Healthy, Brain, 2024.zarrNonespatialdata1099865189r3sSzfkOliLFFfhDzmP9hw2215.0Nonemd5-dTrueTrue12NoneNoneTrue197.02024-12-16 10:07:09.710482+00:008None1
\n", + "
" + ], + "text/plain": [ + " uid key \\\n", + "id \n", + "301 J3fKc0m6IfFKqGT10000 None \n", + "300 dBWsSTHSEVoYm7t70000 vitessce_examples/visium.sdata.zarr \n", + "298 wlgPi3jjfVoYUWjA0000 None \n", + "292 bENTLCi4bBxkFdk30000 None \n", + "257 Gdt4nEPT3QTzdmXc0000 None \n", + "293 jsq1NJbwJfXL1EtX0000 None \n", + "290 u7ShvF2TOpFCKKOT0000 None \n", + "288 4gLfyEWea7fJpXyQ0000 None \n", + "284 AqRfNjb1YHyMDZt00000 None \n", + "278 h298F4wLSpYnMo2C0000 None \n", + "276 o4CvH5ipm5g1Psyy0000 None \n", + "274 PmbrQBYxBxayyhls0000 None \n", + "272 wg5zQhHX5cQ9UG4N0000 None \n", + "271 KgUohDusDLA4S6WW0000 None \n", + "270 8Xdm6p3J8FPOxok00000 None \n", + "269 McoRBaiIuHp0Q8RQ0000 None \n", + "268 SYTEY2CR0OfTdq4N0000 None \n", + "266 8XBBsxnNHuWCOeOb0000 None \n", + "265 LumVWsITkcZQteL70000 None \n", + "263 0BDn4bWTAMtQmOXS0000 None \n", + "256 1X1HPFLZIq5Qlwra0000 None \n", + "262 I38PyB0QZJLwi3Io0000 None \n", + "239 Co2Bwl9TymNlnv9c0000 None \n", + "230 8mjX2vs8KhTS3Y4c0000 None \n", + "228 J6HsBBMXCJ8amco80000 None \n", + "227 AcJ31iZl6KgZR7BO0000 None \n", + "226 fVvF7TCAlJfKsWgo0000 None \n", + "224 5UbbJ6cR0WzaBDHB0000 None \n", + "222 ubllg1iYQBaiDJfW0000 None \n", + "\n", + " description suffix \\\n", + "id \n", + "301 View Visium SpatialData Example in Vitessce .vitessce.json \n", + "300 Visium SpatialData Example .zarr \n", + "298 10X, Visium, Mouse, healthy, brain, 2023 .zarr \n", + "292 10X, anndata, Human, healthy, lymph node .anndata.zarr \n", + "257 10X, Visium, Human, healthy, lymph node .ome.zarr \n", + "293 View Human Lymph Node Example in Vitessce .vitessce.json \n", + "290 10X, Visium, Human, unknown, spinal_cord, 2020 .zarr \n", + "288 10X, Visium, Mouse, unknown, brain, 2020 .zarr \n", + "284 10X, Visium, Mouse, healthy, brain, 2022 .zarr \n", + "278 10X, Visium, Human, unknown, brain_cerebellum,... .zarr \n", + "276 10X, Visium, Human, healthy, prostate, 2021 .zarr \n", + "274 10X, Visium, Human, unknown, heart, 2020 .zarr \n", + "272 10X, Visium, Mouse, unknown, brain, 2020 .zarr \n", + "271 10X, Visium, Mouse, unknown, brain, 2020 .zarr \n", + "270 10X, Visium, Mouse, unknown, brain, 2020 .zarr \n", + "269 10X, Visium, Mouse, healthy, brain, 2022 .zarr \n", + "268 10X, Visium, Mouse, unknown, brain, 2019 .zarr \n", + "266 10X, Visium, Human, large intestine colorectal... .zarr \n", + "265 Visium vitessce demo .vitessce.json \n", + "263 View Human Lymph Node Example in Vitessce .vitessce.json \n", + "256 AnnData object for Human lymph node, 10X Genomics .anndata.zarr \n", + "262 View Human Lymph Node Example in Vitessce .vitessce.json \n", + "239 10X, Visium, Human, breast cancer, breast, 2022 .zarr \n", + "230 10X, Visium, Human, prostate cancer, prostate,... .zarr \n", + "228 10X, Xenium, Human, cancer, intestine_colon, 2023 .zarr \n", + "227 10X, Xenium, Human, healthy, heart, 2024 .zarr \n", + "226 10X, Xenium, Human, glioblastoma, brain, 2024 .zarr \n", + "224 10X, Xenium, Human, acute lymphoid leukemia, b... .zarr \n", + "222 10X, VisiumHD, Mouse, Healthy, Brain, 2024 .zarr \n", + "\n", + " kind otype size hash n_files \\\n", + "id \n", + "301 None None 1842 _ogAN9X4lJ-dYDkdL3SEYw NaN \n", + "300 dataset None 1473180245 4r3hK7svWsNx1TIWdRyRsw 9167.0 \n", + "298 None spatialdata 116459181 Ss6q2vgfyG3ARYWp0SUmsg 443.0 \n", + "292 dataset AnnData 199002741 0f1ev_U6XEzhfdyka2aSVA 2038.0 \n", + "257 dataset None 13404384 mTHeMyuDsPVFBsfON2SZMQ 265.0 \n", + "293 None None 3005 qHci84d3Z-fLw8ML_3S0lg NaN \n", + "290 None spatialdata 27522152 jW3Cq3su6K5FgMgaaz9DvA 263.0 \n", + "288 None spatialdata 54817412 yocuS6gw1aoX2zwDNZsebg 327.0 \n", + "284 None spatialdata 54817400 NTOnZzCUoYiXjypzHEud7g 327.0 \n", + "278 None spatialdata 28680862 I3p1SB4RL5YR8EcLcsZsNQ 227.0 \n", + "276 None spatialdata 34069617 Mjt1UQFsL-sd_eEXGnJWjQ 259.0 \n", + "274 None spatialdata 41793228 tNEMxpH6G5uoTY3CxRRMGw 263.0 \n", + "272 None spatialdata 57953090 raOCaxJLh_weCxY7z4mu3w 327.0 \n", + "271 None spatialdata 38143587 Xdl3V5khe8508IjaJQhm8g 327.0 \n", + "270 None spatialdata 38143587 1SFkEeE1UvvswCpuCstDPw 327.0 \n", + "269 None spatialdata 54841203 PlYRrY4VI-frNKHES5gvqw 327.0 \n", + "268 None spatialdata 56180342 pgkbcq_Rhf24MfhY-1zc1Q 327.0 \n", + "266 None spatialdata 52401277 TUrS6Myy91ExOsSws_zQ2A 323.0 \n", + "265 None None 2047 SdxuKSaTOOT23HkA2jlLdg NaN \n", + "263 None None 3005 -PchR8TOA4y0lj6IjmayiA NaN \n", + "256 dataset AnnData 199002959 YolRLEIAX6AE9IGLRSOjGA 2038.0 \n", + "262 None None 2987 6QGkKjYdn3QMkGBLOAip0w NaN \n", + "239 None spatialdata 53442529 Z05oc7m9alFGiMo1l33DOQ 311.0 \n", + "230 None spatialdata 60838635 TNg5OjUQowqBR_B0komdBQ 326.0 \n", + "228 None spatialdata 5129800776 AyFqQ-0ipr-sgDzUuEErmg 786.0 \n", + "227 None spatialdata 1483994224 f7BgRVX9kNxvnSpm4BrOMA 368.0 \n", + "226 None spatialdata 25439820101 Y7XqV167fo0hN6xNsRQSWw 2426.0 \n", + "224 None spatialdata 2490544230 JBdHTZJUvNltQNjFB8UdcQ 598.0 \n", + "222 None spatialdata 1099865189 r3sSzfkOliLFFfhDzmP9hw 2215.0 \n", + "\n", + " n_observations _hash_type _key_is_virtual _overwrite_versions space_id \\\n", + "id \n", + "301 None md5 True False 1 \n", + "300 None md5-d True True 1 \n", + "298 None md5-d True True 1 \n", + "292 None md5-d True True 1 \n", + "257 None md5-d True True 1 \n", + "293 None md5 True False 1 \n", + "290 None md5-d True True 1 \n", + "288 None md5-d True True 1 \n", + "284 None md5-d True True 1 \n", + "278 None md5-d True True 1 \n", + "276 None md5-d True True 1 \n", + "274 None md5-d True True 1 \n", + "272 None md5-d True True 1 \n", + "271 None md5-d True True 1 \n", + "270 None md5-d True True 1 \n", + "269 None md5-d True True 1 \n", + "268 None md5-d True True 1 \n", + "266 None md5-d True True 1 \n", + "265 None md5 True False 1 \n", + "263 None md5 True False 1 \n", + "256 None md5-d True True 1 \n", + "262 None md5 True False 1 \n", + "239 None md5-d True True 1 \n", + "230 None md5-d True True 1 \n", + "228 None md5-d True True 1 \n", + "227 None md5-d True True 1 \n", + "226 None md5-d True True 1 \n", + "224 None md5-d True True 1 \n", + "222 None md5-d True True 1 \n", + "\n", + " storage_id schema_id version is_latest run_id \\\n", + "id \n", + "301 2 None None True 257.0 \n", + "300 2 None None True NaN \n", + "298 2 None None True 252.0 \n", + "292 2 None None True 230.0 \n", + "257 2 None None True 230.0 \n", + "293 2 None None True 250.0 \n", + "290 2 None None True NaN \n", + "288 2 None None True 243.0 \n", + "284 2 None None True 239.0 \n", + "278 2 None None True 248.0 \n", + "276 2 None None True 247.0 \n", + "274 2 None None True 245.0 \n", + "272 2 None None True 244.0 \n", + "271 2 None None True 241.0 \n", + "270 2 None None True 241.0 \n", + "269 2 None None True 240.0 \n", + "268 2 None None True NaN \n", + "266 2 None None True 237.0 \n", + "265 2 None None True 235.0 \n", + "263 2 None None True 231.0 \n", + "256 2 None None True 230.0 \n", + "262 2 None None True 229.0 \n", + "239 2 None None True 197.0 \n", + "230 2 None None True 197.0 \n", + "228 2 None None True 197.0 \n", + "227 2 None None True 197.0 \n", + "226 2 None None True 197.0 \n", + "224 2 None None True 197.0 \n", + "222 2 None None True 197.0 \n", + "\n", + " created_at created_by_id _aux _branch_code \n", + "id \n", + "301 2025-02-03 13:28:06.909252+00:00 8 None 1 \n", + "300 2025-02-03 13:11:16.519472+00:00 8 None 1 \n", + "298 2025-01-21 14:17:50.268625+00:00 8 None 1 \n", + "292 2025-01-20 16:49:45.453742+00:00 8 None 1 \n", + "257 2024-12-19 09:56:39.264348+00:00 8 None 1 \n", + "293 2025-01-20 16:51:28.656230+00:00 8 None 1 \n", + "290 2025-01-19 21:54:04.931463+00:00 8 None 1 \n", + "288 2025-01-19 21:51:51.708790+00:00 8 None 1 \n", + "284 2025-01-19 21:45:48.348520+00:00 8 None 1 \n", + "278 2025-01-19 21:34:25.397573+00:00 8 None 1 \n", + "276 2025-01-19 21:32:33.064038+00:00 8 None 1 \n", + "274 2025-01-19 21:30:51.916798+00:00 8 None 1 \n", + "272 2025-01-19 21:29:00.556320+00:00 8 None 1 \n", + "271 2025-01-19 21:26:41.761595+00:00 8 None 1 \n", + "270 2025-01-19 21:25:28.550886+00:00 8 None 1 \n", + "269 2025-01-19 21:23:37.660374+00:00 8 None 1 \n", + "268 2025-01-19 21:19:25.551402+00:00 8 None 1 \n", + "266 2025-01-19 16:42:31.115433+00:00 8 None 1 \n", + "265 2025-01-16 19:17:02.333550+00:00 8 None 1 \n", + "263 2025-01-08 13:31:22.631439+00:00 8 None 1 \n", + "256 2024-12-19 09:55:30.210836+00:00 8 None 1 \n", + "262 2024-12-19 10:12:16.511765+00:00 8 None 1 \n", + "239 2024-12-16 15:04:08.589580+00:00 8 None 1 \n", + "230 2024-12-16 11:05:27.316266+00:00 8 None 1 \n", + "228 2024-12-16 10:59:12.338173+00:00 8 None 1 \n", + "227 2024-12-16 10:56:05.258469+00:00 8 None 1 \n", + "226 2024-12-16 10:48:23.337843+00:00 8 None 1 \n", + "224 2024-12-16 10:27:24.088735+00:00 8 None 1 \n", + "222 2024-12-16 10:07:09.710482+00:00 8 None 1 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ln.Artifact.df()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select an example uid - e.g. `wlgPi3jjfVoYUWjA0000` and retrieve the artifact:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "artifact = ln.Artifact.get(\"wlgPi3jjfVoYUWjA0000\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`store_dataset` will download the data associated to the artifact. Default path is `.` and the default name is the artifact uid. A different path and name can be used via the parameters `path` and `name`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93m!\u001b[0m run input wasn't tracked, call `ln.track()` and re-run\n" + ] + } + ], + "source": [ + "path = store_dataset(artifact=artifact)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/lustre/groups/ml01/workspace/lea.zimmermann/software/miniconda3/envs/sddb_test/lib/python3.11/site-packages/zarr/creation.py:614: UserWarning: ignoring keyword argument 'read_only'\n", + " compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)\n" + ] + } + ], + "source": [ + "sdata = sd.read_zarr(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SpatialData object, with associated Zarr store: /ictstr01/home/icb/lea.zimmermann/projects/spatialdata-db/spatialdata-db/docs/notebooks/wlgPi3jjfVoYUWjA.zarr\n", + "├── Images\n", + "│ ├── 'CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_full_image': DataTree[cyx] (3, 1578, 2000), (3, 789, 1000), (3, 394, 500), (3, 197, 250), (3, 98, 125)\n", + "│ ├── 'CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_hires_image': DataArray[cyx] (3, 1578, 2000)\n", + "│ └── 'CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_lowres_image': DataArray[cyx] (3, 474, 600)\n", + "├── Shapes\n", + "│ └── 'CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain': GeoDataFrame shape: (6965, 2) (2D shapes)\n", + "└── Tables\n", + " └── 'table': AnnData (6965, 19465)\n", + "with coordinate systems:\n", + " ▸ 'downscaled_hires', with elements:\n", + " CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_hires_image (Images), CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain (Shapes)\n", + " ▸ 'downscaled_lowres', with elements:\n", + " CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_lowres_image (Images), CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain (Shapes)\n", + " ▸ 'global', with elements:\n", + " CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain_full_image (Images), CytAssist_Fresh_Frozen_Sagittal_Mouse_Brain (Shapes)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shutil.rmtree(path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sddb_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/spatialdata_db/integrations/__init__.py b/src/spatialdata_db/integrations/__init__.py new file mode 100644 index 0000000..8833c05 --- /dev/null +++ b/src/spatialdata_db/integrations/__init__.py @@ -0,0 +1,3 @@ +from .lamindb_wrappers import store_dataset + +__all__ = ["store_dataset"] diff --git a/src/spatialdata_db/integrations/lamindb_wrappers.py b/src/spatialdata_db/integrations/lamindb_wrappers.py new file mode 100644 index 0000000..d40a0bc --- /dev/null +++ b/src/spatialdata_db/integrations/lamindb_wrappers.py @@ -0,0 +1,59 @@ +import shutil +from pathlib import Path + +import lamindb as ln + + +def store_dataset( + artifact: ln.Artifact, path: str | Path = Path("."), name: str | None = None, overwrite: bool = False +) -> str: + """ + Store a cached artifact in a specified directory, rename it if needed. + + Parameters + ---------- + - artifact (lamindb.Artifact): The artifact object from lamindb. + - path (Union[str, Path]): Directory where the cached artifact should be stored (default: "."). + - name (Optional[str]): If provided, renames the cached artifact to this name (default: None). + - overwrite (bool): If True, overwrites an existing file. Defaults to False. + + Returns + ------- + - str: The absolute path to the stored artifact. + + Raises + ------ + - FileNotFoundError: If the artifact cannot be found after caching. + - PermissionError: If moving the artifact fails due to insufficient permissions. + - RuntimeError: If caching fails for any reason. + - FileExistsError: If the target file already exists and overwrite=False. + """ + target_dir = Path(path).resolve() + + try: + target_dir.mkdir(parents=True, exist_ok=True) + except PermissionError as e: + raise PermissionError(f"Failed to create the target directory due to insufficient permissions: {e}") from e + + cached_path = artifact.cache() + cached_path = Path(cached_path).resolve() + + final_path = target_dir / (name if name else cached_path.name) + + try: + if final_path.exists() and not overwrite: + raise FileExistsError(f"Target file already exists: {final_path}. Use overwrite=True to replace it.") + except PermissionError as e: + raise PermissionError(f"Insufficient permissions: {e}") from e + + if cached_path != final_path: + try: + shutil.move(str(cached_path), str(final_path)) + except PermissionError as e: + raise PermissionError(f"Failed to move artifact due to insufficient permissions: {e}") from e + except FileNotFoundError as e: + raise FileNotFoundError(f"Failed to find cached artifact: {e}") from e + except OSError as e: + raise OSError(f"Failed to move artifact due to an OS error: {e}") from e + + return str(final_path) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2556551 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,13 @@ +import lamindb.setup as ln_setup + + +def pytest_sessionstart(session): + """Initialize LamindB before any test runs.""" + print("\n🔹 Setting up LamindB test instance...") + ln_setup.init(storage="lamin_test_instance", name="lamin_test_instance", schema="bionty") + + +def pytest_sessionfinish(session): + """Clean up LamindB after all tests are completed.""" + print("\n🔹 Cleaning up LamindB test instance after all tests are done...") + ln_setup.delete("lamin_test_instance", force=True, require_empty=False) diff --git a/tests/integrations/test_lamin_wrappers.py b/tests/integrations/test_lamin_wrappers.py new file mode 100644 index 0000000..f9438d2 --- /dev/null +++ b/tests/integrations/test_lamin_wrappers.py @@ -0,0 +1,142 @@ +import os +import shutil +from pathlib import Path +from unittest.mock import MagicMock + +import lamindb as ln +import pytest + +from spatialdata_db.integrations import store_dataset + +DEFAULT_FILE_NAME = "test_data.zarr" + + +@pytest.fixture(scope="function") +def mock_artifact(tmp_path): + artifact = MagicMock(spec=ln.Artifact) # Mock with ln.Artifact attributes + zarr_path = tmp_path / DEFAULT_FILE_NAME + zarr_path.mkdir() + + artifact.cache = MagicMock(return_value=str(zarr_path)) # Explicitly add the `cache` method + return artifact + + +def test_store_artifact_custom_path(mock_artifact, tmp_path): + """Test storing artifact in a provided directory.""" + artifact = mock_artifact + target_path = tmp_path / "target_dir" + target_path.mkdir() + + result_path = store_dataset(artifact, path=target_path) + + assert Path(result_path).exists() + assert Path(result_path).parent == target_path + + +def test_store_artifact_rename(mock_artifact, tmp_path): + """Test storing artifact with a custom name.""" + artifact = mock_artifact + target_path = tmp_path / "target_dir" + target_path.mkdir() + + new_name = "renamed_data.zarr" + result_path = store_dataset(artifact, path=target_path, name=new_name) + + assert Path(result_path).exists() + assert Path(result_path).name == new_name + assert Path(result_path).parent == target_path + + +def test_store_artifact_file_not_found(tmp_path): + """Test that FileNotFoundError is raised if artifact.cache() points to a non-existent file.""" + artifact = MagicMock(spec=ln.Artifact) + zarr_path = tmp_path / "missing.zarr" + + # Explicitly add the `cache` method + artifact.cache = MagicMock(return_value=str(zarr_path)) + + target_path = tmp_path / "target_dir" + with pytest.raises(FileNotFoundError): + store_dataset(artifact, path=target_path) + + +def test_store_artifact_reading_permission_error(mock_artifact, tmp_path): + """Test that PermissionError is raised if the user has insufficient permissions for the target directory.""" + artifact = mock_artifact + locked_dir = tmp_path / "locked" + locked_dir.mkdir() + os.chmod(locked_dir, 0o400) # Read-only directory + + with pytest.raises(PermissionError, match="Insufficient permissions"): + store_dataset(artifact, path=locked_dir) + + os.chmod(locked_dir, 0o700) # Restore permissions + + +def test_store_artifact_directory_creation_permission_error(mock_artifact, tmp_path): + """Test that PermissionError is raised if creating the target directory fails due to insufficient permissions.""" + locked_parent = tmp_path / "locked_parent" + locked_parent.mkdir() + os.chmod(locked_parent, 0o400) # Read-only directory + + locked_dir = locked_parent / "subdir" + + with pytest.raises(PermissionError, match="Failed to create the target directory"): + store_dataset(mock_artifact, path=locked_dir) + + os.chmod(locked_parent, 0o700) # Restore permissions + + +def test_store_artifact_move_permission_error(mock_artifact, monkeypatch, tmp_path): + """Test that PermissionError is raised if moving the artifact fails due to insufficient permissions.""" + artifact = mock_artifact + target_path = tmp_path / "target_dir" + target_path.mkdir() + + def mock_move(*args, **kwargs): + raise PermissionError("Mocked permission error") + + monkeypatch.setattr(shutil, "move", mock_move) # Mock shutil.move to always raise PermissionError + + with pytest.raises(PermissionError, match="Failed to move artifact due to insufficient permissions"): + store_dataset(artifact, path=target_path) + + +def test_store_artifact_os_error(mock_artifact, monkeypatch, tmp_path): + """Test that OSError is raised if a simulated system error occurs.""" + artifact = mock_artifact + + def mock_move(*args, **kwargs): + raise OSError("Mocked OS error") + + monkeypatch.setattr(shutil, "move", mock_move) + + target_path = tmp_path / "target_dir" + with pytest.raises(OSError, match="Mocked OS error"): + store_dataset(artifact, path=target_path) + + +def test_store_artifact_overwrite_false(mock_artifact, tmp_path): + """Test that FileExistsError is raised if overwrite=False and file exists.""" + artifact = mock_artifact + target_path = tmp_path / "existing_dir" + target_path.mkdir() + existing_file = target_path / DEFAULT_FILE_NAME + existing_file.mkdir() + + with pytest.raises(FileExistsError): + store_dataset(artifact, path=target_path, overwrite=False) + + +def test_store_artifact_overwrite_true(mock_artifact, tmp_path): + """Test that artifact is stored correctly when overwrite=True.""" + artifact = mock_artifact + target_path = tmp_path / "existing_dir" + target_path.mkdir() + existing_file = target_path / DEFAULT_FILE_NAME + existing_file.mkdir() + + result_path = store_dataset(artifact, path=target_path, overwrite=True) + + assert Path(result_path).exists() + assert Path(result_path).name == DEFAULT_FILE_NAME