Skip to content

2025.3 #95

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions images/datascience-notebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,29 +64,32 @@ USER jovyan

# Python/Mamba Deps
## Package versions
ARG JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0
ARG JUPYTERHUB_VERSION=4.1.5 JUPYTERSERVER_VERSION=2.14.2 NBGRADER_VERSION=0.9.3 JUPYTERLAB_VERSION=4.2.4 NBCONVERT_VERSION=7.16.4 NOTEBOOK_VERSION=7.2.1 NBCLASSIC_VERSION=1.1.0
ARG PANDAS_VERSION=2.2.3 STATSMODELS_VERSION=0.14.4 BOTTLENECK_VERSION=1.4.2 NUMEXPR_VERSION=2.10.2

# Install uv for faster package installations
RUN pip3 install --no-cache-dir --upgrade uv

# Install essential+datascience pip packages
## mistune added for nbgrader issues
RUN mamba install -c conda-forge pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn && \
mamba install -c conda-forge nltk statsmodels=$STATSMODELS_VERSION pandas=$PANDAS_VERSION mistune && \
mamba install -c conda-forge dpkt nose datascience pyarrow bottleneck=$BOTTLENECK_VERSION umap-learn numexpr=$NUMEXPR_VESION && \
RUN uv pip install pillow typing-extensions tzlocal appdirs gputil mock pytest umap-learn --system && \
uv pip install nltk statsmodels==$STATSMODELS_VERSION pandas==$PANDAS_VERSION mistune --system && \
uv pip install dpkt nose datascience pyarrow bottleneck==$BOTTLENECK_VERSION umap-learn numexpr==$NUMEXPR_VERSION --system && \
python -c 'import matplotlib.pyplot' && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all
uv cache clean

# Install jupyterlab+extensions
RUN mamba install -c conda-forge jupyterhub=$JUPYTERHUB_VERSION jupyter_server=$JUPYTERSERVER_VERSION && \
mamba install -c conda-forge jupyterlab=$JUPYTERLAB_VERSION notebook=$NOTEBOOK_VERSION nbclassic=$NBCLASSIC_VERSION && \
# (TODO: Re-enable collab once RTC is fixed) mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals jupyter-collaboration && \
mamba install -c conda-forge jupyterlab_rise jupyter_server_terminals && \
mamba install -c conda-forge jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson && \
mamba install -c conda-forge nbconvert=$NBCONVERT_VERSION nbgrader=$NBGRADER_VERSION && \
RUN uv pip install jupyterhub==$JUPYTERHUB_VERSION jupyter_server==$JUPYTERSERVER_VERSION --system && \
uv pip install jupyterlab==$JUPYTERLAB_VERSION notebook==$NOTEBOOK_VERSION nbclassic==$NBCLASSIC_VERSION --system && \
# (TODO: Re-enable collab once RTC is fixed) uv pip install jupyterlab_rise jupyter_server_terminals jupyter-collaboration --system && \
uv pip install jupyterlab_rise jupyter_server_terminals --system && \
uv pip install jupyterlab-latex jupyterlab-git jupyterlab-fasta jupyterlab-geojson --system && \
uv pip install nbconvert==$NBCONVERT_VERSION nbgrader==$NBGRADER_VERSION --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all
uv cache clean

# Install R packages
RUN mamba install -c conda-forge r-markdown r-covr r-git2r r-crosstalk r-dt -y && \
Expand Down
49 changes: 16 additions & 33 deletions images/scipy-ml-notebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,68 +53,51 @@ USER jovyan
# CUDA setup w/mamba
## TODO: Investigate this command, seems to duplicate cuda packages for nvidia (pypi + conda-forge).
# cuda-toolkit is a skeleton package on CUDA 12, unlike CUDA <= 11
RUN mamba install -c "nvidia/label/cuda-12.6.0" cuda-nvcc \
cuda-toolkit=$CUDA_VERSION \
# For CUDA 11: cudatoolkit=$CUDA_VERSION \
cuda-version=$CUDA_VERSION \
nccl \
-y && \
RUN uv pip install --extra-index-url https://pypi.nvidia.com nvidia-cuda-nvcc-cu12 nvidia-nccl-cu12 cuda-python --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y
uv cache clean

# Install scipy pip packages
## install protobuf to avoid weird base type error. seems like if we don't then it'll be installed twice.
## https://github.com/spesmilo/electrum/issues/7825
## pip cache purge didnt work here for some reason.
RUN pip install --no-cache-dir protobuf==$PROTOBUF_VERSION
RUN uv pip install --no-cache-dir protobuf==$PROTOBUF_VERSION --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
uv cache clean

## cuda-python installed to have parity with tensorflow and cudnn
## Install pillow<7 due to dependency issue https://github.com/pytorch/vision/issues/1712
## tensorrt installed to fix not having libnvinfer that has caused tensorflow issues.
RUN pip install opencv-contrib-python-headless \
opencv-python && \
RUN uv pip install opencv-contrib-python-headless opencv-python --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
pip cache purge
uv cache clean

RUN mamba install -c conda-forge pyqt pycocotools pillow scapy && \
RUN uv pip install PyQt5 pycocotools pillow scapy --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean --all
uv cache clean

# Install CUDA/Torch/Tensorflow/Keras w/pip
## no purge required but no-cache-dir is used. pip purge will actually break the build here!
## Beware of potentially needing to update these if we update the drivers.
## Check tensorrt_env_vars.sh if you have to bump tensorrt!

## tf-keras is keras 2.x for higher versions of tensorflow that would normally require keras 3
RUN pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 && \
pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TF_KERAS_VERSION && \
RUN uv pip install nvidia-cudnn-cu12==$CUDNN_VERSION torch==$TORCH_VERSION torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 --system && \
uv pip install tensorflow==$TENSORFLOW_VERSION tensorflow-datasets tensorrt==$TENSORRT_VERSION keras==$KERAS_VERSION tf-keras==$TF_KERAS_VERSION --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
uv cache clean

RUN pip install transformers datasets accelerate huggingface-hub timm && \
RUN uv pip install transformers datasets accelerate huggingface-hub timm --system && \
fix-permissions $CONDA_DIR && \
fix-permissions /home/$NB_USER && \
mamba clean -a -y && \
pip cache purge
uv cache clean

USER $NB_UID:$NB_GID
ENV PATH=${PATH}:/usr/local/nvidia/bin:/opt/conda/bin

# CUDA fixes for CONDA
## Copy libdevice file to the required path
RUN mkdir -p $CONDA_DIR/lib/nvvm/libdevice && \
cp $CONDA_DIR/nvvm/libdevice/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/
#CUDA 11: cp $CONDA_DIR/lib/libdevice.10.bc $CONDA_DIR/lib/nvvm/libdevice/

# TensorRT fix for tensorflow
## https://github.com/tensorflow/tensorflow/issues/61468 (could not find TensorRT)
## This will most definitely have to be changed after 8.6.1...
RUN ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer_plugin.so.$TENSORRT_VERSION && \
ln -s /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.8 /opt/conda/lib/python3.11/site-packages/tensorrt_libs/libnvinfer.so.$TENSORRT_VERSION

# Run datahub scripts
RUN . /tmp/activate.sh
4 changes: 2 additions & 2 deletions images/spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ images:
integration_tests: false
info_cmds: [PY_VER, PIP_LIST, CUDA_VERSION, CONDA_INFO, CONDA_LIST, APT_PKG_LIST]
prune: false # comment if scipy-ml stops being the last container
#prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time.
# prepull: false #-- uncomment to disable prepulling behavior for scipy-ml. gives you space on machine in exchange for build time.

tag:
prefix: "2025.2"
prefix: "2025.3"

all_info_cmds:
PY_VER:
Expand Down
Loading