diff --git a/.github/unittest/linux_libs/scripts_gym/10_nvidia.json b/.github/unittest/linux_libs/scripts_gym/10_nvidia.json new file mode 100644 index 00000000000..576b540c387 --- /dev/null +++ b/.github/unittest/linux_libs/scripts_gym/10_nvidia.json @@ -0,0 +1,6 @@ +{ + "file_format_version" : "1.0.0", + "ICD" : { + "library_path" : "libEGL_nvidia.so.0" + } +} diff --git a/.github/unittest/linux_libs/scripts_gym/run_test.sh b/.github/unittest/linux_libs/scripts_gym/run_test.sh index ff2f298c9d2..d1e2967325f 100755 --- a/.github/unittest/linux_libs/scripts_gym/run_test.sh +++ b/.github/unittest/linux_libs/scripts_gym/run_test.sh @@ -18,12 +18,22 @@ lib_dir="${env_dir}/lib" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$lib_dir export MKL_THREADING_LAYER=GNU +# Start Xvfb with specific OpenGL configuration +export DISPLAY=:99 +Xvfb :99 -screen 0 1400x900x24 -ac +extension GLX +render -noreset > /dev/null 2>&1 & +sleep 3 # Give Xvfb time to start + +# Verify OpenGL/EGL setup +glxinfo -B || true +echo "EGL_PLATFORM=$EGL_PLATFORM" +echo "MUJOCO_GL=$MUJOCO_GL" +echo "PYOPENGL_PLATFORM=$PYOPENGL_PLATFORM" + +# Run the tests python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200 python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym' unset LD_PRELOAD -export DISPLAY=:99 -Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 & python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 -k "gym and not isaac" --error-for-skips --mp_fork coverage combine coverage xml -i diff --git a/.github/unittest/linux_libs/scripts_gym/setup_env.sh b/.github/unittest/linux_libs/scripts_gym/setup_env.sh index aade606ba16..417ee8fef69 100755 --- a/.github/unittest/linux_libs/scripts_gym/setup_env.sh +++ b/.github/unittest/linux_libs/scripts_gym/setup_env.sh @@ -5,14 +5,28 @@ # # Do not install PyTorch and torchvision here, otherwise they also get cached. +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. +apt-get update && apt-get install -y \ + git wget gcc g++ \ + libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev \ + libglvnd0 libgl1 libglx0 libegl1 libgles2 \ + xvfb libegl-dev libx11-dev freeglut3-dev \ + mesa-utils mesa-common-dev \ + libsdl2-dev libsdl2-2.0-0 + set -e +set -v this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -# Avoid error: "fatal: unsafe repository" -apt-get update && apt-get install -y git wget gcc g++ -apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libegl-dev libx11-dev freeglut3-dev +# Setup EGL +mkdir -p /usr/share/glvnd/egl_vendor.d +cp $this_dir/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json + +# Avoid error: "fatal: unsafe repository" git config --global --add safe.directory '*' root_dir="$(git rev-parse --show-toplevel)" conda_dir="${root_dir}/conda" @@ -79,13 +93,15 @@ conda env config vars set \ SDL_VIDEODRIVER=dummy \ DISPLAY=:99 \ PYOPENGL_PLATFORM=egl \ - LD_PRELOAD=$glew_path \ + __GLX_VENDOR_LIBRARY_NAME=nvidia \ + MESA_GL_VERSION_OVERRIDE=3.3 \ + EGL_PLATFORM=x11 \ + LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so:/usr/lib/x86_64-linux-gnu/libGL.so \ NVIDIA_PATH=/usr/src/nvidia-470.63.01 \ MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \ MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \ - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \ TOKENIZERS_PARALLELISM=true -# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin # make env variables apparent conda deactivate && conda activate "${env_dir}" diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml index 61726cae9b7..b722944bc1e 100644 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml @@ -24,6 +24,7 @@ dependencies: - mujoco - patchelf - pyopengl==3.1.4 + - pyglet<1.5.0 - ray - av - h5py diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh index 7622c769aae..ebca125d32f 100755 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh @@ -22,13 +22,20 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/pytorch/rl/mujoco-py/mujoco_py/binaries export MKL_THREADING_LAYER=GNU export BATCHED_PIPE_TIMEOUT=60 -python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200 -python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym' - +# Start Xvfb with specific OpenGL configuration export DISPLAY=:99 Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 & +sleep 3 # Give Xvfb time to start + +# Verify OpenGL setup +glxinfo -B || true +echo "MUJOCO_GL=$MUJOCO_GL" +echo "PYOPENGL_PLATFORM=$PYOPENGL_PLATFORM" + +python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test.py -v --durations 200 +python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym' -CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest \ +MUJOCO_GL=osmesa python .github/unittest/helpers/coverage_run_parallel.py -m pytest \ --instafail -v \ --durations 200 \ --ignore test/test_distributed.py \ diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh index 0ab83d4b042..0a1323bcdd2 100755 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/setup_env.sh @@ -8,27 +8,63 @@ set -e set -v -this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -apt-get update && apt-get upgrade -y -printf "* Installing vim - git - wget\n" -apt-get install -y vim git wget +# Make apt-get non-interactive +export DEBIAN_FRONTEND=noninteractive +# Pre-configure timezone data +ln -fs /usr/share/zoneinfo/UTC /etc/localtime +echo "UTC" > /etc/timezone -printf "* Installing glfw - glew - osmesa part 1\n" -apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb libx11-dev libegl-dev +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -#printf "* Installing glfw - glew - osmesa part 2\n" -#apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libsdl2-dev libsdl2-2.0-0 +# Add NVIDIA repository for drivers +apt-get update && apt-get install -y --no-install-recommends \ + software-properties-common \ + wget \ + ca-certificates + +# Install basic build tools first +apt-get install -y vim git wget build-essential + +# Install system libraries to fix version conflicts +apt-get install -y --no-install-recommends \ + libffi7 \ + libffi-dev \ + libtinfo6 \ + libtinfo-dev \ + libncurses5-dev \ + libncursesw5-dev + +# Install OpenGL packages with focus on OSMesa +apt-get install -y --no-install-recommends \ + libosmesa6-dev \ + libgl1-mesa-dev \ + libgl1-mesa-glx \ + libglew-dev \ + libglfw3-dev \ + libglvnd0 \ + libgl1 \ + libglx0 \ + libegl1 \ + libgles2 \ + xvfb \ + mesa-utils \ + mesa-common-dev \ + libglu1-mesa-dev \ + libsdl2-dev \ + libsdl2-2.0-0 \ + pkg-config if [ "${CU_VERSION:-}" == cpu ] ; then - # solves version `GLIBCXX_3.4.29' not found for tensorboard -# apt-get install -y gcc-4.9 apt-get upgrade -y libstdc++6 apt-get dist-upgrade -y else apt-get install -y g++ gcc fi +# Remove conflicting libraries from conda environment if they exist +rm -f "${env_dir}/lib/libtinfo.so"* || true +rm -f "${env_dir}/lib/libffi.so"* || true + git config --global --add safe.directory '*' root_dir="$(git rev-parse --show-toplevel)" conda_dir="${root_dir}/conda" @@ -93,19 +129,20 @@ printf "* Installing dependencies (except PyTorch)\n" echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml" cat "${this_dir}/environment.yml" -export MUJOCO_GL=egl +# Use OSMesa for rendering +export MUJOCO_GL=osmesa conda env config vars set \ MAX_IDLE_COUNT=1000 \ - MUJOCO_GL=egl \ + MUJOCO_GL=osmesa \ SDL_VIDEODRIVER=dummy \ - DISPLAY=unix:0.0 \ - PYOPENGL_PLATFORM=egl \ - LD_PRELOAD=$glew_path \ - NVIDIA_PATH=/usr/src/nvidia-470.63.01 \ + DISPLAY=:99 \ + PYOPENGL_PLATFORM=osmesa \ + LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libOSMesa.so.6:/usr/lib/x86_64-linux-gnu/libGL.so \ MUJOCO_PY_MJKEY_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/mjkey.txt \ MUJOCO_PY_MUJOCO_PATH=${root_dir}/mujoco-py/mujoco_py/binaries/linux/mujoco210 \ - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \ - TOKENIZERS_PARALLELISM=true + LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH:/home/circleci/project/mujoco-py/mujoco_py/binaries/linux/mujoco210/bin \ + TOKENIZERS_PARALLELISM=true \ + PYGLET_GRAPHICS=opengl3 # make env variables apparent conda deactivate && conda activate "${env_dir}" diff --git a/test/_utils_internal.py b/test/_utils_internal.py index 6ae85cfaf20..443472a9286 100644 --- a/test/_utils_internal.py +++ b/test/_utils_internal.py @@ -182,7 +182,7 @@ def get_available_devices(): def get_default_devices(): num_cuda = torch.cuda.device_count() if num_cuda == 0: - # if torch.mps.is_available(): + # if getattr(torch.mps, "is_available", lambda: False)(): # return [torch.device("mps:0")] return [torch.device("cpu")] elif num_cuda == 1: diff --git a/test/test_collector.py b/test/test_collector.py index fab2e04bb1d..0c9a53bb9de 100644 --- a/test/test_collector.py +++ b/test/test_collector.py @@ -1070,7 +1070,7 @@ def test_no_deepcopy_policy(self, collector_type): shared_device = torch.device("cpu") if torch.cuda.is_available(): original_device = torch.device("cuda:0") - elif torch.mps.is_available(): + elif getattr(torch.mps, "is_available", lambda: False)(): original_device = torch.device("mps") else: pytest.skip("No GPU or MPS device") @@ -2614,7 +2614,8 @@ def test_multi_collector_consistency( @pytest.mark.skipif( - not torch.cuda.is_available() and not torch.mps.is_available(), + not torch.cuda.is_available() + and not getattr(torch.mps, "is_available", lambda: False)(), reason="No casting if no cuda", ) class TestUpdateParams: diff --git a/torchrl/data/replay_buffers/utils.py b/torchrl/data/replay_buffers/utils.py index 97c62bf9707..ee5fa5736dc 100644 --- a/torchrl/data/replay_buffers/utils.py +++ b/torchrl/data/replay_buffers/utils.py @@ -1036,6 +1036,6 @@ def tree_iter(pytree): # noqa: F811 def _auto_device() -> torch.device: if torch.cuda.is_available(): return torch.device("cuda:0") - elif torch.mps.is_available(): + elif getattr(torch.mps, "is_available", lambda: False)(): return torch.device("mps:0") return torch.device("cpu")