Skip to content

Commit 8946b35

Browse files
authored
Fix up building images and publishing the charts (#18)
Refreshing images so we can build and publish helm charts again. Although PyTorch tests are not yet fixed.
1 parent 7e70cf6 commit 8946b35

File tree

9 files changed

+113
-47
lines changed

9 files changed

+113
-47
lines changed

.github/dependabot.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---
2+
3+
version: 2
4+
5+
updates:
6+
# Automatically propose PRs for out-of-date GitHub actions
7+
- package-ecosystem: github-actions
8+
directory: "/"
9+
schedule:
10+
# Check for new versions weekly
11+
interval: weekly
12+
# Update all actions in a single PR
13+
groups:
14+
github-actions:
15+
patterns: ["*"]
16+
labels:
17+
- automation
18+
- gha-update
19+
20+
# Automatically propose PRs for Python dependencies
21+
- package-ecosystem: pip
22+
directory: "/python"
23+
schedule:
24+
# Check for new versions daily
25+
interval: daily
26+
labels:
27+
- automation
28+
- pip-update

.github/workflows/publish-benchmark-images.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@ jobs:
1212
build_push_images:
1313
name: Build and push benchmark images
1414
runs-on: ubuntu-latest
15+
permissions:
16+
contents: read
17+
id-token: write # needed for signing the images with GitHub OIDC Token
18+
packages: write # required for pushing container images
19+
security-events: write # required for pushing SARIF files
1520
strategy:
21+
fail-fast: false
1622
matrix:
1723
include:
1824
- component: discovery
@@ -21,7 +27,8 @@ jobs:
2127
- component: mpi-benchmarks
2228
- component: openfoam
2329
- component: perftest
24-
- component: pytorch-benchmarks
30+
# TODO - need to fix this build
31+
# - component: pytorch-benchmarks
2532
steps:
2633
- name: Check out the repository
2734
uses: actions/checkout@v2

.github/workflows/publish-operator.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ jobs:
1212
build_push_operator_image:
1313
name: Build and push operator image
1414
runs-on: ubuntu-latest
15+
permissions:
16+
contents: read
17+
id-token: write # needed for signing the images with GitHub OIDC Token
18+
packages: write # required for pushing container images
19+
security-events: write # required for pushing SARIF files
1520
steps:
1621
- name: Check out the repository
1722
uses: actions/checkout@v2

images/iperf/Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
# Dockerfile for the iperf benchmarks
33
#####
44

5-
65
FROM debian:bookworm-slim
76

8-
ARG IPERF_VERSION=2.1.8+dfsg-1
97
RUN apt-get update && \
10-
apt-get install -y "iperf=$IPERF_VERSION" && \
8+
apt-get upgrade && \
9+
apt-get install -y "iperf" && \
1110
rm -rf /var/lib/apt/lists/*
1211

1312
EXPOSE 5001

images/mpi-benchmarks/Dockerfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,19 @@
33
# https://www.intel.com/content/www/us/en/develop/documentation/imb-user-guide/top.html
44
#####
55

6-
FROM rockylinux:9.2
6+
FROM quay.io/rockylinux/rockylinux:9.5
77

8-
ARG MPITESTS_VERSION=5.8
98
RUN yum install -y \
109
openssh-clients openssh-server \
1110
rdma-core ucx-ib ucx-rdmacm \
12-
"mpitests-openmpi-${MPITESTS_VERSION}" && \
11+
mpitests-openmpi && \
1312
yum clean all -y && \
1413
rm -rf /var/cache
1514

1615
# Make sure the MPI binaries are on the PATH
1716
ENV OPENMPI_ROOT=/usr/lib64/openmpi
1817
ENV PATH=$OPENMPI_ROOT/bin:$PATH
19-
ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib:$LD_LIBRARY_PATH
18+
ENV LD_LIBRARY_PATH=$OPENMPI_ROOT/lib
2019

2120
# Install helper scripts
2221
COPY ./scripts/* /usr/local/bin

images/perftest/Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
# Dockerfile for the RDMA bandwidth and latency benchmarks
33
#####
44

5-
FROM rockylinux:9.2
5+
FROM quay.io/rockylinux/rockylinux:9.5
66

7-
ARG PERFTEST_VERSION=4.5.0.20
8-
RUN yum install -y "perftest-${PERFTEST_VERSION}" && \
7+
RUN yum install -y perftest && \
98
yum clean all -y && \
109
rm -rf /var/cache

images/pytorch-benchmarks/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
1+
FROM pytorch/pytorch:2.6.0-cuda11.8-cudnn9-runtime
22

33
RUN apt update && apt install -y git time
44
RUN git clone https://github.com/pytorch/benchmark
55
WORKDIR /workspace/benchmark
66
# Pin pytorch-benchmark repo version
7-
RUN git reset --hard 6fef32ddaf93a63088b97eb27620fb57ef247521
7+
RUN git reset --hard a22a2a8309d513c66df995ae27ee48c954b49f66
88
# List of models here should match PytorchModel enum
99
# in python/perftest/pytorch.py
1010
RUN python install.py alexnet resnet50 llama
1111

1212
# PyTorch install.py pins numpy=1.21.2 but
1313
# this breaks numba so update both here
14-
RUN pip install -U numpy numba
14+
RUN pip install -U numpy numba

python/Dockerfile

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,34 @@
1-
FROM python:3.9
1+
FROM ubuntu:jammy as build-image
2+
3+
RUN apt-get update && \
4+
apt-get upgrade -y && \
5+
apt-get install --no-install-recommends python3.10-venv git -y && \
6+
rm -rf /var/lib/apt/lists/*
7+
8+
# build into a venv we can copy across
9+
RUN python3 -m venv /opt/venv
10+
ENV PATH="/opt/venv/bin:$PATH"
11+
12+
COPY . /perftest
13+
RUN pip install -U pip setuptools
14+
RUN pip install --no-deps --requirement /perftest/requirements.txt
15+
RUN pip install -e /perftest
16+
17+
#
18+
# Now the image we run with
19+
#
20+
FROM ubuntu:jammy as run-image
21+
22+
RUN apt-get update && \
23+
apt-get upgrade -y && \
24+
apt-get install --no-install-recommends python3 tini ca-certificates -y && \
25+
rm -rf /var/lib/apt/lists/*
26+
27+
# Copy accross the venv
28+
COPY --from=build-image /opt/venv /opt/venv
29+
# Copy code to keep editable install working
30+
COPY . /perftest
31+
ENV PATH="/opt/venv/bin:$PATH"
232

333
# Create the user that will be used to run the app
434
ENV APP_UID 1001
@@ -22,14 +52,9 @@ RUN apt-get update && \
2252
# Don't buffer stdout and stderr as it breaks realtime logging
2353
ENV PYTHONUNBUFFERED 1
2454

25-
# Install dependencies
26-
# Doing this separately by copying only the requirements file enables better use of the build cache
27-
COPY ./requirements.txt /perftest/
28-
RUN pip install --no-deps --requirement /perftest/requirements.txt
29-
30-
# Install the perftest package
31-
COPY . /perftest
32-
RUN pip install --no-deps -e /perftest
55+
# Make httpx use the system trust roots
56+
# By default, this means we use the CAs from the ca-certificates package
57+
ENV SSL_CERT_FILE /etc/ssl/certs/ca-certificates.crt
3358

3459
# By default, run the operator using kopf
3560
USER $APP_UID

python/requirements.txt

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,32 @@
1-
aiohttp==3.10.11
2-
aiosignal==1.2.0
3-
anyio==3.6.1
4-
async-timeout==4.0.2
5-
attrs==22.1.0
6-
certifi==2024.7.4
7-
charset-normalizer==2.1.1
8-
click==8.1.3
1+
aiohappyeyeballs==2.4.4
2+
aiohttp==3.11.12
3+
aiosignal==1.3.2
4+
annotated-types==0.7.0
5+
anyio==4.8.0
6+
async-timeout==5.0.1
7+
attrs==25.1.0
8+
certifi==2025.1.31
9+
charset-normalizer==3.4.1
10+
click==8.1.8
911
configomatic @ git+https://github.com/stackhpc/configomatic.git@3a7e88693e8f44530ac4f1f5ee3d64977cf3784d
10-
easykube @ git+https://github.com/stackhpc/easykube.git@f8212a0b412b1eb2d7d015508b0ee49b6c2a5eb2
11-
frozenlist==1.3.1
12-
h11==0.12.0
13-
httpcore==0.15.0
14-
httpx==0.23.0
12+
easykube==0.5.0
13+
frozenlist==1.5.0
14+
h11==0.14.0
15+
httpcore==1.0.7
16+
httpx==0.28.1
1517
idna==3.10
16-
iso8601==1.0.2
18+
iso8601==2.1.0
1719
Jinja2==3.1.5
18-
kopf==1.35.6
20+
kopf==1.37.4
1921
kube-custom-resource @ git+https://github.com/stackhpc/kube-custom-resource.git@851b1bf25fecdbc180e73494eb77c7899274ee15
20-
MarkupSafe==2.1.1
21-
multidict==6.0.2
22-
pydantic==1.10.13
23-
python-json-logger==2.0.4
24-
PyYAML==6.0
25-
rfc3986==1.5.0
26-
sniffio==1.3.0
27-
typing-extensions==4.3.0
28-
yarl==1.8.1
22+
MarkupSafe==3.0.2
23+
multidict==6.1.0
24+
propcache==0.2.1
25+
pydantic==1.10.21
26+
pydantic_core==2.27.2
27+
python-json-logger==3.2.1
28+
PyYAML==6.0.2
29+
rfc3986==2.0.0
30+
sniffio==1.3.1
31+
typing_extensions==4.12.2
32+
yarl==1.18.3

0 commit comments

Comments
 (0)