diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c636ee8b0b..05d09bfd31d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -90,8 +90,8 @@ jobs:
             arch: x64
             trixi_test: threaded_legacy
           - version: '1.10'
-            os: macos-13
-            arch: x64
+            os: macos-latest
+            arch: aarch64
             trixi_test: mpi
           - version: '1.10'
             os: macos-latest
diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
index fa816da9a1e..6ae09588861 100644
--- a/src/auxiliary/math.jl
+++ b/src/auxiliary/math.jl
@@ -284,6 +284,11 @@ end
 # when using `@fastmath`, which we also get from
 # [Fortran](https://godbolt.org/z/Yrsa1js7P)
 # or [C++](https://godbolt.org/z/674G7Pccv).
+#
+# Note however that such a custom reimplementation can cause incompatibilities with other
+# packages. Currently we are affected by an issue with MPI.jl on ARM, see
+# https://github.com/trixi-framework/Trixi.jl/issues/1922
+# The workaround is to resort to Base.min / Base.max when using MPI reductions.
 """
     Trixi.max(x, y, ...)
 
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 860e3fa21d3..06110d08d28 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -434,7 +434,8 @@ function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
             res = maximum(abs, view(du, v, ..))
             if mpi_isparallel()
                 # TODO: Debugging, here is a type instability
-                global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+                # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+                global_res = MPI.Reduce!(Ref(res), Base.max, mpi_root(), mpi_comm())
                 if mpi_isroot()
                     res::eltype(du) = global_res[]
                 end
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 000daa015dc..707f6e6d94c 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -131,7 +131,8 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
@@ -161,9 +162,21 @@ function integrate_via_indices(func::Func, u,
                             normalize = normalize)
 
     # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-    global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    if local_integral isa Real
+        global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64},
+                                                          Ref(local_integral)), +,
+                                      mpi_root(), mpi_comm())
+    end
+
     if mpi_isroot()
-        integral = convert(typeof(local_integral), global_integral[])
+        if local_integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(local_integral))
+            integral = convert(typeof(local_integral), global_wrapped)
+        end
     else
         integral = convert(typeof(local_integral), NaN * local_integral)
     end
@@ -194,10 +207,20 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    if integral isa Real
+        global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(integral)),
+                                      +, mpi_root(), mpi_comm())
+    end
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
-        integral = convert(typeof(integral), global_integral[])
+        if integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(integral))
+            integral = convert(typeof(integral), global_wrapped)
+        end
     else
         integral = convert(typeof(integral), NaN * integral)
         total_volume = volume # non-root processes receive nothing from reduce -> overwrite
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index de777be406d..1d167652c63 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -49,7 +49,8 @@ function calc_error_norms(func, u, t, analyzer,
     global_l2_error = Vector(l2_error)
     global_linf_error = Vector(linf_error)
     MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    # Base.max instead of max needed, see comment in src/auxiliary/math.jl
+    MPI.Reduce!(global_linf_error, Base.max, mpi_root(), mpi_comm())
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
         l2_error = convert(typeof(l2_error), global_l2_error)
@@ -87,10 +88,20 @@ function integrate_via_indices(func::Func, u,
         end
     end
 
-    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    if integral isa Real
+        global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    else
+        global_integral = MPI.Reduce!(Base.unsafe_convert(Ptr{Float64}, Ref(integral)),
+                                      +, mpi_root(), mpi_comm())
+    end
     total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
     if mpi_isroot()
-        integral = convert(typeof(integral), global_integral[])
+        if integral isa Real
+            integral = global_integral[]
+        else
+            global_wrapped = unsafe_wrap(Array, global_integral, length(integral))
+            integral = convert(typeof(integral), global_wrapped)
+        end
     else
         integral = convert(typeof(integral), NaN * integral)
         total_volume = volume # non-root processes receive nothing from reduce -> overwrite
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 41251506a0d..c7922cecc66 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -54,7 +54,8 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -70,7 +71,8 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -154,7 +156,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -170,7 +173,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -186,7 +190,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -202,7 +207,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{2},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index 664596f989e..49976de6505 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -130,7 +130,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -146,7 +147,8 @@ function max_dt(u, t, mesh::ParallelP4estMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -162,7 +164,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end
@@ -178,7 +181,8 @@ function max_dt(u, t, mesh::ParallelT8codeMesh{3},
                       typeof(constant_speed), typeof(equations), typeof(dg),
                       typeof(cache)},
                 u, t, mesh, constant_speed, equations, dg, cache)
-    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+    # Base.min instead of min needed, see comment in src/auxiliary/math.jl
+    dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[]
 
     return dt
 end