From 05096e7b56a120fbb83c54a9add4fe64be17c322 Mon Sep 17 00:00:00 2001 From: Pawel Swider Date: Fri, 8 Aug 2025 12:25:08 +0000 Subject: [PATCH 1/5] Matmul complex POC --- src/ATen/native/xpu/Blas.cpp | 44 ++++++++++++++++++ src/ATen/native/xpu/mkl/SpectralOps.cpp | 62 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 src/ATen/native/xpu/Blas.cpp diff --git a/src/ATen/native/xpu/Blas.cpp b/src/ATen/native/xpu/Blas.cpp new file mode 100644 index 0000000000..2c5af7abd2 --- /dev/null +++ b/src/ATen/native/xpu/Blas.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace at::native { + + at::Tensor& mm_complex_out_xpu(const at::Tensor &self, const at::Tensor &mat2, at::Tensor &out) { + at::Tensor self_cont = self.contiguous(); + at::Tensor mat2_cont = mat2.contiguous(); + at::Tensor out_cont = out.contiguous(); + + const int64_t m = self_cont.sizes().at(0); + const int64_t n = mat2_cont.sizes().at(1); + const int64_t k = self_cont.sizes().at(1); + + constexpr std::complex alpha = {1.0f, 0.0f}; + constexpr std::complex beta = {0.0f, 0.0f}; + + oneapi::mkl::blas::row_major::gemm( + at::xpu::getCurrentSYCLQueue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + alpha, + reinterpret_cast*>(self_cont.const_data_ptr()), + k, + reinterpret_cast*>(mat2_cont.const_data_ptr()), + n, + beta, + reinterpret_cast*>(out_cont.data_ptr()), + n); + + return out; +} + +REGISTER_XPU_DISPATCH(mm_complex_stub, &mm_complex_out_xpu) + +} // namespace at::native \ No newline at end of file diff --git a/src/ATen/native/xpu/mkl/SpectralOps.cpp b/src/ATen/native/xpu/mkl/SpectralOps.cpp index 96d4118f5b..492c4da6cd 100644 --- a/src/ATen/native/xpu/mkl/SpectralOps.cpp +++ b/src/ATen/native/xpu/mkl/SpectralOps.cpp @@ -10,6 +10,10 @@ #include #include #include +#include +#include +#include +#include using namespace oneapi::mkl::dft; @@ -578,3 +582,61 @@ Tensor& _fft_r2c_mkl_out( } } // namespace at::native::xpu + + +namespace at::native::xpu { + +at::Tensor& mm_out_xpu(at::Tensor &out, const at::Tensor &self, const at::Tensor &mat2) { + at::Tensor self_cont = self.contiguous(); + at::Tensor mat2_cont = mat2.contiguous(); + at::Tensor out_cont = out.contiguous(); + + const int64_t m = self_cont.sizes().at(0); + const int64_t n = mat2_cont.sizes().at(1); + const int64_t k = self_cont.sizes().at(1); + + constexpr std::complex alpha = {1.0f, 0.0f}; + constexpr std::complex beta = {0.0f, 0.0f}; + + oneapi::mkl::blas::row_major::gemm( + at::xpu::getCurrentSYCLQueue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + alpha, + reinterpret_cast*>(self_cont.const_data_ptr()), + k, + reinterpret_cast*>(mat2_cont.const_data_ptr()), + n, + beta, + reinterpret_cast*>(out_cont.data_ptr()), + n); + + return out; +} + +Tensor mm_xpu(const Tensor& self, const Tensor& other) { + TORCH_CHECK(self.is_xpu() && other.is_xpu(), + "mm_xpu only supports XPU tensors"); + + // Your SYCL implementation here + auto result = at::empty({self.size(0), other.size(1)}, self.options()); + + std::cout << "Example change" << std::endl; + mm_out_xpu(result, self, other); + + return result; +} + +} // namespace at::native::xpu + +// Register ONLY for XPU +TORCH_LIBRARY(xpu_ops, m) { + m.def("mm_xpu(Tensor self, Tensor other) -> Tensor"); +} + +TORCH_LIBRARY_IMPL(xpu_ops, XPU, m) { + m.impl("mm_xpu", TORCH_FN(at::native::xpu::mm_xpu)); +} \ No newline at end of file From e865b3f1311890ad3f6c54fa4e313b50c84ac40e Mon Sep 17 00:00:00 2001 From: Pawel Swider Date: Tue, 26 Aug 2025 13:45:15 +0000 Subject: [PATCH 2/5] MM kernels improvements --- src/ATen/native/xpu/Blas.cpp | 320 +++++++++++++++++++++++++++++++---- 1 file changed, 287 insertions(+), 33 deletions(-) diff --git a/src/ATen/native/xpu/Blas.cpp b/src/ATen/native/xpu/Blas.cpp index 2c5af7abd2..a2bb2f9e0e 100644 --- a/src/ATen/native/xpu/Blas.cpp +++ b/src/ATen/native/xpu/Blas.cpp @@ -1,44 +1,298 @@ -#include #include -#include -#include #include -#include +#include +#include #include +#include +#include namespace at::native { - at::Tensor& mm_complex_out_xpu(const at::Tensor &self, const at::Tensor &mat2, at::Tensor &out) { - at::Tensor self_cont = self.contiguous(); - at::Tensor mat2_cont = mat2.contiguous(); - at::Tensor out_cont = out.contiguous(); - - const int64_t m = self_cont.sizes().at(0); - const int64_t n = mat2_cont.sizes().at(1); - const int64_t k = self_cont.sizes().at(1); - - constexpr std::complex alpha = {1.0f, 0.0f}; - constexpr std::complex beta = {0.0f, 0.0f}; - - oneapi::mkl::blas::row_major::gemm( - at::xpu::getCurrentSYCLQueue(), - oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::nontrans, - m, - n, - k, - alpha, - reinterpret_cast*>(self_cont.const_data_ptr()), - k, - reinterpret_cast*>(mat2_cont.const_data_ptr()), - n, - beta, - reinterpret_cast*>(out_cont.data_ptr()), - n); - - return out; +inline at::Tensor resolveViewsAndConjugation(const at::Tensor& input) { + at::Tensor input_resolved = input.is_conj() ? input.resolve_conj() : input; + at::Tensor input_contiguous = input_resolved.is_contiguous() + ? input_resolved + : input_resolved.contiguous(); + + return input_contiguous; +} + +template +at::Tensor& mm_complex_out_xpu_impl( + const at::Tensor& self, + const at::Tensor& mat2, + at::Tensor& out) { + at::Tensor self_cont = resolveViewsAndConjugation(self); + at::Tensor mat2_cont = resolveViewsAndConjugation(mat2); + at::Tensor out_cont = resolveViewsAndConjugation(out); + + const int64_t m = self_cont.sizes().at(0); + const int64_t n = mat2_cont.sizes().at(1); + const int64_t k = self_cont.sizes().at(1); + + constexpr std::complex alpha = {T(1.0), T(0.0)}; + constexpr std::complex beta = {T(0.0), T(0.0)}; + + oneapi::mkl::blas::row_major::gemm( + c10::xpu::getCurrentXPUStream().queue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + alpha, + reinterpret_cast*>(self_cont.const_data_ptr()), + k, + reinterpret_cast*>(mat2_cont.const_data_ptr()), + n, + beta, + reinterpret_cast*>(out_cont.data_ptr()), + n); + + if (!out.is_same(out_cont)) { + out.copy_(out_cont); + } + + return out; +} + +at::Tensor& mm_complex_out_xpu( + const at::Tensor& self, + const at::Tensor& mat2, + at::Tensor& out) { + at::Tensor out_ref = at::mm(self.cpu(), mat2.cpu()); + + AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "mm_complex_out_xpu", [&] { + using underlying_t = typename c10::scalar_value_type::type; + mm_complex_out_xpu_impl(self, mat2, out); + }); + + return out; +} + +template +at::Tensor& bmm_complex_out_xpu_impl( + const at::Tensor& self, + const at::Tensor& batch2, + at::Tensor& out) { + at::Tensor self_cont = resolveViewsAndConjugation(self); + at::Tensor batch2_cont = resolveViewsAndConjugation(batch2); + at::Tensor out_cont = resolveViewsAndConjugation(out); + + const int64_t batch_size = self_cont.sizes().at(0); + const int64_t m = self_cont.sizes().at(1); + const int64_t n = batch2_cont.sizes().at(2); + const int64_t k = self_cont.sizes().at(2); + + constexpr std::complex alpha = {T(1.0f), T(0.0f)}; + constexpr std::complex beta = {T(0.0f), T(0.0f)}; + + oneapi::mkl::blas::row_major::gemm_batch( + c10::xpu::getCurrentXPUStream().queue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + alpha, + reinterpret_cast*>(self_cont.const_data_ptr()), + k, + m * k, + reinterpret_cast*>(batch2_cont.const_data_ptr()), + n, + k * n, + beta, + reinterpret_cast*>(out_cont.data_ptr()), + n, + m * n, + batch_size); + + if (!out.is_same(out_cont)) { + out.copy_(out_cont); + } + + return out; +} + +at::Tensor& bmm_complex_out_xpu( + const at::Tensor& self, + const at::Tensor& mat2, + at::Tensor& out) { + + AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "bmm_complex_out_xpu", [&] { + using underlying_t = typename c10::scalar_value_type::type; + bmm_complex_out_xpu_impl(self, mat2, out); + }); + + return out; +} + +template +at::Tensor& addmm_complex_out_xpu_impl( + const Tensor& input, + const Tensor& mat1, + const Tensor& mat2, + const Scalar& beta, + const Scalar& alpha, + Tensor& result) { + at::Tensor mat1_cont = resolveViewsAndConjugation(mat1); + at::Tensor mat2_cont = resolveViewsAndConjugation(mat2); + at::Tensor input_cont = resolveViewsAndConjugation(input).clone().detach(); + + const int64_t m = mat1_cont.sizes().at(0); + const int64_t n = mat2_cont.sizes().at(1); + const int64_t k = mat1_cont.sizes().at(1); + + // Some paths in the code below do not handle multiplications of the form [n, 0] x [0, m] + if (k == 0) { + if (result.numel() == 0) { + return result; + } + if (beta.toComplexDouble() == 0.0) { + result.zero_(); + } else { + if (!input.is_same(result)) { + result.copy_(input); + } + result.mul_(beta); + } + return result; + } + + if (m == 0 || n == 0) { + return result; + } + + const std::vector mm_output_size = {m, n}; + if (input_cont.sizes() != mm_output_size) { + input_cont = at::broadcast_to(input_cont, mm_output_size).contiguous(); + } + + + std::complex complex_alpha = + static_cast>(alpha.toComplexDouble()); + std::complex complex_beta = + static_cast>(beta.toComplexDouble()); + + oneapi::mkl::blas::row_major::gemm( + c10::xpu::getCurrentXPUStream().queue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + complex_alpha, + reinterpret_cast*>(mat1_cont.const_data_ptr()), + k, + reinterpret_cast*>(mat2_cont.const_data_ptr()), + n, + complex_beta, + reinterpret_cast*>(input_cont.data_ptr()), + n); + + if (result.sizes() == input_cont.sizes()) { + result.copy_(input_cont); + } else { + result.copy_(input_cont.view(result.sizes())); + } + + return result; +} + +at::Tensor& addmm_complex_out_xpu( + const Tensor& input, + const Tensor& mat1, + const Tensor& mat2, + const Scalar& beta, + const Scalar& alpha, + Tensor& result) { + + AT_DISPATCH_COMPLEX_TYPES(input.scalar_type(), "addmm_complex_out_xpu", [&] { + using underlying_t = typename c10::scalar_value_type::type; + addmm_complex_out_xpu_impl( + input, mat1, mat2, beta, alpha, result); + }); + + return result; +} + +template +at::Tensor& baddbmm_complex_out_xpu_impl( + const Tensor& input, + const Tensor& batch1, + const Tensor& batch2, + const Scalar& beta, + const Scalar& alpha, + Tensor& result) { + at::Tensor batch1_cont = resolveViewsAndConjugation(batch1); + at::Tensor batch2_cont = resolveViewsAndConjugation(batch2); + at::Tensor input_cont = resolveViewsAndConjugation(input).clone().detach(); + + const int64_t batch_size = batch1_cont.sizes().at(0); + const int64_t m = batch1_cont.sizes().at(1); + const int64_t n = batch2_cont.sizes().at(2); + const int64_t k = batch1_cont.sizes().at(2); + + const std::vector mm_output_size = {batch_size, m, n}; + if (input_cont.sizes() != mm_output_size) { + input_cont = at::broadcast_to(input_cont, mm_output_size).contiguous();; + } + + std::complex complex_alpha = + static_cast>(alpha.toComplexDouble()); + std::complex complex_beta = + static_cast>(beta.toComplexDouble()); + + oneapi::mkl::blas::row_major::gemm_batch( + c10::xpu::getCurrentXPUStream().queue(), + oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::nontrans, + m, + n, + k, + complex_alpha, + reinterpret_cast*>(batch1_cont.const_data_ptr()), + k, + m * k, + reinterpret_cast*>(batch2_cont.const_data_ptr()), + n, + k * n, + complex_beta, + reinterpret_cast*>(input_cont.data_ptr()), + n, + m * n, + batch_size); + + if (result.sizes() == input_cont.sizes()) { + result.copy_(input_cont); + } else { + result.copy_(input_cont.view(result.sizes())); + } + + return result; +} + +at::Tensor& baddbmm_complex_out_xpu( + const Tensor& input, + const Tensor& batch1, + const Tensor& batch2, + const Scalar& beta, + const Scalar& alpha, + Tensor& result) { + + AT_DISPATCH_COMPLEX_TYPES( + input.scalar_type(), "baddbmm_complex_out_xpu", [&] { + using underlying_t = typename c10::scalar_value_type::type; + baddbmm_complex_out_xpu_impl( + input, batch1, batch2, beta, alpha, result); + }); + + return result; } REGISTER_XPU_DISPATCH(mm_complex_stub, &mm_complex_out_xpu) +REGISTER_XPU_DISPATCH(bmm_complex_stub, &bmm_complex_out_xpu) +REGISTER_XPU_DISPATCH(addmm_complex_stub, &addmm_complex_out_xpu) +REGISTER_XPU_DISPATCH(baddbmm_complex_stub, &baddbmm_complex_out_xpu) } // namespace at::native \ No newline at end of file From 55dc07e217aaf47ac60d5c5657f43dbd9de2d812 Mon Sep 17 00:00:00 2001 From: Pawel Swider Date: Thu, 28 Aug 2025 12:44:05 +0000 Subject: [PATCH 3/5] Switch to TORCH_LIBRARY makro --- src/ATen/native/xpu/Blas.cpp | 97 +++++++++++++------------ src/ATen/native/xpu/mkl/SpectralOps.cpp | 62 ---------------- 2 files changed, 52 insertions(+), 107 deletions(-) diff --git a/src/ATen/native/xpu/Blas.cpp b/src/ATen/native/xpu/Blas.cpp index a2bb2f9e0e..6738da10df 100644 --- a/src/ATen/native/xpu/Blas.cpp +++ b/src/ATen/native/xpu/Blas.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include #include #include @@ -129,15 +127,15 @@ at::Tensor& bmm_complex_out_xpu( template at::Tensor& addmm_complex_out_xpu_impl( - const Tensor& input, + const Tensor& self, const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, - Tensor& result) { + Tensor& out) { at::Tensor mat1_cont = resolveViewsAndConjugation(mat1); at::Tensor mat2_cont = resolveViewsAndConjugation(mat2); - at::Tensor input_cont = resolveViewsAndConjugation(input).clone().detach(); + at::Tensor self_cont = resolveViewsAndConjugation(self).clone().detach(); const int64_t m = mat1_cont.sizes().at(0); const int64_t n = mat2_cont.sizes().at(1); @@ -145,27 +143,27 @@ at::Tensor& addmm_complex_out_xpu_impl( // Some paths in the code below do not handle multiplications of the form [n, 0] x [0, m] if (k == 0) { - if (result.numel() == 0) { - return result; + if (out.numel() == 0) { + return out; } if (beta.toComplexDouble() == 0.0) { - result.zero_(); + out.zero_(); } else { - if (!input.is_same(result)) { - result.copy_(input); + if (!self.is_same(out)) { + out.copy_(self); } - result.mul_(beta); + out.mul_(beta); } - return result; + return out; } if (m == 0 || n == 0) { - return result; + return out; } const std::vector mm_output_size = {m, n}; - if (input_cont.sizes() != mm_output_size) { - input_cont = at::broadcast_to(input_cont, mm_output_size).contiguous(); + if (self_cont.sizes() != mm_output_size) { + self_cont = at::broadcast_to(self_cont, mm_output_size).contiguous(); } @@ -187,46 +185,46 @@ at::Tensor& addmm_complex_out_xpu_impl( reinterpret_cast*>(mat2_cont.const_data_ptr()), n, complex_beta, - reinterpret_cast*>(input_cont.data_ptr()), + reinterpret_cast*>(self_cont.data_ptr()), n); - if (result.sizes() == input_cont.sizes()) { - result.copy_(input_cont); + if (out.sizes() == self_cont.sizes()) { + out.copy_(self_cont); } else { - result.copy_(input_cont.view(result.sizes())); + out.copy_(self_cont.view(out.sizes())); } - return result; + return out; } at::Tensor& addmm_complex_out_xpu( - const Tensor& input, + const Tensor& self, const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, - Tensor& result) { + Tensor& out) { - AT_DISPATCH_COMPLEX_TYPES(input.scalar_type(), "addmm_complex_out_xpu", [&] { + AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "addmm_complex_out_xpu", [&] { using underlying_t = typename c10::scalar_value_type::type; addmm_complex_out_xpu_impl( - input, mat1, mat2, beta, alpha, result); + self, mat1, mat2, beta, alpha, out); }); - return result; + return out; } template at::Tensor& baddbmm_complex_out_xpu_impl( - const Tensor& input, + const Tensor& self, const Tensor& batch1, const Tensor& batch2, const Scalar& beta, const Scalar& alpha, - Tensor& result) { + Tensor& out) { at::Tensor batch1_cont = resolveViewsAndConjugation(batch1); at::Tensor batch2_cont = resolveViewsAndConjugation(batch2); - at::Tensor input_cont = resolveViewsAndConjugation(input).clone().detach(); + at::Tensor self_cont = resolveViewsAndConjugation(self).clone().detach(); const int64_t batch_size = batch1_cont.sizes().at(0); const int64_t m = batch1_cont.sizes().at(1); @@ -234,8 +232,8 @@ at::Tensor& baddbmm_complex_out_xpu_impl( const int64_t k = batch1_cont.sizes().at(2); const std::vector mm_output_size = {batch_size, m, n}; - if (input_cont.sizes() != mm_output_size) { - input_cont = at::broadcast_to(input_cont, mm_output_size).contiguous();; + if (self_cont.sizes() != mm_output_size) { + self_cont = at::broadcast_to(self_cont, mm_output_size).contiguous();; } std::complex complex_alpha = @@ -258,41 +256,50 @@ at::Tensor& baddbmm_complex_out_xpu_impl( n, k * n, complex_beta, - reinterpret_cast*>(input_cont.data_ptr()), + reinterpret_cast*>(self_cont.data_ptr()), n, m * n, batch_size); - if (result.sizes() == input_cont.sizes()) { - result.copy_(input_cont); + if (out.sizes() == self_cont.sizes()) { + out.copy_(self_cont); } else { - result.copy_(input_cont.view(result.sizes())); + out.copy_(self_cont.view(out.sizes())); } - return result; + return out; } at::Tensor& baddbmm_complex_out_xpu( - const Tensor& input, + const Tensor& self, const Tensor& batch1, const Tensor& batch2, const Scalar& beta, const Scalar& alpha, - Tensor& result) { + Tensor& out) { AT_DISPATCH_COMPLEX_TYPES( - input.scalar_type(), "baddbmm_complex_out_xpu", [&] { + self.scalar_type(), "baddbmm_complex_out_xpu", [&] { using underlying_t = typename c10::scalar_value_type::type; baddbmm_complex_out_xpu_impl( - input, batch1, batch2, beta, alpha, result); + self, batch1, batch2, beta, alpha, out); }); - return result; + return out; } -REGISTER_XPU_DISPATCH(mm_complex_stub, &mm_complex_out_xpu) -REGISTER_XPU_DISPATCH(bmm_complex_stub, &bmm_complex_out_xpu) -REGISTER_XPU_DISPATCH(addmm_complex_stub, &addmm_complex_out_xpu) -REGISTER_XPU_DISPATCH(baddbmm_complex_stub, &baddbmm_complex_out_xpu) +TORCH_LIBRARY(xpu_mkl, m) { + m.def("xpu_mkl::mm(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); + m.def("xpu_mkl::bmm(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); + m.def("xpu_mkl::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); + m.def("xpu_mkl::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); +} + +TORCH_LIBRARY_IMPL(xpu_mkl, XPU, m) { + m.impl("xpu_mkl::mm", mm_complex_out_xpu); + m.impl("xpu_mkl::bmm", bmm_complex_out_xpu); + m.impl("xpu_mkl::addmm", addmm_complex_out_xpu); + m.impl("xpu_mkl::baddbmm", baddbmm_complex_out_xpu); +} -} // namespace at::native \ No newline at end of file +} // namespace at::native diff --git a/src/ATen/native/xpu/mkl/SpectralOps.cpp b/src/ATen/native/xpu/mkl/SpectralOps.cpp index 492c4da6cd..96d4118f5b 100644 --- a/src/ATen/native/xpu/mkl/SpectralOps.cpp +++ b/src/ATen/native/xpu/mkl/SpectralOps.cpp @@ -10,10 +10,6 @@ #include #include #include -#include -#include -#include -#include using namespace oneapi::mkl::dft; @@ -582,61 +578,3 @@ Tensor& _fft_r2c_mkl_out( } } // namespace at::native::xpu - - -namespace at::native::xpu { - -at::Tensor& mm_out_xpu(at::Tensor &out, const at::Tensor &self, const at::Tensor &mat2) { - at::Tensor self_cont = self.contiguous(); - at::Tensor mat2_cont = mat2.contiguous(); - at::Tensor out_cont = out.contiguous(); - - const int64_t m = self_cont.sizes().at(0); - const int64_t n = mat2_cont.sizes().at(1); - const int64_t k = self_cont.sizes().at(1); - - constexpr std::complex alpha = {1.0f, 0.0f}; - constexpr std::complex beta = {0.0f, 0.0f}; - - oneapi::mkl::blas::row_major::gemm( - at::xpu::getCurrentSYCLQueue(), - oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::nontrans, - m, - n, - k, - alpha, - reinterpret_cast*>(self_cont.const_data_ptr()), - k, - reinterpret_cast*>(mat2_cont.const_data_ptr()), - n, - beta, - reinterpret_cast*>(out_cont.data_ptr()), - n); - - return out; -} - -Tensor mm_xpu(const Tensor& self, const Tensor& other) { - TORCH_CHECK(self.is_xpu() && other.is_xpu(), - "mm_xpu only supports XPU tensors"); - - // Your SYCL implementation here - auto result = at::empty({self.size(0), other.size(1)}, self.options()); - - std::cout << "Example change" << std::endl; - mm_out_xpu(result, self, other); - - return result; -} - -} // namespace at::native::xpu - -// Register ONLY for XPU -TORCH_LIBRARY(xpu_ops, m) { - m.def("mm_xpu(Tensor self, Tensor other) -> Tensor"); -} - -TORCH_LIBRARY_IMPL(xpu_ops, XPU, m) { - m.impl("mm_xpu", TORCH_FN(at::native::xpu::mm_xpu)); -} \ No newline at end of file From 963531cfcb35397e46e4807937052b59c83446a4 Mon Sep 17 00:00:00 2001 From: Pawel Swider Date: Fri, 29 Aug 2025 10:52:07 +0000 Subject: [PATCH 4/5] Refactor --- src/ATen/native/xpu/Blas.cpp | 139 ++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 69 deletions(-) diff --git a/src/ATen/native/xpu/Blas.cpp b/src/ATen/native/xpu/Blas.cpp index 6738da10df..31527722d1 100644 --- a/src/ATen/native/xpu/Blas.cpp +++ b/src/ATen/native/xpu/Blas.cpp @@ -6,13 +6,18 @@ namespace at::native { -inline at::Tensor resolveViewsAndConjugation(const at::Tensor& input) { - at::Tensor input_resolved = input.is_conj() ? input.resolve_conj() : input; - at::Tensor input_contiguous = input_resolved.is_contiguous() - ? input_resolved - : input_resolved.contiguous(); +#if defined(USE_ONEMKL_XPU) - return input_contiguous; +at::Tensor& handle_output_copy(at::Tensor& out, const at::Tensor& result) { + if (!out.is_same(result)) { + if (out.sizes() == result.sizes()) { + out.copy_(result); + } else { + out.copy_(result.view(out.sizes())); + } + } + + return out; } template @@ -20,16 +25,16 @@ at::Tensor& mm_complex_out_xpu_impl( const at::Tensor& self, const at::Tensor& mat2, at::Tensor& out) { - at::Tensor self_cont = resolveViewsAndConjugation(self); - at::Tensor mat2_cont = resolveViewsAndConjugation(mat2); - at::Tensor out_cont = resolveViewsAndConjugation(out); + at::Tensor self_cont = self.contiguous().resolve_conj(); + at::Tensor mat2_cont = mat2.contiguous().resolve_conj(); + at::Tensor out_cont = out.contiguous().resolve_conj(); const int64_t m = self_cont.sizes().at(0); const int64_t n = mat2_cont.sizes().at(1); const int64_t k = self_cont.sizes().at(1); - constexpr std::complex alpha = {T(1.0), T(0.0)}; - constexpr std::complex beta = {T(0.0), T(0.0)}; + constexpr std::complex alpha = {T(1), T(0)}; + constexpr std::complex beta = {T(0), T(0)}; oneapi::mkl::blas::row_major::gemm( c10::xpu::getCurrentXPUStream().queue(), @@ -47,18 +52,15 @@ at::Tensor& mm_complex_out_xpu_impl( reinterpret_cast*>(out_cont.data_ptr()), n); - if (!out.is_same(out_cont)) { - out.copy_(out_cont); - } - - return out; + return handle_output_copy(out, out_cont); } at::Tensor& mm_complex_out_xpu( const at::Tensor& self, const at::Tensor& mat2, at::Tensor& out) { - at::Tensor out_ref = at::mm(self.cpu(), mat2.cpu()); + TORCH_CHECK( + self.is_complex(), "_mm_mkl.out expects self to be a complex datatype."); AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "mm_complex_out_xpu", [&] { using underlying_t = typename c10::scalar_value_type::type; @@ -71,19 +73,19 @@ at::Tensor& mm_complex_out_xpu( template at::Tensor& bmm_complex_out_xpu_impl( const at::Tensor& self, - const at::Tensor& batch2, + const at::Tensor& mat2, at::Tensor& out) { - at::Tensor self_cont = resolveViewsAndConjugation(self); - at::Tensor batch2_cont = resolveViewsAndConjugation(batch2); - at::Tensor out_cont = resolveViewsAndConjugation(out); + at::Tensor self_cont = self.contiguous().resolve_conj(); + at::Tensor mat2_cont = mat2.contiguous().resolve_conj(); + at::Tensor out_cont = out.contiguous().resolve_conj(); const int64_t batch_size = self_cont.sizes().at(0); const int64_t m = self_cont.sizes().at(1); - const int64_t n = batch2_cont.sizes().at(2); + const int64_t n = mat2_cont.sizes().at(2); const int64_t k = self_cont.sizes().at(2); - constexpr std::complex alpha = {T(1.0f), T(0.0f)}; - constexpr std::complex beta = {T(0.0f), T(0.0f)}; + constexpr std::complex alpha = {T(1), T(0)}; + constexpr std::complex beta = {T(0), T(0)}; oneapi::mkl::blas::row_major::gemm_batch( c10::xpu::getCurrentXPUStream().queue(), @@ -96,7 +98,7 @@ at::Tensor& bmm_complex_out_xpu_impl( reinterpret_cast*>(self_cont.const_data_ptr()), k, m * k, - reinterpret_cast*>(batch2_cont.const_data_ptr()), + reinterpret_cast*>(mat2_cont.const_data_ptr()), n, k * n, beta, @@ -105,17 +107,15 @@ at::Tensor& bmm_complex_out_xpu_impl( m * n, batch_size); - if (!out.is_same(out_cont)) { - out.copy_(out_cont); - } - - return out; + return handle_output_copy(out, out_cont); } at::Tensor& bmm_complex_out_xpu( const at::Tensor& self, const at::Tensor& mat2, at::Tensor& out) { + TORCH_CHECK( + self.is_complex(), "_bmm_mkl.out expects self to be a complex datatype."); AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "bmm_complex_out_xpu", [&] { using underlying_t = typename c10::scalar_value_type::type; @@ -133,15 +133,14 @@ at::Tensor& addmm_complex_out_xpu_impl( const Scalar& beta, const Scalar& alpha, Tensor& out) { - at::Tensor mat1_cont = resolveViewsAndConjugation(mat1); - at::Tensor mat2_cont = resolveViewsAndConjugation(mat2); - at::Tensor self_cont = resolveViewsAndConjugation(self).clone().detach(); + at::Tensor mat1_cont = mat1.contiguous().resolve_conj(); + at::Tensor mat2_cont = mat2.contiguous().resolve_conj(); + at::Tensor self_cont = self.contiguous().resolve_conj().clone().detach(); const int64_t m = mat1_cont.sizes().at(0); const int64_t n = mat2_cont.sizes().at(1); const int64_t k = mat1_cont.sizes().at(1); - // Some paths in the code below do not handle multiplications of the form [n, 0] x [0, m] if (k == 0) { if (out.numel() == 0) { return out; @@ -166,7 +165,6 @@ at::Tensor& addmm_complex_out_xpu_impl( self_cont = at::broadcast_to(self_cont, mm_output_size).contiguous(); } - std::complex complex_alpha = static_cast>(alpha.toComplexDouble()); std::complex complex_beta = @@ -188,13 +186,7 @@ at::Tensor& addmm_complex_out_xpu_impl( reinterpret_cast*>(self_cont.data_ptr()), n); - if (out.sizes() == self_cont.sizes()) { - out.copy_(self_cont); - } else { - out.copy_(self_cont.view(out.sizes())); - } - - return out; + return handle_output_copy(out, self_cont); } at::Tensor& addmm_complex_out_xpu( @@ -204,6 +196,9 @@ at::Tensor& addmm_complex_out_xpu( const Scalar& beta, const Scalar& alpha, Tensor& out) { + TORCH_CHECK( + self.is_complex(), + "_addmm_mkl.out expects self to be a complex datatype."); AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "addmm_complex_out_xpu", [&] { using underlying_t = typename c10::scalar_value_type::type; @@ -222,9 +217,9 @@ at::Tensor& baddbmm_complex_out_xpu_impl( const Scalar& beta, const Scalar& alpha, Tensor& out) { - at::Tensor batch1_cont = resolveViewsAndConjugation(batch1); - at::Tensor batch2_cont = resolveViewsAndConjugation(batch2); - at::Tensor self_cont = resolveViewsAndConjugation(self).clone().detach(); + at::Tensor batch1_cont = batch1.contiguous().resolve_conj(); + at::Tensor batch2_cont = batch2.contiguous().resolve_conj(); + at::Tensor self_cont = self.contiguous().resolve_conj().clone().detach(); const int64_t batch_size = batch1_cont.sizes().at(0); const int64_t m = batch1_cont.sizes().at(1); @@ -233,7 +228,7 @@ at::Tensor& baddbmm_complex_out_xpu_impl( const std::vector mm_output_size = {batch_size, m, n}; if (self_cont.sizes() != mm_output_size) { - self_cont = at::broadcast_to(self_cont, mm_output_size).contiguous();; + self_cont = at::broadcast_to(self_cont, mm_output_size).contiguous(); } std::complex complex_alpha = @@ -261,13 +256,7 @@ at::Tensor& baddbmm_complex_out_xpu_impl( m * n, batch_size); - if (out.sizes() == self_cont.sizes()) { - out.copy_(self_cont); - } else { - out.copy_(self_cont.view(out.sizes())); - } - - return out; + return handle_output_copy(out, self_cont); } at::Tensor& baddbmm_complex_out_xpu( @@ -277,29 +266,41 @@ at::Tensor& baddbmm_complex_out_xpu( const Scalar& beta, const Scalar& alpha, Tensor& out) { + TORCH_CHECK( + self.is_complex(), + "_baddbmm_mkl.out expects self to be a complex datatype."); - AT_DISPATCH_COMPLEX_TYPES( - self.scalar_type(), "baddbmm_complex_out_xpu", [&] { - using underlying_t = typename c10::scalar_value_type::type; - baddbmm_complex_out_xpu_impl( - self, batch1, batch2, beta, alpha, out); - }); + AT_DISPATCH_COMPLEX_TYPES(self.scalar_type(), "baddbmm_complex_out_xpu", [&] { + using underlying_t = typename c10::scalar_value_type::type; + baddbmm_complex_out_xpu_impl( + self, batch1, batch2, beta, alpha, out); + }); return out; } -TORCH_LIBRARY(xpu_mkl, m) { - m.def("xpu_mkl::mm(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); - m.def("xpu_mkl::bmm(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); - m.def("xpu_mkl::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); - m.def("xpu_mkl::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); +#endif // USE_ONEMKL_XPU + +TORCH_LIBRARY_FRAGMENT(aten, m) { + m.def( + "aten::_mm_mkl.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); + m.def( + "aten::_bmm_mkl.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)"); + m.def( + "aten::_addmm_mkl.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); + m.def( + "aten::_baddbmm_mkl.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"); } -TORCH_LIBRARY_IMPL(xpu_mkl, XPU, m) { - m.impl("xpu_mkl::mm", mm_complex_out_xpu); - m.impl("xpu_mkl::bmm", bmm_complex_out_xpu); - m.impl("xpu_mkl::addmm", addmm_complex_out_xpu); - m.impl("xpu_mkl::baddbmm", baddbmm_complex_out_xpu); +#if defined(USE_ONEMKL_XPU) + +TORCH_LIBRARY_IMPL(aten, XPU, m) { + m.impl("aten::_mm_mkl.out", mm_complex_out_xpu); + m.impl("aten::_bmm_mkl.out", bmm_complex_out_xpu); + m.impl("aten::_addmm_mkl.out", addmm_complex_out_xpu); + m.impl("aten::_baddbmm_mkl.out", baddbmm_complex_out_xpu); } +#endif // USE_ONEMKL_XPU + } // namespace at::native From 35930d26a31b6cfab58ad87709bf6d9cf1010601 Mon Sep 17 00:00:00 2001 From: Pawel Swider Date: Fri, 29 Aug 2025 11:11:20 +0000 Subject: [PATCH 5/5] Complex matmul passing test unskipping --- test/xpu/skip_list_common.py | 973 ++--------------------------------- 1 file changed, 31 insertions(+), 942 deletions(-) diff --git a/test/xpu/skip_list_common.py b/test/xpu/skip_list_common.py index cb9db5bf8c..79b78d847d 100644 --- a/test/xpu/skip_list_common.py +++ b/test/xpu/skip_list_common.py @@ -49,7 +49,7 @@ # OneDNN issues, https://github.com/intel/torch-xpu-ops/issues/253 # RuntimeError: Long is not supported in oneDNN! # RuntimeError: could not create a primitive descriptor for a deconvolution forward propagation primitive - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_noncontiguous_samples_nn_functional_conv3d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose1d_xpu_int64", "test_noncontiguous_samples_nn_functional_conv_transpose2d_xpu_complex64", @@ -124,196 +124,35 @@ "test_dtypes_addmm_decomposed_xpu", "test_dtypes_addmm_xpu", "test_dtypes_addmv_xpu", - "test_dtypes_addr_xpu", "test_dtypes_baddbmm_xpu", - "test_dtypes_cholesky_inverse_xpu", - "test_dtypes_cholesky_solve_xpu", - "test_dtypes_cholesky_xpu", - "test_dtypes_corrcoef_xpu", - "test_dtypes_cov_xpu", - "test_dtypes_linalg_cholesky_ex_xpu", - "test_dtypes_linalg_cholesky_xpu", - "test_dtypes_linalg_cond_xpu", - "test_dtypes_linalg_det_xpu", - "test_dtypes_linalg_eig_xpu", - "test_dtypes_linalg_eigh_xpu", - "test_dtypes_linalg_eigvals_xpu", - "test_dtypes_linalg_eigvalsh_xpu", - "test_dtypes_linalg_inv_ex_xpu", - "test_dtypes_linalg_inv_xpu", - "test_dtypes_linalg_ldl_factor_ex_xpu", - "test_dtypes_linalg_ldl_factor_xpu", - "test_dtypes_linalg_ldl_solve_xpu", - "test_dtypes_linalg_lstsq_grad_oriented_xpu", - "test_dtypes_linalg_lstsq_xpu", - "test_dtypes_linalg_lu_factor_ex_xpu", - "test_dtypes_linalg_lu_factor_xpu", - "test_dtypes_linalg_lu_solve_xpu", - "test_dtypes_linalg_lu_xpu", - "test_dtypes_linalg_matrix_power_xpu", - "test_dtypes_linalg_matrix_rank_hermitian_xpu", - "test_dtypes_linalg_matrix_rank_xpu", - "test_dtypes_linalg_pinv_hermitian_xpu", - "test_dtypes_linalg_pinv_xpu", - "test_dtypes_linalg_qr_xpu", - "test_dtypes_linalg_slogdet_xpu", - "test_dtypes_linalg_solve_ex_xpu", - "test_dtypes_linalg_solve_xpu", - "test_dtypes_linalg_svd_xpu", - "test_dtypes_linalg_tensorinv_xpu", - "test_dtypes_linalg_tensorsolve_xpu", - "test_dtypes_logdet_xpu", - "test_dtypes_lu_solve_xpu", - "test_dtypes_lu_xpu", + "test_dtypes_mv_xpu", - "test_dtypes_nn_functional_scaled_dot_product_attention_xpu", - "test_dtypes_norm_nuc_xpu", - "test_dtypes_pinverse_xpu", - "test_dtypes_qr_xpu", - "test_dtypes_svd_xpu", - "test_dtypes_tensordot_xpu", - "test_dtypes_triangular_solve_xpu", - "test_noncontiguous_samples___rmatmul___xpu_complex64", + "test_noncontiguous_samples___rmatmul___xpu_int64", - "test_noncontiguous_samples_addbmm_xpu_complex64", "test_noncontiguous_samples_addbmm_xpu_float32", "test_noncontiguous_samples_addbmm_xpu_int64", - "test_noncontiguous_samples_addmm_decomposed_xpu_complex64", "test_noncontiguous_samples_addmm_decomposed_xpu_int64", - "test_noncontiguous_samples_addmm_xpu_complex64", "test_noncontiguous_samples_addmm_xpu_float32", "test_noncontiguous_samples_addmm_xpu_int64", - "test_noncontiguous_samples_addmv_xpu_complex64", "test_noncontiguous_samples_addmv_xpu_float32", "test_noncontiguous_samples_addmv_xpu_int64", - "test_noncontiguous_samples_addr_xpu_complex64", - "test_noncontiguous_samples_baddbmm_xpu_complex64", - "test_noncontiguous_samples_baddbmm_xpu_int64", - "test_noncontiguous_samples_bmm_xpu_complex64", "test_noncontiguous_samples_bmm_xpu_int64", - "test_noncontiguous_samples_cholesky_inverse_xpu_complex64", - "test_noncontiguous_samples_cholesky_solve_xpu_complex64", - "test_noncontiguous_samples_cholesky_xpu_complex64", - "test_noncontiguous_samples_corrcoef_xpu_complex64", - "test_noncontiguous_samples_cov_xpu_complex64", - "test_noncontiguous_samples_einsum_xpu_complex64", "test_noncontiguous_samples_einsum_xpu_int64", - "test_noncontiguous_samples_geqrf_xpu_complex64", - "test_noncontiguous_samples_inner_xpu_complex64", "test_noncontiguous_samples_inner_xpu_int64", - "test_noncontiguous_samples_linalg_cholesky_ex_xpu_complex64", - "test_noncontiguous_samples_linalg_cholesky_xpu_complex64", - "test_noncontiguous_samples_linalg_cond_xpu_complex64", - "test_noncontiguous_samples_linalg_det_xpu_complex64", - "test_noncontiguous_samples_linalg_eig_xpu_complex64", "test_noncontiguous_samples_linalg_eig_xpu_float32", - "test_noncontiguous_samples_linalg_eigh_xpu_complex64", - "test_noncontiguous_samples_linalg_eigvals_xpu_complex64", - "test_noncontiguous_samples_linalg_eigvalsh_xpu_complex64", - "test_noncontiguous_samples_linalg_householder_product_xpu_complex64", - "test_noncontiguous_samples_linalg_inv_ex_xpu_complex64", - "test_noncontiguous_samples_linalg_inv_xpu_complex64", - "test_noncontiguous_samples_linalg_ldl_factor_ex_xpu_complex64", - "test_noncontiguous_samples_linalg_ldl_factor_xpu_complex64", - "test_noncontiguous_samples_linalg_ldl_solve_xpu_complex64", - "test_noncontiguous_samples_linalg_lstsq_grad_oriented_xpu_complex64", - "test_noncontiguous_samples_linalg_lstsq_xpu_complex64", - "test_noncontiguous_samples_linalg_lu_factor_ex_xpu_complex64", - "test_noncontiguous_samples_linalg_lu_factor_xpu_complex64", - "test_noncontiguous_samples_linalg_lu_solve_xpu_complex64", - "test_noncontiguous_samples_linalg_lu_xpu_complex64", - "test_noncontiguous_samples_linalg_matrix_norm_xpu_complex64", - "test_noncontiguous_samples_linalg_matrix_power_xpu_complex64", - "test_noncontiguous_samples_linalg_matrix_rank_hermitian_xpu_complex64", - "test_noncontiguous_samples_linalg_matrix_rank_xpu_complex64", - "test_noncontiguous_samples_linalg_norm_subgradients_at_zero_xpu_complex64", - "test_noncontiguous_samples_linalg_norm_xpu_complex64", - "test_noncontiguous_samples_linalg_pinv_hermitian_xpu_complex64", - "test_noncontiguous_samples_linalg_pinv_singular_xpu_complex64", - "test_noncontiguous_samples_linalg_pinv_xpu_complex64", - "test_noncontiguous_samples_linalg_qr_xpu_complex64", - "test_noncontiguous_samples_linalg_slogdet_xpu_complex64", - "test_noncontiguous_samples_linalg_solve_ex_xpu_complex64", - "test_noncontiguous_samples_linalg_solve_triangular_xpu_complex64", - "test_noncontiguous_samples_linalg_solve_xpu_complex64", - "test_noncontiguous_samples_linalg_svd_xpu_complex64", - "test_noncontiguous_samples_linalg_svdvals_xpu_complex64", - "test_noncontiguous_samples_linalg_tensorinv_xpu_complex64", - "test_noncontiguous_samples_linalg_tensorsolve_xpu_complex64", - "test_noncontiguous_samples_logdet_xpu_complex64", - "test_noncontiguous_samples_lu_solve_xpu_complex64", - "test_noncontiguous_samples_lu_xpu_complex64", - "test_noncontiguous_samples_matmul_xpu_complex64", "test_noncontiguous_samples_matmul_xpu_int64", - "test_noncontiguous_samples_mm_xpu_complex64", "test_noncontiguous_samples_mm_xpu_int64", - "test_noncontiguous_samples_mv_xpu_complex64", "test_noncontiguous_samples_mv_xpu_int64", "test_noncontiguous_samples_nn_functional_bilinear_xpu_int64", - "test_noncontiguous_samples_nn_functional_linear_xpu_complex64", - "test_noncontiguous_samples_norm_nuc_xpu_complex64", - "test_noncontiguous_samples_ormqr_xpu_complex64", - "test_noncontiguous_samples_pinverse_xpu_complex64", - "test_noncontiguous_samples_qr_xpu_complex64", - "test_noncontiguous_samples_svd_xpu_complex64", - "test_noncontiguous_samples_tensordot_xpu_complex64", "test_noncontiguous_samples_tensordot_xpu_int64", - "test_noncontiguous_samples_triangular_solve_xpu_complex64", - "test_numpy_ref_addbmm_xpu_complex128", "test_numpy_ref_addbmm_xpu_float64", "test_numpy_ref_addbmm_xpu_int64", - "test_numpy_ref_linalg_tensorinv_xpu_complex128", "test_out_addbmm_xpu_float32", "test_out_addmm_xpu_float32", "test_out_addmv_xpu_float32", "test_out_baddbmm_xpu_float32", "test_out_mm_xpu_float32", "test_out_mv_xpu_float32", - "test_out_requires_grad_error_addbmm_xpu_complex64", - "test_out_requires_grad_error_addmm_decomposed_xpu_complex64", - "test_out_requires_grad_error_addmm_xpu_complex64", - "test_out_requires_grad_error_addmv_xpu_complex64", - "test_out_requires_grad_error_baddbmm_xpu_complex64", - "test_out_requires_grad_error_bmm_xpu_complex64", - "test_out_requires_grad_error_cholesky_inverse_xpu_complex64", - "test_out_requires_grad_error_cholesky_solve_xpu_complex64", - "test_out_requires_grad_error_cholesky_xpu_complex64", - "test_out_requires_grad_error_inner_xpu_complex64", - "test_out_requires_grad_error_linalg_cholesky_ex_xpu_complex64", - "test_out_requires_grad_error_linalg_cholesky_xpu_complex64", - "test_out_requires_grad_error_linalg_eig_xpu_complex64", - "test_out_requires_grad_error_linalg_eigh_xpu_complex64", - "test_out_requires_grad_error_linalg_eigvals_xpu_complex64", - "test_out_requires_grad_error_linalg_eigvalsh_xpu_complex64", - "test_out_requires_grad_error_linalg_inv_ex_xpu_complex64", - "test_out_requires_grad_error_linalg_inv_xpu_complex64", - "test_out_requires_grad_error_linalg_lstsq_xpu_complex64", - "test_out_requires_grad_error_linalg_lu_factor_xpu_complex64", - "test_out_requires_grad_error_linalg_lu_solve_xpu_complex64", - "test_out_requires_grad_error_linalg_multi_dot_xpu_complex64", - "test_out_requires_grad_error_linalg_pinv_hermitian_xpu_complex64", - "test_out_requires_grad_error_linalg_pinv_xpu_complex64", - "test_out_requires_grad_error_linalg_qr_xpu_complex64", - "test_out_requires_grad_error_linalg_solve_ex_xpu_complex64", - "test_out_requires_grad_error_linalg_solve_xpu_complex64", - "test_out_requires_grad_error_linalg_tensorinv_xpu_complex64", - "test_out_requires_grad_error_lu_solve_xpu_complex64", - "test_out_requires_grad_error_lu_xpu_complex64", - "test_out_requires_grad_error_mm_xpu_complex64", - "test_out_requires_grad_error_mv_xpu_complex64", - "test_out_requires_grad_error_nn_functional_linear_xpu_complex64", - "test_out_requires_grad_error_qr_xpu_complex64", - "test_out_requires_grad_error_tensordot_xpu_complex64", - "test_out_requires_grad_error_triangular_solve_xpu_complex64", - "test_out_warning_addmm_decomposed_xpu", - "test_out_warning_addmm_xpu", - "test_out_warning_addmv_xpu", - "test_out_warning_baddbmm_xpu", - "test_out_warning_bmm_xpu", - "test_out_warning_matmul_xpu", - "test_out_warning_mm_xpu", - "test_out_warning_mv_xpu", - "test_out_warning_nn_functional_linear_xpu", "test_python_ref__refs_linalg_svd_xpu_complex128", "test_python_ref__refs_linalg_svd_xpu_complex64", "test_python_ref__refs_linalg_svd_xpu_float64", @@ -329,202 +168,11 @@ "test_python_ref_torch_fallback__refs_linalg_svd_xpu_complex64", "test_python_ref_torch_fallback__refs_linalg_svd_xpu_float64", "test_python_ref_torch_fallback__refs_nn_functional_pdist_xpu_float64", - "test_variant_consistency_eager___rmatmul___xpu_complex64", - "test_variant_consistency_eager_addmm_decomposed_xpu_complex64", - "test_variant_consistency_eager_addmm_xpu_complex64", "test_variant_consistency_eager_addmm_xpu_float32", - "test_variant_consistency_eager_addmv_xpu_complex64", "test_variant_consistency_eager_addmv_xpu_float32", - "test_variant_consistency_eager_baddbmm_xpu_complex64", "test_variant_consistency_eager_baddbmm_xpu_float32", - "test_variant_consistency_eager_bmm_xpu_complex64", - "test_variant_consistency_eager_cholesky_inverse_xpu_complex64", - "test_variant_consistency_eager_cholesky_solve_xpu_complex64", - "test_variant_consistency_eager_cholesky_xpu_complex64", - "test_variant_consistency_eager_corrcoef_xpu_complex64", - "test_variant_consistency_eager_cov_xpu_complex64", - "test_variant_consistency_eager_einsum_xpu_complex64", - "test_variant_consistency_eager_geqrf_xpu_complex64", - "test_variant_consistency_eager_inner_xpu_complex64", - "test_variant_consistency_eager_linalg_cholesky_ex_xpu_complex64", - "test_variant_consistency_eager_linalg_cholesky_xpu_complex64", - "test_variant_consistency_eager_linalg_cond_xpu_complex64", - "test_variant_consistency_eager_linalg_det_xpu_complex64", - "test_variant_consistency_eager_linalg_eig_xpu_complex64", - "test_variant_consistency_eager_linalg_eigh_xpu_complex64", - "test_variant_consistency_eager_linalg_eigvals_xpu_complex64", - "test_variant_consistency_eager_linalg_eigvalsh_xpu_complex64", - "test_variant_consistency_eager_linalg_householder_product_xpu_complex64", - "test_variant_consistency_eager_linalg_inv_ex_xpu_complex64", - "test_variant_consistency_eager_linalg_inv_xpu_complex64", - "test_variant_consistency_eager_linalg_ldl_factor_ex_xpu_complex64", - "test_variant_consistency_eager_linalg_ldl_factor_xpu_complex64", - "test_variant_consistency_eager_linalg_ldl_solve_xpu_complex64", - "test_variant_consistency_eager_linalg_lstsq_grad_oriented_xpu_complex64", - "test_variant_consistency_eager_linalg_lstsq_xpu_complex64", - "test_variant_consistency_eager_linalg_lu_factor_xpu_complex64", - "test_variant_consistency_eager_linalg_lu_solve_xpu_complex64", - "test_variant_consistency_eager_linalg_matrix_norm_xpu_complex64", - "test_variant_consistency_eager_linalg_matrix_power_xpu_complex64", - "test_variant_consistency_eager_linalg_matrix_rank_hermitian_xpu_complex64", - "test_variant_consistency_eager_linalg_matrix_rank_xpu_complex64", - "test_variant_consistency_eager_linalg_multi_dot_xpu_complex64", - "test_variant_consistency_eager_linalg_norm_subgradients_at_zero_xpu_complex64", - "test_variant_consistency_eager_linalg_norm_xpu_complex64", - "test_variant_consistency_eager_linalg_pinv_hermitian_xpu_complex64", - "test_variant_consistency_eager_linalg_pinv_singular_xpu_complex64", - "test_variant_consistency_eager_linalg_pinv_xpu_complex64", - "test_variant_consistency_eager_linalg_qr_xpu_complex64", - "test_variant_consistency_eager_linalg_slogdet_xpu_complex64", - "test_variant_consistency_eager_linalg_solve_ex_xpu_complex64", - "test_variant_consistency_eager_linalg_solve_triangular_xpu_complex64", - "test_variant_consistency_eager_linalg_solve_xpu_complex64", - "test_variant_consistency_eager_linalg_svd_xpu_complex64", - "test_variant_consistency_eager_linalg_svdvals_xpu_complex64", - "test_variant_consistency_eager_linalg_tensorinv_xpu_complex64", - "test_variant_consistency_eager_linalg_tensorsolve_xpu_complex64", - "test_variant_consistency_eager_logdet_xpu_complex64", - "test_variant_consistency_eager_lu_solve_xpu_complex64", - "test_variant_consistency_eager_lu_xpu_complex64", - "test_variant_consistency_eager_matmul_xpu_complex64", - "test_variant_consistency_eager_mm_xpu_complex64", - "test_variant_consistency_eager_mv_xpu_complex64", - "test_variant_consistency_eager_nn_functional_linear_xpu_complex64", - "test_variant_consistency_eager_norm_nuc_xpu_complex64", - "test_variant_consistency_eager_ormqr_xpu_complex64", - "test_variant_consistency_eager_pinverse_xpu_complex64", - "test_variant_consistency_eager_qr_xpu_complex64", - "test_variant_consistency_eager_svd_xpu_complex64", - "test_variant_consistency_eager_tensordot_xpu_complex64", - "test_variant_consistency_eager_triangular_solve_xpu_complex64", - # oneDNN issues - # RuntimeError: value cannot be converted to type float without overflow - # https://github.com/intel/torch-xpu-ops/issues/683 - "test_conj_view_addbmm_xpu_complex64", - "test_neg_conj_view_addbmm_xpu_complex128", - ### Error #0 in TestMathBitsXPU , RuntimeError: Double and complex datatype matmul is not supported in oneDNN + ### Error #0 in TestMathBitsXPU , RuntimeError: Double datatype matmul is not supported in oneDNN # https://github.com/intel/torch-xpu-ops/issues/254 - "test_conj_view___rmatmul___xpu_complex64", - "test_conj_view__refs_linalg_svd_xpu_complex64", - "test_conj_view_addmm_decomposed_xpu_complex64", - "test_conj_view_addmm_xpu_complex64", - "test_conj_view_addmv_xpu_complex64", - "test_conj_view_addr_xpu_complex64", - "test_conj_view_baddbmm_xpu_complex64", - "test_conj_view_bmm_xpu_complex64", - "test_conj_view_cholesky_inverse_xpu_complex64", - "test_conj_view_cholesky_solve_xpu_complex64", - "test_conj_view_cholesky_xpu_complex64", - "test_conj_view_corrcoef_xpu_complex64", - "test_conj_view_cov_xpu_complex64", - "test_conj_view_einsum_xpu_complex64", - "test_conj_view_geqrf_xpu_complex64", - "test_conj_view_inner_xpu_complex64", - "test_conj_view_linalg_cholesky_ex_xpu_complex64", - "test_conj_view_linalg_cholesky_xpu_complex64", - "test_conj_view_linalg_cond_xpu_complex64", - "test_conj_view_linalg_det_xpu_complex64", - "test_conj_view_linalg_eig_xpu_complex64", - "test_conj_view_linalg_eigh_xpu_complex64", - "test_conj_view_linalg_eigvals_xpu_complex64", - "test_conj_view_linalg_eigvalsh_xpu_complex64", - "test_conj_view_linalg_householder_product_xpu_complex64", - "test_conj_view_linalg_inv_ex_xpu_complex64", - "test_conj_view_linalg_inv_xpu_complex64", - "test_conj_view_linalg_ldl_factor_ex_xpu_complex64", - "test_conj_view_linalg_ldl_factor_xpu_complex64", - "test_conj_view_linalg_ldl_solve_xpu_complex64", - "test_conj_view_linalg_lstsq_grad_oriented_xpu_complex64", - "test_conj_view_linalg_lstsq_xpu_complex64", - "test_conj_view_linalg_lu_factor_xpu_complex64", - "test_conj_view_linalg_lu_solve_xpu_complex64", - "test_conj_view_linalg_matrix_norm_xpu_complex64", - "test_conj_view_linalg_matrix_power_xpu_complex64", - "test_conj_view_linalg_matrix_rank_hermitian_xpu_complex64", - "test_conj_view_linalg_matrix_rank_xpu_complex64", - "test_conj_view_linalg_multi_dot_xpu_complex64", - "test_conj_view_linalg_norm_subgradients_at_zero_xpu_complex64", - "test_conj_view_linalg_norm_xpu_complex64", - "test_conj_view_linalg_pinv_hermitian_xpu_complex64", - "test_conj_view_linalg_pinv_singular_xpu_complex64", - "test_conj_view_linalg_pinv_xpu_complex64", - "test_conj_view_linalg_qr_xpu_complex64", - "test_conj_view_linalg_slogdet_xpu_complex64", - "test_conj_view_linalg_solve_ex_xpu_complex64", - "test_conj_view_linalg_solve_triangular_xpu_complex64", - "test_conj_view_linalg_solve_xpu_complex64", - "test_conj_view_linalg_svd_xpu_complex64", - "test_conj_view_linalg_svdvals_xpu_complex64", - "test_conj_view_linalg_tensorinv_xpu_complex64", - "test_conj_view_linalg_tensorsolve_xpu_complex64", - "test_conj_view_logdet_xpu_complex64", - "test_conj_view_lu_solve_xpu_complex64", - "test_conj_view_lu_xpu_complex64", - "test_conj_view_matmul_xpu_complex64", - "test_conj_view_mm_xpu_complex64", - "test_conj_view_mv_xpu_complex64", - "test_conj_view_nn_functional_linear_xpu_complex64", - "test_conj_view_norm_nuc_xpu_complex64", - "test_conj_view_ormqr_xpu_complex64", - "test_conj_view_pinverse_xpu_complex64", - "test_conj_view_qr_xpu_complex64", - "test_conj_view_svd_xpu_complex64", - "test_conj_view_tensordot_xpu_complex64", - "test_conj_view_triangular_solve_xpu_complex64", - "test_neg_conj_view_addmm_decomposed_xpu_complex128", - "test_neg_conj_view_addmm_xpu_complex128", - "test_neg_conj_view_addmv_xpu_complex128", - "test_neg_conj_view_addr_xpu_complex128", - "test_neg_conj_view_baddbmm_xpu_complex128", - "test_neg_conj_view_bmm_xpu_complex128", - "test_neg_conj_view_cholesky_inverse_xpu_complex128", - "test_neg_conj_view_cholesky_solve_xpu_complex128", - "test_neg_conj_view_cholesky_xpu_complex128", - "test_neg_conj_view_corrcoef_xpu_complex128", - "test_neg_conj_view_cov_xpu_complex128", - "test_neg_conj_view_geqrf_xpu_complex128", - "test_neg_conj_view_inner_xpu_complex128", - "test_neg_conj_view_linalg_cholesky_ex_xpu_complex128", - "test_neg_conj_view_linalg_cholesky_xpu_complex128", - "test_neg_conj_view_linalg_cond_xpu_complex128", - "test_neg_conj_view_linalg_eig_xpu_complex128", - "test_neg_conj_view_linalg_eigh_xpu_complex128", - "test_neg_conj_view_linalg_eigvals_xpu_complex128", - "test_neg_conj_view_linalg_eigvalsh_xpu_complex128", - "test_neg_conj_view_linalg_householder_product_xpu_complex128", - "test_neg_conj_view_linalg_inv_ex_xpu_complex128", - "test_neg_conj_view_linalg_inv_xpu_complex128", - "test_neg_conj_view_linalg_ldl_factor_ex_xpu_complex128", - "test_neg_conj_view_linalg_ldl_factor_xpu_complex128", - "test_neg_conj_view_linalg_ldl_solve_xpu_complex128", - "test_neg_conj_view_linalg_lstsq_grad_oriented_xpu_complex128", - "test_neg_conj_view_linalg_lstsq_xpu_complex128", - "test_neg_conj_view_linalg_lu_factor_xpu_complex128", - "test_neg_conj_view_linalg_lu_solve_xpu_complex128", - "test_neg_conj_view_linalg_matrix_rank_hermitian_xpu_complex128", - "test_neg_conj_view_linalg_matrix_rank_xpu_complex128", - "test_neg_conj_view_linalg_multi_dot_xpu_complex128", - "test_neg_conj_view_linalg_pinv_hermitian_xpu_complex128", - "test_neg_conj_view_linalg_pinv_singular_xpu_complex128", - "test_neg_conj_view_linalg_pinv_xpu_complex128", - "test_neg_conj_view_linalg_qr_xpu_complex128", - "test_neg_conj_view_linalg_solve_ex_xpu_complex128", - "test_neg_conj_view_linalg_solve_triangular_xpu_complex128", - "test_neg_conj_view_linalg_solve_xpu_complex128", - "test_neg_conj_view_linalg_svdvals_xpu_complex128", - "test_neg_conj_view_linalg_tensorinv_xpu_complex128", - "test_neg_conj_view_linalg_tensorsolve_xpu_complex128", - "test_neg_conj_view_lu_solve_xpu_complex128", - "test_neg_conj_view_lu_xpu_complex128", - "test_neg_conj_view_mm_xpu_complex128", - "test_neg_conj_view_mv_xpu_complex128", - "test_neg_conj_view_nn_functional_linear_xpu_complex128", - "test_neg_conj_view_norm_nuc_xpu_complex128", - "test_neg_conj_view_ormqr_xpu_complex128", - "test_neg_conj_view_pinverse_xpu_complex128", - "test_neg_conj_view_qr_xpu_complex128", - "test_neg_conj_view_tensordot_xpu_complex128", - "test_neg_conj_view_triangular_solve_xpu_complex128", "test_neg_view___rmatmul___xpu_float64", "test_neg_view__refs_linalg_svd_xpu_float64", "test_neg_view__refs_nn_functional_pdist_xpu_float64", @@ -721,48 +369,24 @@ # Could not run 'aten::_to_copy' with arguments from the 'NestedTensorXPU' backend "test_with_nested_tensor_input_xpu", # oneDNN issues - # Double and complex datatype matmul is not supported in oneDNN + # Double datatype matmul is not supported in oneDNN # https://github.com/intel/torch-xpu-ops/issues/253 - "test_sdp_math_gradcheck_contiguous_inputs_False_xpu", - "test_sdp_math_gradcheck_contiguous_inputs_True_xpu", - "test_transformerencoder_batch_first_True_training_True_enable_nested_tensor_True_xpu", - "test_transformerencoder_batch_first_True_training_True_enable_nested_tensor_False_xpu", - "test_transformerencoder_batch_first_True_training_False_enable_nested_tensor_True_xpu", - "test_transformerencoder_batch_first_True_training_False_enable_nested_tensor_False_xpu", - "test_transformerencoder_batch_first_False_training_True_enable_nested_tensor_True_xpu", - "test_transformerencoder_batch_first_False_training_True_enable_nested_tensor_False_xpu", - "test_transformerencoder_batch_first_False_training_False_enable_nested_tensor_True_xpu", - "test_transformerencoder_batch_first_False_training_False_enable_nested_tensor_False_xpu", "test_scaled_dot_product_attention_4D_input_dim_no_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_4D_input_dim_no_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_4D_input_dim_no_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_4D_input_dim_4D_causal_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_4D_input_dim_4D_causal_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_4D_input_dim_4D_causal_attn_mask_dropout_p_0_0_xpu", - "test_scaled_dot_product_attention_4D_input_dim_4D_attn_mask_dropout_p_0_5_xpu", - "test_scaled_dot_product_attention_4D_input_dim_4D_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_4D_input_dim_4D_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_4D_input_dim_2D_causal_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_4D_input_dim_2D_causal_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_4D_input_dim_2D_causal_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_4D_input_dim_2D_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_4D_input_dim_2D_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_4D_input_dim_2D_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_3D_input_dim_no_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_3D_input_dim_no_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_3D_input_dim_no_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_3D_input_dim_3D_causal_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_3D_input_dim_3D_causal_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_3D_input_dim_3D_causal_attn_mask_dropout_p_0_0_xpu", - "test_scaled_dot_product_attention_3D_input_dim_3D_attn_mask_dropout_p_0_5_xpu", - "test_scaled_dot_product_attention_3D_input_dim_3D_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_3D_input_dim_3D_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_3D_input_dim_2D_causal_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_3D_input_dim_2D_causal_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_3D_input_dim_2D_causal_attn_mask_dropout_p_0_0_xpu", "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_5_xpu", "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_2_xpu", - "test_scaled_dot_product_attention_3D_input_dim_2D_attn_mask_dropout_p_0_0_xpu", # https://github.com/intel/torch-xpu-ops/issues/1432 "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_2_key_padding_mask_dim_2_bool_xpu", "test_multiheadattention_fastpath_attn_mask_attn_mask_dim_3_key_padding_mask_dim_2_bool_xpu", @@ -774,7 +398,7 @@ "test_complex_xpu.py": None, "test_modules_xpu.py": ( # oneDNN issues - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_cpu_gpu_parity_nn_Bilinear_xpu_float64", "test_cpu_gpu_parity_nn_GRUCell_xpu_float64", "test_cpu_gpu_parity_nn_GRU_eval_mode_xpu_float64", @@ -985,21 +609,11 @@ "test_transformerencoderlayer_xpu_float16", "test_transformerencoderlayer_xpu_float32", # oneDNN issues - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_TransformerDecoderLayer_empty_xpu", - "test_TransformerDecoder_empty_xpu", - "test_TransformerEncoder_empty_xpu", - "test_Transformer_empty_xpu", - "test_affine_grid", - "test_affine_grid_3d", - "test_RNN_cpu_vs_cudnn_no_dropout", - "test_RNN_cpu_vs_cudnn_with_dropout", + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_GRU_grad_and_gradgrad_xpu_float64", "test_LSTM_grad_and_gradgrad_xpu_float64", "test_lstmcell_backward_only_one_output_grad_xpu_float64", "test_module_to_empty_xpu_float64", - "test_RNN_change_dropout", - "test_RNN_dropout", "test_rnn_fused_xpu_float64", "test_rnn_retain_variables_xpu_float64", "test_transformerencoderlayer_xpu_float64", @@ -1070,7 +684,7 @@ "test_autograd_multiple_dispatch_registrations_xpu", # AttributeError: module 'torch.xpu' has no attribute "test_profiler_emit_nvtx_xpu", - # Double and complex datatype matmul is not supported in oneDNN + # Jacobian mismatch for output 0 with respect to input 0 "test_mv_grad_stride_0_xpu", # module 'torch._C' has no attribute '_scatter' "test_checkpointing_without_reentrant_dataparallel", @@ -1219,119 +833,59 @@ # All linear algebra related ops are not supported for XPU. # _convert_weight_to_int4pack not support "_int4_mm_m_", - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + # AssertionError: "the 'out' tensor was specified and requires gradients" "test_tensordot_out_kernel_errors_with_autograd_xpu_complex64", + # NotImplementedError: The operator 'aten::cholesky_inverse.out' is not currently implemented for the XPU device. + "test_cholesky_inverse_xpu_complex128", + "test_cholesky_inverse_xpu_complex64", + # NotImplementedError: The operator 'aten::linalg_solve_triangular.out' is not currently implemented for the XPU device. + "test_linalg_solve_triangular_large_xpu_complex128", + "test_linalg_solve_triangular_large_xpu_complex64", + "test_linalg_solve_triangular_xpu_complex128", + "test_linalg_solve_triangular_xpu_complex64", + # NotImplementedError: Could not run 'aten::addmm' with arguments from the 'SparseXPU' backend. + "test_svd_lowrank_xpu_complex128", + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_tensordot_out_kernel_errors_with_autograd_xpu_float32", "test_1_sized_with_0_strided_xpu_float64", - "test_addbmm_xpu_complex128", - "test_addbmm_xpu_complex64", "test_addbmm_xpu_float64", "test_addmm_gelu_xpu_float64", "test_addmm_relu_xpu_float64", "test_addmm_sizes_xpu_float64", - "test_addmm_xpu_complex128", - "test_addmm_xpu_complex64", "test_addmm_xpu_float64", "test_addmv_rowmajor_colmajor_incx_incy_lda_xpu_float64", - "test_addmv_xpu_complex128", - "test_addmv_xpu_complex64", "test_addmv_xpu_float64", - "test_baddbmm_xpu_complex128", - "test_baddbmm_xpu_complex64", "test_baddbmm_xpu_float64", - "test_bmm_xpu_complex128", - "test_bmm_xpu_complex64", "test_bmm_xpu_float64", "test_blas_alpha_beta_empty_xpu_float64", - "test_cholesky_errors_and_warnings_xpu_complex128", - "test_cholesky_errors_and_warnings_xpu_complex64", "test_cholesky_errors_and_warnings_xpu_float64", - "test_cholesky_ex_xpu_complex128", - "test_cholesky_ex_xpu_complex64", "test_cholesky_ex_xpu_float64", - "test_cholesky_inverse_xpu_complex128", - "test_cholesky_inverse_xpu_complex64", "test_cholesky_inverse_xpu_float64", "test_cholesky_solve_backward_xpu_float64", - "test_cholesky_solve_batched_many_batches_xpu_complex128", - "test_cholesky_solve_batched_many_batches_xpu_complex64", "test_cholesky_solve_batched_many_batches_xpu_float64", - "test_cholesky_solve_batched_xpu_complex128", - "test_cholesky_solve_batched_xpu_complex64", "test_cholesky_solve_batched_xpu_float64", - "test_cholesky_solve_xpu_complex128", - "test_cholesky_solve_xpu_complex64", "test_cholesky_solve_xpu_float64", - "test_cholesky_xpu_complex128", - "test_cholesky_xpu_complex64", "test_cholesky_xpu_float64", - "test_corner_cases_of_cublasltmatmul_xpu_complex128", - "test_corner_cases_of_cublasltmatmul_xpu_complex64", "test_corner_cases_of_cublasltmatmul_xpu_float64", "test_eig_check_magma_xpu_float32", - "test_einsum_random_xpu_complex128", "test_einsum_random_xpu_float64", - "test_einsum_sublist_format_xpu_complex128", "test_einsum_sublist_format_xpu_float64", - "test_einsum_xpu_complex128", "test_einsum_xpu_float64", - "test_inner_xpu_complex64", - "test_invariance_error_spectral_decompositions_xpu_complex128", - "test_inverse_many_batches_xpu_complex128", - "test_inverse_many_batches_xpu_complex64", "test_inverse_many_batches_xpu_float64", - "test_inverse_xpu_complex128", - "test_inverse_xpu_complex64", "test_inverse_xpu_float64", - "test_ldl_factor_xpu_complex128", - "test_ldl_factor_xpu_complex64", "test_ldl_factor_xpu_float64", - "test_ldl_solve_xpu_complex128", - "test_ldl_solve_xpu_complex64", "test_ldl_solve_xpu_float64", - "test_linalg_lstsq_batch_broadcasting_xpu_complex128", - "test_linalg_lstsq_batch_broadcasting_xpu_complex64", "test_linalg_lstsq_batch_broadcasting_xpu_float64", - "test_linalg_lstsq_xpu_complex128", - "test_linalg_lstsq_xpu_complex64", "test_linalg_lstsq_xpu_float64", - "test_linalg_lu_family_xpu_complex128", - "test_linalg_lu_family_xpu_complex64", "test_linalg_lu_family_xpu_float64", - "test_linalg_lu_solve_xpu_complex128", - "test_linalg_lu_solve_xpu_complex64", - "test_linalg_solve_triangular_broadcasting_xpu_complex128", - "test_linalg_solve_triangular_broadcasting_xpu_complex64", "test_linalg_solve_triangular_broadcasting_xpu_float64", - "test_linalg_solve_triangular_large_xpu_complex128", - "test_linalg_solve_triangular_large_xpu_complex64", "test_linalg_solve_triangular_large_xpu_float64", - "test_linalg_solve_triangular_xpu_complex128", - "test_linalg_solve_triangular_xpu_complex64", "test_linalg_solve_triangular_xpu_float64", "test_lobpcg_basic_xpu_float64", "test_lobpcg_ortho_xpu_float64", - "test_lu_solve_batched_broadcasting_xpu_complex128", - "test_lu_solve_batched_broadcasting_xpu_complex64", - "test_lu_solve_batched_many_batches_xpu_complex128", - "test_lu_solve_batched_many_batches_xpu_complex64", - "test_lu_solve_batched_xpu_complex128", - "test_lu_solve_batched_xpu_complex64", - "test_lu_solve_large_matrices_xpu_complex128", - "test_lu_solve_large_matrices_xpu_complex64", - "test_lu_solve_xpu_complex128", - "test_lu_solve_xpu_complex64", - "test_matmul_out_kernel_errors_with_autograd_xpu_complex64", - "test_matmul_small_brute_force_1d_Nd_xpu_complex64", - "test_matmul_small_brute_force_2d_Nd_xpu_complex64", - "test_matmul_small_brute_force_3d_Nd_xpu_complex64", - "test_matrix_power_negative_xpu_complex128", "test_matrix_power_negative_xpu_float64", - "test_matrix_power_non_negative_xpu_complex128", "test_matrix_power_non_negative_xpu_float64", "test_matrix_rank_atol_rtol_xpu_float64", - "test_matrix_rank_xpu_complex128", - "test_matrix_rank_xpu_complex64", "test_matrix_rank_xpu_float64", "test_mm_bmm_non_memory_dense_xpu", "test_mm_conjtranspose_xpu", @@ -1341,53 +895,22 @@ "test_multi_dot_xpu_complex128", "test_multi_dot_xpu_float64", "test_old_cholesky_batched_many_batches_xpu_float64", - "test_old_cholesky_batched_upper_xpu_complex128", - "test_old_cholesky_batched_upper_xpu_complex64", "test_old_cholesky_batched_upper_xpu_float64", - "test_old_cholesky_batched_xpu_complex128", - "test_old_cholesky_batched_xpu_complex64", "test_old_cholesky_batched_xpu_float64", - "test_old_cholesky_xpu_complex128", - "test_old_cholesky_xpu_complex64", "test_old_cholesky_xpu_float64", - "test_ormqr_xpu_complex128", - "test_ormqr_xpu_complex64", "test_ormqr_xpu_float64", "test_pca_lowrank_xpu", - "test_pinv_errors_and_warnings_xpu_complex128", - "test_pinv_errors_and_warnings_xpu_complex64", "test_pinv_errors_and_warnings_xpu_float64", - "test_pinv_xpu_complex128", - "test_pinv_xpu_complex64", "test_pinv_xpu_float64", - "test_pinverse_xpu_complex128", - "test_pinverse_xpu_complex64", "test_pinverse_xpu_float64", - "test_slogdet_xpu_complex128", - "test_slogdet_xpu_complex64", - "test_solve_batched_broadcasting_xpu_complex128", - "test_solve_batched_broadcasting_xpu_complex64", "test_solve_batched_broadcasting_xpu_float64", - "test_solve_xpu_complex128", - "test_solve_xpu_complex64", "test_solve_xpu_float64", "test_strided_mm_bmm_xpu_float64", - "test_svd_lowrank_xpu_complex128", "test_svd_lowrank_xpu_float64", - "test_svd_xpu_complex128", - "test_svd_xpu_complex64", "test_svd_xpu_float64", - "test_triangular_solve_batched_broadcasting_xpu_complex128", - "test_triangular_solve_batched_broadcasting_xpu_complex64", "test_triangular_solve_batched_broadcasting_xpu_float64", - "test_triangular_solve_batched_many_batches_xpu_complex128", - "test_triangular_solve_batched_many_batches_xpu_complex64", "test_triangular_solve_batched_many_batches_xpu_float64", - "test_triangular_solve_batched_xpu_complex128", - "test_triangular_solve_batched_xpu_complex64", "test_triangular_solve_batched_xpu_float64", - "test_triangular_solve_xpu_complex128", - "test_triangular_solve_xpu_complex64", "test_triangular_solve_xpu_float64", # https://github.com/intel/torch-xpu-ops/issues/821 # addmm.out, addmv.out, linalg_lstsq, vdot&dot, _int_mm lack XPU support and fallback to CPU @@ -1474,241 +997,122 @@ ), "test_ops_fwd_gradients_xpu.py": ( # All of the followings are oneDNN issues - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_fn_fwgrad_bwgrad___rmatmul___xpu_complex128", + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_fn_fwgrad_bwgrad___rmatmul___xpu_float64", "test_fn_fwgrad_bwgrad_addbmm_xpu_float64", - "test_fn_fwgrad_bwgrad_addmm_decomposed_xpu_complex128", "test_fn_fwgrad_bwgrad_addmm_decomposed_xpu_float64", - "test_fn_fwgrad_bwgrad_addmm_xpu_complex128", "test_fn_fwgrad_bwgrad_addmm_xpu_float64", - "test_fn_fwgrad_bwgrad_addmv_xpu_complex128", "test_fn_fwgrad_bwgrad_addmv_xpu_float64", - "test_fn_fwgrad_bwgrad_addr_xpu_complex128", "test_fn_fwgrad_bwgrad_addr_xpu_float64", - "test_fn_fwgrad_bwgrad_baddbmm_xpu_complex128", "test_fn_fwgrad_bwgrad_baddbmm_xpu_float64", - "test_fn_fwgrad_bwgrad_bmm_xpu_complex128", "test_fn_fwgrad_bwgrad_bmm_xpu_float64", - "test_fn_fwgrad_bwgrad_cholesky_inverse_xpu_complex128", "test_fn_fwgrad_bwgrad_cholesky_inverse_xpu_float64", - "test_fn_fwgrad_bwgrad_cholesky_solve_xpu_complex128", "test_fn_fwgrad_bwgrad_cholesky_solve_xpu_float64", - "test_fn_fwgrad_bwgrad_cholesky_xpu_complex128", "test_fn_fwgrad_bwgrad_cholesky_xpu_float64", - "test_fn_fwgrad_bwgrad_corrcoef_xpu_complex128", "test_fn_fwgrad_bwgrad_corrcoef_xpu_float64", - "test_fn_fwgrad_bwgrad_einsum_xpu_complex128", "test_fn_fwgrad_bwgrad_einsum_xpu_float64", - "test_fn_fwgrad_bwgrad_inner_xpu_complex128", "test_fn_fwgrad_bwgrad_inner_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_cholesky_ex_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_cholesky_ex_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_cholesky_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_cholesky_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_cond_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_cond_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_det_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_eig_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_eig_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_eigh_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_eigh_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_eigvals_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_eigvals_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_eigvalsh_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_eigvalsh_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_householder_product_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_householder_product_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_inv_ex_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_inv_ex_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_inv_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_inv_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_lstsq_grad_oriented_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_lu_factor_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_lu_solve_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_lu_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_lu_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_matrix_norm_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_matrix_norm_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_matrix_power_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_matrix_power_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_multi_dot_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_multi_dot_xpu_float64", "test_fn_fwgrad_bwgrad_linalg_norm_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_pinv_hermitian_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_pinv_hermitian_xpu_float64", "test_fn_fwgrad_bwgrad_linalg_pinv_singular_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_pinv_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_pinv_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_qr_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_qr_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_slogdet_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_solve_ex_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_solve_triangular_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_solve_triangular_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_solve_xpu_complex128", - "test_fn_fwgrad_bwgrad_linalg_svd_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_svd_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_svdvals_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_svdvals_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_tensorinv_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_tensorinv_xpu_float64", - "test_fn_fwgrad_bwgrad_linalg_tensorsolve_xpu_complex128", "test_fn_fwgrad_bwgrad_linalg_tensorsolve_xpu_float64", - "test_fn_fwgrad_bwgrad_logdet_xpu_complex128", "test_fn_fwgrad_bwgrad_logdet_xpu_float64", - "test_fn_fwgrad_bwgrad_lu_solve_xpu_complex128", - "test_fn_fwgrad_bwgrad_lu_xpu_complex128", "test_fn_fwgrad_bwgrad_lu_xpu_float64", - "test_fn_fwgrad_bwgrad_matmul_xpu_complex128", "test_fn_fwgrad_bwgrad_matmul_xpu_float64", - "test_fn_fwgrad_bwgrad_mm_xpu_complex128", "test_fn_fwgrad_bwgrad_mm_xpu_float64", - "test_fn_fwgrad_bwgrad_mv_xpu_complex128", "test_fn_fwgrad_bwgrad_mv_xpu_float64", "test_fn_fwgrad_bwgrad_nn_functional_bilinear_xpu_float64", - "test_fn_fwgrad_bwgrad_nn_functional_linear_xpu_complex128", "test_fn_fwgrad_bwgrad_nn_functional_linear_xpu_float64", "test_fn_fwgrad_bwgrad_nn_functional_multi_head_attention_forward_xpu_float64", "test_fn_fwgrad_bwgrad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_fwgrad_bwgrad_norm_nuc_xpu_complex128", "test_fn_fwgrad_bwgrad_norm_nuc_xpu_float64", - "test_fn_fwgrad_bwgrad_ormqr_xpu_complex128", "test_fn_fwgrad_bwgrad_ormqr_xpu_float64", "test_fn_fwgrad_bwgrad_pca_lowrank_xpu_float64", - "test_fn_fwgrad_bwgrad_pinverse_xpu_complex128", "test_fn_fwgrad_bwgrad_pinverse_xpu_float64", - "test_fn_fwgrad_bwgrad_qr_xpu_complex128", "test_fn_fwgrad_bwgrad_qr_xpu_float64", "test_fn_fwgrad_bwgrad_svd_lowrank_xpu_float64", - "test_fn_fwgrad_bwgrad_svd_xpu_complex128", "test_fn_fwgrad_bwgrad_svd_xpu_float64", - "test_fn_fwgrad_bwgrad_tensordot_xpu_complex128", "test_fn_fwgrad_bwgrad_tensordot_xpu_float64", - "test_forward_mode_AD___rmatmul___xpu_complex128", "test_forward_mode_AD___rmatmul___xpu_float64", "test_forward_mode_AD_addbmm_xpu_float64", - "test_forward_mode_AD_addmm_decomposed_xpu_complex128", "test_forward_mode_AD_addmm_decomposed_xpu_float64", - "test_forward_mode_AD_addmm_xpu_complex128", "test_forward_mode_AD_addmm_xpu_float64", - "test_forward_mode_AD_addmv_xpu_complex128", "test_forward_mode_AD_addmv_xpu_float64", - "test_forward_mode_AD_baddbmm_xpu_complex128", "test_forward_mode_AD_baddbmm_xpu_float64", - "test_forward_mode_AD_bmm_xpu_complex128", "test_forward_mode_AD_bmm_xpu_float64", - "test_forward_mode_AD_cholesky_inverse_xpu_complex128", "test_forward_mode_AD_cholesky_inverse_xpu_float64", - "test_forward_mode_AD_cholesky_solve_xpu_complex128", "test_forward_mode_AD_cholesky_solve_xpu_float64", - "test_forward_mode_AD_cholesky_xpu_complex128", "test_forward_mode_AD_cholesky_xpu_float64", - "test_forward_mode_AD_corrcoef_xpu_complex128", "test_forward_mode_AD_corrcoef_xpu_float64", - "test_forward_mode_AD_dot_xpu_complex128", "test_forward_mode_AD_dot_xpu_float64", - "test_forward_mode_AD_einsum_xpu_complex128", "test_forward_mode_AD_einsum_xpu_float64", - "test_forward_mode_AD_inner_xpu_complex128", "test_forward_mode_AD_inner_xpu_float64", - "test_forward_mode_AD_linalg_cholesky_ex_xpu_complex128", "test_forward_mode_AD_linalg_cholesky_ex_xpu_float64", - "test_forward_mode_AD_linalg_cholesky_xpu_complex128", "test_forward_mode_AD_linalg_cholesky_xpu_float64", - "test_forward_mode_AD_linalg_cond_xpu_complex128", "test_forward_mode_AD_linalg_cond_xpu_float64", - "test_forward_mode_AD_linalg_det_xpu_complex128", - "test_forward_mode_AD_linalg_eig_xpu_complex128", "test_forward_mode_AD_linalg_eig_xpu_float64", - "test_forward_mode_AD_linalg_eigh_xpu_complex128", "test_forward_mode_AD_linalg_eigh_xpu_float64", - "test_forward_mode_AD_linalg_eigvals_xpu_complex128", "test_forward_mode_AD_linalg_eigvals_xpu_float64", - "test_forward_mode_AD_linalg_eigvalsh_xpu_complex128", "test_forward_mode_AD_linalg_eigvalsh_xpu_float64", - "test_forward_mode_AD_linalg_householder_product_xpu_complex128", "test_forward_mode_AD_linalg_householder_product_xpu_float64", - "test_forward_mode_AD_linalg_inv_ex_xpu_complex128", "test_forward_mode_AD_linalg_inv_ex_xpu_float64", - "test_forward_mode_AD_linalg_inv_xpu_complex128", "test_forward_mode_AD_linalg_inv_xpu_float64", - "test_forward_mode_AD_linalg_lstsq_grad_oriented_xpu_complex128", "test_forward_mode_AD_linalg_lstsq_grad_oriented_xpu_float64", - "test_forward_mode_AD_linalg_lu_factor_ex_xpu_complex128", - "test_forward_mode_AD_linalg_lu_factor_xpu_complex128", - "test_forward_mode_AD_linalg_lu_solve_xpu_complex128", - "test_forward_mode_AD_linalg_lu_xpu_complex128", "test_forward_mode_AD_linalg_lu_xpu_float64", - "test_forward_mode_AD_linalg_matrix_norm_xpu_complex128", "test_forward_mode_AD_linalg_matrix_norm_xpu_float64", - "test_forward_mode_AD_linalg_matrix_power_xpu_complex128", "test_forward_mode_AD_linalg_matrix_power_xpu_float64", - "test_forward_mode_AD_linalg_multi_dot_xpu_complex128", "test_forward_mode_AD_linalg_multi_dot_xpu_float64", "test_forward_mode_AD_linalg_norm_xpu_float64", - "test_forward_mode_AD_linalg_pinv_hermitian_xpu_complex128", "test_forward_mode_AD_linalg_pinv_hermitian_xpu_float64", - "test_forward_mode_AD_linalg_pinv_singular_xpu_complex128", "test_forward_mode_AD_linalg_pinv_singular_xpu_float64", - "test_forward_mode_AD_linalg_pinv_xpu_complex128", "test_forward_mode_AD_linalg_pinv_xpu_float64", - "test_forward_mode_AD_linalg_qr_xpu_complex128", "test_forward_mode_AD_linalg_qr_xpu_float64", - "test_forward_mode_AD_linalg_slogdet_xpu_complex128", - "test_forward_mode_AD_linalg_solve_ex_xpu_complex128", - "test_forward_mode_AD_linalg_solve_triangular_xpu_complex128", "test_forward_mode_AD_linalg_solve_triangular_xpu_float64", - "test_forward_mode_AD_linalg_solve_xpu_complex128", - "test_forward_mode_AD_linalg_svd_xpu_complex128", "test_forward_mode_AD_linalg_svd_xpu_float64", - "test_forward_mode_AD_linalg_svdvals_xpu_complex128", "test_forward_mode_AD_linalg_svdvals_xpu_float64", - "test_forward_mode_AD_linalg_tensorinv_xpu_complex128", "test_forward_mode_AD_linalg_tensorinv_xpu_float64", - "test_forward_mode_AD_linalg_tensorsolve_xpu_complex128", "test_forward_mode_AD_linalg_tensorsolve_xpu_float64", - "test_forward_mode_AD_logdet_xpu_complex128", "test_forward_mode_AD_logdet_xpu_float64", - "test_forward_mode_AD_lu_solve_xpu_complex128", - "test_forward_mode_AD_lu_xpu_complex128", "test_forward_mode_AD_lu_xpu_float64", - "test_forward_mode_AD_matmul_xpu_complex128", "test_forward_mode_AD_matmul_xpu_float64", - "test_forward_mode_AD_mm_xpu_complex128", "test_forward_mode_AD_mm_xpu_float64", - "test_forward_mode_AD_mv_xpu_complex128", "test_forward_mode_AD_mv_xpu_float64", "test_forward_mode_AD_nn_functional_bilinear_xpu_float64", - "test_forward_mode_AD_nn_functional_linear_xpu_complex128", "test_forward_mode_AD_nn_functional_linear_xpu_float64", - "test_forward_mode_AD_norm_nuc_xpu_complex128", "test_forward_mode_AD_norm_nuc_xpu_float64", "test_forward_mode_AD_pca_lowrank_xpu_float64", - "test_forward_mode_AD_pinverse_xpu_complex128", "test_forward_mode_AD_pinverse_xpu_float64", - "test_forward_mode_AD_qr_xpu_complex128", "test_forward_mode_AD_qr_xpu_float64", "test_forward_mode_AD_svd_lowrank_xpu_float64", - "test_forward_mode_AD_svd_xpu_complex128", "test_forward_mode_AD_svd_xpu_float64", - "test_forward_mode_AD_tensordot_xpu_complex128", "test_forward_mode_AD_tensordot_xpu_float64", - "test_forward_mode_AD_triangular_solve_xpu_complex128", "test_forward_mode_AD_triangular_solve_xpu_float64", "test_inplace_forward_mode_AD_addbmm_xpu_float64", - "test_inplace_forward_mode_AD_addmm_decomposed_xpu_complex128", "test_inplace_forward_mode_AD_addmm_decomposed_xpu_float64", - "test_inplace_forward_mode_AD_addmm_xpu_complex128", "test_inplace_forward_mode_AD_addmm_xpu_float64", - "test_inplace_forward_mode_AD_addmv_xpu_complex128", "test_inplace_forward_mode_AD_addmv_xpu_float64", - "test_inplace_forward_mode_AD_baddbmm_xpu_complex128", "test_inplace_forward_mode_AD_baddbmm_xpu_float64", - "test_forward_mode_AD_pca_lowrank_xpu_complex128", - "test_forward_mode_AD_svd_lowrank_xpu_complex128", # RuntimeError: value cannot be converted to type float without overflow "test_fn_fwgrad_bwgrad_addbmm_xpu_complex128", "test_forward_mode_AD_addbmm_xpu_complex128", @@ -1810,264 +1214,134 @@ ), "test_ops_gradients_xpu.py": ( # All are oneDNN issues - ### Error #0 in TestBwdGradientsXPU , totally 271 , RuntimeError: Double and complex datatype matmul is not supported in oneDNN + ### Error #0 in TestBwdGradientsXPU, RuntimeError: Double datatype matmul is not supported in oneDNN "test_fn_grad_index_reduce_prod_xpu_float64", "test_inplace_grad_index_reduce_prod_xpu_float64", - "test_fn_grad___rmatmul___xpu_complex128", "test_fn_grad___rmatmul___xpu_float64", "test_fn_grad_addbmm_xpu_float64", - "test_fn_grad_addmm_decomposed_xpu_complex128", "test_fn_grad_addmm_decomposed_xpu_float64", - "test_fn_grad_addmm_xpu_complex128", "test_fn_grad_addmm_xpu_float64", - "test_fn_grad_addmv_xpu_complex128", "test_fn_grad_addmv_xpu_float64", - "test_fn_grad_addr_xpu_complex128", "test_fn_grad_addr_xpu_float64", - "test_fn_grad_baddbmm_xpu_complex128", "test_fn_grad_baddbmm_xpu_float64", - "test_fn_grad_bmm_xpu_complex128", "test_fn_grad_bmm_xpu_float64", "test_fn_grad_cdist_xpu_float64", - "test_fn_grad_cholesky_inverse_xpu_complex128", "test_fn_grad_cholesky_inverse_xpu_float64", - "test_fn_grad_cholesky_solve_xpu_complex128", "test_fn_grad_cholesky_solve_xpu_float64", - "test_fn_grad_cholesky_xpu_complex128", "test_fn_grad_cholesky_xpu_float64", - "test_fn_grad_corrcoef_xpu_complex128", "test_fn_grad_corrcoef_xpu_float64", - "test_fn_grad_einsum_xpu_complex128", "test_fn_grad_einsum_xpu_float64", - "test_fn_grad_inner_xpu_complex128", "test_fn_grad_inner_xpu_float64", - "test_fn_grad_linalg_cholesky_ex_xpu_complex128", "test_fn_grad_linalg_cholesky_ex_xpu_float64", - "test_fn_grad_linalg_cholesky_xpu_complex128", "test_fn_grad_linalg_cholesky_xpu_float64", - "test_fn_grad_linalg_cond_xpu_complex128", "test_fn_grad_linalg_cond_xpu_float64", - "test_fn_grad_linalg_det_xpu_complex128", - "test_fn_grad_linalg_eig_xpu_complex128", "test_fn_grad_linalg_eig_xpu_float64", - "test_fn_grad_linalg_eigh_xpu_complex128", "test_fn_grad_linalg_eigh_xpu_float64", - "test_fn_grad_linalg_eigvals_xpu_complex128", - "test_fn_grad_linalg_eigvalsh_xpu_complex128", "test_fn_grad_linalg_eigvalsh_xpu_float64", - "test_fn_grad_linalg_householder_product_xpu_complex128", "test_fn_grad_linalg_householder_product_xpu_float64", - "test_fn_grad_linalg_inv_ex_xpu_complex128", "test_fn_grad_linalg_inv_ex_xpu_float64", - "test_fn_grad_linalg_inv_xpu_complex128", "test_fn_grad_linalg_inv_xpu_float64", - "test_fn_grad_linalg_lstsq_grad_oriented_xpu_complex128", "test_fn_grad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_grad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_grad_linalg_lu_factor_xpu_complex128", - "test_fn_grad_linalg_lu_solve_xpu_complex128", - "test_fn_grad_linalg_lu_xpu_complex128", "test_fn_grad_linalg_lu_xpu_float64", - "test_fn_grad_linalg_matrix_norm_xpu_complex128", "test_fn_grad_linalg_matrix_norm_xpu_float64", - "test_fn_grad_linalg_matrix_power_xpu_complex128", "test_fn_grad_linalg_matrix_power_xpu_float64", - "test_fn_grad_linalg_multi_dot_xpu_complex128", "test_fn_grad_linalg_multi_dot_xpu_float64", "test_fn_grad_linalg_norm_xpu_float64", - "test_fn_grad_linalg_pinv_hermitian_xpu_complex128", "test_fn_grad_linalg_pinv_hermitian_xpu_float64", - "test_fn_grad_linalg_pinv_singular_xpu_complex128", "test_fn_grad_linalg_pinv_singular_xpu_float64", - "test_fn_grad_linalg_pinv_xpu_complex128", "test_fn_grad_linalg_pinv_xpu_float64", - "test_fn_grad_linalg_qr_xpu_complex128", "test_fn_grad_linalg_qr_xpu_float64", - "test_fn_grad_linalg_slogdet_xpu_complex128", - "test_fn_grad_linalg_solve_ex_xpu_complex128", - "test_fn_grad_linalg_solve_triangular_xpu_complex128", "test_fn_grad_linalg_solve_triangular_xpu_float64", - "test_fn_grad_linalg_solve_xpu_complex128", - "test_fn_grad_linalg_svd_xpu_complex128", "test_fn_grad_linalg_svd_xpu_float64", - "test_fn_grad_linalg_svdvals_xpu_complex128", "test_fn_grad_linalg_svdvals_xpu_float64", - "test_fn_grad_linalg_tensorinv_xpu_complex128", "test_fn_grad_linalg_tensorinv_xpu_float64", - "test_fn_grad_linalg_tensorsolve_xpu_complex128", "test_fn_grad_linalg_tensorsolve_xpu_float64", - "test_fn_grad_logdet_xpu_complex128", "test_fn_grad_logdet_xpu_float64", - "test_fn_grad_lu_solve_xpu_complex128", - "test_fn_grad_lu_xpu_complex128", "test_fn_grad_lu_xpu_float64", - "test_fn_grad_matmul_xpu_complex128", "test_fn_grad_matmul_xpu_float64", - "test_fn_grad_mm_xpu_complex128", "test_fn_grad_mm_xpu_float64", - "test_fn_grad_mv_xpu_complex128", "test_fn_grad_mv_xpu_float64", "test_fn_grad_nn_functional_bilinear_xpu_float64", - "test_fn_grad_nn_functional_linear_xpu_complex128", "test_fn_grad_nn_functional_linear_xpu_float64", "test_fn_grad_nn_functional_multi_head_attention_forward_xpu_float64", "test_fn_grad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_grad_norm_nuc_xpu_complex128", "test_fn_grad_norm_nuc_xpu_float64", - "test_fn_grad_ormqr_xpu_complex128", "test_fn_grad_ormqr_xpu_float64", "test_fn_grad_pca_lowrank_xpu_float64", - "test_fn_grad_pinverse_xpu_complex128", "test_fn_grad_pinverse_xpu_float64", - "test_fn_grad_qr_xpu_complex128", "test_fn_grad_qr_xpu_float64", "test_fn_grad_svd_lowrank_xpu_float64", - "test_fn_grad_svd_xpu_complex128", "test_fn_grad_svd_xpu_float64", - "test_fn_grad_tensordot_xpu_complex128", "test_fn_grad_tensordot_xpu_float64", - "test_fn_grad_triangular_solve_xpu_complex128", "test_fn_grad_triangular_solve_xpu_float64", - "test_fn_gradgrad___rmatmul___xpu_complex128", "test_fn_gradgrad___rmatmul___xpu_float64", "test_fn_gradgrad_addbmm_xpu_float64", - "test_fn_gradgrad_addmm_decomposed_xpu_complex128", "test_fn_gradgrad_addmm_decomposed_xpu_float64", - "test_fn_gradgrad_addmm_xpu_complex128", "test_fn_gradgrad_addmm_xpu_float64", - "test_fn_gradgrad_addmv_xpu_complex128", "test_fn_gradgrad_addmv_xpu_float64", - "test_fn_gradgrad_addr_xpu_complex128", "test_fn_gradgrad_addr_xpu_float64", - "test_fn_gradgrad_baddbmm_xpu_complex128", "test_fn_gradgrad_baddbmm_xpu_float64", - "test_fn_gradgrad_bmm_xpu_complex128", "test_fn_gradgrad_bmm_xpu_float64", - "test_fn_gradgrad_cholesky_inverse_xpu_complex128", "test_fn_gradgrad_cholesky_inverse_xpu_float64", - "test_fn_gradgrad_cholesky_solve_xpu_complex128", "test_fn_gradgrad_cholesky_solve_xpu_float64", - "test_fn_gradgrad_cholesky_xpu_complex128", "test_fn_gradgrad_cholesky_xpu_float64", - "test_fn_gradgrad_corrcoef_xpu_complex128", "test_fn_gradgrad_corrcoef_xpu_float64", - "test_fn_gradgrad_einsum_xpu_complex128", "test_fn_gradgrad_einsum_xpu_float64", - "test_fn_gradgrad_inner_xpu_complex128", "test_fn_gradgrad_inner_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_ex_xpu_complex128", "test_fn_gradgrad_linalg_cholesky_ex_xpu_float64", - "test_fn_gradgrad_linalg_cholesky_xpu_complex128", "test_fn_gradgrad_linalg_cholesky_xpu_float64", - "test_fn_gradgrad_linalg_cond_xpu_complex128", "test_fn_gradgrad_linalg_cond_xpu_float64", - "test_fn_gradgrad_linalg_det_xpu_complex128", - "test_fn_gradgrad_linalg_eig_xpu_complex128", "test_fn_gradgrad_linalg_eig_xpu_float64", - "test_fn_gradgrad_linalg_eigh_xpu_complex128", "test_fn_gradgrad_linalg_eigh_xpu_float64", - "test_fn_gradgrad_linalg_eigvals_xpu_complex128", "test_fn_gradgrad_linalg_eigvals_xpu_float64", - "test_fn_gradgrad_linalg_eigvalsh_xpu_complex128", "test_fn_gradgrad_linalg_eigvalsh_xpu_float64", - "test_fn_gradgrad_linalg_householder_product_xpu_complex128", "test_fn_gradgrad_linalg_householder_product_xpu_float64", - "test_fn_gradgrad_linalg_inv_ex_xpu_complex128", "test_fn_gradgrad_linalg_inv_ex_xpu_float64", - "test_fn_gradgrad_linalg_inv_xpu_complex128", "test_fn_gradgrad_linalg_inv_xpu_float64", - "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_complex128", "test_fn_gradgrad_linalg_lstsq_grad_oriented_xpu_float64", - "test_fn_gradgrad_linalg_lu_factor_ex_xpu_complex128", - "test_fn_gradgrad_linalg_lu_factor_xpu_complex128", - "test_fn_gradgrad_linalg_lu_solve_xpu_complex128", - "test_fn_gradgrad_linalg_lu_xpu_complex128", "test_fn_gradgrad_linalg_lu_xpu_float64", - "test_fn_gradgrad_linalg_matrix_norm_xpu_complex128", "test_fn_gradgrad_linalg_matrix_norm_xpu_float64", - "test_fn_gradgrad_linalg_matrix_power_xpu_complex128", "test_fn_gradgrad_linalg_matrix_power_xpu_float64", - "test_fn_gradgrad_linalg_multi_dot_xpu_complex128", "test_fn_gradgrad_linalg_multi_dot_xpu_float64", - "test_fn_gradgrad_linalg_pinv_hermitian_xpu_complex128", "test_fn_gradgrad_linalg_pinv_hermitian_xpu_float64", "test_fn_gradgrad_linalg_pinv_singular_xpu_float64", - "test_fn_gradgrad_linalg_pinv_xpu_complex128", "test_fn_gradgrad_linalg_pinv_xpu_float64", - "test_fn_gradgrad_linalg_qr_xpu_complex128", "test_fn_gradgrad_linalg_qr_xpu_float64", - "test_fn_gradgrad_linalg_slogdet_xpu_complex128", - "test_fn_gradgrad_linalg_solve_ex_xpu_complex128", - "test_fn_gradgrad_linalg_solve_triangular_xpu_complex128", "test_fn_gradgrad_linalg_solve_triangular_xpu_float64", - "test_fn_gradgrad_linalg_solve_xpu_complex128", - "test_fn_gradgrad_linalg_svd_xpu_complex128", "test_fn_gradgrad_linalg_svd_xpu_float64", - "test_fn_gradgrad_linalg_svdvals_xpu_complex128", "test_fn_gradgrad_linalg_svdvals_xpu_float64", - "test_fn_gradgrad_linalg_tensorinv_xpu_complex128", "test_fn_gradgrad_linalg_tensorinv_xpu_float64", - "test_fn_gradgrad_linalg_tensorsolve_xpu_complex128", "test_fn_gradgrad_linalg_tensorsolve_xpu_float64", - "test_fn_gradgrad_logdet_xpu_complex128", "test_fn_gradgrad_logdet_xpu_float64", - "test_fn_gradgrad_lu_solve_xpu_complex128", - "test_fn_gradgrad_lu_xpu_complex128", "test_fn_gradgrad_lu_xpu_float64", - "test_fn_gradgrad_matmul_xpu_complex128", "test_fn_gradgrad_matmul_xpu_float64", - "test_fn_gradgrad_mm_xpu_complex128", "test_fn_gradgrad_mm_xpu_float64", - "test_fn_gradgrad_mv_xpu_complex128", "test_fn_gradgrad_mv_xpu_float64", "test_fn_gradgrad_nn_functional_bilinear_xpu_float64", - "test_fn_gradgrad_nn_functional_linear_xpu_complex128", "test_fn_gradgrad_nn_functional_linear_xpu_float64", "test_fn_gradgrad_nn_functional_multi_head_attention_forward_xpu_float64", "test_fn_gradgrad_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_fn_gradgrad_norm_nuc_xpu_complex128", "test_fn_gradgrad_norm_nuc_xpu_float64", - "test_fn_gradgrad_ormqr_xpu_complex128", "test_fn_gradgrad_ormqr_xpu_float64", "test_fn_gradgrad_pca_lowrank_xpu_float64", - "test_fn_gradgrad_pinverse_xpu_complex128", "test_fn_gradgrad_pinverse_xpu_float64", - "test_fn_gradgrad_qr_xpu_complex128", "test_fn_gradgrad_qr_xpu_float64", "test_fn_gradgrad_svd_lowrank_xpu_float64", - "test_fn_gradgrad_svd_xpu_complex128", "test_fn_gradgrad_svd_xpu_float64", - "test_fn_gradgrad_tensordot_xpu_complex128", "test_fn_gradgrad_tensordot_xpu_float64", - "test_fn_gradgrad_triangular_solve_xpu_complex128", "test_fn_gradgrad_triangular_solve_xpu_float64", "test_inplace_grad_addbmm_xpu_float64", - "test_inplace_grad_addmm_decomposed_xpu_complex128", "test_inplace_grad_addmm_decomposed_xpu_float64", - "test_inplace_grad_addmm_xpu_complex128", "test_inplace_grad_addmm_xpu_float64", - "test_inplace_grad_addmv_xpu_complex128", "test_inplace_grad_addmv_xpu_float64", - "test_inplace_grad_addr_xpu_complex128", "test_inplace_grad_addr_xpu_float64", - "test_inplace_grad_baddbmm_xpu_complex128", "test_inplace_grad_baddbmm_xpu_float64", "test_inplace_gradgrad_addbmm_xpu_float64", - "test_inplace_gradgrad_addmm_decomposed_xpu_complex128", "test_inplace_gradgrad_addmm_decomposed_xpu_float64", - "test_inplace_gradgrad_addmm_xpu_complex128", "test_inplace_gradgrad_addmm_xpu_float64", - "test_inplace_gradgrad_addmv_xpu_complex128", "test_inplace_gradgrad_addmv_xpu_float64", - "test_inplace_gradgrad_addr_xpu_complex128", "test_inplace_gradgrad_addr_xpu_float64", - "test_inplace_gradgrad_baddbmm_xpu_complex128", "test_inplace_gradgrad_baddbmm_xpu_float64", - "test_fn_grad_pca_lowrank_xpu_complex128", - "test_fn_grad_svd_lowrank_xpu_complex128", - "test_fn_gradgrad_pca_lowrank_xpu_complex128", - "test_fn_gradgrad_svd_lowrank_xpu_complex128", - "test_fn_grad_linalg_norm_xpu_complex128", ### Error #1 in TestBwdGradientsXPU , totally 4 , RuntimeError: value cannot be converted to type float without overflow "test_fn_grad_addbmm_xpu_complex128", "test_fn_gradgrad_addbmm_xpu_complex128", @@ -2105,8 +1379,6 @@ "test_broadcast_fn_map2_xpu", ### Error #8 in TestTorchDeviceTypeXPU , totally 1 , TypeError: map_ is only implemented on CPU tensors "test_broadcast_fn_map_xpu", - ### Error #9 in TestTorchDeviceTypeXPU , totally 1 , RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_corrcoef_xpu_complex64", ### Error #12 in TestTorchDeviceTypeXPU , totally 2 , AttributeError: module 'torch.xpu' has no attribute 'amp' "test_grad_scaler_pass_itself_xpu", "test_pickle_gradscaler_xpu", @@ -2177,7 +1449,7 @@ ), "nn/test_multihead_attention_xpu.py": ( # known oneDNN issue - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_multihead_attention_dtype_batch_first_xpu_float64", "test_multihead_attention_dtype_xpu_float64", "test_multihead_attn_fast_path_query_and_bias_have_different_dtypes_xpu_float64", @@ -2275,372 +1547,189 @@ "test_dispatch_meta_outplace_nn_functional_linear_xpu_int64", "test_dispatch_symbolic_meta_outplace_nn_functional_linear_xpu_int64", "test_meta_outplace_nn_functional_linear_xpu_int64", - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN - "test_dispatch_meta_inplace_addbmm_xpu_complex", - "test_dispatch_meta_outplace_addbmm_xpu_complex", - "test_dispatch_symbolic_meta_inplace_addbmm_xpu_complex", - "test_dispatch_symbolic_meta_outplace_addbmm_xpu_complex", - "test_meta_inplace_addbmm_xpu_complex", - "test_meta_outplace_addbmm_xpu_complex", + # NotImplementedError: The operator 'aten::cholesky_inverse.out' is not currently implemented for the XPU device. + "test_dispatch_meta_outplace_cholesky_inverse_xpu_complex", + "test_dispatch_symbolic_meta_outplace_cholesky_inverse_xpu_complex", + # NotImplementedError: The operator 'aten::cholesky.out' is not currently implemented for the XPU device. + "test_dispatch_meta_outplace_cholesky_xpu_complex", + "test_dispatch_symbolic_meta_outplace_cholesky_xpu_complex", + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_dispatch_meta_inplace_addbmm_xpu_float64", - "test_dispatch_meta_inplace_addmm_decomposed_xpu_complex", "test_dispatch_meta_inplace_addmm_decomposed_xpu_float64", - "test_dispatch_meta_inplace_addmm_xpu_complex", "test_dispatch_meta_inplace_addmm_xpu_float64", - "test_dispatch_meta_inplace_addmv_xpu_complex", "test_dispatch_meta_inplace_addmv_xpu_float64", - "test_dispatch_meta_inplace_baddbmm_xpu_complex", "test_dispatch_meta_inplace_baddbmm_xpu_float64", - "test_dispatch_meta_outplace___rmatmul___xpu_complex", "test_dispatch_meta_outplace___rmatmul___xpu_float64", "test_dispatch_meta_outplace_addbmm_xpu_float64", - "test_dispatch_meta_outplace_addmm_decomposed_xpu_complex", "test_dispatch_meta_outplace_addmm_decomposed_xpu_float64", - "test_dispatch_meta_outplace_addmm_xpu_complex", "test_dispatch_meta_outplace_addmm_xpu_float64", - "test_dispatch_meta_outplace_addmv_xpu_complex", "test_dispatch_meta_outplace_addmv_xpu_float64", - "test_dispatch_meta_outplace_baddbmm_xpu_complex", "test_dispatch_meta_outplace_baddbmm_xpu_float64", - "test_dispatch_meta_outplace_bmm_xpu_complex", "test_dispatch_meta_outplace_bmm_xpu_float64", "test_dispatch_meta_outplace_cdist_xpu_float64", - "test_dispatch_meta_outplace_cholesky_inverse_xpu_complex", "test_dispatch_meta_outplace_cholesky_inverse_xpu_float64", - "test_dispatch_meta_outplace_cholesky_solve_xpu_complex", "test_dispatch_meta_outplace_cholesky_solve_xpu_float64", - "test_dispatch_meta_outplace_cholesky_xpu_complex", "test_dispatch_meta_outplace_cholesky_xpu_float64", - "test_dispatch_meta_outplace_corrcoef_xpu_complex", "test_dispatch_meta_outplace_corrcoef_xpu_float64", - "test_dispatch_meta_outplace_cov_xpu_complex", "test_dispatch_meta_outplace_cov_xpu_float64", - "test_dispatch_meta_outplace_einsum_xpu_complex", "test_dispatch_meta_outplace_einsum_xpu_float64", - "test_dispatch_meta_outplace_geqrf_xpu_complex", "test_dispatch_meta_outplace_geqrf_xpu_float64", - "test_dispatch_meta_outplace_inner_xpu_complex", "test_dispatch_meta_outplace_inner_xpu_float64", - "test_dispatch_meta_outplace_linalg_cholesky_ex_xpu_complex", "test_dispatch_meta_outplace_linalg_cholesky_ex_xpu_float64", - "test_dispatch_meta_outplace_linalg_cholesky_xpu_complex", "test_dispatch_meta_outplace_linalg_cholesky_xpu_float64", - "test_dispatch_meta_outplace_linalg_det_xpu_complex", - "test_dispatch_meta_outplace_linalg_eig_xpu_complex", "test_dispatch_meta_outplace_linalg_eig_xpu_float64", - "test_dispatch_meta_outplace_linalg_eigh_xpu_complex", "test_dispatch_meta_outplace_linalg_eigh_xpu_float64", - "test_dispatch_meta_outplace_linalg_eigvals_xpu_complex", - "test_dispatch_meta_outplace_linalg_eigvalsh_xpu_complex", "test_dispatch_meta_outplace_linalg_eigvalsh_xpu_float64", - "test_dispatch_meta_outplace_linalg_inv_ex_xpu_complex", "test_dispatch_meta_outplace_linalg_inv_ex_xpu_float64", - "test_dispatch_meta_outplace_linalg_inv_xpu_complex", "test_dispatch_meta_outplace_linalg_inv_xpu_float64", - "test_dispatch_meta_outplace_linalg_ldl_factor_ex_xpu_complex", "test_dispatch_meta_outplace_linalg_ldl_factor_ex_xpu_float64", - "test_dispatch_meta_outplace_linalg_ldl_factor_xpu_complex", "test_dispatch_meta_outplace_linalg_ldl_factor_xpu_float64", - "test_dispatch_meta_outplace_linalg_ldl_solve_xpu_complex", "test_dispatch_meta_outplace_linalg_ldl_solve_xpu_float64", - "test_dispatch_meta_outplace_linalg_lstsq_grad_oriented_xpu_complex", "test_dispatch_meta_outplace_linalg_lstsq_grad_oriented_xpu_float64", - "test_dispatch_meta_outplace_linalg_lstsq_xpu_complex", "test_dispatch_meta_outplace_linalg_lstsq_xpu_float64", - "test_dispatch_meta_outplace_linalg_lu_factor_xpu_complex", - "test_dispatch_meta_outplace_linalg_lu_solve_xpu_complex", - "test_dispatch_meta_outplace_linalg_matrix_power_xpu_complex", "test_dispatch_meta_outplace_linalg_matrix_power_xpu_float64", - "test_dispatch_meta_outplace_linalg_matrix_rank_hermitian_xpu_complex", "test_dispatch_meta_outplace_linalg_matrix_rank_hermitian_xpu_float64", - "test_dispatch_meta_outplace_linalg_matrix_rank_xpu_complex", "test_dispatch_meta_outplace_linalg_matrix_rank_xpu_float64", - "test_dispatch_meta_outplace_linalg_multi_dot_xpu_complex", "test_dispatch_meta_outplace_linalg_multi_dot_xpu_float64", - "test_dispatch_meta_outplace_linalg_pinv_hermitian_xpu_complex", "test_dispatch_meta_outplace_linalg_pinv_hermitian_xpu_float64", - "test_dispatch_meta_outplace_linalg_pinv_singular_xpu_complex", "test_dispatch_meta_outplace_linalg_pinv_singular_xpu_float64", - "test_dispatch_meta_outplace_linalg_pinv_xpu_complex", "test_dispatch_meta_outplace_linalg_pinv_xpu_float64", - "test_dispatch_meta_outplace_linalg_qr_xpu_complex", "test_dispatch_meta_outplace_linalg_qr_xpu_float64", - "test_dispatch_meta_outplace_linalg_slogdet_xpu_complex", - "test_dispatch_meta_outplace_linalg_solve_ex_xpu_complex", - "test_dispatch_meta_outplace_linalg_solve_xpu_complex", - "test_dispatch_meta_outplace_linalg_svd_xpu_complex", "test_dispatch_meta_outplace_linalg_svd_xpu_float64", - "test_dispatch_meta_outplace_linalg_tensorinv_xpu_complex", "test_dispatch_meta_outplace_linalg_tensorinv_xpu_float64", - "test_dispatch_meta_outplace_logdet_xpu_complex", "test_dispatch_meta_outplace_logdet_xpu_float64", - "test_dispatch_meta_outplace_lu_solve_xpu_complex", - "test_dispatch_meta_outplace_lu_xpu_complex", "test_dispatch_meta_outplace_lu_xpu_float64", - "test_dispatch_meta_outplace_matmul_xpu_complex", "test_dispatch_meta_outplace_matmul_xpu_float64", - "test_dispatch_meta_outplace_mm_xpu_complex", "test_dispatch_meta_outplace_mm_xpu_float64", - "test_dispatch_meta_outplace_mv_xpu_complex", "test_dispatch_meta_outplace_mv_xpu_float64", "test_dispatch_meta_outplace_nn_functional_bilinear_xpu_float64", - "test_dispatch_meta_outplace_nn_functional_linear_xpu_complex", "test_dispatch_meta_outplace_nn_functional_linear_xpu_float64", "test_dispatch_meta_outplace_nn_functional_multi_head_attention_forward_xpu_float64", "test_dispatch_meta_outplace_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_dispatch_meta_outplace_pca_lowrank_xpu_complex", "test_dispatch_meta_outplace_pca_lowrank_xpu_float64", - "test_dispatch_meta_outplace_pinverse_xpu_complex", "test_dispatch_meta_outplace_pinverse_xpu_float64", - "test_dispatch_meta_outplace_qr_xpu_complex", "test_dispatch_meta_outplace_qr_xpu_float64", - "test_dispatch_meta_outplace_svd_lowrank_xpu_complex", "test_dispatch_meta_outplace_svd_lowrank_xpu_float64", - "test_dispatch_meta_outplace_svd_xpu_complex", "test_dispatch_meta_outplace_svd_xpu_float64", - "test_dispatch_meta_outplace_tensordot_xpu_complex", "test_dispatch_meta_outplace_tensordot_xpu_float64", - "test_dispatch_meta_outplace_triangular_solve_xpu_complex", "test_dispatch_meta_outplace_triangular_solve_xpu_float64", "test_dispatch_symbolic_meta_inplace_addbmm_xpu_float64", - "test_dispatch_symbolic_meta_inplace_addmm_decomposed_xpu_complex", "test_dispatch_symbolic_meta_inplace_addmm_decomposed_xpu_float64", - "test_dispatch_symbolic_meta_inplace_addmm_xpu_complex", "test_dispatch_symbolic_meta_inplace_addmm_xpu_float64", - "test_dispatch_symbolic_meta_inplace_addmv_xpu_complex", "test_dispatch_symbolic_meta_inplace_addmv_xpu_float64", - "test_dispatch_symbolic_meta_inplace_baddbmm_xpu_complex", "test_dispatch_symbolic_meta_inplace_baddbmm_xpu_float64", - "test_dispatch_symbolic_meta_outplace___rmatmul___xpu_complex", "test_dispatch_symbolic_meta_outplace___rmatmul___xpu_float64", "test_dispatch_symbolic_meta_outplace_addbmm_xpu_float64", - "test_dispatch_symbolic_meta_outplace_addmm_decomposed_xpu_complex", "test_dispatch_symbolic_meta_outplace_addmm_decomposed_xpu_float64", - "test_dispatch_symbolic_meta_outplace_addmm_xpu_complex", "test_dispatch_symbolic_meta_outplace_addmm_xpu_float64", - "test_dispatch_symbolic_meta_outplace_addmv_xpu_complex", "test_dispatch_symbolic_meta_outplace_addmv_xpu_float64", - "test_dispatch_symbolic_meta_outplace_baddbmm_xpu_complex", "test_dispatch_symbolic_meta_outplace_baddbmm_xpu_float64", - "test_dispatch_symbolic_meta_outplace_bmm_xpu_complex", "test_dispatch_symbolic_meta_outplace_bmm_xpu_float64", "test_dispatch_symbolic_meta_outplace_cdist_xpu_float64", - "test_dispatch_symbolic_meta_outplace_cholesky_inverse_xpu_complex", "test_dispatch_symbolic_meta_outplace_cholesky_inverse_xpu_float64", - "test_dispatch_symbolic_meta_outplace_cholesky_solve_xpu_complex", "test_dispatch_symbolic_meta_outplace_cholesky_solve_xpu_float64", - "test_dispatch_symbolic_meta_outplace_cholesky_xpu_complex", "test_dispatch_symbolic_meta_outplace_cholesky_xpu_float64", - "test_dispatch_symbolic_meta_outplace_corrcoef_xpu_complex", "test_dispatch_symbolic_meta_outplace_corrcoef_xpu_float64", - "test_dispatch_symbolic_meta_outplace_cov_xpu_complex", "test_dispatch_symbolic_meta_outplace_cov_xpu_float64", - "test_dispatch_symbolic_meta_outplace_einsum_xpu_complex", "test_dispatch_symbolic_meta_outplace_einsum_xpu_float64", - "test_dispatch_symbolic_meta_outplace_geqrf_xpu_complex", "test_dispatch_symbolic_meta_outplace_geqrf_xpu_float64", - "test_dispatch_symbolic_meta_outplace_inner_xpu_complex", "test_dispatch_symbolic_meta_outplace_inner_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_cholesky_ex_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_cholesky_ex_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_cholesky_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_cholesky_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_det_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_eig_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_eig_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_eigh_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_eigh_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_eigvals_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_eigvalsh_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_eigvalsh_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_inv_ex_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_inv_ex_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_inv_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_inv_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_ldl_factor_ex_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_ldl_factor_ex_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_ldl_factor_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_ldl_factor_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_ldl_solve_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_ldl_solve_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_lstsq_grad_oriented_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_lstsq_grad_oriented_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_lstsq_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_lstsq_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_lu_factor_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_lu_solve_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_matrix_power_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_matrix_power_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_matrix_rank_hermitian_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_matrix_rank_hermitian_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_matrix_rank_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_matrix_rank_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_multi_dot_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_multi_dot_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_pinv_hermitian_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_pinv_hermitian_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_pinv_singular_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_pinv_singular_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_pinv_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_pinv_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_qr_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_qr_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_slogdet_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_solve_ex_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_solve_xpu_complex", - "test_dispatch_symbolic_meta_outplace_linalg_svd_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_svd_xpu_float64", - "test_dispatch_symbolic_meta_outplace_linalg_tensorinv_xpu_complex", "test_dispatch_symbolic_meta_outplace_linalg_tensorinv_xpu_float64", - "test_dispatch_symbolic_meta_outplace_logdet_xpu_complex", "test_dispatch_symbolic_meta_outplace_logdet_xpu_float64", - "test_dispatch_symbolic_meta_outplace_lu_solve_xpu_complex", - "test_dispatch_symbolic_meta_outplace_lu_xpu_complex", "test_dispatch_symbolic_meta_outplace_lu_xpu_float64", - "test_dispatch_symbolic_meta_outplace_matmul_xpu_complex", "test_dispatch_symbolic_meta_outplace_matmul_xpu_float64", - "test_dispatch_symbolic_meta_outplace_mm_xpu_complex", "test_dispatch_symbolic_meta_outplace_mm_xpu_float64", - "test_dispatch_symbolic_meta_outplace_mv_xpu_complex", "test_dispatch_symbolic_meta_outplace_mv_xpu_float64", "test_dispatch_symbolic_meta_outplace_nn_functional_bilinear_xpu_float64", - "test_dispatch_symbolic_meta_outplace_nn_functional_linear_xpu_complex", "test_dispatch_symbolic_meta_outplace_nn_functional_linear_xpu_float64", "test_dispatch_symbolic_meta_outplace_nn_functional_multi_head_attention_forward_xpu_float64", "test_dispatch_symbolic_meta_outplace_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_dispatch_symbolic_meta_outplace_pca_lowrank_xpu_complex", "test_dispatch_symbolic_meta_outplace_pca_lowrank_xpu_float64", - "test_dispatch_symbolic_meta_outplace_pinverse_xpu_complex", "test_dispatch_symbolic_meta_outplace_pinverse_xpu_float64", - "test_dispatch_symbolic_meta_outplace_qr_xpu_complex", "test_dispatch_symbolic_meta_outplace_qr_xpu_float64", - "test_dispatch_symbolic_meta_outplace_svd_lowrank_xpu_complex", "test_dispatch_symbolic_meta_outplace_svd_lowrank_xpu_float64", - "test_dispatch_symbolic_meta_outplace_svd_xpu_complex", "test_dispatch_symbolic_meta_outplace_svd_xpu_float64", - "test_dispatch_symbolic_meta_outplace_tensordot_xpu_complex", "test_dispatch_symbolic_meta_outplace_tensordot_xpu_float64", - "test_dispatch_symbolic_meta_outplace_triangular_solve_xpu_complex", "test_dispatch_symbolic_meta_outplace_triangular_solve_xpu_float64", "test_meta_inplace_addbmm_xpu_float64", - "test_meta_inplace_addmm_decomposed_xpu_complex", "test_meta_inplace_addmm_decomposed_xpu_float64", - "test_meta_inplace_addmm_xpu_complex", "test_meta_inplace_addmm_xpu_float64", - "test_meta_inplace_addmv_xpu_complex", "test_meta_inplace_addmv_xpu_float64", - "test_meta_inplace_baddbmm_xpu_complex", "test_meta_inplace_baddbmm_xpu_float64", - "test_meta_outplace___rmatmul___xpu_complex", "test_meta_outplace___rmatmul___xpu_float64", "test_meta_outplace_addbmm_xpu_float64", - "test_meta_outplace_addmm_decomposed_xpu_complex", "test_meta_outplace_addmm_decomposed_xpu_float64", - "test_meta_outplace_addmm_xpu_complex", "test_meta_outplace_addmm_xpu_float64", - "test_meta_outplace_addmv_xpu_complex", "test_meta_outplace_addmv_xpu_float64", - "test_meta_outplace_baddbmm_xpu_complex", "test_meta_outplace_baddbmm_xpu_float64", - "test_meta_outplace_bmm_xpu_complex", "test_meta_outplace_bmm_xpu_float64", "test_meta_outplace_cdist_xpu_float64", - "test_meta_outplace_cholesky_inverse_xpu_complex", "test_meta_outplace_cholesky_inverse_xpu_float64", - "test_meta_outplace_cholesky_solve_xpu_complex", "test_meta_outplace_cholesky_solve_xpu_float64", - "test_meta_outplace_cholesky_xpu_complex", "test_meta_outplace_cholesky_xpu_float64", - "test_meta_outplace_corrcoef_xpu_complex", "test_meta_outplace_corrcoef_xpu_float64", - "test_meta_outplace_cov_xpu_complex", "test_meta_outplace_cov_xpu_float64", - "test_meta_outplace_einsum_xpu_complex", "test_meta_outplace_einsum_xpu_float64", - "test_meta_outplace_geqrf_xpu_complex", "test_meta_outplace_geqrf_xpu_float64", - "test_meta_outplace_inner_xpu_complex", "test_meta_outplace_inner_xpu_float64", - "test_meta_outplace_linalg_cholesky_ex_xpu_complex", "test_meta_outplace_linalg_cholesky_ex_xpu_float64", - "test_meta_outplace_linalg_cholesky_xpu_complex", "test_meta_outplace_linalg_cholesky_xpu_float64", - "test_meta_outplace_linalg_det_xpu_complex", - "test_meta_outplace_linalg_eig_xpu_complex", "test_meta_outplace_linalg_eig_xpu_float64", - "test_meta_outplace_linalg_eigh_xpu_complex", "test_meta_outplace_linalg_eigh_xpu_float64", - "test_meta_outplace_linalg_eigvals_xpu_complex", - "test_meta_outplace_linalg_eigvalsh_xpu_complex", "test_meta_outplace_linalg_eigvalsh_xpu_float64", - "test_meta_outplace_linalg_inv_ex_xpu_complex", "test_meta_outplace_linalg_inv_ex_xpu_float64", - "test_meta_outplace_linalg_inv_xpu_complex", "test_meta_outplace_linalg_inv_xpu_float64", - "test_meta_outplace_linalg_ldl_factor_ex_xpu_complex", "test_meta_outplace_linalg_ldl_factor_ex_xpu_float64", - "test_meta_outplace_linalg_ldl_factor_xpu_complex", "test_meta_outplace_linalg_ldl_factor_xpu_float64", - "test_meta_outplace_linalg_ldl_solve_xpu_complex", "test_meta_outplace_linalg_ldl_solve_xpu_float64", - "test_meta_outplace_linalg_lstsq_grad_oriented_xpu_complex", "test_meta_outplace_linalg_lstsq_grad_oriented_xpu_float64", - "test_meta_outplace_linalg_lstsq_xpu_complex", "test_meta_outplace_linalg_lstsq_xpu_float64", - "test_meta_outplace_linalg_lu_factor_xpu_complex", - "test_meta_outplace_linalg_lu_solve_xpu_complex", - "test_meta_outplace_linalg_matrix_power_xpu_complex", "test_meta_outplace_linalg_matrix_power_xpu_float64", - "test_meta_outplace_linalg_matrix_rank_hermitian_xpu_complex", "test_meta_outplace_linalg_matrix_rank_hermitian_xpu_float64", - "test_meta_outplace_linalg_matrix_rank_xpu_complex", "test_meta_outplace_linalg_matrix_rank_xpu_float64", - "test_meta_outplace_linalg_multi_dot_xpu_complex", "test_meta_outplace_linalg_multi_dot_xpu_float64", - "test_meta_outplace_linalg_pinv_hermitian_xpu_complex", "test_meta_outplace_linalg_pinv_hermitian_xpu_float64", - "test_meta_outplace_linalg_pinv_singular_xpu_complex", "test_meta_outplace_linalg_pinv_singular_xpu_float64", - "test_meta_outplace_linalg_pinv_xpu_complex", "test_meta_outplace_linalg_pinv_xpu_float64", - "test_meta_outplace_linalg_qr_xpu_complex", "test_meta_outplace_linalg_qr_xpu_float64", - "test_meta_outplace_linalg_slogdet_xpu_complex", - "test_meta_outplace_linalg_solve_ex_xpu_complex", - "test_meta_outplace_linalg_solve_xpu_complex", - "test_meta_outplace_linalg_svd_xpu_complex", "test_meta_outplace_linalg_svd_xpu_float64", - "test_meta_outplace_linalg_tensorinv_xpu_complex", "test_meta_outplace_linalg_tensorinv_xpu_float64", - "test_meta_outplace_logdet_xpu_complex", "test_meta_outplace_logdet_xpu_float64", - "test_meta_outplace_lu_solve_xpu_complex", - "test_meta_outplace_lu_xpu_complex", "test_meta_outplace_lu_xpu_float64", - "test_meta_outplace_matmul_xpu_complex", "test_meta_outplace_matmul_xpu_float64", - "test_meta_outplace_mm_xpu_complex", "test_meta_outplace_mm_xpu_float64", - "test_meta_outplace_mv_xpu_complex", "test_meta_outplace_mv_xpu_float64", "test_meta_outplace_nn_functional_bilinear_xpu_float64", - "test_meta_outplace_nn_functional_linear_xpu_complex", "test_meta_outplace_nn_functional_linear_xpu_float64", "test_meta_outplace_nn_functional_multi_head_attention_forward_xpu_float64", "test_meta_outplace_nn_functional_scaled_dot_product_attention_xpu_float64", - "test_meta_outplace_pca_lowrank_xpu_complex", "test_meta_outplace_pca_lowrank_xpu_float64", - "test_meta_outplace_pinverse_xpu_complex", "test_meta_outplace_pinverse_xpu_float64", - "test_meta_outplace_qr_xpu_complex", "test_meta_outplace_qr_xpu_float64", - "test_meta_outplace_svd_lowrank_xpu_complex", "test_meta_outplace_svd_lowrank_xpu_float64", - "test_meta_outplace_svd_xpu_complex", "test_meta_outplace_svd_xpu_float64", - "test_meta_outplace_tensordot_xpu_complex", "test_meta_outplace_tensordot_xpu_float64", - "test_meta_outplace_triangular_solve_xpu_complex", "test_meta_outplace_triangular_solve_xpu_float64", # RuntimeError: Short is not supported in oneDNN! "test_dispatch_meta_inplace_addbmm_xpu_int16", @@ -2994,7 +2083,7 @@ "test_distributions_xpu.py": None, "test_optim_xpu.py": ( # oneDNN issues - # RuntimeError: Double and complex datatype matmul is not supported in oneDNN + # RuntimeError: Double datatype matmul is not supported in oneDNN "test_foreach_matches_forloop_ASGD_xpu_float64", "test_foreach_matches_forloop_Adadelta_xpu_float64", "test_foreach_matches_forloop_Adafactor_xpu_float64",