Dev/skotapati/copy broadcasting (#350)

skotapati · web-flow · commit b520970694b6 · 2023-02-21T14:22:48.000-08:00
* Handle broadcasting by expanding src tensor in Copy.mm

* Unblock linalg_matrix_power

* Improved formatting
diff --git a/aten/src/ATen/native/mps/operations/Copy.mm b/aten/src/ATen/native/mps/operations/Copy.mm
@@ -300,22 +300,27 @@ void copy_blit_mps(void* dst, const void* src, size_t size) {
   TORCH_CHECK(dst.defined(), "dst is undefined");
   TORCH_CHECK(src.defined(), "src is undefined");
 
+  bool needs_broadcasting = false;
+
   if (src.numel() == 0 || dst.is_same(src)) {
     return dst;
   }
   if (dst.numel() == 0) {
     dst.resize_as_(src);
   }
+  if (dst.dim() > src.dim()) {
+    needs_broadcasting = true;
+  }
 
   if (src.device().type() == at::kMPS && dst.device().type() == at::kCPU) {
-    return copy_from_mps_(dst, src, non_blocking);
+    return copy_from_mps_(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);
   }
   if (src.device().type() == at::kCPU && dst.device().type() == at::kMPS) {
-    return copy_to_mps_(dst, src, non_blocking);
+    return copy_to_mps_(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);
   }
 
   if (src.device().type() == at::kMPS && dst.device().type() == at::kMPS) {
-    return copy_kernel_mps(dst, src, non_blocking);
+    return copy_kernel_mps(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);
   }
   TORCH_INTERNAL_ASSERT(
       src.device().type() == DeviceType::MPS,
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -10200,7 +10200,6 @@ class TestConsistency(TestCaseMPS):
     # All the entries in this list should be removed
     BLOCKLIST = {
         # Functions that hard crash
-        'linalg.matrix_power': [torch.float32],
         'resize_': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
         'resize_as_': [torch.float16, torch.float32],
         'topk': [torch.int16, torch.int32, torch.int64, torch.uint8],

Original file line number	Diff line number	Diff line change
`@@ -300,22 +300,27 @@ void copy_blit_mps(void* dst, const void* src, size_t size) {`
`300`	`300`	`TORCH_CHECK(dst.defined(), "dst is undefined");`
`301`	`301`	`TORCH_CHECK(src.defined(), "src is undefined");`
`302`	`302`
	`303`	`+ bool needs_broadcasting = false;`
	`304`	`+`
`303`	`305`	`if (src.numel() == 0 \|\| dst.is_same(src)) {`
`304`	`306`	`return dst;`
`305`	`307`	`}`
`306`	`308`	`if (dst.numel() == 0) {`
`307`	`309`	`dst.resize_as_(src);`
`308`	`310`	`}`
	`311`	`+ if (dst.dim() > src.dim()) {`
	`312`	`+ needs_broadcasting = true;`
	`313`	`+ }`
`309`	`314`
`310`	`315`	`if (src.device().type() == at::kMPS && dst.device().type() == at::kCPU) {`
`311`		`- return copy_from_mps_(dst, src, non_blocking);`
	`316`	`+ return copy_from_mps_(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);`
`312`	`317`	`}`
`313`	`318`	`if (src.device().type() == at::kCPU && dst.device().type() == at::kMPS) {`
`314`		`- return copy_to_mps_(dst, src, non_blocking);`
	`319`	`+ return copy_to_mps_(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);`
`315`	`320`	`}`
`316`	`321`
`317`	`322`	`if (src.device().type() == at::kMPS && dst.device().type() == at::kMPS) {`
`318`		`- return copy_kernel_mps(dst, src, non_blocking);`
	`323`	`+ return copy_kernel_mps(dst, needs_broadcasting ? src.expand_as(dst) : src, non_blocking);`
`319`	`324`	`}`
`320`	`325`	`TORCH_INTERNAL_ASSERT(`
`321`	`326`	`src.device().type() == DeviceType::MPS,`