Hard fail for multi-result op if unreduced axes among results are not all same.

Google-ML-Automation · copybara-github · commit 3c58c0b99d98 · 2025-09-16T03:44:37.000-07:00
PiperOrigin-RevId: 807627730
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
@@ -963,6 +963,10 @@ bool differentOperandShardingFromFirstResult(Operation* op) {
   });
 }
 
+ArrayRef<AxisRefAttr> getUnreducedAxes(TensorShardingAttr sharding) {
+  return sharding ? sharding.getUnreducedAxes() : ArrayRef<AxisRefAttr>();
+}
+
 void insertExplicitReshardsOnOp(Operation* op,
                                 ArrayRef<TensorShardingAttr> inShardings,
                                 ArrayRef<TensorShardingAttr> outShardings,
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h
@@ -82,6 +82,10 @@ std::optional<ArrayRef<AxisRefAttr>> getFactorSharding(
 // operand shardings. If `op` does not have any results, returns false;
 bool differentOperandShardingFromFirstResult(Operation* op);
 
+// Returns unreduced axes of given `sharding`. If `sharding` is null, returns
+// empty axes.
+ArrayRef<AxisRefAttr> getUnreducedAxes(TensorShardingAttr sharding);
+
 // Inserts explicit reshards on the operands and results of `op` such that the
 // sharding of `op` is compatible with its sharding rule.
 //
diff --git a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
@@ -333,6 +333,17 @@ void insertAllReduceOnOpIfUnreducedToReplicated(
     return;
   }
 
+  TensorShardingAttr firstResultSharding = getSharding(op->getResult(0));
+  if (op->getNumResults() > 1) {
+    ArrayRef<AxisRefAttr> firstResultUnreducedAxes =
+        getUnreducedAxes(firstResultSharding);
+    for (OpResult result : op->getResults().drop_front()) {
+      SDY_CHECK(firstResultUnreducedAxes ==
+                getUnreducedAxes(getSharding(result)))
+          << "Unreduced axes mismatch between results for multi-result op.";
+    }
+  }
+
   // For each operand that has unreduced axes, insert an all-reduce if
   // any of the unreduced axes isn't unreduced in the target sharding.
   //
@@ -341,9 +352,8 @@ void insertAllReduceOnOpIfUnreducedToReplicated(
   rewriter.setInsertionPoint(op);
   for (OpOperand& operand : op->getOpOperands()) {
     if (TensorShardingAttr inSharding = getSharding(operand.get())) {
-      insertAllReduceIfUnreducedToReplicated(operand, inSharding,
-                                             getSharding(op->getResult(0)),
-                                             symbolTable, rewriter);
+      insertAllReduceIfUnreducedToReplicated(
+          operand, inSharding, firstResultSharding, symbolTable, rewriter);
     }
   }
 }
diff --git a/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/unreduced.mlir b/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards/unreduced.mlir
@@ -134,16 +134,17 @@ func.func @reduce_multiple_results_unreduced(
     %arg0: tensor<2x64x13xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"x", "y"}, {}, {}]>},
     %arg1: tensor<2x64x13xi32> {sdy.sharding = #sdy.sharding<@mesh, [{"x", "y"}, {}, {}]>})
     -> (tensor<64xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}], unreduced={"x"}>},
-        tensor<64xi32> {sdy.sharding = #sdy.sharding<@mesh, [{}], unreduced={"y"}>}) {
+        tensor<64xi32> {sdy.sharding = #sdy.sharding<@mesh, [{}], unreduced={"x":(1)2}>}) {
   %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
   %1 = stablehlo.constant dense<0> : tensor<i32>
   // CHECK:      %[[REDUCE:.*]]:2 = stablehlo.reduce(%arg0 init: %cst), (%arg1 init: %c) across dimensions = [0, 2]
-  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}], unreduced={"x"}>, <@mesh, [{}], unreduced={"y"}>]>}
+  // CHECK-SAME:   {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}], unreduced={"x"}>, <@mesh, [{}], unreduced={"x"}>]>}
   // CHECK:      %[[ALL_REDUCE1:.*]] = sdy.all_reduce {"y"} %[[REDUCE]]#0 out_sharding=<@mesh, [{}], unreduced={"x"}> : tensor<64xf32>
-  // CHECK-NEXT: %[[ALL_REDUCE2:.*]] = sdy.all_reduce {"x"} %[[REDUCE]]#1 out_sharding=<@mesh, [{}], unreduced={"y"}> : tensor<64xi32>
-  // CHECK-NEXT: return %[[ALL_REDUCE1]], %[[ALL_REDUCE2]] : tensor<64xf32>, tensor<64xi32>
+  // CHECK-NEXT: %[[ALL_REDUCE2:.*]] = sdy.all_reduce {"y"} %[[REDUCE]]#1 out_sharding=<@mesh, [{}], unreduced={"x"}> : tensor<64xi32>
+  // CHECK-NEXT: %[[ALL_REDUCE3:.*]] = sdy.all_reduce {"x":(2)2} %[[ALL_REDUCE2]] out_sharding=<@mesh, [{}], unreduced={"x":(1)2}> : tensor<64xi32>
+  // CHECK-NEXT: return %[[ALL_REDUCE1]], %[[ALL_REDUCE3]] : tensor<64xf32>, tensor<64xi32>
   %2:2 = stablehlo.reduce(%arg0 init: %0), (%arg1 init: %1) across dimensions = [0, 2]
-    {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}], unreduced={"x"}>, <@mesh, [{}], unreduced={"y"}>]>} :
+    {sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{}], unreduced={"x"}>, <@mesh, [{}], unreduced={"x"}>]>} :
     (tensor<2x64x13xf32>, tensor<2x64x13xi32>, tensor<f32>, tensor<i32>) -> (tensor<64xf32>, tensor<64xi32>)
     reducer(%arg2: tensor<f32>, %arg4: tensor<f32>) (%arg3: tensor<i32>, %arg5: tensor<i32>)  {
       %3 = stablehlo.add %arg2, %arg4 : tensor<f32>