Enable PropagationEdgesAttr verification for sdy.data_flow_edge operations.

Varcho · copybara-github · commit 36c83a53518e · 2025-08-07T15:07:58.000-07:00
PiperOrigin-RevId: 792322564
diff --git a/shardy/dialect/sdy/ir/verifiers.cc b/shardy/dialect/sdy/ir/verifiers.cc
@@ -1043,12 +1043,6 @@ SmallVector<TensorShardingAttr> getShardingsReferenceByPropagationEdge(
 
 LogicalResult verifyPropagationEdgesShardingAttr(
     PropagationEdgesAttr propagationEdges, Operation* op) {
-  // TODO(b/429645141): add PropagationEdgesAttr verification for
-  // `DataFlowEdgeOp`
-  if (isa<DataFlowEdgeOp>(op)) {
-    return success();
-  }
-
   SmallVector<TensorShardingAttr> shardings =
       getShardingsReferenceByPropagationEdge(propagationEdges, op);
 
diff --git a/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc b/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc
@@ -361,46 +361,71 @@ PropagationEdgesAttr createPropagationEdges(Operation* op,
                                             MLIRContext* context) {
   Builder builder(context);
   StepToAxisPropagationDetailsMap perStepEdgesForAxis;
+
+  // Build a temporary map to keep track of the edge with the minimum step
+  // index for each source/target/axis combination. Due to sharding constraints
+  // we have logic to specifically add edges outside of propagation. If similar
+  // edges are then re-included during propagation, we need to filter them out.
+  llvm::DenseMap<
+      AxisRefAttr,
+      llvm::DenseMap<EdgeValueRefAttr,
+                     llvm::DenseMap<EdgeValueRefAttr, PropagationEdge>>>
+      minStepEdges;
   for (const auto& [axisRef, edges] : axisToEdges) {
     for (const PropagationEdge& edge : edges) {
-      auto source =
+      auto sourceAttr =
           EdgeValueRefAttr::get(context, edge.source.type, edge.source.index);
-      auto target =
+      auto targetAttr =
           EdgeValueRefAttr::get(context, edge.target.type, edge.target.index);
-      perStepEdgesForAxis[edge.propagationStep][axisRef][source].insert(target);
+      auto& sourceMap = minStepEdges[axisRef];
+      auto& targetMap = sourceMap[sourceAttr];
+      auto [it, inserted] = targetMap.try_emplace(targetAttr, edge);
+      if (!inserted) {
+        if (edge.propagationStep < it->second.propagationStep) {
+          it->second = edge;
+        }
+      }
+    }
+  }
+
+  // Regroup the edges (with the minimum step index per Axis) by step index.
+  for (const auto& [axisRef, sourceMap] : minStepEdges) {
+    for (const auto& [sourceAttr, targetMap] : sourceMap) {
+      for (const auto& [targetAttr, edge] : targetMap) {
+        perStepEdgesForAxis[edge.propagationStep][axisRef][sourceAttr].insert(
+            targetAttr);
+      }
     }
   }
 
   SmallVector<PropagationOneStepAttr> perStepEdges;
   for (const auto& [step, edgesForAxis] : perStepEdgesForAxis) {
-    SmallVector<AxisToPropagationDetailsAttr> axis_entries;
+    SmallVector<AxisToPropagationDetailsAttr> axisEntries;
     for (const auto& [axisRef, edges] : edgesForAxis) {
-      // There should only be one source in the edge map.
-      assert(edges.size() == 1);
-      EdgeValueRefAttr source = edges.begin()->first;
-      DenseSet<EdgeValueRefAttr> targets = edges.begin()->second;
-      // Sort the targets for deterministic ordering in the output attr.
-      SmallVector<EdgeValueRefAttr> targetsArray(targets.begin(),
-                                                 targets.end());
-      llvm::stable_sort(targetsArray,
-                        [](EdgeValueRefAttr a, EdgeValueRefAttr b) {
-                          if (a.getType() == b.getType()) {
-                            return a.getIndex() < b.getIndex();
-                          }
-                          return a.getType() < b.getType();
-                        });
-      AxisToPropagationDetailsAttr axisToPropagationDetails =
-          AxisToPropagationDetailsAttr::get(context, axisRef, source,
-                                            targetsArray);
-      axis_entries.push_back(axisToPropagationDetails);
+      for (const auto& [source, targets] : edges) {
+        // Sort the targets for deterministic ordering in the output attr.
+        SmallVector<EdgeValueRefAttr> targetsArray(targets.begin(),
+                                                   targets.end());
+        llvm::stable_sort(targetsArray,
+                          [](EdgeValueRefAttr a, EdgeValueRefAttr b) {
+                            if (a.getType() == b.getType()) {
+                              return a.getIndex() < b.getIndex();
+                            }
+                            return a.getType() < b.getType();
+                          });
+        AxisToPropagationDetailsAttr axisToPropagationDetails =
+            AxisToPropagationDetailsAttr::get(context, axisRef, source,
+                                              targetsArray);
+        axisEntries.push_back(axisToPropagationDetails);
+      }
     }
     // Sort the axes by name for deterministic ordering in the output attr.
-    llvm::stable_sort(axis_entries, [](AxisToPropagationDetailsAttr a,
-                                       AxisToPropagationDetailsAttr b) {
+    llvm::stable_sort(axisEntries, [](AxisToPropagationDetailsAttr a,
+                                      AxisToPropagationDetailsAttr b) {
       return a.getAxisName() < b.getAxisName();
     });
     perStepEdges.push_back(
-        PropagationOneStepAttr::get(context, step, axis_entries));
+        PropagationOneStepAttr::get(context, step, axisEntries));
   }
 
   // Sort the edges by step index.
@@ -666,6 +691,47 @@ void prepareFuncResultToEdgesHandler(
   }
 }
 
+void prepareShardingConstraintToEdgesHandler(
+    ModuleOp moduleOp, OperationToEdgesMap& operationToEdgesMap) {
+  moduleOp.walk([&](ShardingConstraintOp shardingConstraintOp) {
+    auto sharding = shardingConstraintOp.getSharding();
+    if (!sharding) {
+      return;
+    }
+    for (DimensionShardingAttr dimSharding : sharding.getDimShardings()) {
+      for (AxisRefAttr axisRef : dimSharding.getAxes()) {
+        operationToEdgesMap[shardingConstraintOp][axisRef].push_back(
+            PropagationEdge{/*source=*/EdgeNode{EdgeNodeType::RESULT, 0},
+                            /*target=*/EdgeNode{EdgeNodeType::OPERAND, 0},
+                            /*propagationStep=*/0});
+      }
+    }
+  });
+
+  // Input sources of `ManualComputationOp` act as a sharding constraint (and
+  // the ApplyShardingConstrains pass treats them as such). Due to this, we need
+  // to create appropriate propagation edges for them.
+  moduleOp.walk([&](ManualComputationOp manualComputationOp) {
+    int64_t i = 0;
+    for (const auto& sharding :
+         manualComputationOp.getInShardings().getShardings()) {
+      auto edgeOp =
+          DataFlowEdgeOp::lookup(manualComputationOp.getBody().getArgument(i));
+      assert(edgeOp);
+      for (DimensionShardingAttr dimSharding : sharding.getDimShardings()) {
+        for (AxisRefAttr axisRef : dimSharding.getAxes()) {
+          // The ManualComputationOp "owns" this initial edge.
+          operationToEdgesMap[manualComputationOp][axisRef].push_back(
+              PropagationEdge{/*source=*/EdgeNode{EdgeNodeType::OPERAND, i},
+                              /*target=*/EdgeNode{EdgeNodeType::RESULT, i},
+                              /*propagationStep=*/0});
+        }
+      }
+      i++;
+    }
+  });
+}
+
 OriginSharding lookUpValueOriginSharding(
     Value value, AxisRefAttr axisRef,
     const ValueToOriginShardingMap& valueToOriginShardingMap) {
@@ -772,6 +838,8 @@ void SourceShardingHandler::prepareHandler(ModuleOp moduleOp) {
   }
   if (mappings->debugPropagationEdgeSharding) {
     prepareFuncResultToEdgesHandler(moduleOp, mappings->funcResultToEdgesMap);
+    prepareShardingConstraintToEdgesHandler(moduleOp,
+                                            mappings->operationToEdgesMap);
   }
   if (mappings->debugShardingOrigins ||
       mappings->debugPropagationEdgeSharding) {
diff --git a/shardy/dialect/sdy/transforms/propagation/debugging/test/edge_shardings.mlir b/shardy/dialect/sdy/transforms/propagation/debugging/test/edge_shardings.mlir
@@ -167,8 +167,7 @@ sdy.mesh @mesh = <["a"=2, "b"=2]>
 // CHECK-LABEL: manual_computation_multiple_results
 // CHECK-SAME:    %arg0: tensor<32x32xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"b", ?}, {"a", ?}]>})
 // CHECK-SAME:    -> (tensor<16x32xf32> {sdy.propagation_edges = #sdy.propagation_edges<[
-// CHECK-SAME:                               {step-0 = [{"a" = operand-0 -> [result-0]}, {"b" = operand-0 -> [result-0]}]},
-// CHECK-SAME:                               {step-6 = [{"a" = operand-0 -> [result-0]}]}]>,
+// CHECK-SAME:                               {step-0 = [{"a" = operand-0 -> [result-0]}, {"b" = operand-0 -> [result-0]}]}]>,
 // CHECK-SAME:                           sdy.sharding = #sdy.sharding<@mesh, [{?}, {"a", ?}]>},
 // CHECK-SAME:        tensor<32x32xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"b", ?}, {"a", ?}]>}) {
 func.func @manual_computation_multiple_results(%arg0: tensor<32x32xf32>) -> (tensor<16x32xf32>, tensor<32x32xf32>) {
@@ -184,6 +183,8 @@ func.func @manual_computation_multiple_results(%arg0: tensor<32x32xf32>) -> (ten
   // CHECK-SAME:       #sdy.propagation_edges<[
   // CHECK-SAME:           {step-1 = [{"b" = result-0 -> [operand-0]}]},
   // CHECK-SAME:           {step-5 = [{"a" = result-0 -> [operand-0]}]}]>],
+  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[
+  // CHECK-SAME:       {step-0 = [{"b" = operand-0 -> [result-0]}]}]>,
   // CHECK-SAME:   sdy.result_propagation_edges = [
   // CHECK-SAME:       #sdy.propagation_edges<[
   // CHECK-SAME:           {step-3 = [{"a" = operand-0 -> [result-0]}]}]>,
@@ -250,20 +251,18 @@ func.func @sub_axes_merging_reshape(
 
 sdy.mesh @mesh = <["a"=2, "b"=2, "c"=8]>
 
-// TODO(b/434949739): Describe how the propagation edge is created due to the
-// apply-sharding-constraints pass.
 // CHECK-LABEL: two_sharding_constraint
 // CHECK-SAME:    %arg0: tensor<8x8xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"a", ?}, {"b", ?}]>}
 // CHECK-SAME:    -> (tensor<8x8xf32> {sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"b" = operand-0 -> [result-0]}]}, {step-6 = [{"a" = operand-0 -> [result-0]}]}]>,
 // CHECK-SAME:                         sdy.sharding = #sdy.sharding<@mesh, [{"a", ?}, {"b", ?}]>}) {
 func.func @two_sharding_constraint(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> {
   // CHECK-NEXT: %[[SC_1:.*]] = sdy.sharding_constraint %arg0 <@mesh, [{"a"}, {"b", ?}]> {
-  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-1 = [{"a" = result-0 -> [operand-0]}]}, {step-5 = [{"b" = result-0 -> [operand-0]}]}]>} : tensor<8x8xf32>
+  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"a" = result-0 -> [operand-0]}]}, {step-5 = [{"b" = result-0 -> [operand-0]}]}]>} : tensor<8x8xf32>
   // CHECK-NEXT: %[[ADD:.*]] = stablehlo.add %[[SC_1]], %[[SC_1]] {
-  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-2 = [{"a" = operand-0 -> [result-0]}]}, {step-4 = [{"b" = result-0 -> [operand-0, operand-1]}]}]>,
+  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-2 = [{"a" = operand-0 -> [result-0]}]}, {step-4 = [{"b" = result-0 -> [operand-0, operand-1]}]}]>
   // CHECK-SAME:   sdy.sharding = #sdy.sharding_per_value<[<@mesh, [{"a", ?}, {"b", ?}]>]>} : tensor<8x8xf32>
   // CHECK-NEXT: %[[SC_2:.*]] = sdy.sharding_constraint %[[ADD]] <@mesh, [{"a", ?}, {"b"}]> {
-  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-3 = [{"a" = operand-0 -> [result-0]}, {"b" = result-0 -> [operand-0]}]}]>} : tensor<8x8xf32>
+  // CHECK-SAME:   sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"b" = result-0 -> [operand-0]}]}, {step-3 = [{"a" = operand-0 -> [result-0]}]}]>} : tensor<8x8xf32>
   // CHECK-NEXT: return %[[SC_2]]
   %0 = sdy.sharding_constraint %arg0 <@mesh, [{"a"}, {?}]> : tensor<8x8xf32>
   %1 = stablehlo.add %0, %0 : tensor<8x8xf32>
@@ -275,17 +274,17 @@ func.func @two_sharding_constraint(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> {
 
 sdy.mesh @mesh = <["a"=2, "b"=2, "c"=8]>
 
-// TODO(b/434949739): Describe how the propagation edge is created due to the
-// apply-sharding-constraints pass.
 // CHECK-LABEL: push_sharding_constraints_to_func_results
 // CHECK-SAME:   %arg0: tensor<8xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"a"}]>},
 // CHECK-SAME:   %arg1: tensor<8xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"a"}]>})
-// CHECK-SAME:   -> (tensor<8xf32> {sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"a" = operand-0 -> [result-0]}]}, {step-1 = [{"a" = operand-0 -> [result-0]}]}]>,
+// CHECK-SAME:   -> (tensor<8xf32> {sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"a" = operand-0 -> [result-0]}]}]>,
 // CHECK-SAME:                      sdy.sharding = #sdy.sharding<@mesh, [{"a", ?}]>},
 // CHECK-SAME:       tensor<8xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"a", ?}]>}) {
 func.func @push_sharding_constraints_to_func_results(
   %arg0: tensor<8xf32>, %arg1: tensor<8xf32>
   ) -> (tensor<8xf32>, tensor<8xf32>) {
+  // CHECK: %[[C1:.*]] = sdy.sharding_constraint %arg0 <@mesh, [{"a"}]> {sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"a" = result-0 -> [operand-0]}]}]>} : tensor<8xf32>
+  // CHECK: %[[C2:.*]] = sdy.sharding_constraint %arg1 <@mesh, [{"a"}]> {sdy.propagation_edges = #sdy.propagation_edges<[{step-0 = [{"a" = result-0 -> [operand-0]}]}]>} : tensor<8xf32>
   %1 = sdy.sharding_constraint %arg0 <@mesh, [{"a"}]> : tensor<8xf32>
   %2 = sdy.sharding_constraint %arg1 <@mesh, [{"a"}]> : tensor<8xf32>
   return %1, %2 : tensor<8xf32>, tensor<8xf32>