Add Iota operation to direct bindings (#4879)

rdspring1 · web-flow · commit 3db1b12cc7c7 · 2025-08-03T12:52:25.000-07:00
## PR List - #4876 - #4877 - #4878 - #4879 **<< This PR.** - #4880 - #4881 - #4882 - #4883 - #4884
diff --git a/python/python_direct/ops.cpp b/python/python_direct/ops.cpp
@@ -2281,6 +2281,37 @@ TensorView
 )");
 }
 
+void bindTensorFactoryOps(py::module_& ops) {
+  ops.def(
+      "iota",
+      [](Val* length, Val* start, Val* step, PrimDataType dtype)
+          -> TensorView* { return iota(length, start, step, dtype); },
+      py::arg("length"),
+      py::arg("start").none(true) = py::none(),
+      py::arg("step").none(true) = py::none(),
+      py::arg("dtype") = DataType::Int,
+      R"(
+Create a tensor with values from 0 to length-1.
+
+Parameters
+----------
+length : Val
+    The length of the tensor.
+start : Val, optional
+    The start of the tensor. When the default is None, start is set to zero.
+step : Val, optional
+    The step of the tensor. When the default is None, step is set to zero.
+dtype : PrimDataType, optional
+    The data type of the tensor. Default is DataType::Int.
+
+Returns
+-------
+TensorView
+    The tensor with values from 0 to length-1.
+)",
+      py::return_value_policy::reference);
+}
+
 } // namespace
 
 void bindOperations(py::module& nvfuser) {
@@ -2290,12 +2321,13 @@ void bindOperations(py::module& nvfuser) {
   bindBinaryOps(nvf_ops);
   bindTernaryOps(nvf_ops);
   bindReductionOps(nvf_ops);
+  bindScanOps(nvf_ops);
   bindCastOps(nvf_ops);
   bindMatmulOps(nvf_ops);
   bindMetadataOps(nvf_ops);
   bindTensorUtilityOps(nvf_ops);
   bindIndexingOps(nvf_ops);
-  bindScanOps(nvf_ops);
+  bindTensorFactoryOps(nvf_ops);
 }
 
 } // namespace nvfuser::python
diff --git a/python/python_direct/python_translate.cpp b/python/python_direct/python_translate.cpp
@@ -981,6 +981,29 @@ class PythonTranslator : public OptInConstDispatch {
         {lsop->out()});
   }
 
+  // Map IotaOp to python frontend
+  void handle(const IotaOp* iop) final {
+    NVF_ERROR(iop != nullptr);
+    TensorView* out_tv = iop->output(0)->as<TensorView>();
+    visited_vals_.insert(out_tv);
+
+    dispatch(iop->length());
+    dispatch(iop->start());
+    dispatch(iop->step());
+
+    static const auto default_args = std::make_tuple(
+        KeywordArgument<decltype(iop->length())>{"length", std::nullopt},
+        KeywordArgument<decltype(iop->start())>{"start", nullptr},
+        KeywordArgument<decltype(iop->step())>{"step", nullptr},
+        KeywordArgument<DataType>{"dtype", DataType::Int});
+    printer_.generateKwargsOperation(
+        "fd.ops.iota",
+        std::make_tuple(),
+        default_args,
+        std::make_tuple(iop->length(), iop->start(), iop->step(), iop->dtype()),
+        {out_tv});
+  }
+
   // Map IndexSelectOp to IndexSelectOpRecord
   void handle(const IndexSelectOp* isop) final {
     NVF_ERROR(isop != nullptr);
diff --git a/tests/python/direct/test_python_frontend.py b/tests/python/direct/test_python_frontend.py
@@ -887,3 +887,26 @@ def fusion_func(fd: FusionDefinition) -> None:
     nvf_out, _ = nvfuser_direct_test.exec_nvfuser(fusion_func, inputs)
     for out in nvf_out:
         nvfuser_direct_test.assertTrue(out.allclose(x[:, 1:, 2:]))
+
+
+def test_iota(nvfuser_direct_test):
+    inputs = [
+        (2, 0, 2, DataType.Int),
+        (3, 100, 1, DataType.Int32),
+    ]
+
+    def fusion_func(fd: FusionDefinition):
+        for input in inputs:
+            c0 = fd.define_scalar(input[0])
+            c1 = None if input[1] is None else fd.define_scalar(input[1])
+            c2 = None if input[2] is None else fd.define_scalar(input[2])
+            dt = input[3]
+            t3 = fd.ops.iota(c0, c1, c2, dt)
+            fd.add_output(t3)
+
+    nvf_out, _ = nvfuser_direct_test.exec_nvfuser(fusion_func, [])
+
+    eager_out1 = torch.tensor([0, 2], dtype=torch.long, device="cuda")
+    eager_out2 = torch.tensor([100, 101, 102], dtype=torch.int, device="cuda")
+    nvfuser_direct_test.assertEqual(eager_out1, nvf_out[0])
+    nvfuser_direct_test.assertEqual(eager_out2, nvf_out[1])
diff --git a/tests/python/opinfo/opinfos.py b/tests/python/opinfo/opinfos.py
@@ -1326,6 +1326,7 @@ def torch_reshape_sym_fn(input_tensor, output_shaped_tensor):
         ArgumentType.ConstantScalar,
         ArgumentType.Constant,
     ),
+    supports_direct_bindings=True,
 )
 tensor_creation_ops.append(iota_opinfo)
 

Original file line number	Diff line number	Diff line change
`@@ -1326,6 +1326,7 @@ def torch_reshape_sym_fn(input_tensor, output_shaped_tensor):`
`1326`	`1326`	`ArgumentType.ConstantScalar,`
`1327`	`1327`	`ArgumentType.Constant,`
`1328`	`1328`	`),`
	`1329`	`+ supports_direct_bindings=True,`
`1329`	`1330`	`)`
`1330`	`1331`	`tensor_creation_ops.append(iota_opinfo)`
`1331`	`1332`