@@ -3349,7 +3349,8 @@ TensorDomain::TensorDomain(
33493349 std::vector<IterDomain*> loop_domain,
33503350 std::optional<std::vector<IterDomain*>> alternate_loop_domain,
33513351 std::vector<std::optional<bool >> contiguity,
3352- std::vector<IterDomain*> additional_ids)
3352+ std::vector<IterDomain*> additional_ids,
3353+ bool skip_checks)
33533354 : Val(passkey, ValType::TensorDomain, DataType::Null),
33543355 root_domain_(std::move(root_domain)),
33553356 logical_domain_(std::move(logical_domain)),
@@ -3366,18 +3367,21 @@ TensorDomain::TensorDomain(
33663367 NVF_CHECK (
33673368 loop_domain_.empty () == logical_domain_.empty (),
33683369 " logical domain and loop domain can only be both empty or neither empty" );
3369- validateLoopDomain (logical_domain_, loop_domain_, additional_ids_);
3370- if (!root_domain_.empty ()) {
3371- ir_utils::validateDomainEquivalence (
3372- logical_domain_, root_domain_, additional_ids_);
3373- }
3374- if (!allocation_domain_.empty ()) {
3375- ir_utils::validateDomainEquivalence (
3376- logical_domain_, allocation_domain_, additional_ids_);
3377- }
3378- if (alternate_loop_domain_.has_value ()) {
3379- validateLoopDomain (
3380- logical_domain_, alternate_loop_domain_.value (), additional_ids_);
3370+
3371+ if (!skip_checks) {
3372+ validateLoopDomain (logical_domain_, loop_domain_, additional_ids_);
3373+ if (!root_domain_.empty ()) {
3374+ ir_utils::validateDomainEquivalence (
3375+ logical_domain_, root_domain_, additional_ids_);
3376+ }
3377+ if (!allocation_domain_.empty ()) {
3378+ ir_utils::validateDomainEquivalence (
3379+ logical_domain_, allocation_domain_, additional_ids_);
3380+ }
3381+ if (alternate_loop_domain_.has_value ()) {
3382+ validateLoopDomain (
3383+ logical_domain_, alternate_loop_domain_.value (), additional_ids_);
3384+ }
33813385 }
33823386
33833387 // resetDomains initializes other member variables, required by clang-tidy
@@ -6551,4 +6555,60 @@ std::vector<PolymorphicValue> CutlassNvfp4GroupedMmaOp::evaluate(
65516555
65526556NVFUSER_DEFINE_CLONE_AND_CREATE (CutlassNvfp4GroupedMmaOp)
65536557
6558+ GroupedBlockScalingFactorLayoutOp::GroupedBlockScalingFactorLayoutOp(
6559+ IrBuilderPasskey passkey,
6560+ Val* output,
6561+ Val* input,
6562+ Val* expert_offsets,
6563+ Val* sf_offsets,
6564+ BlockScalingFactorLayout layout,
6565+ Val* k,
6566+ Val* g)
6567+ : Expr(passkey) {
6568+ addInput (input);
6569+ addInput (expert_offsets);
6570+ addInput (sf_offsets);
6571+ addInput (k);
6572+ addInput (g);
6573+ addOutput (output);
6574+ addDataAttribute (layout);
6575+ }
6576+
6577+ std::string GroupedBlockScalingFactorLayoutOp::toString (int indent_size) const {
6578+ std::stringstream ss;
6579+ indent (ss, indent_size) << output (0 )->toString () << " \n " ;
6580+ indent_size++;
6581+ indent (ss, indent_size) << " = grouped_block_scaling_factor_layout(\n " ;
6582+ indent_size++;
6583+ indent (ss, indent_size) << " input = " << in ()->toString () << " ,\n " ;
6584+ indent (ss, indent_size) << " expert_offsets = " << expertOffsets ()->toString ()
6585+ << " ,\n " ;
6586+ indent (ss, indent_size) << " sf_offsets = "
6587+ << scalingFactorOffsets ()->toString () << " ,\n " ;
6588+ indent (ss, indent_size) << " layout = "
6589+ << (layout () == BlockScalingFactorLayout::Block128x4
6590+ ? " Block128x4"
6591+ : " Unknown" )
6592+ << " \n " ;
6593+ indent_size--;
6594+ indent (ss, indent_size) << " )\n " ;
6595+ return ss.str ();
6596+ }
6597+
6598+ std::string GroupedBlockScalingFactorLayoutOp::toInlineString (
6599+ int indent_size) const {
6600+ NVF_CHECK (
6601+ false , " GroupedBlockScalingFactorLayoutOp can not be printed inline" );
6602+ }
6603+
6604+ std::vector<PolymorphicValue> GroupedBlockScalingFactorLayoutOp::evaluate (
6605+ const ExpressionEvaluator& ee,
6606+ const std::vector<PolymorphicValue>& inputs) const {
6607+ // This is a placeholder implementation - the actual implementation
6608+ // would depend on the specific block scaling factor layout operation
6609+ NVF_THROW (" GroupedBlockScalingFactorLayoutOp evaluation not yet implemented" );
6610+ }
6611+
6612+ NVFUSER_DEFINE_CLONE_AND_CREATE (GroupedBlockScalingFactorLayoutOp)
6613+
65546614} // namespace nvfuser
0 commit comments