|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// Also available under a BSD-style license. See LICENSE. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | + |
| 10 | +#include "PassDetail.h" |
| 11 | + |
| 12 | +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" |
| 13 | +#include "torch-mlir/Dialect/Torch/IR/TorchOps.h" |
| 14 | +#include "torch-mlir/Dialect/Torch/Transforms/Passes.h" |
| 15 | +#include "torch-mlir/Dialect/Torch/Utils/Utils.h" |
| 16 | + |
| 17 | +using namespace mlir; |
| 18 | +using namespace mlir::torch; |
| 19 | +using namespace mlir::torch::Torch; |
| 20 | + |
| 21 | +namespace { |
| 22 | + |
| 23 | +template <typename SrcOp> |
| 24 | +class QuantizeOperands : public OpRewritePattern<SrcOp> { |
| 25 | +public: |
| 26 | + using OpRewritePattern<SrcOp>::OpRewritePattern; |
| 27 | + |
| 28 | + LogicalResult matchAndRewrite(SrcOp op, |
| 29 | + PatternRewriter &rewriter) const override { |
| 30 | + llvm::SmallVector<Value> operands(op->getOperands()); |
| 31 | + |
| 32 | + bool dequanted = false; |
| 33 | + for (auto &operand : operands) { |
| 34 | + if (auto dequant = operand.getDefiningOp<AtenDequantizeTensorOp>()) { |
| 35 | + operand = dequant.getOperand(); |
| 36 | + dequanted = true; |
| 37 | + } |
| 38 | + if (auto dequant = operand.getDefiningOp<AtenDequantizeSelfOp>()) { |
| 39 | + operand = dequant.getOperand(); |
| 40 | + dequanted = true; |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + if (!dequanted) { |
| 45 | + return rewriter.notifyMatchFailure(op, "no dequantizations found"); |
| 46 | + } |
| 47 | + |
| 48 | + rewriter.replaceOpWithNewOp<SrcOp>(op, op.getType(), operands); |
| 49 | + return success(); |
| 50 | + } |
| 51 | +}; |
| 52 | + |
| 53 | +template <typename SrcOp> class QuantizeBias : public OpRewritePattern<SrcOp> { |
| 54 | +public: |
| 55 | + using OpRewritePattern<SrcOp>::OpRewritePattern; |
| 56 | + |
| 57 | + LogicalResult matchAndRewrite(SrcOp op, |
| 58 | + PatternRewriter &rewriter) const override { |
| 59 | + llvm::SmallVector<Value> operands(op->getOperands()); |
| 60 | + if (operands.size() < 3) |
| 61 | + return failure(); |
| 62 | + |
| 63 | + Value bias = operands[2]; |
| 64 | + if (bias.getDefiningOp<AtenDequantizeTensorOp>()) |
| 65 | + return failure(); |
| 66 | + |
| 67 | + Value lhsScale; |
| 68 | + if (auto qLhs = |
| 69 | + operands[0].getDefiningOp<Aten_MakePerTensorQuantizedTensorOp>()) |
| 70 | + lhsScale = qLhs.getScale(); |
| 71 | + |
| 72 | + Value rhsScale; |
| 73 | + if (auto qRhs = |
| 74 | + operands[1].getDefiningOp<Aten_MakePerTensorQuantizedTensorOp>()) |
| 75 | + rhsScale = qRhs.getScale(); |
| 76 | + |
| 77 | + if (!rhsScale || !lhsScale) |
| 78 | + return failure(); |
| 79 | + |
| 80 | + auto biasTy = bias.getType().cast<ValueTensorType>(); |
| 81 | + auto biasETy = biasTy.getOptionalDtype(); |
| 82 | + if (!biasETy || !isa<mlir::FloatType>(biasETy)) |
| 83 | + return failure(); |
| 84 | + |
| 85 | + Value biasScale = rewriter.create<AtenMulFloatOp>( |
| 86 | + op.getLoc(), lhsScale.getType(), lhsScale, rhsScale); |
| 87 | + |
| 88 | + Value zero = rewriter.create<Torch::ConstantIntOp>( |
| 89 | + op.getLoc(), rewriter.getType<Torch::IntType>(), |
| 90 | + rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); |
| 91 | + |
| 92 | + auto qi32Ty = rewriter.getType<QInt32Type>(); |
| 93 | + auto newBiasTy = |
| 94 | + rewriter.getType<ValueTensorType>(biasTy.getOptionalSizes(), qi32Ty); |
| 95 | + Value dtype = getDtypeIntValueForType(rewriter, op.getLoc(), qi32Ty); |
| 96 | + bias = rewriter.create<AtenQuantizePerTensorOp>( |
| 97 | + op.getLoc(), newBiasTy, bias, biasScale, zero, dtype); |
| 98 | + |
| 99 | + operands[2] = bias; |
| 100 | + rewriter.replaceOpWithNewOp<SrcOp>(op, op.getType(), operands); |
| 101 | + return success(); |
| 102 | + } |
| 103 | +}; |
| 104 | + |
| 105 | +template <typename SrcOp> |
| 106 | +class QuantizeAccumulator : public OpRewritePattern<SrcOp> { |
| 107 | +public: |
| 108 | + using OpRewritePattern<SrcOp>::OpRewritePattern; |
| 109 | + |
| 110 | + LogicalResult matchAndRewrite(SrcOp op, |
| 111 | + PatternRewriter &rewriter) const override { |
| 112 | + auto lhs = op.getOperand(0); |
| 113 | + auto rhs = op.getOperand(1); |
| 114 | + |
| 115 | + auto resultTy = dyn_cast_or_null<ValueTensorType>(op.getType()); |
| 116 | + if (!resultTy || !resultTy.hasDtype()) |
| 117 | + return failure(); |
| 118 | + |
| 119 | + Type resultETy = resultTy.getDtype(); |
| 120 | + if (!resultETy.isa<mlir::FloatType>()) |
| 121 | + return failure(); |
| 122 | + |
| 123 | + Value lhsScale; |
| 124 | + if (auto defining = |
| 125 | + lhs.template getDefiningOp<Aten_MakePerTensorQuantizedTensorOp>()) { |
| 126 | + lhsScale = defining.getScale(); |
| 127 | + } |
| 128 | + |
| 129 | + Value rhsScale; |
| 130 | + if (auto defining = |
| 131 | + rhs.template getDefiningOp<Aten_MakePerTensorQuantizedTensorOp>()) { |
| 132 | + rhsScale = defining.getScale(); |
| 133 | + } |
| 134 | + |
| 135 | + if (!lhsScale || !rhsScale) |
| 136 | + return failure(); |
| 137 | + |
| 138 | + // Quantize the bias input to the expected result: |
| 139 | + Value zero = rewriter.create<Torch::ConstantIntOp>( |
| 140 | + op.getLoc(), rewriter.getType<Torch::IntType>(), |
| 141 | + rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0)); |
| 142 | + |
| 143 | + auto qi32Ty = rewriter.getType<QInt32Type>(); |
| 144 | + Value biasScale = rewriter.create<AtenMulFloatOp>( |
| 145 | + op.getLoc(), lhsScale.getType(), lhsScale, rhsScale); |
| 146 | + |
| 147 | + // Update the quantied type: |
| 148 | + llvm::SmallVector<Value> operands(op.getOperands()); |
| 149 | + |
| 150 | + auto newResultTy = |
| 151 | + rewriter.getType<ValueTensorType>(resultTy.getOptionalSizes(), qi32Ty); |
| 152 | + auto conv = rewriter.create<SrcOp>(op.getLoc(), newResultTy, operands); |
| 153 | + |
| 154 | + // Attach the quantize information to the resulting quint32: |
| 155 | + auto intReprTy = rewriter.getType<ValueTensorType>( |
| 156 | + resultTy.getOptionalSizes(), |
| 157 | + rewriter.getIntegerType(32, IntegerType::Signed)); |
| 158 | + auto intRepr = rewriter.create<AtenIntReprOp>(op.getLoc(), intReprTy, conv); |
| 159 | + |
| 160 | + auto quantTy = |
| 161 | + rewriter.getType<ValueTensorType>(resultTy.getOptionalSizes(), qi32Ty); |
| 162 | + auto quant = rewriter.create<Aten_MakePerTensorQuantizedTensorOp>( |
| 163 | + op.getLoc(), quantTy, intRepr, biasScale, zero); |
| 164 | + auto dequant = |
| 165 | + rewriter.create<AtenDequantizeTensorOp>(op.getLoc(), resultTy, quant); |
| 166 | + rewriter.replaceOp(op, dequant); |
| 167 | + |
| 168 | + return success(); |
| 169 | + } |
| 170 | +}; |
| 171 | + |
| 172 | +template <typename SrcOp> class RemoveUnused : public OpRewritePattern<SrcOp> { |
| 173 | +public: |
| 174 | + using OpRewritePattern<SrcOp>::OpRewritePattern; |
| 175 | + |
| 176 | + LogicalResult matchAndRewrite(SrcOp op, |
| 177 | + PatternRewriter &rewriter) const override { |
| 178 | + auto result = op.getResult(); |
| 179 | + if (result.use_empty()) { |
| 180 | + op.erase(); |
| 181 | + return success(); |
| 182 | + } |
| 183 | + return failure(); |
| 184 | + } |
| 185 | +}; |
| 186 | + |
| 187 | +class FuseQuantizedOpsPass : public FuseQuantizedOpsBase<FuseQuantizedOpsPass> { |
| 188 | +public: |
| 189 | + void runOnOperation() override { |
| 190 | + MLIRContext *context = &getContext(); |
| 191 | + RewritePatternSet patterns(context); |
| 192 | + patterns |
| 193 | + .insert<RemoveUnused<AtenDequantizeSelfOp>, |
| 194 | + RemoveUnused<AtenDequantizeTensorOp>, |
| 195 | + RemoveUnused<AtenQuantizePerTensorOp>, |
| 196 | + QuantizeOperands<AtenConvolutionOp>, QuantizeOperands<AtenMmOp>, |
| 197 | + QuantizeAccumulator<AtenConvolutionOp>, |
| 198 | + QuantizeAccumulator<AtenMmOp>, QuantizeBias<AtenConvolutionOp>>( |
| 199 | + context); |
| 200 | + |
| 201 | + GreedyRewriteConfig config; |
| 202 | + if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns), |
| 203 | + config))) { |
| 204 | + return signalPassFailure(); |
| 205 | + } |
| 206 | + } |
| 207 | +}; |
| 208 | + |
| 209 | +} // namespace |
| 210 | + |
| 211 | +std::unique_ptr<OperationPass<func::FuncOp>> |
| 212 | +mlir::torch::Torch::createFuseQuantizedOpsPass() { |
| 213 | + return std::make_unique<FuseQuantizedOpsPass>(); |
| 214 | +} |
0 commit comments