//===- TosaToLinalg.cpp - Lowering Tosa to Linalg Dialect -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // These rewriters lower from the Tosa to the Linalg dialect. // //===----------------------------------------------------------------------===// #include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include using namespace mlir; static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { return SmallVector(nParallelLoops, getParallelIteratorTypeName()); } template static arith::ConstantOp createConstFromIntAttribute(Operation *op, std::string attrName, Type requiredAttrType, OpBuilder &rewriter) { auto castedN = static_cast( op->getAttr(attrName).cast().getValue().getSExtValue()); return rewriter.create( op->getLoc(), IntegerAttr::get(requiredAttrType, castedN)); } template static void getValuesFromIntArrayAttribute(ArrayAttr attr, SmallVector &arrayValues) { for (Attribute val : attr.getValue()) { arrayValues.push_back(val.cast().getValue().getSExtValue()); } } template static mlir::SelectOp clampHelper(Location loc, Value arg, arith::ConstantOp min, arith::ConstantOp max, P pred, OpBuilder &rewriter) { auto smallerThanMin = rewriter.create(loc, pred, arg, min); auto minOrArg = rewriter.create(loc, smallerThanMin, min, arg); auto largerThanMax = rewriter.create(loc, pred, max, arg); return rewriter.create(loc, largerThanMax, max, minOrArg); } static SmallVector filterDynamicDims(SmallVector dynDims) { SmallVector filteredDims; for (auto dim : dynDims) if (dim) filteredDims.push_back(dim); return filteredDims; } static Value createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, ArrayRef resultTypes, PatternRewriter &rewriter) { Location loc = op->getLoc(); auto elementTy = op->getOperand(0).getType().cast().getElementType(); // tosa::AbsOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) { auto zero = rewriter.create( loc, rewriter.getZeroAttr(elementTy)); auto cmp = rewriter.create(loc, arith::CmpIPredicate::sgt, args[0], zero); auto neg = rewriter.create(loc, zero, args[0]); return rewriter.create(loc, cmp, args[0], neg); } // tosa::AddOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::SubOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::MulOp if (isa(op) && elementTy.isa()) { if (dyn_cast(op).shift() != 0) { (void)rewriter.notifyMatchFailure(op, "Cannot have shift value for float"); return nullptr; } return rewriter.create(loc, resultTypes, args); } // tosa::DivOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ReciprocalOp if (isa(op) && elementTy.isa()) { auto one = rewriter.create(loc, FloatAttr::get(elementTy, 1)); return rewriter.create(loc, resultTypes, one, args[0]); } if (isa(op) && elementTy.isa()) { Value a = args[0]; Value b = args[1]; auto shift = op->getAttr("shift").cast().getValue().getSExtValue(); if (shift > 0) { auto shiftConst = rewriter.create(loc, shift, /*bitwidth=*/8); if (!a.getType().isInteger(32)) a = rewriter.create(loc, rewriter.getI32Type(), a); if (!b.getType().isInteger(32)) b = rewriter.create(loc, rewriter.getI32Type(), b); auto result = rewriter.create( loc, rewriter.getI32Type(), a, b, shiftConst, rewriter.getBoolAttr(false)); if (elementTy.isInteger(32)) return result; return rewriter.create(loc, elementTy, result); } int aWidth = a.getType().getIntOrFloatBitWidth(); int bWidth = b.getType().getIntOrFloatBitWidth(); int cWidth = resultTypes[0].getIntOrFloatBitWidth(); if (aWidth < cWidth) a = rewriter.create(loc, resultTypes[0], a); if (bWidth < cWidth) b = rewriter.create(loc, resultTypes[0], b); return rewriter.create(loc, resultTypes, a, b); } // tosa::NegateOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); if (isa(op) && elementTy.isa() && !cast(op).quantization_info()) { auto constant = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); return rewriter.create(loc, resultTypes, constant, args[0]); } if (isa(op) && elementTy.isa() && cast(op).quantization_info()) { auto quantizationInfo = cast(op).quantization_info(); int32_t inputBitWidth = elementTy.getIntOrFloatBitWidth(); int64_t inZp = quantizationInfo.getValue().input_zp().getValue().getSExtValue(); int64_t outZp = quantizationInfo.getValue().output_zp().getValue().getSExtValue(); // Compute the maximum value that can occur in the intermediate buffer. int64_t zpAdd = inZp + outZp; int64_t maxValue = APInt::getSignedMaxValue(inputBitWidth).getSExtValue() + std::abs(zpAdd) + 1; // Convert that maximum value into the maximum bitwidth needed to represent // it. We assume 48-bit numbers may be supported further in the pipeline. int intermediateBitWidth = 64; if (maxValue <= APInt::getSignedMaxValue(16).getSExtValue()) { intermediateBitWidth = 16; } else if (maxValue <= APInt::getSignedMaxValue(32).getSExtValue()) { intermediateBitWidth = 32; } else if (maxValue <= APInt::getSignedMaxValue(48).getSExtValue()) { intermediateBitWidth = 48; } Type intermediateType = rewriter.getIntegerType(intermediateBitWidth); Value zpAddValue = rewriter.create( loc, rewriter.getIntegerAttr(intermediateType, zpAdd)); // The negation can be applied by doing: // outputValue = inZp + outZp - inputValue auto ext = rewriter.create(loc, intermediateType, args[0]); auto sub = rewriter.create(loc, zpAddValue, ext); // Clamp to the negation range. auto min = rewriter.create( loc, APInt::getSignedMinValue(inputBitWidth).getSExtValue(), intermediateType); auto max = rewriter.create( loc, APInt::getSignedMaxValue(inputBitWidth).getSExtValue(), intermediateType); auto clamp = clampHelper( loc, sub, min, max, arith::CmpIPredicate::slt, rewriter); // Truncate to the final value. return rewriter.create(loc, elementTy, clamp); } // tosa::BitwiseAndOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::BitwiseOrOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::BitwiseNotOp if (isa(op) && elementTy.isa()) { auto allOnesAttr = rewriter.getIntegerAttr( elementTy, APInt::getAllOnes(elementTy.getIntOrFloatBitWidth())); auto allOnes = rewriter.create(loc, allOnesAttr); return rewriter.create(loc, resultTypes, args[0], allOnes); } // tosa::BitwiseXOrOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogicalLeftShiftOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogicalRightShiftOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ArithmeticRightShiftOp if (isa(op) && elementTy.isa()) { auto result = rewriter.create(loc, resultTypes, args); auto round = op->getAttr("round").cast().getValue(); if (!round) { return result; } Type i1Ty = IntegerType::get(rewriter.getContext(), /*width=*/1); auto one = rewriter.create(loc, IntegerAttr::get(elementTy, 1)); auto zero = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); auto i1one = rewriter.create(loc, IntegerAttr::get(i1Ty, 1)); // Checking that input2 != 0 auto shiftValueGreaterThanZero = rewriter.create( loc, arith::CmpIPredicate::sgt, args[1], zero); // Checking for the last bit of input1 to be 1 auto subtract = rewriter.create(loc, resultTypes, args[1], one); auto shifted = rewriter.create(loc, resultTypes, args[0], subtract) ->getResults(); auto truncated = rewriter.create(loc, i1Ty, shifted, mlir::None); auto isInputOdd = rewriter.create(loc, i1Ty, truncated, i1one); auto shouldRound = rewriter.create( loc, i1Ty, shiftValueGreaterThanZero, isInputOdd); auto extended = rewriter.create(loc, resultTypes, shouldRound); return rewriter.create(loc, resultTypes, result, extended); } // tosa::ClzOp if (isa(op) && elementTy.isa()) { int bitWidth = elementTy.getIntOrFloatBitWidth(); auto zero = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); auto leadingZeros = rewriter.create( loc, IntegerAttr::get(elementTy, bitWidth)); SmallVector operands = {args[0], leadingZeros, zero}; SmallVector types = {elementTy, elementTy, elementTy}; auto whileOp = rewriter.create(loc, types, operands); Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, types); Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, types); // The conditional block of the while loop. { rewriter.setInsertionPointToStart(&whileOp.getBefore().front()); Value input = before->getArgument(0); Value zero = before->getArgument(2); Value inputLargerThanZero = rewriter.create( loc, arith::CmpIPredicate::ne, input, zero); rewriter.create(loc, inputLargerThanZero, before->getArguments()); } // The body of the while loop: shift right until reaching a value of 0. { rewriter.setInsertionPointToStart(&whileOp.getAfter().front()); Value input = after->getArgument(0); Value leadingZeros = after->getArgument(1); auto one = rewriter.create( loc, IntegerAttr::get(elementTy, 1)); auto shifted = rewriter.create(loc, resultTypes, input, one); auto leadingZerosMinusOne = rewriter.create(loc, resultTypes, leadingZeros, one); rewriter.create( loc, ValueRange({shifted, leadingZerosMinusOne, after->getArgument(2)})); } rewriter.setInsertionPointAfter(whileOp); return whileOp->getResult(1); } // tosa::LogicalAnd if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::LogicalNot if (isa(op) && elementTy.isInteger(1)) { auto one = rewriter.create( loc, rewriter.getIntegerAttr(elementTy, 1)); return rewriter.create(loc, resultTypes, args[0], one); } // tosa::LogicalOr if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::LogicalXor if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, resultTypes, args); // tosa::PowOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::RsqrtOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::LogOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ExpOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::TanhOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::GreaterOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, arith::CmpFPredicate::OGT, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, arith::CmpIPredicate::sgt, args[0], args[1]); // tosa::GreaterEqualOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, arith::CmpFPredicate::OGE, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, arith::CmpIPredicate::sge, args[0], args[1]); // tosa::EqualOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, arith::CmpFPredicate::OEQ, args[0], args[1]); if (isa(op) && elementTy.isSignlessInteger()) return rewriter.create(loc, arith::CmpIPredicate::eq, args[0], args[1]); // tosa::SelectOp if (isa(op)) { elementTy = op->getOperand(1).getType().cast().getElementType(); if (elementTy.isa() || elementTy.isa()) return rewriter.create(loc, args[0], args[1], args[2]); } // tosa::MaximumOp if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpFPredicate::OGT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isSignlessInteger()) { auto predicate = rewriter.create( loc, arith::CmpIPredicate::sgt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } // tosa::MinimumOp if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpFPredicate::OLT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isSignlessInteger()) { auto predicate = rewriter.create( loc, arith::CmpIPredicate::slt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } // tosa::CeilOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::FloorOp if (isa(op) && elementTy.isa()) return rewriter.create(loc, resultTypes, args); // tosa::ClampOp if (isa(op) && elementTy.isa()) { auto min = rewriter.create(loc, elementTy, op->getAttr("min_fp")); auto max = rewriter.create(loc, elementTy, op->getAttr("max_fp")); return clampHelper(loc, args[0], min, max, arith::CmpFPredicate::OLT, rewriter); } if (isa(op) && elementTy.isa()) { auto intTy = elementTy.cast(); int32_t min = static_cast( op->getAttr("min_int").cast().getValue().getSExtValue()); int32_t max = static_cast( op->getAttr("max_int").cast().getValue().getSExtValue()); if (intTy.isUnsignedInteger()) { min = std::max(min, 0); max = std::min( max, APInt::getMaxValue(intTy.getIntOrFloatBitWidth()).getSExtValue()); } else { min = std::max( min, APInt::getSignedMinValue(intTy.getIntOrFloatBitWidth()) .getSExtValue()); max = std::min( max, APInt::getSignedMaxValue(intTy.getIntOrFloatBitWidth()) .getSExtValue()); } auto minVal = rewriter.create( loc, min, intTy.getIntOrFloatBitWidth()); auto maxVal = rewriter.create( loc, max, intTy.getIntOrFloatBitWidth()); return clampHelper(loc, args[0], minVal, maxVal, arith::CmpIPredicate::slt, rewriter); } // tosa::ReluNOp if (isa(op) && elementTy.isa()) { auto zero = rewriter.create(loc, FloatAttr::get(elementTy, 0)); auto n = rewriter.create(loc, elementTy, op->getAttr("max_fp")); return clampHelper(loc, args[0], zero, n, arith::CmpFPredicate::OLT, rewriter); } if (isa(op) && elementTy.isa()) { auto zero = rewriter.create(loc, IntegerAttr::get(elementTy, 0)); auto n = createConstFromIntAttribute(op, "max_int", elementTy, rewriter); return clampHelper(loc, args[0], zero, n, arith::CmpIPredicate::slt, rewriter); } // tosa::SigmoidOp if (isa(op) && elementTy.isa()) { auto one = rewriter.create(loc, FloatAttr::get(elementTy, 1)); auto negate = rewriter.create(loc, resultTypes, args[0]); auto exp = rewriter.create(loc, resultTypes, negate); auto added = rewriter.create(loc, resultTypes, exp, one); return rewriter.create(loc, resultTypes, one, added); } // tosa::CastOp if (isa(op)) { Type srcTy = elementTy; Type dstTy = resultTypes.front(); bool bitExtend = srcTy.getIntOrFloatBitWidth() < dstTy.getIntOrFloatBitWidth(); if (srcTy == dstTy) return args.front(); if (srcTy.isa() && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isa() && dstTy.isa() && !bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); // 1-bit integers need to be treated as signless. if (srcTy.isInteger(1) && arith::UIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isInteger(1) && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); // Unsigned integers need an unrealized cast so that they can be passed // to UIToFP. if (srcTy.isUnsignedInteger() && dstTy.isa()) { auto unrealizedCast = rewriter .create( loc, rewriter.getIntegerType(srcTy.getIntOrFloatBitWidth()), args[0]) .getResult(0); return rewriter.create(loc, resultTypes[0], unrealizedCast); } // All other si-to-fp conversions should be handled by SIToFP. if (arith::SIToFPOp::areCastCompatible(srcTy, dstTy)) return rewriter.create(loc, resultTypes, args, mlir::None); // Casting to boolean, floats need to only be checked as not-equal to zero. if (srcTy.isa() && dstTy.isInteger(1)) { Value zero = rewriter.create( loc, rewriter.getFloatAttr(srcTy, 0.0)); return rewriter.create(loc, arith::CmpFPredicate::UNE, args.front(), zero); } if (arith::FPToSIOp::areCastCompatible(srcTy, dstTy)) { auto zero = rewriter.create( loc, rewriter.getF32FloatAttr(0.0f)); auto half = rewriter.create( loc, rewriter.getF32FloatAttr(0.5f)); auto intMin = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); auto intMax = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); auto added = rewriter.create(loc, args[0], half); auto subbed = rewriter.create(loc, args[0], half); auto negative = rewriter.create( loc, arith::CmpFPredicate::OLT, args[0], zero); auto rounded = rewriter.create(loc, negative, subbed, added); auto clamped = clampHelper( loc, rounded, intMin, intMax, arith::CmpFPredicate::OLT, rewriter); return rewriter.create(loc, dstTy, clamped); } // Casting to boolean, integers need to only be checked as not-equal to // zero. if (srcTy.isa() && dstTy.isInteger(1)) { Value zero = rewriter.create( loc, 0, srcTy.getIntOrFloatBitWidth()); return rewriter.create(loc, arith::CmpIPredicate::ne, args.front(), zero); } if (srcTy.isa() && dstTy.isa() && bitExtend) return rewriter.create(loc, resultTypes, args, mlir::None); if (srcTy.isa() && dstTy.isa() && !bitExtend) { auto intMin = rewriter.create( loc, APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue(), srcTy.getIntOrFloatBitWidth()); auto intMax = rewriter.create( loc, APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue(), srcTy.getIntOrFloatBitWidth()); auto clamped = clampHelper( loc, args[0], intMin, intMax, arith::CmpIPredicate::slt, rewriter); return rewriter.create(loc, dstTy, clamped); } } (void)rewriter.notifyMatchFailure( op, "unhandled op for linalg body calculation for elementwise op"); return nullptr; } static LogicalResult elementwiseMatchAndRewriteHelper(Operation *operation, PatternRewriter &rewriter) { auto loc = operation->getLoc(); assert(operation->getNumResults() == 1 && "All TOSA elementwise ops should only return a single result."); auto results = operation->getResults(); auto resultTy = operation->getResult(0).getType().dyn_cast(); if (!resultTy) return rewriter.notifyMatchFailure(operation, "All results must be a shaped type"); unsigned rank = resultTy.getRank(); // Construct the indexing maps needed for linalg.generic ops. SmallVector bodyArgTypes; for (Value in : operation->getOperands()) bodyArgTypes.emplace_back(getElementTypeOrSelf(in.getType())); SmallVector opResultTypes; SmallVector initTensors; SmallVector dynDims; dynDims.resize(results.front().getType().cast().getRank()); for (auto arg : operation->getOperands()) { auto operandTy = arg.getType().cast(); for (int i = 0; i < operandTy.getRank(); i++) { if (operandTy.isDynamicDim(i) && !dynDims[i]) dynDims[i] = rewriter.create(loc, arg, i); } } SmallVector filteredDims = filterDynamicDims(dynDims); for (auto result : results) { auto resultTy = result.getType().template cast(); initTensors.push_back(rewriter.create( loc, filteredDims, resultTy.getShape(), resultTy.getElementType())); opResultTypes.push_back(result.getType()); } auto bodyResultTypes = llvm::to_vector<4>(llvm::map_range( initTensors, [](Value v) { return getElementTypeOrSelf(v); })); SmallVector operands; SmallVector indexingMaps; indexingMaps.reserve(operation->getNumOperands() + bodyResultTypes.size()); // Input indexing maps may be broadcasted. for (Value operand : operation->getOperands()) { ShapedType type = operand.getType().cast(); if (type.getShape() == resultTy.getShape()) { operands.push_back(operand); indexingMaps.push_back(rewriter.getMultiDimIdentityMap(rank)); continue; } SmallVector newShape; SmallVector affineExprs; newShape.reserve(type.getRank()); for (auto it : llvm::enumerate(type.getShape())) { if (it.value() == resultTy.getDimSize(it.index())) { newShape.push_back(it.value()); affineExprs.push_back( mlir::getAffineDimExpr(it.index(), rewriter.getContext())); } } if (newShape.size() != rank) { operand = rewriter.create( loc, RankedTensorType::get(newShape, type.getElementType()), operand, rewriter.getI64ArrayAttr(newShape)); } operands.push_back(operand); indexingMaps.push_back(AffineMap::get( /*dimCount=*/type.getRank(), /*symbolCount=*/0, affineExprs, rewriter.getContext())); } indexingMaps.append(operation->getNumResults(), rewriter.getMultiDimIdentityMap(rank)); bool didEncounterError = false; auto linalgOp = rewriter.create( loc, opResultTypes, operands, initTensors, indexingMaps, getNParallelLoopsAttrs(rank), [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) { Value opResult = createLinalgBodyCalculationForElementwiseOp( operation, blockArgs.take_front(operation->getNumOperands()), bodyResultTypes, rewriter); if (!opResult) { didEncounterError = true; return; } nestedBuilder.create(loc, opResult); }); if (didEncounterError) return failure(); rewriter.replaceOp(operation, linalgOp->getResults()); return success(); } // Returns the constant initial value for a given reduction operation. The // attribute type varies depending on the element type required. static Attribute createInitialValueForReduceOp(Operation *op, Type elementTy, PatternRewriter &rewriter) { if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr(elementTy, 0.0); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr(elementTy, 0); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr(elementTy, 1.0); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr(elementTy, 1); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), false)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMaxValue(elementTy.getIntOrFloatBitWidth())); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), true)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth())); if (isa(op) && elementTy.isInteger(1)) return rewriter.getIntegerAttr(elementTy, APInt::getAllOnes(1)); if (isa(op) && elementTy.isInteger(1)) return rewriter.getIntegerAttr(elementTy, APInt::getZero(1)); if (isa(op) && elementTy.isa()) return rewriter.getFloatAttr( elementTy, APFloat::getLargest( elementTy.cast().getFloatSemantics(), true)); if (isa(op) && elementTy.isa()) return rewriter.getIntegerAttr( elementTy, APInt::getSignedMinValue(elementTy.getIntOrFloatBitWidth())); return {}; } // Creates the body calculation for a reduction. The operations vary depending // on the input type. static Value createLinalgBodyCalculationForReduceOp(Operation *op, ValueRange args, Type elementTy, PatternRewriter &rewriter) { Location loc = op->getLoc(); if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { return rewriter.create(loc, args); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpFPredicate::OLT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpIPredicate::slt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpFPredicate::OGT, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isa()) { auto predicate = rewriter.create( loc, arith::CmpIPredicate::sgt, args[0], args[1]); return rewriter.create(loc, predicate, args[0], args[1]); } if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, args); if (isa(op) && elementTy.isInteger(1)) return rewriter.create(loc, args); return {}; } // Performs the match and rewrite for reduction operations. This includes // declaring a correctly sized initial value, and the linalg.generic operation // that reduces across the specified axis. static LogicalResult reduceMatchAndRewriteHelper(Operation *op, uint64_t axis, PatternRewriter &rewriter) { auto loc = op->getLoc(); auto inputTy = op->getOperand(0).getType().template cast(); auto resultTy = op->getResult(0).getType().template cast(); auto elementTy = resultTy.getElementType(); Value input = op->getOperand(0); llvm::SmallVector reduceShape; for (unsigned i = 0; i < inputTy.getRank(); i++) { if (axis != i) reduceShape.push_back(inputTy.getDimSize(i)); } Type reduceTy = RankedTensorType::get(reduceShape, resultTy.getElementType()); // First fill the output buffer with the init value. auto initTensor = rewriter .create(loc, ArrayRef({}), reduceShape, resultTy.getElementType()) .result(); auto fillValueAttr = createInitialValueForReduceOp(op, elementTy, rewriter); if (!fillValueAttr) return rewriter.notifyMatchFailure( op, "No initial value found for reduction operation"); auto fillValue = rewriter.create(loc, fillValueAttr); auto filledTensor = rewriter.create(loc, fillValue, initTensor).result(); SmallVector srcExprs; SmallVector dstExprs; SmallVector iteratorTypes; for (unsigned int i = 0, rank = inputTy.getRank(); i != rank; ++i) { srcExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext())); iteratorTypes.push_back(axis == i ? getReductionIteratorTypeName() : getParallelIteratorTypeName()); if (axis != i) dstExprs.push_back(mlir::getAffineDimExpr(i, rewriter.getContext())); } bool didEncounterError = false; auto maps = AffineMap::inferFromExprList({srcExprs, dstExprs}); auto linalgOp = rewriter.create( loc, reduceTy, input, filledTensor, maps, iteratorTypes, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange blockArgs) { auto result = createLinalgBodyCalculationForReduceOp( op, blockArgs, elementTy, rewriter); if (result) didEncounterError = true; nestedBuilder.create(loc, result); }); if (!didEncounterError) return failure(); rewriter.replaceOpWithNewOp(op, resultTy, linalgOp.getResults()); return success(); } static bool findIntermediateShape(ArrayRef lhsShape, ArrayRef rhsShape, SmallVector &intermediateShape, bool isDynamic) { if (isDynamic) { // TODO (natashaknk): Make dynamic intermediate shape not always be rank-1 intermediateShape = {-1}; return true; } if (lhsShape.empty() || rhsShape.empty()) { intermediateShape = {}; return true; } unsigned currLhsDim = 0, currRhsDim = 0; while (currLhsDim < lhsShape.size() && currRhsDim < rhsShape.size()) { int64_t rhsSize = rhsShape[currRhsDim]; int64_t lhsSize = lhsShape[currLhsDim]; while (lhsSize != rhsSize && currLhsDim < lhsShape.size() && currRhsDim < rhsShape.size()) { if (lhsSize < rhsSize) { currLhsDim++; lhsSize *= lhsShape[currLhsDim]; } else { currRhsDim++; rhsSize *= rhsShape[currRhsDim]; } } if (lhsSize == rhsSize) { intermediateShape.push_back(lhsSize); } currRhsDim++; currLhsDim++; } // If the iterators didn't reach the end and their leftover dimensions are not // equal to 1 an intermediate shape was not found. while (currLhsDim < lhsShape.size()) { if (lhsShape[currLhsDim++] != 1) { return false; } } while (currRhsDim < rhsShape.size()) { if (rhsShape[currRhsDim++] != 1) { return false; } } return true; } static bool createReassociationMapsForCollapse( PatternRewriter &rewriter, ArrayRef srcShape, ArrayRef dstShape, SmallVector &reassociationMap, bool isDynamic) { // If the shape is dynamic, create a map for collapsing into one dimension. if (isDynamic) { SmallVector exprs; for (int i = 0, s = srcShape.size(); i < s; ++i) exprs.push_back(rewriter.getAffineDimExpr(i)); reassociationMap = {exprs}; return true; } if (dstShape.empty()) { reassociationMap = {}; return true; } reassociationMap.resize(dstShape.size()); unsigned currSrcDim = 0, currDstDim = 0; while (currSrcDim < srcShape.size() && currDstDim < dstShape.size()) { int64_t dstSize = dstShape[currDstDim]; int64_t srcSize = srcShape[currSrcDim]; while (srcSize < dstSize && currSrcDim < srcShape.size()) { reassociationMap[currDstDim].push_back( rewriter.getAffineDimExpr(currSrcDim++)); srcSize *= srcShape[currSrcDim]; } if (srcSize == dstSize) { reassociationMap[currDstDim].push_back( rewriter.getAffineDimExpr(currSrcDim++)); // If the next dim in collapsedShape is not 1, treat subsequent dims in // expandedShape which are 1 to be collapsed. if (currDstDim == dstShape.size() - 1 || dstShape[currDstDim + 1] != 1) { while (currSrcDim < srcShape.size() && srcShape[currSrcDim] == 1) { reassociationMap[currDstDim].push_back( rewriter.getAffineDimExpr(currSrcDim++)); } } } currDstDim++; } // If both iterators didn't reach the end, we have leftover dimentions which // implies that we have a mismatch in shape. if (currSrcDim != srcShape.size() || currDstDim != dstShape.size()) { return false; } return true; } namespace { template class PointwiseConverter : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(SrcOp op, PatternRewriter &rewriter) const final { return elementwiseMatchAndRewriteHelper(op, rewriter); } }; class ReshapeConverterCollapse : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { ShapedType operandTy = adaptor.input1().getType().cast(); ShapedType resultTy = reshape.getType().template cast(); bool isDynamic = !operandTy.hasStaticShape(); if (isDynamic && resultTy.getRank() != 1) { return rewriter.notifyMatchFailure( reshape, "Cannot collapse dynamic dims to more than one dimension"); } if (operandTy == resultTy) { rewriter.replaceOp(reshape, adaptor.getOperands()[0]); return success(); } SmallVector reassociationMap; if (!createReassociationMapsForCollapse(rewriter, operandTy.getShape(), resultTy.getShape(), reassociationMap, isDynamic)) { return rewriter.notifyMatchFailure( reshape, "tosa.reshape Attempting to collapse into an incompatible shape"); } SmallVector intermediateShape; if (!findIntermediateShape(operandTy.getShape(), resultTy.getShape(), intermediateShape, isDynamic)) { return rewriter.notifyMatchFailure( reshape, "tosa.reshape Cannot collapse into given shape"); } rewriter.replaceOpWithNewOp( reshape, resultTy, adaptor.getOperands()[0], reassociationMap); return success(); } }; class ReshapeConverterExpand : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { ShapedType operandTy = adaptor.input1().getType().cast(); ShapedType resultTy = reshape.getType().template cast(); bool isDynamic = !operandTy.hasStaticShape(); if (operandTy == resultTy) { rewriter.replaceOp(reshape, adaptor.getOperands()[0]); return success(); } if (isDynamic && operandTy.getRank() != 1) { return rewriter.notifyMatchFailure( reshape, "Cannot expand dynamic dims from more than one dimension"); } SmallVector reassociationMap; if (!createReassociationMapsForCollapse(rewriter, resultTy.getShape(), operandTy.getShape(), reassociationMap, isDynamic)) { return rewriter.notifyMatchFailure( reshape, "tosa.reshape Attempting to expand into an incompatible shape"); } SmallVector intermediateShape; if (!findIntermediateShape(operandTy.getShape(), resultTy.getShape(), intermediateShape, isDynamic) || intermediateShape != operandTy.getShape()) { return rewriter.notifyMatchFailure( reshape, "tosa.reshape Cannot expand into given shape"); } rewriter.replaceOpWithNewOp( reshape, resultTy, adaptor.getOperands()[0], reassociationMap); return success(); } }; class ReshapeConverterCollapseExpand : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(tosa::ReshapeOp reshape, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { ShapedType operandTy = adaptor.input1().getType().cast(); ShapedType resultTy = reshape.getType().template cast(); bool isDynamic = !operandTy.hasStaticShape(); if (operandTy == resultTy) { rewriter.replaceOp(reshape, adaptor.getOperands()[0]); return success(); } SmallVector intermediateShape; if (!findIntermediateShape(resultTy.getShape(), operandTy.getShape(), intermediateShape, isDynamic)) { return rewriter.notifyMatchFailure( reshape, "tosa.reshape Cannot identify an intermediate shape between " "the given two shapes"); } Value collapse = rewriter.create( reshape.getLoc(), RankedTensorType::get(intermediateShape, reshape.getType().getElementType()), adaptor.input1()); Value expand = rewriter.create(reshape.getLoc(), resultTy, collapse); rewriter.replaceOp(reshape, expand); return success(); } }; class TransposeConverter : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(tosa::TransposeOp op, PatternRewriter &rewriter) const final { DenseIntElementsAttr perms; if (!matchPattern(op.perms(), m_Constant(&perms))) { return failure(); } auto loc = op.getLoc(); auto input = op->getOperand(0); auto resultTy = op.getType().cast(); SmallVector dynDims; dynDims.resize(op->getResult(0).getType().cast().getRank()); SmallVector inputExprs; inputExprs.resize(resultTy.getRank()); auto operandTy = input.getType().cast(); for (auto permutation : llvm::enumerate(perms.getValues())) { auto index = permutation.index(); auto value = permutation.value().getZExtValue(); if (!operandTy.hasRank() || operandTy.isDynamicDim(index)) { dynDims[value] = rewriter.create(loc, input, index); } inputExprs[value] = rewriter.getAffineDimExpr(index); } SmallVector filteredDims = filterDynamicDims(dynDims); auto initTensor = rewriter.create( loc, filteredDims, resultTy.getShape(), resultTy.getElementType()); SmallVector affineMaps = { AffineMap::get(resultTy.getRank(), /*symbolCount=*/0, inputExprs, rewriter.getContext()), rewriter.getMultiDimIdentityMap(resultTy.getRank())}; rewriter.replaceOpWithNewOp