//===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the ShrinkWrapping class.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/ShrinkWrapping.h"
#include "bolt/Core/MCPlus.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include <numeric>
#include <stack>

#define DEBUG_TYPE "shrinkwrapping"

using namespace llvm;

namespace opts {

extern cl::opt<bool> TimeOpts;
extern cl::OptionCategory BoltOptCategory;

static cl::opt<unsigned> ShrinkWrappingThreshold(
    "shrink-wrapping-threshold",
    cl::desc("Percentage of prologue execution count to use as threshold when"
             " evaluating whether a block is cold enough to be profitable to"
             " move eligible spills there"),
    cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
namespace bolt {

void CalleeSavedAnalysis::analyzeSaves() {
  ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
  StackReachingUses &SRU = Info.getStackReachingUses();
  auto &InsnToBB = Info.getInsnToBBMap();
  BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);

  LLVM_DEBUG(dbgs() << "Checking spill locations\n");
  for (BinaryBasicBlock &BB : BF) {
    LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
    const MCInst *Prev = nullptr;
    for (MCInst &Inst : BB) {
      if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
        // Blacklist weird stores we don't understand
        if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
            FIE->IsStoreFromReg) {
          BlacklistedRegs.set(FIE->RegOrImm);
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }

        if (!FIE->IsStore || !FIE->IsStoreFromReg ||
            BlacklistedRegs[FIE->RegOrImm]) {
          Prev = &Inst;
          continue;
        }

        // If this reg is defined locally, it is not a callee-saved reg
        if (RD.isReachedBy(FIE->RegOrImm,
                           Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
          BlacklistedRegs.set(FIE->RegOrImm);
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }

        // If this stack position is accessed in another function, we are
        // probably dealing with a parameter passed in a stack -- do not mess
        // with it
        if (SRU.isStoreUsed(*FIE,
                            Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
            /*IncludeLocalAccesses=*/false) {
          BlacklistedRegs.set(FIE->RegOrImm);
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }

        // If this stack position is loaded elsewhere in another reg, we can't
        // update it, so blacklist it.
        if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
                                                  : SRU.expr_begin(BB))) {
          BlacklistedRegs.set(FIE->RegOrImm);
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }

        // Ignore regs with multiple saves
        if (CalleeSaved[FIE->RegOrImm]) {
          BlacklistedRegs.set(FIE->RegOrImm);
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }

        CalleeSaved.set(FIE->RegOrImm);
        SaveFIEByReg[FIE->RegOrImm] = &*FIE;
        SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
        BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
        OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
        LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
                          << FIE->RegOrImm << "\n");
      }
      Prev = &Inst;
    }
  }
}

void CalleeSavedAnalysis::analyzeRestores() {
  ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();

  // Now compute all restores of these callee-saved regs
  for (BinaryBasicBlock &BB : BF) {
    const MCInst *Prev = nullptr;
    for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
      MCInst &Inst = *I;
      if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
        if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
          Prev = &Inst;
          continue;
        }

        // If this reg is used locally after a restore, then we are probably
        // not dealing with a callee-saved reg. Except if this use is by
        // another store, but we don't cover this case yet.
        // Also not callee-saved if this load accesses caller stack or isn't
        // simple.
        if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
            RU.isReachedBy(FIE->RegOrImm,
                           Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
          CalleeSaved.reset(FIE->RegOrImm);
          Prev = &Inst;
          continue;
        }
        // If stack offsets between saves/store don't agree with each other,
        // we don't completely understand what's happening here
        if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
          CalleeSaved.reset(FIE->RegOrImm);
          LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
                               "mismatching restore: "
                            << FIE->RegOrImm << "\n");
          Prev = &Inst;
          continue;
        }

        LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
                          << "\n");
        if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
          LoadFIEByReg[FIE->RegOrImm] = &*FIE;
        BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
                              AllocatorId);
        HasRestores.set(FIE->RegOrImm);
      }
      Prev = &Inst;
    }
  }
}

std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
  std::vector<MCInst *> Results;
  for (BinaryBasicBlock &BB : BF)
    for (MCInst &Inst : BB)
      if (getSavedReg(Inst) == Reg)
        Results.push_back(&Inst);
  return Results;
}

std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
  std::vector<MCInst *> Results;
  for (BinaryBasicBlock &BB : BF)
    for (MCInst &Inst : BB)
      if (getRestoredReg(Inst) == Reg)
        Results.push_back(&Inst);
  return Results;
}

CalleeSavedAnalysis::~CalleeSavedAnalysis() {
  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      BC.MIB->removeAnnotation(Inst, getSaveTag());
      BC.MIB->removeAnnotation(Inst, getRestoreTag());
    }
  }
}

void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
  if (BlacklistedRegions[Offset] < Size)
    BlacklistedRegions[Offset] = Size;
}

bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
  for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
    if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
      return true;
  return false;
}

bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
                                                     int64_t Size) {
  bool HasConflict = false;
  for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
    std::pair<const int64_t, int64_t> &Elem = *Iter;
    if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
        (Offset != Elem.first || Size != Elem.second)) {
      Iter = AvailableRegions.erase(Iter);
      HasConflict = true;
      continue;
    }
    ++Iter;
  }
  if (HasConflict) {
    blacklistRegion(Offset, Size);
    return true;
  }
  return false;
}

void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  if (!BC.MII->get(Point.getOpcode())
           .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
    return;

  int SPVal, FPVal;
  std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
  std::pair<MCPhysReg, int64_t> FP;

  if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
    FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
  else
    FP = std::make_pair(0, 0);
  std::pair<MCPhysReg, int64_t> SP;

  if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
    SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
  else
    SP = std::make_pair(0, 0);

  int64_t Output;
  if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
    return;

  // Not your regular frame pointer initialization... bail
  if (Output != SPVal)
    blacklistRegion(0, 0);
}

void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  if (!BC.MII->get(Point.getOpcode())
           .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
    return;
  // Check if the definition of SP comes from FP -- in this case, this
  // value may need to be updated depending on our stack layout changes
  const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
  unsigned NumDefs = InstInfo.getNumDefs();
  bool UsesFP = false;
  for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
       ++I) {
    MCOperand &Operand = Point.getOperand(I);
    if (!Operand.isReg())
      continue;
    if (Operand.getReg() == BC.MIB->getFramePointer()) {
      UsesFP = true;
      break;
    }
  }
  if (!UsesFP)
    return;

  // Setting up evaluation
  int SPVal, FPVal;
  std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
  std::pair<MCPhysReg, int64_t> FP;

  if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
    FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
  else
    FP = std::make_pair(0, 0);
  std::pair<MCPhysReg, int64_t> SP;

  if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
    SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
  else
    SP = std::make_pair(0, 0);

  int64_t Output;
  if (!BC.MIB->evaluateSimple(Point, Output, SP, FP))
    return;

  // If the value is the same of FP, no need to adjust it
  if (Output == FPVal)
    return;

  // If an allocation happened through FP, bail
  if (Output <= SPVal) {
    blacklistRegion(0, 0);
    return;
  }

  // We are restoring SP to an old value based on FP. Mark it as a stack
  // access to be fixed later.
  BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
}

void StackLayoutModifier::classifyStackAccesses() {
  // Understand when stack slots are being used non-locally
  StackReachingUses &SRU = Info.getStackReachingUses();

  for (BinaryBasicBlock &BB : BF) {
    const MCInst *Prev = nullptr;
    for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
      MCInst &Inst = *I;
      checkFramePointerInitialization(Inst);
      checkStackPointerRestore(Inst);
      ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
      if (!FIEX) {
        Prev = &Inst;
        continue;
      }
      if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
        blacklistRegion(FIEX->StackOffset, FIEX->Size);
        Prev = &Inst;
        continue;
      }
      // If this stack position is accessed in another function, we are
      // probably dealing with a parameter passed in a stack -- do not mess
      // with it
      if (SRU.isStoreUsed(*FIEX,
                          Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
                          /*IncludeLocalAccesses=*/false)) {
        blacklistRegion(FIEX->StackOffset, FIEX->Size);
        Prev = &Inst;
        continue;
      }
      // Now we have a clear stack slot access. Check if its blacklisted or if
      // it conflicts with another chunk.
      if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
          blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
        Prev = &Inst;
        continue;
      }
      // We are free to go. Add it as available stack slot which we know how
      // to move it.
      AvailableRegions[FIEX->StackOffset] = FIEX->Size;
      BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
      RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
      RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
      LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
                        << (int)FIEX->Size << "\n");
    }
  }
}

void StackLayoutModifier::classifyCFIs() {
  std::stack<std::pair<int64_t, uint16_t>> CFIStack;
  int64_t CfaOffset = -8;
  uint16_t CfaReg = 7;

  auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
    const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
    if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
      BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
      LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
    } else {
      IsSimple = false;
      return;
    }
  };

  for (BinaryBasicBlock *&BB : BF.layout()) {
    for (MCInst &Inst : *BB) {
      if (!BC.MIB->isCFI(Inst))
        continue;
      const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
      switch (CFI->getOperation()) {
      case MCCFIInstruction::OpDefCfa:
        CfaOffset = -CFI->getOffset();
        recordAccess(&Inst, CfaOffset);
        LLVM_FALLTHROUGH;
      case MCCFIInstruction::OpDefCfaRegister:
        CfaReg = CFI->getRegister();
        break;
      case MCCFIInstruction::OpDefCfaOffset:
        CfaOffset = -CFI->getOffset();
        recordAccess(&Inst, CfaOffset);
        break;
      case MCCFIInstruction::OpOffset:
        recordAccess(&Inst, CFI->getOffset());
        BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
                              BC.MRI->getLLVMRegNum(CFI->getRegister(),
                                                    /*isEH=*/false),
                              AllocatorId);
        break;
      case MCCFIInstruction::OpSameValue:
        BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
                              BC.MRI->getLLVMRegNum(CFI->getRegister(),
                                                    /*isEH=*/false),
                              AllocatorId);
        break;
      case MCCFIInstruction::OpRememberState:
        CFIStack.push(std::make_pair(CfaOffset, CfaReg));
        break;
      case MCCFIInstruction::OpRestoreState: {
        assert(!CFIStack.empty() && "Corrupt CFI stack");
        std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
        CFIStack.pop();
        CfaOffset = Elem.first;
        CfaReg = Elem.second;
        break;
      }
      case MCCFIInstruction::OpRelOffset:
      case MCCFIInstruction::OpAdjustCfaOffset:
        llvm_unreachable("Unhandled AdjustCfaOffset");
        break;
      default:
        break;
      }
    }
  }
}

void StackLayoutModifier::scheduleChange(
    MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
  auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
      Inst, getTodoTag(), AllocatorId);
  WList.push_back(Item);
}

bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
  if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
    return false;

  ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
  if (!FIE)
    return false;

  return canCollapseRegion(FIE->StackOffset);
}

bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
  if (!IsInitialized)
    initialize();
  if (!IsSimple)
    return false;

  if (CollapsedRegions.count(RegionAddr))
    return true;

  // Check if it is possible to readjust all accesses below RegionAddr
  if (!BlacklistedRegions.empty())
    return false;

  return true;
}

bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
  ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
  if (!FIE)
    return false;
  int64_t RegionAddr = FIE->StackOffset;
  int64_t RegionSz = FIE->Size;
  return collapseRegion(DeletedPush, RegionAddr, RegionSz);
}

bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
                                         int64_t RegionSz) {
  if (!canCollapseRegion(RegionAddr))
    return false;

  assert(IsInitialized);
  StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();

  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
        continue;
      auto Slot =
          BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
              Inst, getSlotTag());
      if (!AvailableRegions.count(Slot))
        continue;
      // We need to ensure this access is affected by the deleted push
      if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
        continue;

      if (BC.MIB->isCFI(Inst)) {
        if (Slot > RegionAddr)
          continue;
        scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
        continue;
      }
      ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
      if (!FIE) {
        if (Slot > RegionAddr)
          continue;
        // SP update based on frame pointer
        scheduleChange(
            Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
        continue;
      }

      if (Slot == RegionAddr) {
        BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
        continue;
      }
      if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
        continue;

      if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
        continue;

      if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
        continue;

      scheduleChange(
          Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
    }
  }

  CollapsedRegions.insert(RegionAddr);
  return true;
}

void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
        continue;
      BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
      scheduleChange(
          Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
    }
  }
}

bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
  if (!IsInitialized)
    initialize();
  if (!IsSimple)
    return false;

  StackPointerTracking &SPT = Info.getStackPointerTracking();
  int64_t RegionAddr = SPT.getStateBefore(P)->first;
  if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
    return false;

  if (InsertedRegions.count(RegionAddr))
    return true;

  // Check if we are going to screw up stack accesses at call sites that
  // pass parameters via stack
  if (!BlacklistedRegions.empty())
    return false;

  return true;
}

bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
  if (!canInsertRegion(P))
    return false;

  assert(IsInitialized);
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  // This RegionAddr is slightly different from the one seen in collapseRegion
  // This is the value of SP before the allocation the user wants to make.
  int64_t RegionAddr = SPT.getStateBefore(P)->first;
  if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
    return false;

  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();

  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
        continue;
      auto Slot =
          BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
              Inst, getSlotTag());
      if (!AvailableRegions.count(Slot))
        continue;

      if (!(DA.doesADominateB(P, Inst)))
        continue;

      if (BC.MIB->isCFI(Inst)) {
        if (Slot >= RegionAddr)
          continue;
        scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
        continue;
      }
      ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
      if (!FIE) {
        if (Slot >= RegionAddr)
          continue;
        scheduleChange(
            Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
        continue;
      }

      if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
        continue;
      if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
        continue;
      if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
        continue;
      scheduleChange(
          Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
    }
  }

  InsertedRegions.insert(RegionAddr);
  return true;
}

void StackLayoutModifier::performChanges() {
  std::set<uint32_t> ModifiedCFIIndices;
  for (BinaryBasicBlock &BB : BF) {
    for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
      MCInst &Inst = *I;
      if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
        assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
        BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
      }
      if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
        continue;
      auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
          Inst, getTodoTag());
      int64_t Adjustment = 0;
      WorklistItem::ActionType AdjustmentType = WorklistItem::None;
      for (WorklistItem &WI : WList) {
        if (WI.Action == WorklistItem::None)
          continue;
        assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
               WI.Action == WorklistItem::AdjustCFI);
        assert((AdjustmentType == WorklistItem::None ||
                AdjustmentType == WI.Action) &&
               "Conflicting actions requested at the same program point");
        AdjustmentType = WI.Action;
        Adjustment += WI.OffsetUpdate;
      }
      if (!Adjustment)
        continue;
      if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
        assert(BC.MIB->isCFI(Inst));
        uint32_t CFINum = Inst.getOperand(0).getImm();
        if (ModifiedCFIIndices.count(CFINum))
          continue;
        ModifiedCFIIndices.insert(CFINum);
        const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
        const MCCFIInstruction::OpType Operation = CFI->getOperation();
        if (Operation == MCCFIInstruction::OpDefCfa ||
            Operation == MCCFIInstruction::OpDefCfaOffset)
          Adjustment = 0 - Adjustment;
        LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
                          << " to " << (CFI->getOffset() + Adjustment) << "\n");
        BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
        continue;
      }
      int32_t SrcImm = 0;
      MCPhysReg Reg = 0;
      MCPhysReg StackPtrReg = 0;
      int64_t StackOffset = 0;
      bool IsIndexed = false;
      bool IsLoad = false;
      bool IsStore = false;
      bool IsSimple = false;
      bool IsStoreFromReg = false;
      uint8_t Size = 0;
      bool Success = false;
      Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
                                      Reg, SrcImm, StackPtrReg, StackOffset,
                                      Size, IsSimple, IsIndexed);
      if (!Success) {
        // SP update based on FP value
        Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
        assert(Success);
        continue;
      }
      assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
      if (StackPtrReg != BC.MIB->getFramePointer())
        Adjustment = -Adjustment;
      if (IsLoad)
        Success = BC.MIB->createRestoreFromStack(
            Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
      else if (IsStore)
        Success = BC.MIB->createSaveToStack(
            Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
      LLVM_DEBUG({
        dbgs() << "Adjusted instruction: ";
        Inst.dump();
      });
      assert(Success);
    }
  }
}

void StackLayoutModifier::initialize() {
  classifyStackAccesses();
  classifyCFIs();
  IsInitialized = true;
}

std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0};
std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0};

using BBIterTy = BinaryBasicBlock::iterator;

void ShrinkWrapping::classifyCSRUses() {
  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
                                     BitVector(DA.NumInstrs, false));

  const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      if (BC.MIB->isCFI(Inst))
        continue;
      BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
      BC.MIB->getTouchedRegs(Inst, BV);
      BV &= CSA.CalleeSaved;
      for (int I = BV.find_first(); I != -1; I = BV.find_next(I)) {
        if (I == 0)
          continue;
        if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
          UsesByReg[I].set(DA.ExprToIdx[&Inst]);
      }
      if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
        continue;
      BV = CSA.CalleeSaved;
      BV &= FPAliases;
      for (int I = BV.find_first(); I > 0; I = BV.find_next(I))
        UsesByReg[I].set(DA.ExprToIdx[&Inst]);
    }
  }
}

void ShrinkWrapping::pruneUnwantedCSRs() {
  BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
  for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
    if (!CSA.CalleeSaved[I])
      continue;
    if (ParamRegs[I]) {
      CSA.CalleeSaved.reset(I);
      continue;
    }
    if (UsesByReg[I].empty()) {
      LLVM_DEBUG(
          dbgs()
          << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
          << "\n");
      CSA.CalleeSaved.reset(I);
      continue;
    }
    if (!CSA.HasRestores[I]) {
      LLVM_DEBUG(
          dbgs() << "Dismissing Callee-Saved Reg because it does not have "
                    "restores:"
                 << I << "\n");
      CSA.CalleeSaved.reset(I);
    }
  }
}

void ShrinkWrapping::computeSaveLocations() {
  SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs());
  ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
  StackPointerTracking &SPT = Info.getStackPointerTracking();

  LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
  for (BinaryBasicBlock &BB : BF) {
    LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");

    MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
    if (!First)
      continue;

    // Use reaching instructions to detect if we are inside a loop - if we
    // are, do not consider this BB as valid placement for saves.
    if (RI.isInLoop(BB))
      continue;

    const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
    // If we don't know stack state at this point, bail
    if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
        (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
      continue;

    for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
      if (!CSA.CalleeSaved[I])
        continue;

      BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
      for (int J = UsesByReg[I].find_first(); J > 0;
           J = UsesByReg[I].find_next(J))
        if (DA.doesADominateB(*First, J))
          BBDominatedUses.set(J);
      LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
                        << BBDominatedUses.count() << " uses for reg " << I
                        << ". Total uses for reg is " << UsesByReg[I].count()
                        << "\n");
      BBDominatedUses &= UsesByReg[I];
      if (BBDominatedUses == UsesByReg[I]) {
        LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
                          << " as a save pos for " << I << "\n");
        SavePos[I].insert(First);
        LLVM_DEBUG({
          dbgs() << "Dominated uses are:\n";
          for (int J = UsesByReg[I].find_first(); J > 0;
               J = UsesByReg[I].find_next(J)) {
            dbgs() << "Idx " << J << ": ";
            DA.Expressions[J]->dump();
          }
        });
      }
    }
  }

  BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(),
                                        std::numeric_limits<uint64_t>::max());
  BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr);
  auto &InsnToBB = Info.getInsnToBBMap();
  for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
    if (!CSA.CalleeSaved[I])
      continue;

    for (MCInst *Pos : SavePos[I]) {
      BinaryBasicBlock *BB = InsnToBB[Pos];
      uint64_t Count = BB->getExecutionCount();
      if (Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
          Count < BestSaveCount[I]) {
        BestSavePos[I] = Pos;
        BestSaveCount[I] = Count;
      }
    }
  }
}

void ShrinkWrapping::computeDomOrder() {
  std::vector<MCPhysReg> Order;
  for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
    Order.push_back(I);
  }

  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
  auto &InsnToBB = Info.getInsnToBBMap();
  std::sort(Order.begin(), Order.end(),
            [&](const MCPhysReg &A, const MCPhysReg &B) {
              BinaryBasicBlock *BBA =
                  BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr;
              BinaryBasicBlock *BBB =
                  BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr;
              if (BBA == BBB)
                return A < B;
              if (!BBA && BBB)
                return false;
              if (BBA && !BBB)
                return true;
              if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B]))
                return true;
              if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A]))
                return false;
              return A < B;
            });

  for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
    DomOrder[Order[I]] = I;
}

bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
                                       uint64_t &TotalEstimatedWin) {
  const uint64_t CurSavingCost = CSA.SavingCost[CSR];
  if (!CSA.CalleeSaved[CSR])
    return false;

  uint64_t BestCount = BestSaveCount[CSR];
  BestPosSave = BestSavePos[CSR];
  bool ShouldMove = false;
  if (BestCount != std::numeric_limits<uint64_t>::max() &&
      BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) {
    LLVM_DEBUG({
      auto &InsnToBB = Info.getInsnToBBMap();
      dbgs() << "Better position for saves found in func " << BF.getPrintName()
             << " count << " << BF.getKnownExecutionCount() << "\n";
      dbgs() << "Reg: " << CSR
             << "; New BB: " << InsnToBB[BestPosSave]->getName()
             << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
    });
    TotalEstimatedWin += CurSavingCost - BestCount;
    ShouldMove = true;
  }

  if (!ShouldMove)
    return false;
  if (!BestPosSave) {
    LLVM_DEBUG({
      dbgs() << "Dropping opportunity because we don't know where to put "
                "stores -- total est. freq reduc: "
             << TotalEstimatedWin << "\n";
    });
    return false;
  }
  return true;
}

/// Auxiliar function used to create basic blocks for critical edges and update
/// the dominance frontier with these new locations
void ShrinkWrapping::splitFrontierCritEdges(
    BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
    const SmallVector<bool, 4> &IsCritEdge,
    const SmallVector<BinaryBasicBlock *, 4> &From,
    const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
  LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
                    << BF.getPrintName() << "\n");
  // For every FromBB, there might be one or more critical edges, with
  // To[I] containing destination BBs. It's important to memorize
  // the original size of the Frontier as we may append to it while splitting
  // critical edges originating with blocks with multiple destinations.
  for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
    if (!IsCritEdge[I])
      continue;
    if (To[I].empty())
      continue;
    BinaryBasicBlock *FromBB = From[I];
    LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
                      << "\n");
    // Split edge for every DestinationBBs
    for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
      BinaryBasicBlock *DestinationBB = To[I][DI];
      LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
      BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
      // Insert dummy instruction so this BB is never empty (we need this for
      // PredictiveStackPointerTracking to work, since it annotates instructions
      // and not BBs).
      if (NewBB->empty()) {
        MCInst NewInst;
        BC.MIB->createNoop(NewInst);
        NewBB->addInstruction(std::move(NewInst));
        scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
      }

      // Update frontier
      ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
      if (DI == 0) {
        // Update frontier inplace
        Frontier[I] = NewFrontierPP;
        LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
                          << '\n');
      } else {
        // Append new frontier to the end of the list
        Frontier.push_back(NewFrontierPP);
        LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
                          << '\n');
      }
    }
  }
}

SmallVector<ProgramPoint, 4>
ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
                                   uint64_t TotalEstimatedWin) {
  SmallVector<ProgramPoint, 4> Frontier;
  SmallVector<bool, 4> IsCritEdge;
  bool CannotPlace = false;
  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();

  SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
  SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
  // In case of a critical edge, we need to create extra BBs to host restores
  // into edges transitioning to the dominance frontier, otherwise we pull these
  // restores to inside the dominated area.
  Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
  LLVM_DEBUG({
    dbgs() << "Dumping dominance frontier for ";
    BC.printInstruction(dbgs(), *BestPosSave);
    for (ProgramPoint &PP : Frontier)
      if (PP.isInst())
        BC.printInstruction(dbgs(), *PP.getInst());
      else
        dbgs() << PP.getBB()->getName() << "\n";
  });
  for (ProgramPoint &PP : Frontier) {
    bool HasCritEdges = false;
    if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
        doesInstUsesCSR(*PP.getInst(), CSR))
      CannotPlace = true;
    BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
    CritEdgesFrom.emplace_back(FrontierBB);
    CritEdgesTo.emplace_back(0);
    SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
    // Check for invoke instructions at the dominance frontier, which indicates
    // the landing pad is not dominated.
    if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
      LLVM_DEBUG(
          dbgs() << "Bailing on restore placement to avoid LP splitting\n");
      Frontier.clear();
      return Frontier;
    }
    doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
      if (!DA.doesADominateB(*BestPosSave, P)) {
        Dests.emplace_back(Info.getParentBB(P));
        return;
      }
      HasCritEdges = true;
    });
    IsCritEdge.push_back(HasCritEdges);
  }
  // Restores cannot be placed in empty BBs because we have a dataflow
  // analysis that depends on insertions happening before real instructions
  // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
  // dummy nop that is scheduled to be removed later.
  bool InvalidateRequired = false;
  for (BinaryBasicBlock *&BB : BF.layout()) {
    if (BB->size() != 0)
      continue;
    MCInst NewInst;
    BC.MIB->createNoop(NewInst);
    auto II = BB->addInstruction(std::move(NewInst));
    scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
    InvalidateRequired = true;
  }
  if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
    LLVM_DEBUG({
      dbgs() << "Now detected critical edges in the following frontier:\n";
      for (ProgramPoint &PP : Frontier) {
        if (PP.isBB()) {
          dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
        } else {
          dbgs() << "  Inst: ";
          PP.getInst()->dump();
        }
      }
    });
    splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
                           CritEdgesTo);
    InvalidateRequired = true;
  }
  if (InvalidateRequired) {
    // BitVectors that represent all insns of the function are invalid now
    // since we changed BBs/Insts. Re-run steps that depend on pointers being
    // valid
    Info.invalidateAll();
    classifyCSRUses();
  }
  if (CannotPlace) {
    LLVM_DEBUG({
      dbgs() << "Dropping opportunity because restore placement failed"
                " -- total est. freq reduc: "
             << TotalEstimatedWin << "\n";
    });
    Frontier.clear();
    return Frontier;
  }
  return Frontier;
}

bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
                                          int64_t SaveOffset) {
  if (FA.requiresAlignment(BF)) {
    LLVM_DEBUG({
      dbgs() << "Reg " << CSR
             << " is not using push/pops due to function "
                "alignment requirements.\n";
    });
    return false;
  }
  for (MCInst *Save : CSA.getSavesByReg(CSR)) {
    if (!SLM.canCollapseRegion(Save)) {
      LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
      return false;
    }
  }
  // Abort if one of the restores for this CSR is not a POP.
  for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
    if (!BC.MIB->isPop(*Load)) {
      LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
      return false;
    }
  }

  StackPointerTracking &SPT = Info.getStackPointerTracking();
  // Abort if we are inserting a push into an entry BB (offset -8) and this
  // func sets up a frame pointer.
  if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
      SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
    LLVM_DEBUG({
      dbgs() << "Reg " << CSR
             << " cannot insert region or we are "
                "trying to insert a push into entry bb.\n";
    });
    return false;
  }
  return true;
}

SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
    const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
    unsigned CSR) {
  SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
  // Moving pop locations to the correct sp offset
  ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  for (ProgramPoint &PP : FixedRestorePoints) {
    BinaryBasicBlock *BB = Info.getParentBB(PP);
    bool Found = false;
    if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
        SaveOffset) {
      BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
      BV &= UsesByReg[CSR];
      if (!BV.any()) {
        Found = true;
        PP = BB;
        continue;
      }
    }
    for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
      if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
        BitVector BV = *RI.getStateAt(*RIt);
        BV &= UsesByReg[CSR];
        if (!BV.any()) {
          Found = true;
          PP = &*RIt;
          break;
        }
      }
    }
    if (!Found) {
      LLVM_DEBUG({
        dbgs() << "Could not find restore insertion point for " << CSR
               << ", falling back to load/store mode\n";
      });
      FixedRestorePoints.clear();
      return FixedRestorePoints;
    }
  }
  return FixedRestorePoints;
}

void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
                                                    bool UsePushPops) {

  for (BinaryBasicBlock *&BB : BF.layout()) {
    std::vector<MCInst *> CFIs;
    for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
      MCInst &Inst = *I;
      if (BC.MIB->isCFI(Inst)) {
        // Delete all offset CFIs related to this CSR
        if (SLM.getOffsetCFIReg(Inst) == CSR) {
          HasDeletedOffsetCFIs[CSR] = true;
          scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
          continue;
        }
        CFIs.push_back(&Inst);
        continue;
      }

      uint16_t SavedReg = CSA.getSavedReg(Inst);
      uint16_t RestoredReg = CSA.getRestoredReg(Inst);
      if (SavedReg != CSR && RestoredReg != CSR) {
        CFIs.clear();
        continue;
      }

      scheduleChange(&Inst, WorklistItem(UsePushPops
                                             ? WorklistItem::Erase
                                             : WorklistItem::ChangeToAdjustment,
                                         CSR));

      // Delete associated CFIs
      const bool RecordDeletedPushCFIs =
          SavedReg == CSR && DeletedPushCFIs[CSR].empty();
      const bool RecordDeletedPopCFIs =
          RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
      for (MCInst *CFI : CFIs) {
        const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
        // Do not touch these...
        if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
            MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
          continue;
        scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
        if (RecordDeletedPushCFIs) {
          // Do not record this to be replayed later because we are going to
          // rebuild it.
          if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
            continue;
          DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
        }
        if (RecordDeletedPopCFIs) {
          if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
            continue;
          DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
        }
      }
      CFIs.clear();
    }
  }
}

bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
  if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
      CSA.getRestoredReg(Inst) == CSR)
    return false;
  BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
  BC.MIB->getTouchedRegs(Inst, BV);
  return BV[CSR];
}

void ShrinkWrapping::scheduleSaveRestoreInsertions(
    unsigned CSR, MCInst *BestPosSave,
    SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
  auto &InsnToBB = Info.getInsnToBBMap();
  const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
  const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
  assert(FIESave && FIELoad && "Invalid CSR");

  LLVM_DEBUG({
    dbgs() << "Scheduling save insertion at: ";
    BestPosSave->dump();
  });

  scheduleChange(BestPosSave,
                 UsePushPops ? WorklistItem::InsertPushOrPop
                             : WorklistItem::InsertLoadOrStore,
                 *FIESave, CSR);

  for (ProgramPoint &PP : RestorePoints) {
    BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
    LLVM_DEBUG({
      dbgs() << "Scheduling restore insertion at: ";
      if (PP.isInst())
        PP.getInst()->dump();
      else
        dbgs() << PP.getBB()->getName() << "\n";
    });
    MCInst *Term =
        FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
    if (Term)
      PP = Term;
    bool PrecededByPrefix = false;
    if (PP.isInst()) {
      auto Iter = FrontierBB->findInstruction(PP.getInst());
      if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
        --Iter;
        PrecededByPrefix = BC.MIB->isPrefix(*Iter);
      }
    }
    if (PP.isInst() &&
        (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
      assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
             "cannot move to end of bb");
      scheduleChange(InsnToBB[PP.getInst()],
                     UsePushPops ? WorklistItem::InsertPushOrPop
                                 : WorklistItem::InsertLoadOrStore,
                     *FIELoad, CSR);
      continue;
    }
    scheduleChange(PP,
                   UsePushPops ? WorklistItem::InsertPushOrPop
                               : WorklistItem::InsertLoadOrStore,
                   *FIELoad, CSR);
  }
}

void ShrinkWrapping::moveSaveRestores() {
  bool DisablePushPopMode = false;
  bool UsedPushPopMode = false;
  // Keeps info about successfully moved regs: reg index, save position and
  // save size
  std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;

  for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
    MCInst *BestPosSave = nullptr;
    uint64_t TotalEstimatedWin = 0;
    if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin))
      continue;
    SmallVector<ProgramPoint, 4> RestorePoints =
        doRestorePlacement(BestPosSave, I, TotalEstimatedWin);
    if (RestorePoints.empty())
      continue;

    const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
    const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
    (void)FIELoad;
    assert(FIESave && FIELoad);
    StackPointerTracking &SPT = Info.getStackPointerTracking();
    const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
    int SaveOffset = SPFP.first;
    uint8_t SaveSize = FIESave->Size;

    // If we don't know stack state at this point, bail
    if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
        (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
      continue;

    // Operation mode: if true, will insert push/pops instead of loads/restores
    bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);

    if (UsePushPops) {
      SmallVector<ProgramPoint, 4> FixedRestorePoints =
          fixPopsPlacements(RestorePoints, SaveOffset, I);
      if (FixedRestorePoints.empty())
        UsePushPops = false;
      else
        RestorePoints = FixedRestorePoints;
    }

    // Disable push-pop mode for all CSRs in this function
    if (!UsePushPops)
      DisablePushPopMode = true;
    else
      UsedPushPopMode = true;

    scheduleOldSaveRestoresRemoval(I, UsePushPops);
    scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
    MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
  }

  // Revert push-pop mode if it failed for a single CSR
  if (DisablePushPopMode && UsedPushPopMode) {
    UsedPushPopMode = false;
    for (BinaryBasicBlock &BB : BF) {
      auto WRI = Todo.find(&BB);
      if (WRI != Todo.end()) {
        std::vector<WorklistItem> &TodoList = WRI->second;
        for (WorklistItem &Item : TodoList)
          if (Item.Action == WorklistItem::InsertPushOrPop)
            Item.Action = WorklistItem::InsertLoadOrStore;
      }
      for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
        MCInst &Inst = *I;
        auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
            Inst, getAnnotationIndex());
        if (!TodoList)
          continue;
        bool isCFI = BC.MIB->isCFI(Inst);
        for (WorklistItem &Item : *TodoList) {
          if (Item.Action == WorklistItem::InsertPushOrPop)
            Item.Action = WorklistItem::InsertLoadOrStore;
          if (!isCFI && Item.Action == WorklistItem::Erase)
            Item.Action = WorklistItem::ChangeToAdjustment;
        }
      }
    }
  }

  // Update statistics
  if (!UsedPushPopMode) {
    SpillsMovedRegularMode += MovedRegs.size();
    return;
  }

  // Schedule modifications to stack-accessing instructions via
  // StackLayoutModifier.
  SpillsMovedPushPopMode += MovedRegs.size();
  for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
    unsigned RegNdx;
    MCInst *SavePos;
    size_t SaveSize;
    std::tie(RegNdx, SavePos, SaveSize) = I;
    for (MCInst *Save : CSA.getSavesByReg(RegNdx))
      SLM.collapseRegion(Save);
    SLM.insertRegion(SavePos, SaveSize);
  }
}

namespace {
/// Helper function to identify whether two basic blocks created by splitting
/// a critical edge have the same contents.
bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
                            const BinaryBasicBlock &B) {
  if (A.succ_size() != B.succ_size())
    return false;
  if (A.succ_size() != 1)
    return false;

  if (*A.succ_begin() != *B.succ_begin())
    return false;

  if (A.size() != B.size())
    return false;

  // Compare instructions
  auto I = A.begin(), E = A.end();
  auto OtherI = B.begin(), OtherE = B.end();
  while (I != E && OtherI != OtherE) {
    if (I->getOpcode() != OtherI->getOpcode())
      return false;
    if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
          return true;
        }))
      return false;
    ++I;
    ++OtherI;
  }
  return true;
}
} // namespace

bool ShrinkWrapping::foldIdenticalSplitEdges() {
  bool Changed = false;
  for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
    BinaryBasicBlock &BB = *Iter;
    if (!BB.getName().startswith(".LSplitEdge"))
      continue;
    for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
      BinaryBasicBlock &RBB = *RIter;
      if (&RBB == &BB)
        break;
      if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
          !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
        continue;
      assert(RBB.pred_size() == 1 && "Invalid split edge BB");
      BinaryBasicBlock *Pred = *RBB.pred_begin();
      uint64_t OrigCount = Pred->branch_info_begin()->Count;
      uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
      BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
      Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
      Changed = true;
      // Remove the block from CFG
      RBB.markValid(false);
    }
  }

  return Changed;
}

namespace {

// A special StackPointerTracking that compensates for our future plans
// in removing/adding insn.
class PredictiveStackPointerTracking
    : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
  friend class DataflowAnalysis<PredictiveStackPointerTracking,
                                std::pair<int, int>>;
  decltype(ShrinkWrapping::Todo) &TodoMap;
  DataflowInfoManager &Info;

  Optional<unsigned> AnnotationIndex;

protected:
  void compNextAux(const MCInst &Point,
                   const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
                   std::pair<int, int> &Res) {
    for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
      if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
          BC.MIB->isPush(Point)) {
        Res.first += BC.MIB->getPushSize(Point);
        continue;
      }
      if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
          BC.MIB->isPop(Point)) {
        Res.first -= BC.MIB->getPopSize(Point);
        continue;
      }
      if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
          Item.FIEToInsert.IsStore) {
        Res.first -= Item.FIEToInsert.Size;
        continue;
      }
      if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
          Item.FIEToInsert.IsLoad) {
        Res.first += Item.FIEToInsert.Size;
        continue;
      }
    }
  }

  std::pair<int, int> computeNext(const MCInst &Point,
                                  const std::pair<int, int> &Cur) {
    std::pair<int, int> Res =
        StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
            Point, Cur);
    if (Res.first == StackPointerTracking::SUPERPOSITION ||
        Res.first == StackPointerTracking::EMPTY)
      return Res;
    auto TodoItems =
        BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
            Point, ShrinkWrapping::getAnnotationName());
    if (TodoItems)
      compNextAux(Point, *TodoItems, Res);
    auto &InsnToBBMap = Info.getInsnToBBMap();
    if (&*InsnToBBMap[&Point]->rbegin() != &Point)
      return Res;
    auto WRI = TodoMap.find(InsnToBBMap[&Point]);
    if (WRI == TodoMap.end())
      return Res;
    compNextAux(Point, WRI->second, Res);
    return Res;
  }

  StringRef getAnnotationName() const {
    return StringRef("PredictiveStackPointerTracking");
  }

public:
  PredictiveStackPointerTracking(BinaryFunction &BF,
                                 decltype(ShrinkWrapping::Todo) &TodoMap,
                                 DataflowInfoManager &Info,
                                 MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
      : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
                                                                 AllocatorId),
        TodoMap(TodoMap), Info(Info) {}

  void run() {
    StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
  }
};

} // end anonymous namespace

void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
                                      int SPValPop) {
  MCInst *SavePoint = nullptr;
  for (BinaryBasicBlock &BB : BF) {
    for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
         ++InstIter) {
      int32_t SrcImm = 0;
      MCPhysReg Reg = 0;
      MCPhysReg StackPtrReg = 0;
      int64_t StackOffset = 0;
      bool IsIndexed = false;
      bool IsLoad = false;
      bool IsStore = false;
      bool IsSimple = false;
      bool IsStoreFromReg = false;
      uint8_t Size = 0;
      if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
                                 Reg, SrcImm, StackPtrReg, StackOffset, Size,
                                 IsSimple, IsIndexed))
        continue;
      if (Reg != CSR || !IsStore || !IsSimple)
        continue;
      SavePoint = &*InstIter;
      break;
    }
    if (SavePoint)
      break;
  }
  assert(SavePoint);
  LLVM_DEBUG({
    dbgs() << "Now using as save point for reg " << CSR << " :";
    SavePoint->dump();
  });
  bool PrevAffectedZone = false;
  BinaryBasicBlock *PrevBB = nullptr;
  DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
  for (BinaryBasicBlock *BB : BF.layout()) {
    if (BB->size() == 0)
      continue;
    const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
    const bool InAffectedZoneAtBegin =
        (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
    bool InAffectedZone = InAffectedZoneAtBegin;
    for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
      const bool CurZone = DA.count(*InstIter, *SavePoint);
      if (InAffectedZone != CurZone) {
        auto InsertionIter = InstIter;
        ++InsertionIter;
        InAffectedZone = CurZone;
        if (InAffectedZone)
          InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
                                            SPValPop);
        else
          InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
                                            SPValPush);
        --InstIter;
      }
    }
    // Are we at the first basic block or hot-cold split point?
    if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
      if (InAffectedZoneAtBegin)
        insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
    } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
      if (InAffectedZoneAtBegin)
        insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
      else
        insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
    }
    PrevAffectedZone = InAffectedZoneAtEnd;
    PrevBB = BB;
  }
}

void ShrinkWrapping::rebuildCFIForSP() {
  for (BinaryBasicBlock &BB : BF) {
    for (MCInst &Inst : BB) {
      if (!BC.MIB->isCFI(Inst))
        continue;
      const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
      if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
        BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
    }
  }

  int PrevSPVal = -8;
  BinaryBasicBlock *PrevBB = nullptr;
  StackPointerTracking &SPT = Info.getStackPointerTracking();
  for (BinaryBasicBlock *BB : BF.layout()) {
    if (BB->size() == 0)
      continue;
    const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
    const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
    int SPVal = SPValAtBegin;
    for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
      const int CurVal = SPT.getStateAt(*Iter)->first;
      if (SPVal != CurVal) {
        auto InsertionIter = Iter;
        ++InsertionIter;
        Iter = BF.addCFIInstruction(
            BB, InsertionIter,
            MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
        SPVal = CurVal;
      }
    }
    if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
      BF.addCFIInstruction(
          BB, BB->begin(),
          MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
    else if (SPValAtBegin != PrevSPVal)
      BF.addCFIInstruction(
          PrevBB, PrevBB->end(),
          MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
    PrevSPVal = SPValAtEnd;
    PrevBB = BB;
  }

  for (BinaryBasicBlock &BB : BF)
    for (auto I = BB.begin(); I != BB.end();)
      if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
        I = BB.eraseInstruction(I);
      else
        ++I;
}

MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
                                         const FrameIndexEntry &FIE,
                                         bool CreatePushOrPop) {
  MCInst NewInst;
  if (SPVal != StackPointerTracking::SUPERPOSITION &&
      SPVal != StackPointerTracking::EMPTY) {
    if (FIE.IsLoad) {
      if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
                                          FIE.StackOffset - SPVal, FIE.RegOrImm,
                                          FIE.Size)) {
        errs() << "createRestoreFromStack: not supported on this platform\n";
        abort();
      }
    } else {
      if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
                                     FIE.StackOffset - SPVal, FIE.RegOrImm,
                                     FIE.Size)) {
        errs() << "createSaveToStack: not supported on this platform\n";
        abort();
      }
    }
    if (CreatePushOrPop)
      BC.MIB->changeToPushOrPop(NewInst);
    return NewInst;
  }
  assert(FPVal != StackPointerTracking::SUPERPOSITION &&
         FPVal != StackPointerTracking::EMPTY);

  if (FIE.IsLoad) {
    if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
                                        FIE.StackOffset - FPVal, FIE.RegOrImm,
                                        FIE.Size)) {
      errs() << "createRestoreFromStack: not supported on this platform\n";
      abort();
    }
  } else {
    if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
                                   FIE.StackOffset - FPVal, FIE.RegOrImm,
                                   FIE.Size)) {
      errs() << "createSaveToStack: not supported on this platform\n";
      abort();
    }
  }
  return NewInst;
}

void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
  const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
  if (UpdatedCFIs.count(CFI))
    return;

  switch (CFI->getOperation()) {
  case MCCFIInstruction::OpDefCfa:
  case MCCFIInstruction::OpDefCfaRegister:
  case MCCFIInstruction::OpDefCfaOffset:
    CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
    break;
  case MCCFIInstruction::OpOffset:
  default:
    break;
  }

  UpdatedCFIs.insert(CFI);
}

BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
                                                BBIterTy Pos, unsigned Reg,
                                                bool isPush, int Sz,
                                                int64_t NewOffset) {
  if (isPush) {
    for (uint32_t Idx : DeletedPushCFIs[Reg]) {
      Pos = BF.addCFIPseudo(&BB, Pos, Idx);
      updateCFIInstOffset(*Pos++, NewOffset);
    }
    if (HasDeletedOffsetCFIs[Reg]) {
      Pos = BF.addCFIInstruction(
          &BB, Pos,
          MCCFIInstruction::createOffset(
              nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
      ++Pos;
    }
  } else {
    for (uint32_t Idx : DeletedPopCFIs[Reg]) {
      Pos = BF.addCFIPseudo(&BB, Pos, Idx);
      updateCFIInstOffset(*Pos++, NewOffset);
    }
    if (HasDeletedOffsetCFIs[Reg]) {
      Pos = BF.addCFIInstruction(
          &BB, Pos,
          MCCFIInstruction::createSameValue(
              nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
      ++Pos;
    }
  }
  return Pos;
}

BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
                                          BinaryBasicBlock *CurBB,
                                          const WorklistItem &Item,
                                          int64_t SPVal, int64_t FPVal) {
  // Trigger CFI reconstruction for this CSR if necessary - writing to
  // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
  if ((Item.FIEToInsert.IsStore &&
       !DeletedPushCFIs[Item.AffectedReg].empty()) ||
      (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
      HasDeletedOffsetCFIs[Item.AffectedReg]) {
    if (Item.Action == WorklistItem::InsertPushOrPop) {
      if (Item.FIEToInsert.IsStore)
        PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
      else
        PopOffsetByReg[Item.AffectedReg] = SPVal;
    } else {
      if (Item.FIEToInsert.IsStore)
        PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
      else
        PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
    }
  }

  LLVM_DEBUG({
    dbgs() << "Creating stack access with SPVal = " << SPVal
           << "; stack offset = " << Item.FIEToInsert.StackOffset
           << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
           << "\n";
  });
  MCInst NewInst =
      createStackAccess(SPVal, FPVal, Item.FIEToInsert,
                        Item.Action == WorklistItem::InsertPushOrPop);
  if (InsertionPoint != CurBB->end()) {
    LLVM_DEBUG({
      dbgs() << "Adding before Inst: ";
      InsertionPoint->dump();
      dbgs() << "the following inst: ";
      NewInst.dump();
    });
    BBIterTy Iter =
        CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
    return ++Iter;
  }
  CurBB->addInstruction(std::move(NewInst));
  LLVM_DEBUG(dbgs() << "Adding to BB!\n");
  return CurBB->end();
}

BBIterTy ShrinkWrapping::processInsertionsList(
    BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
    std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
  bool HasInsertions = false;
  for (WorklistItem &Item : TodoList) {
    if (Item.Action == WorklistItem::Erase ||
        Item.Action == WorklistItem::ChangeToAdjustment)
      continue;
    HasInsertions = true;
    break;
  }

  if (!HasInsertions)
    return InsertionPoint;

  assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
           SPVal != StackPointerTracking::EMPTY) ||
          (FPVal != StackPointerTracking::SUPERPOSITION &&
           FPVal != StackPointerTracking::EMPTY)) &&
         "Cannot insert if we have no idea of the stack state here");

  // Revert the effect of PSPT for this location, we want SP Value before
  // insertions
  if (InsertionPoint == CurBB->end()) {
    for (WorklistItem &Item : TodoList) {
      if (Item.Action != WorklistItem::InsertPushOrPop)
        continue;
      if (Item.FIEToInsert.IsStore)
        SPVal += Item.FIEToInsert.Size;
      if (Item.FIEToInsert.IsLoad)
        SPVal -= Item.FIEToInsert.Size;
    }
  }

  // Reorder POPs to obey the correct dominance relation between them
  std::stable_sort(TodoList.begin(), TodoList.end(),
                   [&](const WorklistItem &A, const WorklistItem &B) {
                     if ((A.Action != WorklistItem::InsertPushOrPop ||
                          !A.FIEToInsert.IsLoad) &&
                         (B.Action != WorklistItem::InsertPushOrPop ||
                          !B.FIEToInsert.IsLoad))
                       return false;
                     if ((A.Action != WorklistItem::InsertPushOrPop ||
                          !A.FIEToInsert.IsLoad))
                       return true;
                     if ((B.Action != WorklistItem::InsertPushOrPop ||
                          !B.FIEToInsert.IsLoad))
                       return false;
                     return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
                   });

  // Process insertions
  for (WorklistItem &Item : TodoList) {
    if (Item.Action == WorklistItem::Erase ||
        Item.Action == WorklistItem::ChangeToAdjustment)
      continue;

    InsertionPoint =
        processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
    if (Item.Action == WorklistItem::InsertPushOrPop &&
        Item.FIEToInsert.IsStore)
      SPVal -= Item.FIEToInsert.Size;
    if (Item.Action == WorklistItem::InsertPushOrPop &&
        Item.FIEToInsert.IsLoad)
      SPVal += Item.FIEToInsert.Size;
  }
  return InsertionPoint;
}

bool ShrinkWrapping::processInsertions() {
  PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
  PSPT.run();

  bool Changes = false;
  for (BinaryBasicBlock &BB : BF) {
    // Process insertions before some inst.
    for (auto I = BB.begin(); I != BB.end(); ++I) {
      MCInst &Inst = *I;
      auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
          Inst, getAnnotationIndex());
      if (!TodoList)
        continue;
      Changes = true;
      std::vector<WorklistItem> List = *TodoList;
      LLVM_DEBUG({
        dbgs() << "Now processing insertions in " << BB.getName()
               << " before inst: ";
        Inst.dump();
      });
      auto Iter = I;
      std::pair<int, int> SPTState =
          *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
      I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
    }
    // Process insertions at the end of bb
    auto WRI = Todo.find(&BB);
    if (WRI != Todo.end()) {
      std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
      processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
                            SPTState.second);
      Changes = true;
    }
  }
  return Changes;
}

void ShrinkWrapping::processDeletions() {
  LivenessAnalysis &LA = Info.getLivenessAnalysis();
  for (BinaryBasicBlock &BB : BF) {
    for (auto II = BB.begin(); II != BB.end(); ++II) {
      MCInst &Inst = *II;
      auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
          Inst, getAnnotationIndex());
      if (!TodoList)
        continue;
      // Process all deletions
      for (WorklistItem &Item : *TodoList) {
        if (Item.Action != WorklistItem::Erase &&
            Item.Action != WorklistItem::ChangeToAdjustment)
          continue;

        if (Item.Action == WorklistItem::ChangeToAdjustment) {
          // Is flag reg alive across this func?
          bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
          if (int Sz = BC.MIB->getPushSize(Inst)) {
            BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
            continue;
          }
          if (int Sz = BC.MIB->getPopSize(Inst)) {
            BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
            continue;
          }
        }

        LLVM_DEBUG({
          dbgs() << "Erasing: ";
          BC.printInstruction(dbgs(), Inst);
        });
        II = std::prev(BB.eraseInstruction(II));
        break;
      }
    }
  }
}

void ShrinkWrapping::rebuildCFI() {
  const bool FP = Info.getStackPointerTracking().HasFramePointer;
  Info.invalidateAll();
  if (!FP) {
    rebuildCFIForSP();
    Info.invalidateAll();
  }
  for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
    if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
      continue;
    const int64_t SPValPush = PushOffsetByReg[I];
    const int64_t SPValPop = PopOffsetByReg[I];
    insertUpdatedCFI(I, SPValPush, SPValPop);
    Info.invalidateAll();
  }
}

bool ShrinkWrapping::perform() {
  HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
  PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
  PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
  DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);

  if (BF.checkForAmbiguousJumpTables()) {
    LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
                      << ".\n");
    // We could call disambiguateJumpTables here, but it is probably not worth
    // the cost (of duplicating potentially large jump tables that could regress
    // dcache misses). Moreover, ambiguous JTs are rare and coming from code
    // written in assembly language. Just bail.
    return false;
  }
  SLM.initialize();
  CSA.compute();
  classifyCSRUses();
  pruneUnwantedCSRs();
  computeSaveLocations();
  computeDomOrder();
  moveSaveRestores();
  LLVM_DEBUG({
    dbgs() << "Func before shrink-wrapping: \n";
    BF.dump();
  });
  SLM.performChanges();
  // Early exit if processInsertions doesn't detect any todo items
  if (!processInsertions())
    return false;
  processDeletions();
  if (foldIdenticalSplitEdges()) {
    const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
    (void)Stats;
    LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
                      << " redundant split edge BBs (" << Stats.second
                      << " bytes) for " << BF.getPrintName() << "\n");
  }
  rebuildCFI();
  // We may have split edges, creating BBs that need correct branching
  BF.fixBranches();
  LLVM_DEBUG({
    dbgs() << "Func after shrink-wrapping: \n";
    BF.dump();
  });
  return true;
}

void ShrinkWrapping::printStats() {
  outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
         << " spills inserting load/stores and " << SpillsMovedPushPopMode
         << " spills inserting push/pops\n";
}

// Operators necessary as a result of using MCAnnotation
raw_ostream &operator<<(raw_ostream &OS,
                        const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
  OS << "SWTodo[";
  const char *Sep = "";
  for (const ShrinkWrapping::WorklistItem &Item : Vec) {
    OS << Sep;
    switch (Item.Action) {
    case ShrinkWrapping::WorklistItem::Erase:
      OS << "Erase";
      break;
    case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
      OS << "ChangeToAdjustment";
      break;
    case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
      OS << "InsertLoadOrStore";
      break;
    case ShrinkWrapping::WorklistItem::InsertPushOrPop:
      OS << "InsertPushOrPop";
      break;
    }
    Sep = ", ";
  }
  OS << "]";
  return OS;
}

raw_ostream &
operator<<(raw_ostream &OS,
           const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
  OS << "SLMTodo[";
  const char *Sep = "";
  for (const StackLayoutModifier::WorklistItem &Item : Vec) {
    OS << Sep;
    switch (Item.Action) {
    case StackLayoutModifier::WorklistItem::None:
      OS << "None";
      break;
    case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
      OS << "AdjustLoadStoreOffset";
      break;
    case StackLayoutModifier::WorklistItem::AdjustCFI:
      OS << "AdjustCFI";
      break;
    }
    Sep = ", ";
  }
  OS << "]";
  return OS;
}

bool operator==(const ShrinkWrapping::WorklistItem &A,
                const ShrinkWrapping::WorklistItem &B) {
  return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
          A.Adjustment == B.Adjustment &&
          A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
          A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
          A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
          A.FIEToInsert.Size == B.FIEToInsert.Size &&
          A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
          A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
}

bool operator==(const StackLayoutModifier::WorklistItem &A,
                const StackLayoutModifier::WorklistItem &B) {
  return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
}

} // end namespace bolt
} // end namespace llvm
