/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef NS_WINDOWS_DLL_INTERCEPTOR_H_
#define NS_WINDOWS_DLL_INTERCEPTOR_H_

#include "mozilla/Assertions.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/UniquePtr.h"
#include "nsWindowsHelpers.h"

#include <wchar.h>
#include <windows.h>
#include <winternl.h>

/*
 * Simple function interception.
 *
 * We have two separate mechanisms for intercepting a function: We can use the
 * built-in nop space, if it exists, or we can create a detour.
 *
 * Using the built-in nop space works as follows: On x86-32, DLL functions
 * begin with a two-byte nop (mov edi, edi) and are preceeded by five bytes of
 * NOP instructions.
 *
 * When we detect a function with this prelude, we do the following:
 *
 * 1. Write a long jump to our interceptor function into the five bytes of NOPs
 *    before the function.
 *
 * 2. Write a short jump -5 into the two-byte nop at the beginning of the function.
 *
 * This mechanism is nice because it's thread-safe.  It's even safe to do if
 * another thread is currently running the function we're modifying!
 *
 * When the WindowsDllNopSpacePatcher is destroyed, we overwrite the short jump
 * but not the long jump, so re-intercepting the same function won't work,
 * because its prelude won't match.
 *
 *
 * Unfortunately nop space patching doesn't work on functions which don't have
 * this magic prelude (and in particular, x86-64 never has the prelude).  So
 * when we can't use the built-in nop space, we fall back to using a detour,
 * which works as follows:
 *
 * 1. Save first N bytes of OrigFunction to trampoline, where N is a
 *    number of bytes >= 5 that are instruction aligned.
 *
 * 2. Replace first 5 bytes of OrigFunction with a jump to the Hook
 *    function.
 *
 * 3. After N bytes of the trampoline, add a jump to OrigFunction+N to
 *    continue original program flow.
 *
 * 4. Hook function needs to call the trampoline during its execution,
 *    to invoke the original function (so address of trampoline is
 *    returned).
 *
 * When the WindowsDllDetourPatcher object is destructed, OrigFunction is
 * patched again to jump directly to the trampoline instead of going through
 * the hook function. As such, re-intercepting the same function won't work, as
 * jump instructions are not supported.
 *
 * Note that this is not thread-safe.  Sad day.
 *
 */

#include <stdint.h>

#define COPY_CODES(NBYTES)  do {    \
  memcpy(&tramp[nTrampBytes], &origBytes[nOrigBytes], NBYTES);    \
  nOrigBytes += NBYTES;             \
  nTrampBytes += NBYTES;            \
} while (0)

namespace mozilla {
namespace internal {

class AutoVirtualProtect
{
public:
  AutoVirtualProtect(void* aFunc, size_t aSize, DWORD aProtect)
    : mFunc(aFunc), mSize(aSize), mNewProtect(aProtect), mOldProtect(0),
      mSuccess(false)
  {}

  ~AutoVirtualProtect()
  {
    if (mSuccess) {
      VirtualProtectEx(GetCurrentProcess(), mFunc, mSize, mOldProtect,
                       &mOldProtect);
    }
  }

  bool Protect()
  {
    mSuccess = !!VirtualProtectEx(GetCurrentProcess(), mFunc, mSize,
                                  mNewProtect, &mOldProtect);
    if (!mSuccess) {
      // printf("VirtualProtectEx failed! %d\n", GetLastError());
    }
    return mSuccess;
  }

private:
  void* const mFunc;
  size_t const mSize;
  DWORD const mNewProtect;
  DWORD mOldProtect;
  bool mSuccess;
};

class WindowsDllNopSpacePatcher
{
  typedef uint8_t* byteptr_t;
  HMODULE mModule;

  // Dumb array for remembering the addresses of functions we've patched.
  // (This should be nsTArray, but non-XPCOM code uses this class.)
  static const size_t maxPatchedFns = 16;
  byteptr_t mPatchedFns[maxPatchedFns];
  size_t mPatchedFnsLen;

public:
  WindowsDllNopSpacePatcher()
    : mModule(0)
    , mPatchedFnsLen(0)
  {}

#if defined(_M_IX86)
  ~WindowsDllNopSpacePatcher()
  {
    // Restore the mov edi, edi to the beginning of each function we patched.

    for (size_t i = 0; i < mPatchedFnsLen; i++) {
      byteptr_t fn = mPatchedFns[i];

      // Ensure we can write to the code.
      AutoVirtualProtect protect(fn, 2, PAGE_EXECUTE_READWRITE);
      if (!protect.Protect()) {
        continue;
      }

      // mov edi, edi
      *((uint16_t*)fn) = 0xff8b;

      // I don't think this is actually necessary, but it can't hurt.
      FlushInstructionCache(GetCurrentProcess(),
                            /* ignored */ nullptr,
                            /* ignored */ 0);
    }
  }

  void Init(const char* aModuleName)
  {
    if (!IsCompatible()) {
#if defined(MOZILLA_INTERNAL_API)
      NS_WARNING("NOP space patching is unavailable for compatibility reasons");
#endif
      return;
    }

    mModule = LoadLibraryExA(aModuleName, nullptr, 0);
    if (!mModule) {
      //printf("LoadLibraryEx for '%s' failed\n", aModuleName);
      return;
    }
  }

  /**
   * NVIDIA Optimus drivers utilize Microsoft Detours 2.x to patch functions
   * in our address space. There is a bug in Detours 2.x that causes it to
   * patch at the wrong address when attempting to detour code that is already
   * NOP space patched. This function is an effort to detect the presence of
   * this NVIDIA code in our address space and disable NOP space patching if it
   * is. We also check AppInit_DLLs since this is the mechanism that the Optimus
   * drivers use to inject into our process.
   */
  static bool IsCompatible()
  {
    // These DLLs are known to have bad interactions with this style of patching
    const wchar_t* kIncompatibleDLLs[] = {
      L"detoured.dll",
      L"_etoured.dll",
      L"nvd3d9wrap.dll",
      L"nvdxgiwrap.dll"
    };
    // See if the infringing DLLs are already loaded
    for (unsigned int i = 0; i < mozilla::ArrayLength(kIncompatibleDLLs); ++i) {
      if (GetModuleHandleW(kIncompatibleDLLs[i])) {
        return false;
      }
    }
    if (GetModuleHandleW(L"user32.dll")) {
      // user32 is loaded but the infringing DLLs are not, assume we're safe to
      // proceed.
      return true;
    }
    // If user32 has not loaded yet, check AppInit_DLLs to ensure that Optimus
    // won't be loaded once user32 is initialized.
    HKEY hkey = NULL;
    if (!RegOpenKeyExW(HKEY_LOCAL_MACHINE,
          L"SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Windows",
          0, KEY_QUERY_VALUE, &hkey)) {
      nsAutoRegKey key(hkey);
      DWORD numBytes = 0;
      const wchar_t kAppInitDLLs[] = L"AppInit_DLLs";
      // Query for required buffer size
      LONG status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr,
                                     nullptr, nullptr, &numBytes);
      mozilla::UniquePtr<wchar_t[]> data;
      if (!status) {
        // Allocate the buffer and query for the actual data
        data = mozilla::MakeUnique<wchar_t[]>(numBytes / sizeof(wchar_t));
        status = RegQueryValueExW(hkey, kAppInitDLLs, nullptr,
                                  nullptr, (LPBYTE)data.get(), &numBytes);
      }
      if (!status) {
        // For each token, split up the filename components and then check the
        // name of the file.
        const wchar_t kDelimiters[] = L", ";
        wchar_t* tokenContext = nullptr;
        wchar_t* token = wcstok_s(data.get(), kDelimiters, &tokenContext);
        while (token) {
          wchar_t fname[_MAX_FNAME] = {0};
          if (!_wsplitpath_s(token, nullptr, 0, nullptr, 0,
                             fname, mozilla::ArrayLength(fname),
                             nullptr, 0)) {
            // nvinit.dll is responsible for bootstrapping the DLL injection, so
            // that is the library that we check for here
            const wchar_t kNvInitName[] = L"nvinit";
            if (!_wcsnicmp(fname, kNvInitName,
                           mozilla::ArrayLength(kNvInitName))) {
              return false;
            }
          }
          token = wcstok_s(nullptr, kDelimiters, &tokenContext);
        }
      }
    }
    return true;
  }

  bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc)
  {
    if (!mModule) {
      return false;
    }

    if (!IsCompatible()) {
#if defined(MOZILLA_INTERNAL_API)
      NS_WARNING("NOP space patching is unavailable for compatibility reasons");
#endif
      return false;
    }

    MOZ_RELEASE_ASSERT(mPatchedFnsLen < maxPatchedFns, "No room for the hook");

    byteptr_t fn = reinterpret_cast<byteptr_t>(GetProcAddress(mModule, aName));
    if (!fn) {
      //printf ("GetProcAddress failed\n");
      return false;
    }

    fn = ResolveRedirectedAddress(fn);

    // Ensure we can read and write starting at fn - 5 (for the long jmp we're
    // going to write) and ending at fn + 2 (for the short jmp up to the long
    // jmp). These bytes may span two pages with different protection.
    AutoVirtualProtect protectBefore(fn - 5, 5, PAGE_EXECUTE_READWRITE);
    AutoVirtualProtect protectAfter(fn, 2, PAGE_EXECUTE_READWRITE);
    if (!protectBefore.Protect() || !protectAfter.Protect()) {
      return false;
    }

    bool rv = WriteHook(fn, aHookDest, aOrigFunc);

    if (rv) {
      mPatchedFns[mPatchedFnsLen] = fn;
      mPatchedFnsLen++;
    }

    return rv;
  }

  bool WriteHook(byteptr_t aFn, intptr_t aHookDest, void** aOrigFunc)
  {
    // Check that the 5 bytes before aFn are NOP's or INT 3's,
    // and that the 2 bytes after aFn are mov(edi, edi).
    //
    // It's safe to read aFn[-5] because we set it to PAGE_EXECUTE_READWRITE
    // before calling WriteHook.

    for (int i = -5; i <= -1; i++) {
      if (aFn[i] != 0x90 && aFn[i] != 0xcc) { // nop or int 3
        return false;
      }
    }

    // mov edi, edi.  Yes, there are two ways to encode the same thing:
    //
    //   0x89ff == mov r/m, r
    //   0x8bff == mov r, r/m
    //
    // where "r" is register and "r/m" is register or memory.  Windows seems to
    // use 8bff; I include 89ff out of paranoia.
    if ((aFn[0] != 0x8b && aFn[0] != 0x89) || aFn[1] != 0xff) {
      return false;
    }

    // Write a long jump into the space above the function.
    aFn[-5] = 0xe9; // jmp
    *((intptr_t*)(aFn - 4)) = aHookDest - (uintptr_t)(aFn); // target displacement

    // Set aOrigFunc here, because after this point, aHookDest might be called,
    // and aHookDest might use the aOrigFunc pointer.
    *aOrigFunc = aFn + 2;

    // Short jump up into our long jump.
    *((uint16_t*)(aFn)) = 0xf9eb; // jmp $-5

    // I think this routine is safe without this, but it can't hurt.
    FlushInstructionCache(GetCurrentProcess(),
                          /* ignored */ nullptr,
                          /* ignored */ 0);

    return true;
  }

private:
  static byteptr_t ResolveRedirectedAddress(const byteptr_t aOriginalFunction)
  {
    // If function entry is jmp rel8 stub to the internal implementation, we
    // resolve redirected address from the jump target.
    if (aOriginalFunction[0] == 0xeb) {
      int8_t offset = (int8_t)(aOriginalFunction[1]);
      if (offset <= 0) {
        // Bail out for negative offset: probably already patched by some
        // third-party code.
        return aOriginalFunction;
      }

      for (int8_t i = 0; i < offset; i++) {
        if (aOriginalFunction[2 + i] != 0x90) {
          // Bail out on insufficient nop space.
          return aOriginalFunction;
        }
      }

      return aOriginalFunction + 2 + offset;
    }

    // If function entry is jmp [disp32] such as used by kernel32,
    // we resolve redirected address from import table.
    if (aOriginalFunction[0] == 0xff && aOriginalFunction[1] == 0x25) {
      return (byteptr_t)(**((uint32_t**) (aOriginalFunction + 2)));
    }

    return aOriginalFunction;
  }
#else
  void Init(const char* aModuleName)
  {
    // Not implemented except on x86-32.
  }

  bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc)
  {
    // Not implemented except on x86-32.
    return false;
  }
#endif
};

class WindowsDllDetourPatcher
{
  typedef unsigned char* byteptr_t;
public:
  WindowsDllDetourPatcher()
    : mModule(0), mHookPage(0), mMaxHooks(0), mCurHooks(0)
  {
  }

  ~WindowsDllDetourPatcher()
  {
    int i;
    byteptr_t p;
    for (i = 0, p = mHookPage; i < mCurHooks; i++, p += kHookSize) {
#if defined(_M_IX86)
      size_t nBytes = 1 + sizeof(intptr_t);
#elif defined(_M_X64)
      size_t nBytes = 2 + sizeof(intptr_t);
#else
#error "Unknown processor type"
#endif
      byteptr_t origBytes = (byteptr_t)DecodePointer(*((byteptr_t*)p));

      // ensure we can modify the original code
      AutoVirtualProtect protect(origBytes, nBytes, PAGE_EXECUTE_READWRITE);
      if (!protect.Protect()) {
        continue;
      }

      // Remove the hook by making the original function jump directly
      // in the trampoline.
      intptr_t dest = (intptr_t)(p + sizeof(void*));
#if defined(_M_IX86)
      // Ensure the JMP from CreateTrampoline is where we expect it to be.
      if (origBytes[0] != 0xE9)
        continue;
      *((intptr_t*)(origBytes + 1)) =
        dest - (intptr_t)(origBytes + 5); // target displacement
#elif defined(_M_X64)
      // Ensure the MOV R11 from CreateTrampoline is where we expect it to be.
      if (origBytes[0] != 0x49 || origBytes[1] != 0xBB)
        continue;
      *((intptr_t*)(origBytes + 2)) = dest;
#else
#error "Unknown processor type"
#endif
    }
  }

  void Init(const char* aModuleName, int aNumHooks = 0)
  {
    if (mModule) {
      return;
    }

    mModule = LoadLibraryExA(aModuleName, nullptr, 0);
    if (!mModule) {
      //printf("LoadLibraryEx for '%s' failed\n", aModuleName);
      return;
    }

    int hooksPerPage = 4096 / kHookSize;
    if (aNumHooks == 0) {
      aNumHooks = hooksPerPage;
    }

    mMaxHooks = aNumHooks + (hooksPerPage % aNumHooks);

    mHookPage = (byteptr_t)VirtualAllocEx(GetCurrentProcess(), nullptr,
                                          mMaxHooks * kHookSize,
                                          MEM_COMMIT | MEM_RESERVE,
                                          PAGE_EXECUTE_READ);
    if (!mHookPage) {
      mModule = 0;
      return;
    }
  }

  bool Initialized() { return !!mModule; }

  bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc)
  {
    if (!mModule) {
      return false;
    }

    void* pAddr = (void*)GetProcAddress(mModule, aName);
    if (!pAddr) {
      //printf ("GetProcAddress failed\n");
      return false;
    }

    pAddr = ResolveRedirectedAddress((byteptr_t)pAddr);

    CreateTrampoline(pAddr, aHookDest, aOrigFunc);
    if (!*aOrigFunc) {
      //printf ("CreateTrampoline failed\n");
      return false;
    }

    return true;
  }

protected:
  const static int kPageSize = 4096;
  const static int kHookSize = 128;

  HMODULE mModule;
  byteptr_t mHookPage;
  int mMaxHooks;
  int mCurHooks;

  // rex bits
  static const BYTE kMaskHighNibble = 0xF0;
  static const BYTE kRexOpcode = 0x40;
  static const BYTE kMaskRexW = 0x08;
  static const BYTE kMaskRexR = 0x04;
  static const BYTE kMaskRexX = 0x02;
  static const BYTE kMaskRexB = 0x01;

  // mod r/m bits
  static const BYTE kRegFieldShift = 3;
  static const BYTE kMaskMod = 0xC0;
  static const BYTE kMaskReg = 0x38;
  static const BYTE kMaskRm = 0x07;
  static const BYTE kRmNeedSib = 0x04;
  static const BYTE kModReg = 0xC0;
  static const BYTE kModDisp32 = 0x80;
  static const BYTE kModDisp8 = 0x40;
  static const BYTE kModNoRegDisp = 0x00;
  static const BYTE kRmNoRegDispDisp32 = 0x05;

  // sib bits
  static const BYTE kMaskSibScale = 0xC0;
  static const BYTE kMaskSibIndex = 0x38;
  static const BYTE kMaskSibBase = 0x07;
  static const BYTE kSibBaseEbp = 0x05;

  // Register bit IDs.
  static const BYTE kRegAx = 0x0;
  static const BYTE kRegCx = 0x1;
  static const BYTE kRegDx = 0x2;
  static const BYTE kRegBx = 0x3;
  static const BYTE kRegSp = 0x4;
  static const BYTE kRegBp = 0x5;
  static const BYTE kRegSi = 0x6;
  static const BYTE kRegDi = 0x7;

  // Special ModR/M codes.  These indicate operands that cannot be simply
  // memcpy-ed.
  // Operand is a 64-bit RIP-relative address.
  static const int kModOperand64 = -2;
  // Operand is not yet handled by our trampoline.
  static const int kModUnknown = -1;

  /**
   * Returns the number of bytes taken by the ModR/M byte, SIB (if present)
   * and the instruction's operand.  In special cases, the special MODRM codes
   * above are returned.
   * aModRm points to the ModR/M byte of the instruction.
   * On return, aSubOpcode (if present) is filled with the subopcode/register
   * code found in the ModR/M byte.
   */
  int CountModRmSib(const BYTE *aModRm, BYTE* aSubOpcode = nullptr)
  {
    if (!aModRm) {
      MOZ_ASSERT(aModRm, "Missing ModRM byte");
      return kModUnknown;
    }
    int numBytes = 1; // Start with 1 for mod r/m byte itself
    switch (*aModRm & kMaskMod) {
      case kModReg:
        return numBytes;
      case kModDisp8:
        numBytes += 1;
        break;
      case kModDisp32:
        numBytes += 4;
        break;
      case kModNoRegDisp:
        if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) {
#if defined(_M_X64)
          if (aSubOpcode) {
            *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
          }
          return kModOperand64;
#else
          // On IA-32, all ModR/M instruction modes address memory relative to 0
          numBytes += 4;
#endif
        } else if (((*aModRm & kMaskRm) == kRmNeedSib &&
             (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) {
          numBytes += 4;
        }
        break;
      default:
        // This should not be reachable
        MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits");
        return kModUnknown;
    }
    if ((*aModRm & kMaskRm) == kRmNeedSib) {
      // SIB byte
      numBytes += 1;
    }
    if (aSubOpcode) {
      *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
    }
    return numBytes;
  }

#if defined(_M_X64)
  // To patch for JMP and JE

  enum JumpType {
   Je,
   Jne,
   Jmp,
   Call
  };

  struct JumpPatch {
    JumpPatch()
      : mHookOffset(0), mJumpAddress(0), mType(JumpType::Jmp)
    {
    }

    JumpPatch(size_t aOffset, intptr_t aAddress, JumpType aType = JumpType::Jmp)
      : mHookOffset(aOffset), mJumpAddress(aAddress), mType(aType)
    {
    }

    size_t GenerateJump(uint8_t* aCode)
    {
      size_t offset = mHookOffset;
      if (mType == JumpType::Je) {
        // JNE RIP+14
        aCode[offset]     = 0x75;
        aCode[offset + 1] = 14;
        offset += 2;
      } else if (mType == JumpType::Jne) {
        // JE RIP+14
        aCode[offset]     = 0x74;
        aCode[offset + 1] = 14;
        offset += 2;
      }

      // Near call/jmp, absolute indirect, address given in r/m32
      if (mType == JumpType::Call) {
        // CALL [RIP+0]
        aCode[offset] = 0xff;
        aCode[offset + 1] = 0x15;
        // The offset to jump destination -- ie it is placed 2 bytes after the offset.
        *reinterpret_cast<int32_t*>(aCode + offset + 2) = 2;
        aCode[offset + 2 + 4] = 0xeb;    // JMP +8 (jump over mJumpAddress)
        aCode[offset + 2 + 4 + 1] = 8;
        *reinterpret_cast<int64_t*>(aCode + offset + 2 + 4 + 2) = mJumpAddress;
        return offset + 2 + 4 + 2 + 8;
      } else {
        // JMP [RIP+0]
        aCode[offset] = 0xff;
        aCode[offset + 1] = 0x25;
        // The offset to jump destination is 0
        *reinterpret_cast<int32_t*>(aCode + offset + 2) = 0;
        *reinterpret_cast<int64_t*>(aCode + offset + 2 + 4) = mJumpAddress;
        return offset + 2 + 4 + 8;
      }
    }

    size_t mHookOffset;
    intptr_t mJumpAddress;
    JumpType mType;
  };

#endif

  enum ePrefixGroupBits
  {
    eNoPrefixes = 0,
    ePrefixGroup1 = (1 << 0),
    ePrefixGroup2 = (1 << 1),
    ePrefixGroup3 = (1 << 2),
    ePrefixGroup4 = (1 << 3)
  };

  int CountPrefixBytes(byteptr_t aBytes, const int aBytesIndex,
                       unsigned char* aOutGroupBits)
  {
    unsigned char& groupBits = *aOutGroupBits;
    groupBits = eNoPrefixes;
    int index = aBytesIndex;
    while (true) {
      switch (aBytes[index]) {
        // Group 1
        case 0xF0: // LOCK
        case 0xF2: // REPNZ
        case 0xF3: // REP / REPZ
          if (groupBits & ePrefixGroup1) {
            return -1;
          }
          groupBits |= ePrefixGroup1;
          ++index;
          break;

        // Group 2
        case 0x2E: // CS override / branch not taken
        case 0x36: // SS override
        case 0x3E: // DS override / branch taken
        case 0x64: // FS override
        case 0x65: // GS override
          if (groupBits & ePrefixGroup2) {
            return -1;
          }
          groupBits |= ePrefixGroup2;
          ++index;
          break;

        // Group 3
        case 0x66: // operand size override
          if (groupBits & ePrefixGroup3) {
            return -1;
          }
          groupBits |= ePrefixGroup3;
          ++index;
          break;

        // Group 4
        case 0x67: // Address size override
          if (groupBits & ePrefixGroup4) {
            return -1;
          }
          groupBits |= ePrefixGroup4;
          ++index;
          break;

        default:
          return index - aBytesIndex;
      }
    }
  }

  // Return a ModR/M byte made from the 2 Mod bits, the register used for the
  // reg bits and the register used for the R/M bits.
  BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm)
  {
    MOZ_ASSERT((aRm & kMaskRm) == aRm);
    MOZ_ASSERT((aModBits & kMaskMod) == aModBits);
    MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) == (aReg << kRegFieldShift));
    return aModBits | (aReg << kRegFieldShift) | aRm;
  }

  void CreateTrampoline(void* aOrigFunction, intptr_t aDest, void** aOutTramp)
  {
    *aOutTramp = nullptr;

    AutoVirtualProtect protectHookPage(mHookPage, mMaxHooks * kHookSize,
                                       PAGE_EXECUTE_READWRITE);
    if (!protectHookPage.Protect()) {
      return;
    }

    byteptr_t tramp = FindTrampolineSpace();
    if (!tramp) {
      return;
    }

    // We keep the address of the original function in the first bytes of
    // the trampoline buffer
    *((void**)tramp) = EncodePointer(aOrigFunction);
    tramp += sizeof(void*);

    byteptr_t origBytes = (byteptr_t)aOrigFunction;

    // # of bytes of the original function that we can overwrite.
    int nOrigBytes = 0;

#if defined(_M_IX86)
    int pJmp32 = -1;
    while (nOrigBytes < 5) {
      // Understand some simple instructions that might be found in a
      // prologue; we might need to extend this as necessary.
      //
      // Note!  If we ever need to understand jump instructions, we'll
      // need to rewrite the displacement argument.
      unsigned char prefixGroups;
      int numPrefixBytes = CountPrefixBytes(origBytes, nOrigBytes, &prefixGroups);
      if (numPrefixBytes < 0 || (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) {
        // Either the prefix sequence was bad, or there are prefixes that
        // we don't currently support (groups 3 and 4)
        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
        return;
      }
      nOrigBytes += numPrefixBytes;
      if (origBytes[nOrigBytes] >= 0x88 &&
          origBytes[nOrigBytes] <= 0x8B) {
        // various MOVs
        ++nOrigBytes;
        int len = CountModRmSib(origBytes + nOrigBytes);
        if (len < 0) {
          MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
          return;
        }
        nOrigBytes += len;
      } else if (origBytes[nOrigBytes] == 0xA1) {
        // MOV eax, [seg:offset]
        nOrigBytes += 5;
      } else if (origBytes[nOrigBytes] == 0xB8) {
        // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
        nOrigBytes += 5;
      } else if (origBytes[nOrigBytes] == 0x33 &&
                 (origBytes[nOrigBytes+1] & kMaskMod) == kModReg) {
        // XOR r32, r32
        nOrigBytes += 2;
      } else if ((origBytes[nOrigBytes] & 0xf8) == 0x40) {
        // INC r32
        nOrigBytes += 1;
      } else if (origBytes[nOrigBytes] == 0x83) {
        // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r/m, imm8
        unsigned char b = origBytes[nOrigBytes + 1];
        if ((b & 0xc0) == 0xc0) {
          // ADD|ODR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
          nOrigBytes += 3;
        } else {
          // bail
          MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence");
          return;
        }
      } else if (origBytes[nOrigBytes] == 0x68) {
        // PUSH with 4-byte operand
        nOrigBytes += 5;
      } else if ((origBytes[nOrigBytes] & 0xf0) == 0x50) {
        // 1-byte PUSH/POP
        nOrigBytes++;
      } else if (origBytes[nOrigBytes] == 0x6A) {
        // PUSH imm8
        nOrigBytes += 2;
      } else if (origBytes[nOrigBytes] == 0xe9) {
        pJmp32 = nOrigBytes;
        // jmp 32bit offset
        nOrigBytes += 5;
      } else if (origBytes[nOrigBytes] == 0xff &&
                 origBytes[nOrigBytes + 1] == 0x25) {
        // jmp [disp32]
        nOrigBytes += 6;
      } else if (origBytes[nOrigBytes] == 0xc2) {
        // ret imm16.  We can't handle this but it happens.  We don't ASSERT but we do fail to hook.
#if defined(MOZILLA_INTERNAL_API)
        NS_WARNING("Cannot hook method -- RET opcode found");
#endif
        return;
      } else {
        //printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n", origBytes[nOrigBytes]);
        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
        return;
      }
    }

    // The trampoline is a copy of the instructions that we just traced,
    // followed by a jump that we add below.
    memcpy(tramp, aOrigFunction, nOrigBytes);
#elif defined(_M_X64)
    // The number of bytes used by the trampoline.
    int nTrampBytes = 0;
    bool foundJmp = false;

    while (nOrigBytes < 13) {
      // If we found JMP 32bit offset, we require that the next bytes must
      // be NOP or INT3.  There is no reason to copy them.
      // TODO: This used to trigger for Je as well.  Now that I allow
      // instructions after CALL and JE, I don't think I need that.
      // The only real value of this condition is that if code follows a JMP
      // then its _probably_ the target of a JMP somewhere else and we
      // will be overwriting it, which would be tragic.  This seems
      // highly unlikely.
      if (foundJmp) {
        if (origBytes[nOrigBytes] == 0x90 || origBytes[nOrigBytes] == 0xcc) {
          nOrigBytes++;
          continue;
        }
        MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP");
        return;
      }
      if (origBytes[nOrigBytes] == 0x0f) {
        COPY_CODES(1);
        if (origBytes[nOrigBytes] == 0x1f) {
          // nop (multibyte)
          COPY_CODES(1);
          if ((origBytes[nOrigBytes] & 0xc0) == 0x40 &&
              (origBytes[nOrigBytes] & 0x7) == 0x04) {
            COPY_CODES(3);
          } else {
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
        } else if (origBytes[nOrigBytes] == 0x05) {
          // syscall
          COPY_CODES(1);
        } else if (origBytes[nOrigBytes] == 0x10 ||
                   origBytes[nOrigBytes] == 0x11) {
          // SSE: movups xmm, xmm/m128
          //      movups xmm/m128, xmm
          COPY_CODES(1);
          int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes]);
          if (nModRmSibBytes < 0) {
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          } else {
            COPY_CODES(nModRmSibBytes);
          }
        } else if (origBytes[nOrigBytes] == 0x84) {
          // je rel32
          JumpPatch jump(nTrampBytes - 1,  // overwrite the 0x0f we copied above
                          (intptr_t)(origBytes + nOrigBytes + 5 +
                                     *(reinterpret_cast<int32_t*>(origBytes + nOrigBytes + 1))),
                          JumpType::Je);
          nTrampBytes = jump.GenerateJump(tramp);
          nOrigBytes += 5;
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else if (origBytes[nOrigBytes] == 0x40 ||
                 origBytes[nOrigBytes] == 0x41) {
        // Plain REX or REX.B
        COPY_CODES(1);
        if ((origBytes[nOrigBytes] & 0xf0) == 0x50) {
          // push/pop with Rx register
          COPY_CODES(1);
        } else if (origBytes[nOrigBytes] >= 0xb8 && origBytes[nOrigBytes] <= 0xbf) {
          // mov r32, imm32
          COPY_CODES(5);
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else if (origBytes[nOrigBytes] == 0x44) {
        // REX.R
        COPY_CODES(1);

        // TODO: Combine with the "0x89" case below in the REX.W section
        if (origBytes[nOrigBytes] == 0x89) {
          // mov r/m32, r32
          COPY_CODES(1);
          int len = CountModRmSib(origBytes + nOrigBytes);
          if (len < 0) {
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
          COPY_CODES(len);
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
       } else if (origBytes[nOrigBytes] == 0x45) {
        // REX.R & REX.B
        COPY_CODES(1);

        if (origBytes[nOrigBytes] == 0x33) {
          // xor r32, r32
          COPY_CODES(2);
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else if ((origBytes[nOrigBytes] & 0xfa) == 0x48) {
        // REX.W | REX.WR | REX.WRB | REX.WB
        COPY_CODES(1);

        if (origBytes[nOrigBytes] == 0x81 &&
            (origBytes[nOrigBytes + 1] & 0xf8) == 0xe8) {
          // sub r, dword
          COPY_CODES(6);
        } else if (origBytes[nOrigBytes] == 0x83 &&
                   (origBytes[nOrigBytes + 1] & 0xf8) == 0xe8) {
          // sub r, byte
          COPY_CODES(3);
        } else if (origBytes[nOrigBytes] == 0x83 &&
                   (origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == kModReg) {
          // add r, byte
          COPY_CODES(3);
        } else if (origBytes[nOrigBytes] == 0x83 &&
                   (origBytes[nOrigBytes + 1] & 0xf8) == 0x60) {
          // and [r+d], imm8
          COPY_CODES(5);
        } else if (origBytes[nOrigBytes] == 0x2b &&
                   (origBytes[nOrigBytes + 1] & kMaskMod) == kModReg) {
          // sub r64, r64
          COPY_CODES(2);
        } else if (origBytes[nOrigBytes] == 0x85) {
          // 85 /r => TEST r/m32, r32
          if ((origBytes[nOrigBytes + 1] & 0xc0) == 0xc0) {
            COPY_CODES(2);
          } else {
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
        } else if ((origBytes[nOrigBytes] & 0xfd) == 0x89) {
          // MOV r/m64, r64 | MOV r64, r/m64
          BYTE reg;
          int len = CountModRmSib(origBytes + nOrigBytes + 1, &reg);
          if (len < 0) {
            MOZ_ASSERT(len == kModOperand64);
            if (len != kModOperand64) {
              return;
            }
            nOrigBytes += 2;   // skip the MOV and MOD R/M bytes

            // The instruction MOVs 64-bit data from a RIP-relative memory
            // address (determined with a 32-bit offset from RIP) into a
            // 64-bit register.
            int64_t* absAddr =
              reinterpret_cast<int64_t*>(origBytes + nOrigBytes + 4 +
                                         *reinterpret_cast<int32_t*>(origBytes + nOrigBytes));
            nOrigBytes += 4;

            if (reg == kRegAx) {
              // Destination is RAX.  Encode instruction as MOVABS with a
              // 64-bit absolute address as its immediate operand.
              tramp[nTrampBytes] = 0xa1;
              ++nTrampBytes;
              int64_t** trampOperandPtr = reinterpret_cast<int64_t**>(tramp + nTrampBytes);
              *trampOperandPtr = absAddr;
              nTrampBytes += 8;
            } else {
              // The MOV must be done in two steps.  First, we MOVABS the
              // absolute 64-bit address into our target register.
              // Then, we MOV from that address into the register
              // using register-indirect addressing.
              tramp[nTrampBytes] = 0xb8 + reg;
              ++nTrampBytes;
              int64_t** trampOperandPtr = reinterpret_cast<int64_t**>(tramp + nTrampBytes);
              *trampOperandPtr = absAddr;
              nTrampBytes += 8;
              tramp[nTrampBytes] = 0x48;
              tramp[nTrampBytes+1] = 0x8b;
              tramp[nTrampBytes+2] = BuildModRmByte(kModNoRegDisp, reg, reg);
              nTrampBytes += 3;
            }
          } else {
            COPY_CODES(len+1);
          }
        } else if (origBytes[nOrigBytes] == 0xc7) {
          // MOV r/m64, imm32
          if (origBytes[nOrigBytes + 1] == 0x44) {
            // MOV [r64+disp8], imm32
            // ModR/W + SIB + disp8 + imm32
            COPY_CODES(8);
          } else {
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
        } else if (origBytes[nOrigBytes] == 0xff) {
          // JMP /4
          if ((origBytes[nOrigBytes + 1] & 0xc0) == 0x0 &&
              (origBytes[nOrigBytes + 1] & 0x07) == 0x5) {
            // [rip+disp32]
            // convert JMP 32bit offset to JMP 64bit direct
            JumpPatch jump(nTrampBytes - 1,  // overwrite the REX.W/REX.WR we copied above
                           *reinterpret_cast<intptr_t*>(origBytes + nOrigBytes + 6 +
                                                        *reinterpret_cast<int32_t*>(origBytes + nOrigBytes + 2)),
                           JumpType::Jmp);
            nTrampBytes = jump.GenerateJump(tramp);
            nOrigBytes += 6;
            foundJmp = true;
          } else {
            // not support yet!
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
        } else if (origBytes[nOrigBytes] == 0x63 &&
                   (origBytes[nOrigBytes + 1] & kMaskMod) == kModReg) {
          // movsxd r64, r32 (move + sign extend)
          COPY_CODES(2);
        } else {
          // not support yet!
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else if (origBytes[nOrigBytes] == 0x66) {
        // operand override prefix
        COPY_CODES(1);
        // This is the same as the x86 version
        if (origBytes[nOrigBytes] >= 0x88 && origBytes[nOrigBytes] <= 0x8B) {
          // various MOVs
          unsigned char b = origBytes[nOrigBytes + 1];
          if (((b & 0xc0) == 0xc0) ||
              (((b & 0xc0) == 0x00) &&
               ((b & 0x07) != 0x04) && ((b & 0x07) != 0x05))) {
            // REG=r, R/M=r or REG=r, R/M=[r]
            COPY_CODES(2);
          } else if ((b & 0xc0) == 0x40) {
            if ((b & 0x07) == 0x04) {
              // REG=r, R/M=[SIB + disp8]
              COPY_CODES(4);
            } else {
              // REG=r, R/M=[r + disp8]
              COPY_CODES(3);
            }
          } else {
            // complex MOV, bail
            MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
            return;
          }
        }
      } else if ((origBytes[nOrigBytes] & 0xf0) == 0x50) {
        // 1-byte push/pop
        COPY_CODES(1);
      } else if (origBytes[nOrigBytes] == 0x65) {
        // GS prefix
        //
        // The entry of GetKeyState on Windows 10 has the following code.
        // 65 48 8b 04 25 30 00 00 00    mov   rax,qword ptr gs:[30h]
        // (GS prefix + REX + MOV (0x8b) ...)
        if (origBytes[nOrigBytes + 1] == 0x48 &&
            (origBytes[nOrigBytes + 2] >= 0x88 && origBytes[nOrigBytes + 2] <= 0x8b)) {
          COPY_CODES(3);
          int len = CountModRmSib(origBytes + nOrigBytes);
          if (len < 0) {
            // no way to support this yet.
            MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
            return;
          }
          COPY_CODES(len);
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else if (origBytes[nOrigBytes] == 0x80 &&
                 origBytes[nOrigBytes + 1] == 0x3d) {
        // cmp byte ptr [rip-relative address], imm8
        // We'll compute the absolute address and do the cmp in r11

        // push r11 (to save the old value)
        tramp[nTrampBytes] = 0x49;
        ++nTrampBytes;
        tramp[nTrampBytes] = 0x53;
        ++nTrampBytes;

        byteptr_t absAddr =
          reinterpret_cast<byteptr_t>(origBytes + nOrigBytes + 7 +
                                      *reinterpret_cast<int32_t*>(origBytes + nOrigBytes + 2));
        nOrigBytes += 6;

        // mov r11, absolute address
        tramp[nTrampBytes] = 0x49;
        ++nTrampBytes;
        tramp[nTrampBytes] = 0xbb;
        ++nTrampBytes;

        *reinterpret_cast<byteptr_t*>(tramp + nTrampBytes) = absAddr;
        nTrampBytes += 8;

        // cmp byte ptr [r11],...
        tramp[nTrampBytes] = 0x41;
        ++nTrampBytes;
        tramp[nTrampBytes] = 0x80;
        ++nTrampBytes;
        tramp[nTrampBytes] = 0x3b;
        ++nTrampBytes;

        // ...imm8
        COPY_CODES(1);

        // pop r11 (doesn't affect the flags from the cmp)
        tramp[nTrampBytes] = 0x49;
        ++nTrampBytes;
        tramp[nTrampBytes] = 0x5b;
        ++nTrampBytes;
      } else if (origBytes[nOrigBytes] == 0x90) {
        // nop
        COPY_CODES(1);
      } else if ((origBytes[nOrigBytes] & 0xf8) == 0xb8) {
        // MOV r32, imm32
        COPY_CODES(5);
      } else if (origBytes[nOrigBytes] == 0x33) {
        // xor r32, r/m32
        COPY_CODES(2);
      } else if (origBytes[nOrigBytes] == 0xf6) {
        // test r/m8, imm8 (used by ntdll on Windows 10 x64)
        // (no flags are affected by near jmp since there is no task switch,
        // so it is ok for a jmp to be written immediately after a test)
        BYTE subOpcode = 0;
        int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes + 1], &subOpcode);
        if (nModRmSibBytes < 0 || subOpcode != 0) {
          // Unsupported
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
        COPY_CODES(2 + nModRmSibBytes);
      } else if (origBytes[nOrigBytes] == 0x85) {
        // test r/m32, r32
        int nModRmSibBytes = CountModRmSib(&origBytes[nOrigBytes + 1]);
        if (nModRmSibBytes < 0) {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
        COPY_CODES(1 + nModRmSibBytes);
      } else if (origBytes[nOrigBytes] == 0xd1 &&
                  (origBytes[nOrigBytes+1] & kMaskMod) == kModReg) {
        // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32
        // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR)
        COPY_CODES(2);
      } else if (origBytes[nOrigBytes] == 0xc3) {
        // ret
        COPY_CODES(1);
      } else if (origBytes[nOrigBytes] == 0xcc) {
        // int 3
        COPY_CODES(1);
      } else if (origBytes[nOrigBytes] == 0xe8 ||
                 origBytes[nOrigBytes] == 0xe9) {
        // CALL (0xe8) or JMP (0xe9) 32bit offset
        foundJmp = origBytes[nOrigBytes] == 0xe9;
        JumpPatch jump(nTrampBytes,
                       (intptr_t)(origBytes + nOrigBytes + 5 +
                                  *(reinterpret_cast<int32_t*>(origBytes + nOrigBytes + 1))),
                       origBytes[nOrigBytes] == 0xe8 ? JumpType::Call : JumpType::Jmp);
        nTrampBytes = jump.GenerateJump(tramp);
        nOrigBytes += 5;
      } else if (origBytes[nOrigBytes] == 0x74 || // je rel8 (0x74)
                 origBytes[nOrigBytes] == 0x75) { // jne rel8 (0x75)
        char offset = origBytes[nOrigBytes + 1];
        auto jumpType = JumpType::Je;
        if (origBytes[nOrigBytes] == 0x75)
          jumpType = JumpType::Jne;
        JumpPatch jump(nTrampBytes,
          (intptr_t)(origBytes + nOrigBytes + 2 + offset), jumpType);
        nTrampBytes = jump.GenerateJump(tramp);
        nOrigBytes += 2;
      } else if (origBytes[nOrigBytes] == 0xff) {
        if ((origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == 0xf0) {
          // push r64
          COPY_CODES(2);
        } else if (origBytes[nOrigBytes + 1] == 0x25) {
          // jmp absolute indirect m32
          foundJmp = true;
          int32_t offset = *(reinterpret_cast<int32_t*>(origBytes + nOrigBytes + 2));
          int64_t* ptrToJmpDest = reinterpret_cast<int64_t*>(origBytes + nOrigBytes + 6 + offset);
          intptr_t jmpDest = static_cast<intptr_t>(*ptrToJmpDest);
          JumpPatch jump(nTrampBytes, jmpDest, JumpType::Jmp);
          nTrampBytes = jump.GenerateJump(tramp);
          nOrigBytes += 6;
        } else if ((origBytes[nOrigBytes + 1] & (kMaskMod|kMaskReg)) == BuildModRmByte(kModReg, 2, 0)) {
          // CALL reg (ff nn)
          COPY_CODES(2);
        } else {
          MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
          return;
        }
      } else {
        MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
        return;
      }
    }
#else
#error "Unknown processor type"
#endif

    if (nOrigBytes > 100) {
      //printf ("Too big!");
      return;
    }

    // target address of the final jmp instruction in the trampoline
    byteptr_t trampDest = origBytes + nOrigBytes;

#if defined(_M_IX86)
    if (pJmp32 >= 0) {
      // Jump directly to the original target of the jump instead of jumping to the
      // original function.
      // Adjust jump target displacement to jump location in the trampoline.
      *((intptr_t*)(tramp + pJmp32 + 1)) += origBytes - tramp;
    } else {
      tramp[nOrigBytes] = 0xE9; // jmp
      *((intptr_t*)(tramp + nOrigBytes + 1)) =
        (intptr_t)trampDest - (intptr_t)(tramp + nOrigBytes + 5); // target displacement
    }
#elif defined(_M_X64)
    // If the we found a Jmp, we don't need to add another instruction. However,
    // if we found a _conditional_ jump or a CALL (or no control operations
    // at all) then we still need to run the rest of aOriginalFunction.
    if (!foundJmp) {
      JumpPatch patch(nTrampBytes, reinterpret_cast<intptr_t>(trampDest));
      patch.GenerateJump(tramp);
    }
#endif

    // The trampoline is now valid.
    *aOutTramp = tramp;

    // ensure we can modify the original code
    AutoVirtualProtect protect(aOrigFunction, nOrigBytes, PAGE_EXECUTE_READWRITE);
    if (!protect.Protect()) {
      return;
    }

#if defined(_M_IX86)
    // now modify the original bytes
    origBytes[0] = 0xE9; // jmp
    *((intptr_t*)(origBytes + 1)) =
      aDest - (intptr_t)(origBytes + 5); // target displacement
#elif defined(_M_X64)
    // mov r11, address
    origBytes[0] = 0x49;
    origBytes[1] = 0xbb;

    *((intptr_t*)(origBytes + 2)) = aDest;

    // jmp r11
    origBytes[10] = 0x41;
    origBytes[11] = 0xff;
    origBytes[12] = 0xe3;
#endif
  }

  byteptr_t FindTrampolineSpace()
  {
    if (mCurHooks >= mMaxHooks) {
      return 0;
    }

    byteptr_t p = mHookPage + mCurHooks * kHookSize;

    mCurHooks++;

    return p;
  }

  static void* ResolveRedirectedAddress(const byteptr_t aOriginalFunction)
  {
    // If function entry is jmp rel8 stub to the internal implementation, we
    // resolve redirected address from the jump target.
    if (aOriginalFunction[0] == 0xeb) {
      int8_t offset = (int8_t)(aOriginalFunction[1]);
      if (offset <= 0) {
        // Bail out for negative offset: probably already patched by some
        // third-party code.
        return aOriginalFunction;
      }

      for (int8_t i = 0; i < offset; i++) {
        if (aOriginalFunction[2 + i] != 0x90) {
          // Bail out on insufficient nop space.
          return aOriginalFunction;
        }
      }

      return aOriginalFunction + 2 + offset;
    }

#if defined(_M_IX86)
    // If function entry is jmp [disp32] such as used by kernel32,
    // we resolve redirected address from import table.
    if (aOriginalFunction[0] == 0xff && aOriginalFunction[1] == 0x25) {
      return (void*)(**((uint32_t**) (aOriginalFunction + 2)));
    }
#elif defined(_M_X64)
    if (aOriginalFunction[0] == 0xe9) {
      // require for TestDllInterceptor with --disable-optimize
      int32_t offset = *((int32_t*)(aOriginalFunction + 1));
      return aOriginalFunction + 5 + offset;
    }
#endif

    return aOriginalFunction;
  }
};

} // namespace internal

class WindowsDllInterceptor
{
  internal::WindowsDllNopSpacePatcher mNopSpacePatcher;
  internal::WindowsDllDetourPatcher mDetourPatcher;

  const char* mModuleName;
  int mNHooks;

public:
  WindowsDllInterceptor()
    : mModuleName(nullptr)
    , mNHooks(0)
  {}

  void Init(const char* aModuleName, int aNumHooks = 0)
  {
    if (mModuleName) {
      return;
    }

    mModuleName = aModuleName;
    mNHooks = aNumHooks;
    mNopSpacePatcher.Init(aModuleName);

    // Lazily initialize mDetourPatcher, since it allocates memory and we might
    // not need it.
  }

  /**
   * Hook/detour the method aName from the DLL we set in Init so that it calls
   * aHookDest instead.  Returns the original method pointer in aOrigFunc
   * and returns true if successful.
   *
   * IMPORTANT: If you use this method, please add your case to the
   * TestDllInterceptor in order to detect future failures.  Even if this
   * succeeds now, updates to the hooked DLL could cause it to fail in
   * the future.
   */
  bool AddHook(const char* aName, intptr_t aHookDest, void** aOrigFunc)
  {
    // Use a nop space patch if possible, otherwise fall back to a detour.
    // This should be the preferred method for adding hooks.

    if (!mModuleName) {
      return false;
    }

    if (mNopSpacePatcher.AddHook(aName, aHookDest, aOrigFunc)) {
      return true;
    }

    return AddDetour(aName, aHookDest, aOrigFunc);
  }

  /**
   * Detour the method aName from the DLL we set in Init so that it calls
   * aHookDest instead.  Returns the original method pointer in aOrigFunc
   * and returns true if successful.
   *
   * IMPORTANT: If you use this method, please add your case to the
   * TestDllInterceptor in order to detect future failures.  Even if this
   * succeeds now, updates to the detoured DLL could cause it to fail in
   * the future.
   */
  bool AddDetour(const char* aName, intptr_t aHookDest, void** aOrigFunc)
  {
    // Generally, code should not call this method directly. Use AddHook unless
    // there is a specific need to avoid nop space patches.

    if (!mModuleName) {
      return false;
    }

    if (!mDetourPatcher.Initialized()) {
      mDetourPatcher.Init(mModuleName, mNHooks);
    }

    return mDetourPatcher.AddHook(aName, aHookDest, aOrigFunc);
  }
};

} // namespace mozilla

#endif /* NS_WINDOWS_DLL_INTERCEPTOR_H_ */
