/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc.

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE. */

#include "cl_common.hpp"

#include "platform/context.hpp"
#include "platform/command.hpp"
#include "platform/memory.hpp"
#include <cmath>

#ifdef _WIN32
#include <d3d10_1.h>
#include "cl_d3d9_amd.hpp"
#include "cl_d3d10_amd.hpp"
#include "cl_d3d11_amd.hpp"
#endif  //_WIN32

#include <cstring>

/*! \addtogroup API
 *  @{
 *
 *  \addtogroup CL_MemObjs
 *
 *  Memory objects are categorized into two types: buffer objects, and image
 *  objects. A buffer object stores a one-dimensional collection of elements
 *  whereas an image object is used to store a two- or three- dimensional
 *  texture, frame-buffer or image.
 *
 *  Elements of a buffer object can be a scalar data type (such as an int,
 *  float), vector data type, or a user-defined structure. An image object is
 *  used to represent a buffer that can be used as a texture or a frame-buffer.
 *  The elements of an image object are selected from a list of predefined
 *  image formats. The minimum number of elements in a memory object is one.
 *
 *  @{
 *
 *  \addtogroup CL_CreatingBuffer
 *
 *  @{
 */

/*! \brief Helper function to validate cl_mem_flags
 *
 * chkReadWrite: true: check the flag CL_MEM_KERNEL_READ_AND_WRITE
 *              false: don't check the falg CL_MEM_KERNEL_READ_AND_WRITE
 *  \return true of flags are valid, otherwise - false
*/
static bool validateFlags(cl_mem_flags flags, bool chkReadWrite = false) {
  // check flags for validity
  cl_bitfield temp = flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY);
  if (chkReadWrite) {
    temp |= (flags & CL_MEM_KERNEL_READ_AND_WRITE);
  }

  if (temp &&
      !(CL_MEM_READ_WRITE == temp || CL_MEM_WRITE_ONLY == temp ||
        (chkReadWrite && (CL_MEM_KERNEL_READ_AND_WRITE == temp ||
                          (CL_MEM_KERNEL_READ_AND_WRITE | CL_MEM_READ_WRITE) == temp)) ||
        CL_MEM_READ_ONLY == temp)) {
    return false;
  }

  if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) ==
      (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)) {
    return false;
  }
  if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) ==
      (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
    return false;
  }

  if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) &&
      (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
                CL_MEM_READ_WRITE | CL_MEM_READ_ONLY))) {
    return false;
  }

  if ((flags & CL_MEM_BUS_ADDRESSABLE_AMD) &&
      (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
    return false;
  }

  return true;
}

/*! \brief Helper function to validate cl_image_desc
 *
 *  \return true of cl_image_desc parameters are valid, otherwise - false
 *
 *  image_type describes the image type and must be either CL_MEM_OBJECT_IMAGE1D,
 *  CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY,
 *  CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY or CL_MEM_OBJECT_IMAGE3D.
 *
 *  image_width is the width of the image in pixels. For a 2D image and
 *  image array, the image width must be <= CL_DEVICE_IMAGE2D_MAX_WIDTH.
 *  For a 3D image, the image width must be <= CL_DEVICE_IMAGE3D_MAX_WIDTH.
 *  For a 1D image buffer, the image width must be <= CL_DEVICE_IMAGE_MAX_BUFFER_SIZE.
 *  For a 1D image and 1D image array, the image width must be
 *  <= CL_DEVICE_IMAGE2D_MAX_WIDTH.
 *
 *  image_height is height of the image in pixels. This is only used if
 *  the image is a 2D, 3D or 2D image array. For a 2D image or image array,
 *  the image height must be <= CL_DEVICE_IMAGE2D_MAX_HEIGHT. For a 3D image,
 *  the image height must be <= CL_DEVICE_IMAGE3D_MAX_HEIGHT.
 *
 *  image_depth is the depth of the image in pixels. This is only used if
 *  the image is a 3D image and must be a value > 1 and
 *  <= CL_DEVICE_IMAGE3D_MAX_DEPTH.
 *
 *  image_array_size is the number of images in the image array. This is only
 *  used if the image is a 1D or 2D image array. The values for
 *  image_array_size, if specified, must be between 1 and
 *  CL_DEVICE_IMAGE_MAX_ARRAY_SIZE.
 *
 *  image_row_pitch is the scan-line pitch in bytes. This must be 0 if
 *  host_ptr is NULL and can be either 0 or >= image_width * size of element in
 *  bytes if host_ptr is not NULL. If host_ptr is not NULL and image_row_pitch = 0,
 *  image_row_pitch is calculated as image_width * size of element in bytes.
 *  If image_row_pitch is not 0, it must be a multiple of the image element
 *  size in bytes.
 *
 *  image_slice_pitch is the size in bytes of each 2D slice in the 3D image or
 *  the size in bytes of each image in a 1D or 2D image array. This must be 0
 *  if host_ptr is NULL. If host_ptr is not NULL, image_slice_pitch can be either
 *  0 or >= image_row_pitch * image_height for a 2D image array or 3D image and
 *  can be either 0 or >= image_row_pitch for a 1D image array. If host_ptr is
 *  not NULL and image_slice_pitch = 0, image_slice_pitch is calculated as
 *  image_row_pitch * image_height for a 2D image array or 3D image and
 *  image_row_pitch for a 1D image array. If image_slice_pitch is not 0, it must
 *  be a multiple of the image_row_pitch.
 *
 *  num_mip_levels and num_samples must be 0.
 *
 *  buffer refers to a valid buffer memory object if image_type is
 *  CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. For a 1D image
 *  buffer object, the image pixels are taken from the buffer object’s
 *  data store. When the contents of a buffer object’s data store are modified,
 *  those changes are reflected in the contents of the 1D image buffer object
 *  and vice-versa at corresponding sychronization points. The image_width
 *  size of element in bytes must be <= size of buffer object data store.
 */
static bool validateImageDescriptor(const std::vector<amd::Device*>& devices,
                                    const amd::Image::Format imageFormat, const cl_image_desc* desc,
                                    void* hostPtr, size_t& imageRowPitch, size_t& imageSlicePitch) {
  if (desc == NULL) {
    return false;
  }

  // Check if any device supports mipmaps
  bool mipMapSupport = false;
  for (auto& dev : devices) {
    if (dev->settings().checkExtension(ClKhrMipMapImage)) {
      mipMapSupport = true;
      break;
    }
  }

  // Check if any device can accept mipmaps
  if ((desc->num_mip_levels != 0) && (!mipMapSupport || (hostPtr != NULL))) {
    return false;
  }

  if (desc->num_samples != 0) {
    return false;
  }

  amd::Buffer* buffer = NULL;
  size_t elemSize = imageFormat.getElementSize();
  bool imageBuffer = false;

  if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
      (desc->mem_object != NULL && desc->image_type == CL_MEM_OBJECT_IMAGE2D)) {
    if (desc->mem_object == NULL) {
      return false;
    }
    buffer = as_amd(desc->mem_object)->asBuffer();
    if (buffer == NULL) {
      return false;
    }
    if ((desc->image_width * desc->image_height * elemSize) > buffer->getSize()) {
      return false;
    }
    imageBuffer = true;
  } else if (desc->mem_object != NULL) {
    return false;
  }

  imageRowPitch = desc->image_row_pitch;
  imageSlicePitch = desc->image_slice_pitch;

  switch (desc->image_type) {
    case CL_MEM_OBJECT_IMAGE3D:
    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
      // check slice pitch
      if (hostPtr == NULL) {
        if (imageSlicePitch != 0) {
          return false;
        }
      }
    // Fall through to process pitch...
    case CL_MEM_OBJECT_IMAGE2D:
    case CL_MEM_OBJECT_IMAGE1D:
      // check row pitch rules
      if (hostPtr == NULL && !imageBuffer) {
        if (imageRowPitch != 0) {
          return false;
        }
      } else if (imageRowPitch != 0) {
        if ((imageRowPitch < desc->image_width * elemSize) || ((imageRowPitch % elemSize) != 0)) {
          return false;
        }
      }
      if (imageRowPitch == 0) {
        if (desc->mem_object != nullptr) {
          imageRowPitch = amd::alignUp(desc->image_width,
                                       devices[0]->info().imagePitchAlignment_) * elemSize;
        } else {
          imageRowPitch = desc->image_width * elemSize;
        }
      }
      break;
    case CL_MEM_OBJECT_IMAGE1D_BUFFER:
      break;
    default:
      return false;
      break;
  }

  // Extra slice validation for three dimensional images
  if ((desc->image_type == CL_MEM_OBJECT_IMAGE3D) ||
      (desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)) {
    if (imageSlicePitch != 0) {
      if ((imageSlicePitch < (imageRowPitch * desc->image_height)) ||
          ((imageSlicePitch % imageRowPitch) != 0)) {
        return false;
      }
    }
    if (imageSlicePitch == 0) {
      imageSlicePitch = imageRowPitch * desc->image_height;
    }
  } else if (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
    if (imageSlicePitch != 0) {
      if ((imageSlicePitch % imageRowPitch) != 0) {
        return false;
      }
    }
    if (imageSlicePitch == 0) {
      imageSlicePitch = imageRowPitch;
    }
  }

  return true;
}

class ImageViewRef : public amd::EmbeddedObject {
 private:
  amd::Image* ref_;
  // Do not copy image view references.
  ImageViewRef& operator=(const ImageViewRef& sref);

 public:
  explicit ImageViewRef() : ref_(NULL) {}
  ~ImageViewRef() {
    if (ref_ != NULL) {
      ref_->release();
    }
  }

  ImageViewRef& operator=(amd::Image* sref) {
    ref_ = sref;
    return *this;
  }
  amd::Image* operator()() const { return ref_; }
};

/*! \brief Create a buffer object.
 *
 *  \param context is a valid OpenCL context used to create the buffer object.
 *
 *  \param flags is a bit-field that is used to specify allocation and usage
 *  information such as the memory arena that should be used to allocate the
 *  buffer object and how it will be used.
 *
 *  \param size is the size in bytes of the buffer memory object to be
 *  allocated.
 *
 *  \param host_ptr is a pointer to the buffer data that may already be
 *  allocated by the application. The size of the buffer that host_ptr points
 *  to must be >= \a size bytes. Passing in a pointer to an already allocated
 *  buffer on the host and using it as a buffer object allows applications to
 *  share data efficiently with kernels and the host.
 *
 *  \param errcode_ret will return an appropriate error code. If \a errcode_ret
 *  is NULL, no error code is returned.
 *
 *  \return A valid non-zero buffer object and \a errcode_ret is set to
 *  CL_SUCCESS if the buffer object is created successfully or a NULL value
 *  with one of the following error values returned in \a errcode_ret:
 *  - CL_INVALID_CONTEXT if \a context is not a valid context.
 *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
 *  - CL_INVALID_BUFFER_SIZE if \a size is 0 or is greater than
 *    CL_DEVICE_MAX_MEM_ALLOC_SIZE value.
 *  - CL_INVALID_HOST_PTR if host_ptr is NULL and CL_MEM_USE_HOST_PTR or
 *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL but
 *    CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in \a flags.
 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *    for buffer object.
 *  - CL_INVALID_OPERATION if the buffer object cannot be created for all
 *    devices in \a context.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY_RET(cl_mem, clCreateBuffer, (cl_context context, cl_mem_flags flags, size_t size,
                                           void* host_ptr, cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    return NULL;
  }
  // check flags for validity
  if (!validateFlags(flags)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    LogWarning("invalid parameter \"flags\"");
    return (cl_mem)0;
  }
  // check size
  if (size == 0) {
    *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE;
    LogWarning("invalid parameter \"size = 0\"");
    return (cl_mem)0;
  }
  const std::vector<amd::Device*>& devices = as_amd(context)->devices();
  bool sizePass = false;
  for (auto& dev : devices) {
    if ((dev->info().maxMemAllocSize_ >= size) ||
        (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) {
      sizePass = true;
      break;
    }
  }
  if (!sizePass) {
    *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE;
    LogWarning("invalid parameter \"size\"");
    return (cl_mem)0;
  }

  // check host_ptr consistency
  if (host_ptr == NULL) {
    if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD)) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }
  } else {
    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD))) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }

    if (flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) {
      flags |= CL_MEM_WRITE_ONLY;

      cl_bus_address_amd* bus_address = reinterpret_cast<cl_bus_address_amd*>(host_ptr);

      if (bus_address->surface_bus_address == 0) {
        *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
        LogWarning("invalid parameter \"surface bus address\"");
        return static_cast<cl_mem>(NULL);
      }

      if (bus_address->surface_bus_address & (amd::Os::pageSize() - 1)) {
        *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
        LogWarning("invalid parameter \"surface bus address\"");
        return static_cast<cl_mem>(NULL);
      }

      if (bus_address->marker_bus_address == 0) {
        *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
        LogWarning("invalid parameter \"marker bus address\"");
        return static_cast<cl_mem>(NULL);
      }

      if (bus_address->marker_bus_address & (amd::Os::pageSize() - 1)) {
        *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
        LogWarning("invalid parameter \"marker bus address\"");
        return static_cast<cl_mem>(NULL);
      }
    }
  }

  // check extensions flag consistency
  if ((flags & CL_MEM_USE_PERSISTENT_MEM_AMD) &&
      (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_EXTERNAL_PHYSICAL_AMD |
                CL_MEM_BUS_ADDRESSABLE_AMD))) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    LogWarning("conflicting flags CL_MEM_USE_PERSISTENT_MEM_AMD and host memory specific flags");
    return (cl_mem)0;
  }

  if ((flags & CL_MEM_EXTERNAL_PHYSICAL_AMD) || (flags & CL_MEM_BUS_ADDRESSABLE_AMD)) {
    size = (size + (amd::Os::pageSize() - 1)) & (~(amd::Os::pageSize() - 1));
  }

  amd::Context& amdContext = *as_amd(context);
  amd::Memory* mem = NULL;
  // check if the ptr is in the svm space, if yes, we need return SVM buffer
  amd::Memory* svmMem = amd::MemObjMap::FindMemObj(host_ptr);
  if ((NULL != svmMem) && (flags & CL_MEM_USE_HOST_PTR)) {
    size_t svmSize = svmMem->getSize();
    size_t offset = static_cast<address>(host_ptr) - static_cast<address>(svmMem->getSvmPtr());
    if (size + offset > svmSize) {
      LogWarning("invalid parameter \"size\"");
      return (cl_mem)0;
    }
    mem = new (amdContext) amd::Buffer(*svmMem, flags, offset, size);
    svmMem->setHostMem(host_ptr);
  } else {
    mem = new (amdContext) amd::Buffer(amdContext, flags, size);
  }

  if (mem == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    return (cl_mem)0;
  }

  if (!mem->create(host_ptr)) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    mem->release();
    return NULL;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return as_cl(mem);
}
RUNTIME_EXIT

RUNTIME_ENTRY_RET(cl_mem, clCreateSubBuffer,
                  (cl_mem mem, cl_mem_flags flags, cl_buffer_create_type buffer_create_type,
                   const void* buffer_create_info, cl_int* errcode_ret)) {
  if (!is_valid(mem) || as_amd(mem)->asBuffer() == NULL) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }
  amd::Buffer& buffer = *as_amd(mem)->asBuffer();

  // check flags for validity
  if (!validateFlags(flags) || (buffer_create_type != CL_BUFFER_CREATE_TYPE_REGION)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  if (buffer.getMemFlags() & (CL_MEM_EXTERNAL_PHYSICAL_AMD | CL_MEM_BUS_ADDRESSABLE_AMD)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  const cl_buffer_region* region = (const cl_buffer_region*)buffer_create_info;

  // Check sub buffer offset alignment
  bool alignmentPass = false;
  const std::vector<amd::Device*>& devices = buffer.getContext().devices();
  for (auto& dev : devices) {
    cl_uint deviceAlignmentBytes = dev->info().memBaseAddrAlign_ >> 3;
    if (region->origin == amd::alignDown(region->origin, deviceAlignmentBytes)) {
      alignmentPass = true;
    }
  }

  // Return an error if the offset is misaligned on all devices
  if (!alignmentPass) {
    *not_null(errcode_ret) = CL_MISALIGNED_SUB_BUFFER_OFFSET;
    return NULL;
  }

  // check size
  if ((region->size == 0) || (region->origin + region->size) > buffer.getSize()) {
    *not_null(errcode_ret) = CL_INVALID_BUFFER_SIZE;
    return NULL;
  }

  amd::Memory* mem = new (buffer.getContext())
      amd::Buffer(buffer, (flags) ? flags : buffer.getMemFlags(), region->origin, region->size);
  if (mem == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    return NULL;
  }

  if (!mem->create(NULL)) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    mem->release();
    return NULL;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return as_cl(mem);
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_ReadWriteBuffer
 *  @{
 */

/*! \brief Enqueue a command to read from a buffer object to host memory.
 *
 *  \param command_queue refers to the command-queue in which the read / write
 *  command will be queued. \a command_queue and \a buffer must be created with
 *  the same OpenCL context.
 *
 *  \param buffer refers to a valid buffer object.
 *
 *  \param blocking_read indicates if the read operation is blocking or
 *  nonblocking. If \a blocking_read is CL_TRUE i.e. the read command is
 *  blocking, clEnqueueReadBuffer does not return until the buffer data has been
 *  read and copied into memory pointed to by ptr.
 *  If \a blocking_read is CL_FALSE i.e. the read command is non-blocking,
 *  clEnqueueReadBuffer queues a non-blocking read command and returns. The
 *  contents of the buffer that ptr points to cannot be used until the read
 *  command has completed. The \a event argument returns an event object which
 *  can be used to query the execution status of the read command. When the read
 *  command has completed, the contents of the buffer that ptr points to can be
 *  used by the application.
 *
 *  \param offset is the offset in bytes in the buffer object to read from or
 *  write to.
 *
 *  \param cb is the size in bytes of data being read or written.
 *
 *  \param ptr is the pointer to buffer in host memory where data is to be read
 *  into or to be written from.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL,
 *  then this particular command does not wait on  any event to complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular read
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for this
 *  command to complete.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
 *    \a buffer are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
 *  - CL_INVALID_VALUE if the region being read or written specified by (offset,
 *    cb) is out of bounds or if \a ptr is a NULL value.
 *  - CL_INVALID_OPERATION if \a clEnqueueReadBuffer is called on buffer which
 *    has been created with CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and \a
 *    num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY(cl_int, clEnqueueReadBuffer,
              (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset,
               size_t cb, void* ptr, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer();
  if (srcBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D srcOffset(offset, 0, 0);
  amd::Coord3D srcSize(cb, 1, 1);

  if (!srcBuffer->validateRegion(srcOffset, srcSize)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::ReadMemoryCommand* command = new amd::ReadMemoryCommand(
      hostQueue, CL_COMMAND_READ_BUFFER, eventWaitList, *srcBuffer, srcOffset, srcSize, ptr);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_read) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to write to  a  buffer  object  from  host memory.
 *
 *  \param command_queue refers to the command-queue in which the  read / write
 *  command will be queued. \a command_queue and \a buffer must be created with
 *  the same OpenCL context.
 *
 *  \param buffer refers to a valid buffer object.
 *
 *  \param blocking_write indicates if  the  write  operation  is  blocking  or
 *  non-blocking. If \a blocking_write is CL_TRUE,  the  OpenCL  implementation
 *  copies the data referred to by \a ptr and enqueues the write  operation  in
 *  the command-queue. The memory pointed to by \a ptr can  be  reused  by  the
 *  application after the clEnqueueWriteBuffer call returns. If
 *  \a blocking_write is CL_FALSE, the OpenCL implementation will use \a ptr to
 *  perform a nonblocking write. As the write is non-blocking the implementation
 *  can return immediately. The memory pointed to by \a ptr cannot be reused by
 *  the application after the call returns. The \a event  argument  returns  an
 *  event object which can be used to query the execution status of  the  write
 *  command. When the write command has completed, the  memory  pointed  to  by
 *  \a ptr can then be reused by the application
 *
 *  \param offset is the offset in bytes in the buffer object to read  from  or
 *  write to.
 *
 *  \param cb is the size in bytes of data being read or written.
 *
 *  \param ptr is the pointer to buffer in host memory where data is to be read
 *  into or to be written from.
 *
 *  \param num_events_in_wait_list specifies the number  of  event  objects  in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete  before  this
 *  particular command can be executed.      If  \a  event_wait_list  is  NULL,
 *  then this particular command does  not  wait  on  any  event  to  complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular write
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for this
 *  command to complete.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
 *    \a buffer are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
 *  - CL_INVALID_VALUE if the region being read or written specified by (offset,
 *    cb) is out of bounds or if \a ptr is a NULL value.
 *  - CL_INVALID_OPERATION if \a clEnqueueWriteBuffer is called on buffer which
 *    has been created with CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and \a
 *    num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueWriteBuffer,
              (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset,
               size_t cb, const void* ptr, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer();
  if (dstBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (dstBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != dstBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D dstOffset(offset, 0, 0);
  amd::Coord3D dstSize(cb, 1, 1);

  if (!dstBuffer->validateRegion(dstOffset, dstSize)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::WriteMemoryCommand* command = new amd::WriteMemoryCommand(
      hostQueue, CL_COMMAND_WRITE_BUFFER, eventWaitList, *dstBuffer, dstOffset, dstSize, ptr);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_write) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueues a command to copy a buffer object to another
 *
 *  \param command_queue refers to the command-queue in which the copy command
 *  will be queued. The OpenCL context associated with \a command_queue,
 *  \a src_buffer and \a dst_buffer must be the same.
 *
 *  \param src_buffer is the source buffer object.
 *
 *  \param dst_buffer is the destination buffer object.
 *
 *  \param src_offset refers to the offset where to begin reading data in
 *  \a src_buffer.
 *
 *  \param dst_offset refers to the offset where to begin copying data in
 *  \a dst_buffer.
 *
 *  \param cb refers to the size in bytes to copy.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL,
 *  then this particular command does not wait on  any event to complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular copy
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue and wait for
 *  this command to complete. clEnqueueBarrier can be used instead.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
 *    \a src_buffer and \a dst_buffer are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a src_buffer and \a dst_buffer are not valid
 *    buffer objects.
 *  - CL_INVALID_VALUE if \a src_offset, \a dst_offset, \a cb, \a src_offset +
 *    \a cb or \a dst_offset + \a cb require accessing elements outside the
 *    buffer memory objects.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueCopyBuffer,
              (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
               size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(src_buffer) || !is_valid(dst_buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer();
  amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer();
  if (srcBuffer == NULL || dstBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext() ||
      hostQueue.context() != dstBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  amd::Coord3D srcOffset(src_offset, 0, 0);
  amd::Coord3D dstOffset(dst_offset, 0, 0);
  amd::Coord3D size(cb, 1, 1);

  if (!srcBuffer->validateRegion(srcOffset, size) || !dstBuffer->validateRegion(dstOffset, size)) {
    return CL_INVALID_VALUE;
  }

  if (srcBuffer == dstBuffer && ((src_offset <= dst_offset && dst_offset < src_offset + cb) ||
                                 (dst_offset <= src_offset && src_offset < dst_offset + cb))) {
    return CL_MEM_COPY_OVERLAP;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::CopyMemoryCommand* command =
      new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER, eventWaitList, *srcBuffer,
                                 *dstBuffer, srcOffset, dstOffset, size);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief clEnqueueReadBufferRect enqueues commands to read a 2D or 3D rectangular
 *  region from a buffer object to host memory.
 *
 *  \param command_queue refers to the command-queue in which the read / write
 *  command will be queued. command_queue and buffer must be created with the same
 *  OpenCL context. buffer refers to a valid buffer object.
 *
 *  \param blocking_read indicates if the read operations are blocking or
 *  nonblocking.
 *  If \a blocking_read is CL_TRUE i.e. the read command is blocking,
 *  clEnqueueReadBufferRect does not return until the buffer data has been read
 *  and copied into memory pointed to by ptr.
 *  If blocking_read is CL_FALSE i.e. the read command is non-blocking,
 *  clEnqueueReadBufferRect queues a non-blocking read command and returns.
 *  The contents of the buffer that ptr points to cannot be used until
 *  the read command has completed. The event argument returns an event object
 *  which can be used to query the execution status of the read command.
 *  When the read command has completed, the contents of the buffer that
 *  ptr points to can be used by the application.
 *
 *  \buffer_origin defines the (x, y, z) offset in the memory region associated
 *  with buffer. For a 2D rectangle region, the z value given by buffer_origin[2]
 *  should be 0. The offset in bytes is computed as
 *  buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch +
 *  buffer_origin[0].
 *
 *  \host_origin defines the (x, y, z) offset in the memory region pointed to
 *  by ptr. For a 2D rectangle region, the z value given by host_origin[2]
 *  should be 0. The offset in bytes is computed as
 *  host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch +
 *  host_origin[0].
 *
 *  \param region defines the (width, height, depth) in bytes of the 2D or 3D
 *  rectangle being read or written.
 *  For a 2D rectangle copy, the depth value given by region[2] should be 1.
 *
 *  \param buffer_row_pitch is the length of each row in bytes to be used for
 *  the memory region associated with buffer. If \a buffer_row_pitch is 0,
 *  \a buffer_row_pitch is computed as region[0].
 *
 *  \param buffer_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region associated with buffer. If \a buffer_slice_pitch is 0,
 *  \a buffer_slice_pitch is computed as region[1] * \a buffer_row_pitch.
 *
 *  \param host_row_pitch is the length of each row in bytes to be used for
 *  the memory region pointed to by ptr. If \a host_row_pitch is 0, \a host_row_pitch
 *  is computed as region[0].
 *
 *  \param host_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region pointed to by ptr. If \a host_slice_pitch is 0,
 *  \a host_slice_pitch is computed as region[1] * \a host_row_pitch.
 *  ptr is the pointer to buffer in host memory where data is to be read into
 *  or to be written from.
 *
 *  \param event_wait_list and \a num_events_in_wait_list specify events that
 *  need to complete before this particular command can be executed.
 *  If \a event_wait_list is NULL, then this particular command does not wait on any
 *  event to complete. If \a event_wait_list is NULL, \a num_events_in_wait_list
 *  must be 0. If \a event_wait_list is not NULL, the list of events pointed to
 *  by \a event_wait_list must be valid and \a num_events_in_wait_list
 *  must be greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points. The context associated with events in
 *  \a event_wait_list and \a command_queue must be the same.
 *
 *  \param event returns an event object that identifies this particular
 *  read / write command and can be used to query or queue a wait for this
 *  particular command to complete. event can be NULL in which case it will not
 *  be possible for the application to query the status of this command or queue a
 *  wait for this command to complete.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise,
 *  it returns one of the following errors:
 *   - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
 *   - CL_INVALID_CONTEXT if the context associated with command_queue and
 *     buffer are not the same or if the context associated with \a command_queue
 *      and events in event_wait_list are not the same.
 *   - CL_INVALID_MEM_OBJECT if buffer is not a valid buffer object.
 *   - CL_INVALID_VALUE if the region being read or written specified by
 *     (buffer_origin, region) is out of bounds.
 *   - CL_INVALID_VALUE if ptr is a NULL value.
 *   - CL_INVALID_OPERATION if \a clEnqueueReadBufferRect is called on buffer which
 *     has been created with CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS.
 *   - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and
 *     \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
 *     \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *     are not valid events.
 *   - CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset
 *     specified when the sub-buffer object is created is not aligned to
 *   - CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
 *   - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *     for data store associated with buffer.
 *   - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *     by the OpenCL implementation on the device.
 *   - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *     required by the OpenCL implementation on the host.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY(cl_int, clEnqueueReadBufferRect,
              (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
               const size_t* buffer_origin, const size_t* host_origin, const size_t* region,
               size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch,
               size_t host_slice_pitch, void* ptr, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  // Validate command queue
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  // Validate opencl buffer
  if (!is_valid(buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer();
  if (srcBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }
  // Make sure we have a valid system memory pointer
  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  // Create buffer rectangle info structure
  amd::BufferRect bufRect;
  amd::BufferRect hostRect;

  if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) ||
      !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D srcStart(bufRect.start_, 0, 0);
  amd::Coord3D srcEnd(bufRect.end_, 1, 1);

  if (!srcBuffer->validateRegion(srcStart, srcEnd)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::Coord3D size(region[0], region[1], region[2]);
  amd::ReadMemoryCommand* command =
      new amd::ReadMemoryCommand(hostQueue, CL_COMMAND_READ_BUFFER_RECT, eventWaitList, *srcBuffer,
                                 srcStart, size, ptr, bufRect, hostRect);
  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_read) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief clEnqueueWriteBufferRect enqueues commands to write a 2D or 3D
 *  rectangular region to a buffer object from host memory.
 *
 *  \param command_queue refers to the command-queue in which the read / write
 *  command will be queued. command_queue and buffer must be created with the same
 *  OpenCL context. buffer refers to a valid buffer object.
 *
 *  \param blocking_write indicates if the write operations are blocking or
 *  nonblocking.
 *  If \a blocking_write is CL_TRUE, the OpenCL implementation copies the data
 *  referred to by ptr and enqueues the write operation in the command-queue.
 *  The memory pointed to by ptr can be reused by the application after
 *  the clEnqueueWriteBufferRect call returns.
 *  If \a blocking_write is CL_FALSE, the OpenCL implementation will use ptr to
 *  perform a nonblocking write. As the write is non-blocking the implementation
 *  can return immediately. The memory pointed to by ptr cannot be reused by
 *  the application after the call returns. The event argument returns
 *  an event object which can be used to query the execution status of the write
 *  command. When the write command has completed, the memory pointed to by ptr
 *  can then be reused by the application.
 *
 *  \buffer_origin defines the (x, y, z) offset in the memory region associated
 *  with buffer. For a 2D rectangle region, the z value given by buffer_origin[2]
 *  should be 0. The offset in bytes is computed as
 *  buffer_origin[2] * buffer_slice_pitch + buffer_origin[1] * buffer_row_pitch +
 *  buffer_origin[0].
 *
 *  \host_origin defines the (x, y, z) offset in the memory region pointed to
 *  by ptr. For a 2D rectangle region, the z value given by host_origin[2]
 *  should be 0. The offset in bytes is computed as
 *  host_origin[2] * host_slice_pitch + host_origin[1] * host_row_pitch +
 *  host_origin[0].
 *
 *  \param region defines the (width, height, depth) in bytes of the 2D or 3D
 *  rectangle being read or written.
 *  For a 2D rectangle copy, the depth value given by region[2] should be 1.
 *
 *  \param buffer_row_pitch is the length of each row in bytes to be used for
 *  the memory region associated with buffer. If \a buffer_row_pitch is 0,
 *  \a buffer_row_pitch is computed as region[0].
 *
 *  \param buffer_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region associated with buffer. If \a buffer_slice_pitch is 0,
 *  \a buffer_slice_pitch is computed as region[1] * \a buffer_row_pitch.
 *
 *  \param host_row_pitch is the length of each row in bytes to be used for
 *  the memory region pointed to by ptr. If \a host_row_pitch is 0, \a host_row_pitch
 *  is computed as region[0].
 *
 *  \param host_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region pointed to by ptr. If \a host_slice_pitch is 0,
 *  \a host_slice_pitch is computed as region[1] * \a host_row_pitch.
 *  ptr is the pointer to buffer in host memory where data is to be read into
 *  or to be written from.
 *
 *  \param event_wait_list and \a num_events_in_wait_list specify events that
 *  need to complete before this particular command can be executed.
 *  If \a event_wait_list is NULL, then this particular command does not wait on any
 *  event to complete. If \a event_wait_list is NULL, \a num_events_in_wait_list
 *  must be 0. If \a event_wait_list is not NULL, the list of events pointed to
 *  by \a event_wait_list must be valid and \a num_events_in_wait_list
 *  must be greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points. The context associated with events in
 *  \a event_wait_list and \a command_queue must be the same.
 *
 *  \param event returns an event object that identifies this particular
 *  read / write command and can be used to query or queue a wait for this
 *  particular command to complete. event can be NULL in which case it will not
 *  be possible for the application to query the status of this command or queue a
 *  wait for this command to complete.
 *
 *  clEnqueueReadBufferRect and clEnqueueWriteBufferRect
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise,
 *  it returns one of the following errors:
 *   - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
 *   - CL_INVALID_CONTEXT if the context associated with command_queue and
 *     buffer are not the same or if the context associated with \a command_queue
 *      and events in event_wait_list are not the same.
 *   - CL_INVALID_MEM_OBJECT if buffer is not a valid buffer object.
 *   - CL_INVALID_VALUE if the region being read or written specified by
 *     (buffer_origin, region) is out of bounds.
 *   - CL_INVALID_VALUE if ptr is a NULL value.
 *   - CL_INVALID_OPERATION if \a clEnqueueWriteBufferRect is called on buffer
 *     which has been created with CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS.
 *   - CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and
 *     \a num_events_in_wait_list > 0, or event_wait_list is not NULL and
 *     \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *     are not valid events.
 *   - CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset
 *     specified when the sub-buffer object is created is not aligned to
 *   - CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue.
 *   - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *     for data store associated with buffer.
 *   - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *     by the OpenCL implementation on the device.
 *   - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *     required by the OpenCL implementation on the host.
 */
RUNTIME_ENTRY(cl_int, clEnqueueWriteBufferRect,
              (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
               const size_t* buffer_origin, const size_t* host_origin, const size_t* region,
               size_t buffer_row_pitch, size_t buffer_slice_pitch, size_t host_row_pitch,
               size_t host_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* dstBuffer = as_amd(buffer)->asBuffer();
  if (dstBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (dstBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != dstBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  // Create buffer rectangle info structure
  amd::BufferRect bufRect;
  amd::BufferRect hostRect;

  if (!bufRect.create(buffer_origin, region, buffer_row_pitch, buffer_slice_pitch) ||
      !hostRect.create(host_origin, region, host_row_pitch, host_slice_pitch)) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D dstStart(bufRect.start_, 0, 0);
  amd::Coord3D dstEnd(bufRect.end_, 1, 1);

  if (!dstBuffer->validateRegion(dstStart, dstEnd)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::Coord3D size(region[0], region[1], region[2]);
  amd::WriteMemoryCommand* command =
      new amd::WriteMemoryCommand(hostQueue, CL_COMMAND_WRITE_BUFFER_RECT, eventWaitList,
                                  *dstBuffer, dstStart, size, ptr, bufRect, hostRect);
  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_write) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueues a command to copy a 2D or 3D rectangular region from
 *  the buffer object identified by \a src_buffer to a 2D or 3D region
 *  in the buffer object identified by \a dst_buffer.
 *
 *  \param command_queue refers to the command-queue in which the copy command
 *  will be queued. The OpenCL context associated with command_queue,
 *  \a src_buffer and \a dst_buffer must be the same.
 *
 *  \param src_origin defines the (x, y, z) offset in the memory region
 *  associated with \a src_buffer. For a 2D rectangle region, the z value given
 *  by src_origin[2] should be 0. The offset in bytes is computed as
 *  src_origin[2] * src_slice_pitch + src_origin[1] * src_row_pitch + src_origin[0].
 *
 *  \param dst_origin defines the (x, y, z) offset in the memory region
 *  associated with \a dst_buffer. For a 2D rectangle region, the z value given
 *  by dst_origin[2] should be 0. The offset in bytes is computed as
 *  dst_origin[2] * dst_slice_pitch + dst_origin[1] * dst_row_pitch + dst_origin[0].
 *
 *  \param region defines the (width, height, depth) in bytes of the 2D or 3D
 *  rectangle being copied. For a 2D rectangle, the depth value given by
 *  region[2] should be 1.
 *
 *  \param pasrc_row_pitch is the length of each row in bytes to be used for
 *  the memory region associated with src_buffer. If src_row_pitch is 0,
 *  src_row_pitch is computed as region[0].
 *
 *  \param src_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region associated with src_buffer. If src_slice_pitch is 0,
 *  src_slice_pitch is computed as region[1] * src_row_pitch.
 *
 *  \param dst_row_pitch is the length of each row in bytes to be used for
 *  the memory region associated with dst_buffer. If dst_row_pitch is 0,
 *  dst_row_pitch is computed as region[0].
 *
 *  \param dst_slice_pitch is the length of each 2D slice in bytes to be used
 *  for the memory region associated with dst_buffer. If dst_slice_pitch is 0,
 *  dst_slice_pitch is computed as region[1] * dst_row_pitch.
 *
 *  \param event_wait_list and num_events_in_wait_list specify events that
 *  need to complete before this particular command can be executed.
 *  If event_wait_list is NULL, then this particular command does not wait on
 *  any event to complete. If event_wait_list is NULL, num_events_in_wait_list
 *  must be 0. If event_wait_list is not NULL, the list of events pointed to by
 *  event_wait_list must be valid and num_events_in_wait_list must be greater
 *  than 0. The events specified in event_wait_list act as synchronization
 *  points. The context associated with events in event_wait_list and
 *  command_queue must be the same.
 *
 *  \param event returns an event object that identifies this particular copy
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue
 *  a wait for this command to complete. clEnqueueBarrier can be used instead.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise,
 *  it returns one of the following errors:
 *   - CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
 *   - CL_INVALID_CONTEXT if the context associated with command_queue,
 *     \a src_buffer and \a dst_buffer are not the same or if the context
 *     associated with \a command_queue and in \a event_wait_list are not the same.
 *   - CL_INVALID_MEM_OBJECT if \a src_buffer and \a dst_buffer are not valid
 *     buffer objects.
 *   - CL_INVALID_VALUE if (\a src_offset, \a region) or (\a dst_offset,
 *     \a region) require accessing elements outside the \a src_buffer and
 *     \a dst_buffer buffer objects respectively.
 *   - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *     \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *     \a num_events_in_wait_list is 0, or if event objects in
 *     \a event_wait_list are not valid events.
 *   - CL_MEM_COPY_OVERLAP if \a src_buffer and \a dst_buffer are the same
 *     buffer object and the source and destination regions overlap.
 *   - CL_MISALIGNED_SUB_BUFFER_OFFSET if \a src_buffer is a sub-buffer object
 *     and offset specified when the sub-buffer object is created is
 *     not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device
 *     associated with queue.
 *   - CL_MISALIGNED_SUB_BUFFER_OFFSET if dst_buffer is a sub-buffer object
 *     and offset specified when the sub-buffer object is created is not
 *     aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated
 *     with queue.
 *   - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
 *     memory for data store associated with src_buffer or dst_buffer.
 *   - CL_OUT_OF_RESOURCES if there is a failure to allocate resources
 *     required by the OpenCL implementation on the device.
 *   - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *     required by the OpenCL implementation on the host
 *
 */
RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferRect,
              (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer,
               const size_t* src_origin, const size_t* dst_origin, const size_t* region,
               size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
               size_t dst_slice_pitch, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(src_buffer) || !is_valid(dst_buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer();
  amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer();
  if (srcBuffer == NULL || dstBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext() ||
      hostQueue.context() != dstBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  // Create buffer rectangle info structure
  amd::BufferRect srcRect;
  amd::BufferRect dstRect;

  if (!srcRect.create(src_origin, region, src_row_pitch, src_slice_pitch) ||
      !dstRect.create(dst_origin, region, dst_row_pitch, dst_slice_pitch)) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D srcStart(srcRect.start_, 0, 0);
  amd::Coord3D dstStart(dstRect.start_, 0, 0);
  amd::Coord3D srcEnd(srcRect.end_, 1, 1);
  amd::Coord3D dstEnd(dstRect.end_, 1, 1);

  if (!srcBuffer->validateRegion(srcStart, srcEnd) ||
      !dstBuffer->validateRegion(dstStart, dstEnd)) {
    return CL_INVALID_VALUE;
  }

  // Check if regions overlap each other
  if ((srcBuffer == dstBuffer) &&
      (std::abs(static_cast<long>(src_origin[0]) - static_cast<long>(dst_origin[0])) <
       static_cast<long>(region[0])) &&
      (std::abs(static_cast<long>(src_origin[1]) - static_cast<long>(dst_origin[1])) <
       static_cast<long>(region[1])) &&
      (std::abs(static_cast<long>(src_origin[2]) - static_cast<long>(dst_origin[2])) <
       static_cast<long>(region[2]))) {
    return CL_MEM_COPY_OVERLAP;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::Coord3D size(region[0], region[1], region[2]);
  amd::CopyMemoryCommand* command =
      new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER_RECT, eventWaitList, *srcBuffer,
                                 *dstBuffer, srcStart, dstStart, size, srcRect, dstRect);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_MemoryCallback
 *  @{
 */

/*! \brief Registers a user callback function that will be called when the
 *   memory object is deleted and its resources freed.
 *
 * Each call to clSetMemObjectDestructorCallback registers the specified user
 * callback function on a callback stack associated with memobj. The registered
 * user callback functions are called in the reverse order in which they were
 * registered. The user callback functions are called and then the memory
 * object’s resources are freed and the memory object is deleted.
 * This provides a mechanism for the application (and libraries) using memobj
 * to be notified when the memory referenced by host_ptr, specified when
 * the memory object is created and used as the storage bits for the memory
 * object, can be reused or freed.
 *
 * \a memobj is a valid memory object.
 * \a pfn_notify is the callback function that can be registered by the
 *    application. This callback function may be called asynchronously by the
 *    OpenCL implementation. It is the application’s responsibility to ensure
 *    that the callback function is thread-safe. The parameters to this callback
 *    function are:
 *      - memobj is the memory object being deleted.
 *      - user_data is a pointer to user supplied data.
 *    If pfn_notify is NULL, no callback function is registered for memobj.
 * \a user_data will be passed as the user_data argument when pfn_notify is
 *    called. user_data can be NULL.
 *
 * \return CL_SUCCESS if the function is executed successfully. Otherwise it
 * returns one of the following errors:
 *   - CL_INVALID_MEM_OBJECT if memobj is not a valid memory object.
 *   - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *     required by the OpenCL implementation on the host.
 *
 * NOTE: When the user callback function is called by the implementation, the
 * contents of the memory region pointed to by host_ptr (if the memory object is
 * created with CL_MEM_USE_HOST_PTR) are undefined. The callback function is
 * typically used by the application to either free or reuse the memory region
 * pointed to by host_ptr. The behavior of calling expensive system routines,
 * OpenCL API calls to create contexts or command-queues, or blocking OpenCL
 * operations from the following list below, in a callback is undefined.
 *
 *  \version 1.1r17
 */
RUNTIME_ENTRY(cl_int, clSetMemObjectDestructorCallback,
              (cl_mem memobj, void(CL_CALLBACK* pfn_notify)(cl_mem memobj, void* user_data),
               void* user_data)) {
  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (pfn_notify == NULL) {
    return CL_INVALID_VALUE;
  }

  if (!as_amd(memobj)->setDestructorCallback(pfn_notify, user_data)) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_RetRelMemory
 *  @{
 */

/*! \brief Increment the \a memobj reference count.
 *
 *  \return CL_SUCCESS if the function is executed successfully or
 *  CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
 *
 *  clCreateBuffer and clCreateImage{2D|3D} perform an implicit retain.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clRetainMemObject, (cl_mem memobj)) {
  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }
  as_amd(memobj)->retain();
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Decrement the \a memobj reference count.
 *
 *  After the \a memobj reference count becomes zero and commands queued for
 *  execution on a command-queue(s) that use \a memobj have finished, the
 *  memory object is deleted.
 *
 *  \return CL_SUCCESS if the function is executed successfully or
 *  CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clReleaseMemObject, (cl_mem memobj)) {
  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }
  as_amd(memobj)->release();
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_CreatingImage
 *  @{
 */

/*! \brief Create a (1D, or 2D) image object.
 *
 *  \param context is a valid OpenCL context on which the image object is to be
 *  created.
 *
 *  \param flags is a bit-field that is used to specify allocation and usage
 *  information about the image memory object being created.
 *
 *  \param image_format is a pointer to a structure that describes format
 *  properties of the image to be allocated.
 *
 *  \param image_width is the width of the image in pixels. Must be greater
 *  than or equal to 1.
 *
 *  \param image_height is the height of the image in pixels. Must be greater
 *  than or equal to 1.
 *
 *  \param image_row_pitch is the scan-line pitch in bytes. This must be 0 if
 *  \a host_ptr is NULL and can be either 0 or >= \a image_width * size of
 *  element in bytes if \a host_ptr is not NULL. If \a host_ptr is not NULL and
 *  \a image_row_pitch = 0, \a image_row_pitch is calculated as
 *  \a image_width * size of element in bytes.
 *
 *  \param host_ptr is a pointer to the image data that may already be allocated
 *  by the application. The size of the buffer that \a host_ptr points to must
 *  be >= \a image_row_pitch * \a image_height. The size of each element in
 *  bytes must be a power of 2. Passing in a pointer to an already allocated
 *  buffer on the host and using it as a memory object allows applications to
 *  share data efficiently with kernels and the host.
 *
 *  \param errcode_ret will return an appropriate error code. If \a errcode_ret
 *  is NULL, no error code is returned.
 *
 *  \return A valid non-zero image object and errcode_ret is set to CL_SUCCESS
 *  if the image object is created successfully. It returns a NULL value with
 *  one of the following error values returned in \a errcode_ret:
 *  - CL_INVALID_CONTEXT if \a context is not a valid context.
 *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
 *  - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if values specified in \a image_format
 *    are not valid or if \a image_format is NULL.
 *  - CL_INVALID_IMAGE_SIZE if \a image_width or \a image_height are 0 or if
 *    they exceed values specified in CL_DEVICE_IMAGE2D_MAX_WIDTH or
 *    CL_DEVICE_IMAGE2D_MAX_HEIGHT respectively or if values specified by
 *    \a image_row_pitch do not follow rules described in the argument
 *    description above.
 *  - CL_INVALID_HOST_PTR if \a host_ptr is NULL and CL_MEM_USE_HOST_PTR or
 *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL
 *    but CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in \a flags.
 *  - CL_IMAGE_FORMAT_NOT_SUPPORTED if the \a image_format is not supported.
 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *    for image object.
 *  - CL_INVALID_OPERATION if the image object as specified by the
 *    \a image_format, \a flags and dimensions cannot be created for all devices
 *    in context that support images or if there are no devices in context that
 *    support images.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY_RET(cl_mem, clCreateImage2D,
                  (cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
                   size_t image_width, size_t image_height, size_t image_row_pitch, void* host_ptr,
                   cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    LogWarning("invalid parameter \"context\"");
    return (cl_mem)0;
  }
  // check flags for validity
  if (!validateFlags(flags)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    LogWarning("invalid parameter \"flags\"");
    return (cl_mem)0;
  }
  // check format
  if (image_format == NULL) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }

  const amd::Image::Format imageFormat(*image_format);
  if (!imageFormat.isValid()) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }

  amd::Context& amdContext = *as_amd(context);
  if (!imageFormat.isSupported(amdContext)) {
    *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }
  // check size parameters
  if (image_width == 0 || image_height == 0) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
    LogWarning("invalid parameter \"image_width\" or \"image_height\"");
    return (cl_mem)0;
  }
  const std::vector<amd::Device*>& devices = as_amd(context)->devices();
  bool supportPass = false;
  bool sizePass = false;
  for (auto& dev : devices) {
    if (dev->info().imageSupport_) {
      supportPass = true;
      if (dev->info().image2DMaxWidth_ >= image_width &&
          dev->info().image2DMaxHeight_ >= image_height) {
        sizePass = true;
        break;
      }
    }
  }
  if (!supportPass) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    LogWarning("there are no devices in context to support images");
    return (cl_mem)0;
  }
  if (!sizePass) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
    LogWarning("invalid parameter \"image_width\" or \"image_height\"");
    return (cl_mem)0;
  }
  // check row pitch rules
  if (host_ptr == NULL) {
    if (image_row_pitch) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  } else if (image_row_pitch) {
    size_t elemSize = imageFormat.getElementSize();
    if ((image_row_pitch < image_width * elemSize) || (image_row_pitch % elemSize)) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  }
  // check host_ptr consistency
  if (host_ptr == NULL) {
    if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }
  } else {
    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }
  }

  // CL_IMAGE_FORMAT_NOT_SUPPORTED ???

  if (image_row_pitch == 0) {
    image_row_pitch = image_width * imageFormat.getElementSize();
  }

  amd::Image* image =
      new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE2D, flags, imageFormat,
                                  image_width, image_height, 1, image_row_pitch, 0);
  if (image == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    LogWarning("cannot allocate resources");
    return (cl_mem)0;
  }

  // CL_MEM_OBJECT_ALLOCATION_FAILURE
  if (!image->create(host_ptr)) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    image->release();
    return (cl_mem)0;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return (cl_mem)as_cl<amd::Memory>(image);
}
RUNTIME_EXIT

/*! \brief Create a 3D image object.
 *
 *  \param context is a valid OpenCL context on which the image object is to be
 *  created.
 *
 *  \param flags is a bit-field that is used to specify allocation and usage
 *  information about the image memory object being created.
 *
 *  \param image_format is a pointer to a structure that describes format
 *  properties of the image to be allocated.
 *
 *  \param image_width is the width of the image in pixels. Must be greater
 *  than or equal to 1.
 *
 *  \param image_height is the height of the image in pixels. Must be greater
 *  than or equal to 1.
 *
 *  \param image_depth is the depth of the image in pixels. This must be a
 *  value > 1.
 *
 *  \param image_row_pitch is the scan-line pitch in bytes. This must be 0 if
 *  \a host_ptr is NULL and can be either 0 or >= \a image_width * size of
 *  element in bytes if \a host_ptr is not NULL. If \a host_ptr is not NULL and
 *  \a image_row_pitch = 0, \a image_row_pitch is calculated as
 *  \a image_width * size of element in bytes.
 *
 *  \param image_slice_pitch is the size in bytes of each 2D slice in the 3D
 *  image. This must be 0 if \a host_ptr is NULL and can be either 0 or >=
 *  \a image_row_pitch * \a image_height if \a host_ptr is not NULL.
 *  If \a host_ptr is not NULL and \a image_slice_pitch = 0,
 *  \a image_slice_pitch is calculated as \a image_row_pitch * \a image_height.
 *
 *  \param host_ptr is a pointer to the image data that may already be allocated
 *  by the application. The size of the buffer that \a host_ptr points to must
 *  be >= \a image_row_pitch * \a image_height * \a image_depth. The size of
 *  each element in bytes must be a power of 2. Passing in a pointer to an
 *  already allocated buffer on the host and using it as a memory object allows
 *  applications to share data efficiently with kernels and the host.
 *
 *  \param errcode_ret will return an appropriate error code. If \a errcode_ret
 *  is NULL, no error code is returned.
 *
 *  \return valid non-zero image object created and the \a errcode_ret is set to
 *  CL_SUCCESS if the image object is created successfully. It returns a NULL
 *  value with one of the following error values returned in \a errcode_ret:
 *  - CL_INVALID_CONTEXT if \a context is not a valid context.
 *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
 *  - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if values specified in \a image_format
 *    are not valid or if \a image_format is NULL.
 *  - CL_INVALID_IMAGE_SIZE if \a image_width, \a image_height or \a image_depth
 *    are 0 or if they exceed values specified in CL_DEVICE_IMAGE3D_MAX_WIDTH,
 *    CL_DEVICE_IMAGE3D_MAX_HEIGHT or CL_DEVICE_IMAGE3D_MAX_DEPTH respectively
 *    or if values specified by \a image_row_pitch and \a image_slice_pitch do
 *    not follow rules described in the argument description above.
 *  - CL_INVALID_HOST_PTR if \a host_ptr is NULL and CL_MEM_USE_HOST_PTR or
 *    CL_MEM_COPY_HOST_PTR are set in \a flags or if \a host_ptr is not NULL but
 *    CL_MEM_COPY_HOST_PTR or CL_MEM_USE_HOST_PTR are not set in \a flags.
 *  - CL_IMAGE_FORMAT_NOT_SUPPORTED if the \a image_format is not supported.
 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *    for image object.
 *  - CL_INVALID_OPERATION if the image object as specified by the
 *    \a image_format, \a flags and dimensions cannot be created for all devices
 *    in context that support images, or if there are no devices in context that
 *    support images.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY_RET(cl_mem, clCreateImage3D,
                  (cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
                   size_t image_width, size_t image_height, size_t image_depth,
                   size_t image_row_pitch, size_t image_slice_pitch, void* host_ptr,
                   cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    LogWarning("invalid parameter \"context\"");
    return (cl_mem)0;
  }
  // check flags for validity
  if (!validateFlags(flags)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    LogWarning("invalid parameter \"flags\"");
    return (cl_mem)0;
  }
  // check format
  if (image_format == NULL) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }
  amd::Image::Format imageFormat(*image_format);

  if (!imageFormat.isValid()) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }

  amd::Context& amdContext = *as_amd(context);
  if (!imageFormat.isSupported(amdContext)) {
    *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED;
    LogWarning("invalid parameter \"image_format\"");
    return (cl_mem)0;
  }
  // check size parameters
  if (image_width == 0 || image_height == 0 || image_depth <= 1) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
    LogWarning("invalid size parameter(s)");
    return (cl_mem)0;
  }
  const std::vector<amd::Device*>& devices = as_amd(context)->devices();
  bool supportPass = false;
  bool sizePass = false;
  for (auto& dev : devices) {
    if (dev->info().imageSupport_) {
      supportPass = true;
      if ((dev->info().image3DMaxWidth_ >= image_width) &&
          (dev->info().image3DMaxHeight_ >= image_height) &&
          (dev->info().image3DMaxDepth_ >= image_depth)) {
        sizePass = true;
        break;
      }
    }
  }
  if (!supportPass) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    LogWarning("there are no devices in context to support images");
    return (cl_mem)0;
  }
  if (!sizePass) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
    LogWarning("invalid size parameter(s)");
    return (cl_mem)0;
  }
  // check row pitch rules
  if (host_ptr == NULL) {
    if (image_row_pitch) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  } else if (image_row_pitch) {
    size_t elemSize = imageFormat.getElementSize();
    if ((image_row_pitch < image_width * elemSize) || (image_row_pitch % elemSize)) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  }
  // check slice pitch
  if (host_ptr == NULL) {
    if (image_slice_pitch) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  } else if (image_slice_pitch) {
    size_t elemSize = imageFormat.getElementSize();
    if ((image_slice_pitch < image_row_pitch * image_height) ||
        (image_slice_pitch % image_row_pitch)) {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
      LogWarning("invalid parameter \"image_row_pitch\"");
      return (cl_mem)0;
    }
  }
  // check host_ptr consistency
  if (host_ptr == NULL) {
    if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }
  } else {
    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter \"host_ptr\"");
      return (cl_mem)0;
    }
  }

  // CL_IMAGE_FORMAT_NOT_SUPPORTED ???

  if (image_row_pitch == 0) {
    image_row_pitch = image_width * imageFormat.getElementSize();
  }
  if (image_slice_pitch == 0) {
    image_slice_pitch = image_row_pitch * image_height;
  }

  amd::Image* image = new (amdContext)
      amd::Image(amdContext, CL_MEM_OBJECT_IMAGE3D, flags, imageFormat, image_width, image_height,
                 image_depth, image_row_pitch, image_slice_pitch);
  if (image == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    LogWarning("cannot allocate resources");
    return (cl_mem)0;
  }

  // CL_MEM_OBJECT_ALLOCATION_FAILURE
  if (!image->create(host_ptr)) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    image->release();
    return (cl_mem)0;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return (cl_mem)as_cl<amd::Memory>(image);
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_QueryImageFormat
 *  @{
 */

/*! \brief Get the list of supported image formats.
 *
 *  \param context is a valid OpenCL context on which the image object(s) will
 *  be created.
 *
 *  \param flags is a bit-field that is used to specify allocation and usage
 *  information about the image memory object being created.
 *
 *  \param image_type describes the image type and must be either
 *  CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE2D,
 *  CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_ARRAY or
 *  CL_MEM_OBJECT_IMAGE2D_ARRAY.
 *
 *  \param num_entries specifies the number of entries that can be returned in
 *  the memory location given by \a image_formats.
 *
 *  \param image_formats is a pointer to a memory location where the list of
 *  supported image formats are returned. Each entry describes a cl_image_format
 *  structure supported by the runtime. If \a image_formats is NULL, it is
 *  ignored.
 *
 *  \param num_image_formats is the actual number of supported image formats for
 *  a specific context and values specified by \a flags. If \a num_image_formats
 *  is NULL, it is ignored.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully
 *  - CL_INVALID_CONTEXT if \a context is not a valid context
 *  - CL_INVALID_VALUE if \a flags or \a image_type are not valid, or if
 *    \a num_entries is 0 and \a image_formats is not NULL
 *
 *  \version 1.2r08
 */
RUNTIME_ENTRY(cl_int, clGetSupportedImageFormats,
              (cl_context context, cl_mem_flags flags, cl_mem_object_type image_type,
               cl_uint num_entries, cl_image_format* image_formats, cl_uint* num_image_formats)) {
  if (!is_valid(context)) {
    LogWarning("invalid parameter \"context\"");
    return CL_INVALID_CONTEXT;
  }
  // check flags for validity
  if (!validateFlags(flags, true)) {
    LogWarning("invalid parameter \"flags\"");
    return CL_INVALID_VALUE;
  }
  // chack image_type
  switch (image_type) {
    case CL_MEM_OBJECT_IMAGE1D_BUFFER:
    case CL_MEM_OBJECT_IMAGE1D:
    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
    case CL_MEM_OBJECT_IMAGE2D:
    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
    case CL_MEM_OBJECT_IMAGE3D:
      break;

    default:
      LogWarning("invalid parameter \"image_type\"");
      return CL_INVALID_VALUE;
  }
  if (num_entries == 0 && image_formats != NULL) {
    LogWarning("invalid parameter \"num_entries\"");
    return CL_INVALID_VALUE;
  }

  const amd::Context& amdContext = *as_amd(context);

  if (image_formats != NULL) {
    amd::Image::getSupportedFormats(amdContext, image_type, num_entries, image_formats, flags);
  }
  if (num_image_formats != NULL) {
    *num_image_formats = amd::Image::numSupportedFormats(amdContext, image_type, flags);
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT


/*! @}
 *  \addtogroup CL_ReadWriteImage
 *  @{
 */

/*! \brief Enqueue a command to read from a 2D or 3D image object to host memory
 *
 *  \param command_queue refers to the command-queue in which the read
 *  command will be queued. \a command_queue and \a image must be created with
 *  the same OpenCL context.
 *
 *  \param image refers to a valid 2D or 3D image object.
 *
 *  \param blocking_read indicates if the read is blocking or nonblocking. If
 *  \a blocking_read is CL_TRUE i.e. the read command is blocking,
 *  clEnqueueReadImage does not return until the buffer data has been read and
 *  copied into memory pointed to by \a ptr. If \a blocking_read is CL_FALSE
 *  i.e. the read command is non-blocking, clEnqueueReadImage queues a
 *  non-blocking read command and returns. The contents of the buffer that
 *  \a ptr points to cannot be used until the read command has completed.
 *  The \a event argument returns an event object which can be used to query the
 *  execution status of the read command. When the read command has completed,
 *  the contents of the buffer that ptr points to can be used by the application
 *
 *  \param origin defines the (x, y, z) offset in the image from where to read
 *  or write. If image is a 2D image object, the z value given by origin[2] must
 *  be 0.
 *
 *  \param region defines the (width, height, depth) of the 2D or 3D rectangle
 *  being read or written. If image is a 2D image object, the depth value given
 *  by region[2] must be 1.
 *
 *  \param row_pitch in clEnqueueReadImage is the length of each row in bytes.
 *  This value must be greater than or equal to the element size in bytes
 *  width. If \a row_pitch is set to 0, the appropriate row pitch is calculated
 *  based on the size of each element in bytes multiplied by width.
 *
 *  \param slice_pitch in clEnqueueReadImage clEnqueueWriteImage is the size
 *  in bytes of the 2D slice of the 3D region of a 3D image being read or
 *  written respectively. This must be 0 if image is a 2D image. This value
 *  must be greater than or equal to row_pitch * height. If \a slice_pitch is
 *  set to 0, the appropriate slice pitch is calculated based on the
 *  \a row_pitch * \a height.
 *
 *  \param ptr is the pointer to a buffer in host memory where image data is
 *  to be read from.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then this
 *  particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular read
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for this
 *  command to complete.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
 *    \a image are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
 *  - CL_INVALID_VALUE if the region being read specified by \a origin and
 *    \a region is out of bounds or if \a ptr is a NULL value.
 *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2] is not
 *    equal to 0 or \a region[2] is not equal to 1 or \a slice_pitch is not
 *    equal to 0.
 *  - CL_INVALID_OPERATION if \a clEnqueueReadImage is called on image which
 *    has been created with CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_INVALID_VALUE if blocking_read is CL_FALSE and \a event is NULL.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY(cl_int, clEnqueueReadImage,
              (cl_command_queue command_queue, cl_mem image, cl_bool blocking_read,
               const size_t* origin, const size_t* region, size_t row_pitch, size_t slice_pitch,
               void* ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
               cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(image)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Image* srcImage = as_amd(image)->asImage();
  if (srcImage == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (srcImage->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcImage->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]);
  amd::Coord3D srcRegion(region[0], region[1], region[2]);

  ImageViewRef mip;
  if (srcImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL,
                               origin[srcImage->getDims()]);
    if (mip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (srcImage->getDims() < 3) {
      srcOrigin.c[srcImage->getDims()] = 0;
    }
    srcImage = mip();
  }

  if (!srcImage->validateRegion(srcOrigin, srcRegion) ||
      !srcImage->isRowSliceValid(row_pitch, slice_pitch, region[0], region[1])) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::ReadMemoryCommand* command =
      new amd::ReadMemoryCommand(hostQueue, CL_COMMAND_READ_IMAGE, eventWaitList, *srcImage,
                                 srcOrigin, srcRegion, ptr, row_pitch, slice_pitch);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_read) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to write to a 2D or 3D image object from host
 *  memory
 *
 *  \param command_queue refers to the command-queue in which the write
 *  command will be queued. \a command_queue and \a image must be created with
 *  the same OpenCL context.
 *
 *  \param image refers to a valid 2D or 3D image object.
 *
 *  \param blocking_write indicates if the write operation is blocking or
 *  nonblocking. If blocking_write is CL_TRUE, the OpenCL implementation copies
 *  the data referred to by \a ptr and enqueues the write command in the
 *  command-queue. The memory pointed to by ptr can be reused by the application
 *  after the clEnqueueWriteImage call returns. If blocking_write is CL_FALSE,
 *  the OpenCL implementation will use ptr to perform a nonblocking write. As
 *  the write is non-blocking the implementation can return immediately. The
 *  memory pointed to by ptr cannot be reused by the application after the call
 *  returns. The event argument returns an event object which can be used to
 *  query the execution status of the write command. When the write command has
 *  completed, the memory pointed to by ptr can then be reused by the
 *  application.
 *
 *  \param origin defines the (x, y, z) offset in the image from where to read
 *  or write. If image is a 2D image object, the z value given by origin[2] must
 *  be 0.
 *
 *  \param region defines the (width, height, depth) of the 2D or 3D rectangle
 *  being read or written. If image is a 2D image object, the depth value given
 *  by region[2] must be 1.
 *
 *  \param input_row_pitch in is the length of each row in bytes.
 *  This value must be greater than or equal to the element size in bytes
 *  width. If \a input_row_pitch is set to 0, the appropriate row pitch is
 *  calculated based on the size of each element in bytes multiplied by width.
 *
 *  \param input_slice_pitch is the size
 *  in bytes of the 2D slice of the 3D region of a 3D image being read or
 *  written respectively. This must be 0 if image is a 2D image. This value
 *  must be greater than or equal to input_row_pitch * height. If
 *  \a input_slice_pitch is  set to 0, the appropriate slice pitch is calculated
 *  based on the  \a input_row_pitch * \a height.
 *
 *  \param ptr is the pointer to a buffer in host memory where image data is
 *  to be written to.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then this
 *  particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular write
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for this
 *  command to complete.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue and
 *    \a image are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
 *  - CL_INVALID_VALUE if the region being written specified by \a origin and
 *    \a region is out of bounds or if \a ptr is a NULL value.
 *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2] is not
 *    equal to 0 or \a region[2] is not equal to 1 or \a slice_pitch is not
 *    equal to 0.
 *  - CL_INVALID_OPERATION if \a clEnqueueWriteImage is called on image which
 *    has been created with CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_INVALID_VALUE if blocking_write is CL_FALSE and \a event is NULL.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueWriteImage,
              (cl_command_queue command_queue, cl_mem image, cl_bool blocking_write,
               const size_t* origin, const size_t* region, size_t input_row_pitch,
               size_t input_slice_pitch, const void* ptr, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(image)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Image* dstImage = as_amd(image)->asImage();
  if (dstImage == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (dstImage->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) {
    return CL_INVALID_OPERATION;
  }

  if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != dstImage->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (ptr == NULL) {
    return CL_INVALID_VALUE;
  }

  amd::Coord3D dstOrigin(origin[0], origin[1], origin[2]);
  amd::Coord3D dstRegion(region[0], region[1], region[2]);
  ImageViewRef mip;
  if (dstImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL,
                               origin[dstImage->getDims()]);
    if (mip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (dstImage->getDims() < 3) {
      dstOrigin.c[dstImage->getDims()] = 0;
    }
    dstImage = mip();
  }

  if (!dstImage->validateRegion(dstOrigin, dstRegion) ||
      !dstImage->isRowSliceValid(input_row_pitch, input_slice_pitch, region[0], region[1])) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::WriteMemoryCommand* command =
      new amd::WriteMemoryCommand(hostQueue, CL_COMMAND_WRITE_IMAGE, eventWaitList, *dstImage,
                                  dstOrigin, dstRegion, ptr, input_row_pitch, input_slice_pitch);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();
  if (blocking_write) {
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to copy image objects.
 *
 *  \param command_queue refers to the command-queue in which the copy command
 *  will be queued. The OpenCL context associated with \a command_queue,
 *  \a src_image and \a dst_image must be the same.
 *
 *  \param src_image is the source image object.
 *
 *  \param dst_image is the destination image object.
 *
 *  \param src_origin defines the starting (x, y, z) location in \a src_image
 *  from where to start the data copy.  If \a src_image is a 2D image object,
 *  the z value given by \a src_origin[2] must be 0.
 *
 *  \param dst_origin defines the starting (x, y, z) location in \a dst_image
 *  from where to start the data copy. If \a dst_image is a 2D image object,
 *  the z value given by \a dst_origin[2] must be 0.
 *
 *  \param region defines the (width, height, depth) of the 2D or 3D rectangle
 *  to copy. If \a src_image or \a dst_image is a 2D image object, the depth
 *  value given by \a region[2] must be 1.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then
 *  this particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0. If
 *  \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular copy
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. \a event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue
 *  a wait for this command to complete. clEnqueueBarrier can be used instead.
 *  It is currently a requirement that the \a src_image and \a dst_image image
 *  memory objects for clEnqueueCopyImage must have the exact image format
 *  (i.e. channel order and channel data type must match).
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
 *    \a src_image and \a dst_image are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a src_image and \a dst_image are not valid image
 *    objects.
 *  - CL_IMAGE_FORMAT_MISMATCH if src_image and dst_image do not use the same
 *    image format.
 *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
 *    \a src_origin and \a src_origin + \a region refers to a region outside
 *    \a src_image, or if the 2D or 3D rectangular region specified by
 *    \a dst_origin and \a dst_origin + \a region refers to a region outside
 *    \a dst_image.
 *  - CL_INVALID_VALUE if \a src_image is a 2D image object and \a origin[2] is
 *    not equal to 0 or \a region[2] is not equal to 1.
 *  - CL_INVALID_VALUE if \a dst_image is a 2D image object and \a dst_origin[2]
 *    is not equal to 0 or \a region[2] is not equal to 1.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueCopyImage,
              (cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image,
               const size_t* src_origin, const size_t* dst_origin, const size_t* region,
               cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(src_image) || !is_valid(dst_image)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Image* srcImage = as_amd(src_image)->asImage();
  amd::Image* dstImage = as_amd(dst_image)->asImage();

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcImage->getContext() ||
      hostQueue.context() != dstImage->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (srcImage->getImageFormat() != dstImage->getImageFormat()) {
    return CL_IMAGE_FORMAT_MISMATCH;
  }

  if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]);
  amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]);
  amd::Coord3D copyRegion(region[0], region[1], region[2]);

  ImageViewRef srcMip;
  if (srcImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    srcMip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL,
                                  src_origin[srcImage->getDims()]);
    if (srcMip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (srcImage->getDims() < 3) {
      srcOrigin.c[srcImage->getDims()] = 0;
    }
    srcImage = srcMip();
  }

  if (!srcImage->validateRegion(srcOrigin, copyRegion)) {
    return CL_INVALID_VALUE;
  }

  ImageViewRef dstMip;
  if (dstImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    dstMip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL,
                                  dst_origin[dstImage->getDims()]);
    if (dstMip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (dstImage->getDims() < 3) {
      dstOrigin.c[dstImage->getDims()] = 0;
    }
    dstImage = dstMip();
  }

  if (!dstImage->validateRegion(dstOrigin, copyRegion)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  if (src_image == dst_image) {
    if ((src_origin[0] <= dst_origin[0] && dst_origin[0] < src_origin[0] + region[0]) ||
        (dst_origin[0] <= src_origin[0] && src_origin[0] < dst_origin[0] + region[0]) ||
        (src_origin[1] <= dst_origin[1] && dst_origin[1] < src_origin[1] + region[1]) ||
        (dst_origin[1] <= src_origin[1] && src_origin[1] < dst_origin[1] + region[1])) {
      return CL_MEM_COPY_OVERLAP;
    }
    if (srcImage->getDims() > 2) {
      if ((src_origin[2] <= dst_origin[2] && dst_origin[2] < src_origin[2] + region[2]) ||
          (dst_origin[2] <= src_origin[2] && src_origin[2] < dst_origin[2] + region[2])) {
        return CL_MEM_COPY_OVERLAP;
      }
    }
  }

  amd::CopyMemoryCommand* command =
      new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_IMAGE, eventWaitList, *srcImage,
                                 *dstImage, srcOrigin, dstOrigin, copyRegion);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_CopyingImageBuffer
 *  @{
 */

/*! \brief Enqueue a command to copy an image object to a buffer object.
 *
 *  \param command_queue must be a valid command-queue. The OpenCL context
 *  associated with \a command_queue, \a src_image and \a dst_buffer must be
 *  the same.
 *
 *  \param src_image is a valid image object.
 *
 *  \param dst_buffer is a valid buffer object.
 *
 *  \param src_origin defines the (x, y, z) offset in the image from where to
 *  copy. If \a src_image is a 2D image object, the z value given by
 *  \a src_origin[2] must be 0.
 *
 *  \param region defines the (width, height, depth) of the 2D or 3D rectangle
 *  to copy. If \a src_image is a 2D image object, the depth value given by
 *  \a region[2] must be 1.
 *
 *  \param dst_offset refers to the offset where to begin copying data in
 *  \a dst_buffer. The size in bytes of the region to be copied referred to as
 *  \a dst_cb is computed as width * height * depth * bytes/image element if
 *  \a src_image is a 3D image object and is computed as
 *  width * height * bytes/image element if \a src_image is a 2D image object.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then this
 *  particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular copy
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. \a event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue a
 *  wait for this command to complete. clEnqueueBarrier can be used instead.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
 *    \a src_image and \a dst_buffer are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a src_image is not a valid image object or
 *    \a dst_buffer is not a valid buffer object.
 *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
 *    \a src_origin and \a src_origin + \a region refers to a region outside
 *    \a src_image, or if the region specified by \a dst_offset and
 *    \a dst_offset + \a dst_cb to a region outside \a dst_buffer.
 *  - CL_INVALID_VALUE if \a src_image is a 2D image object and \a src_origin[2]
 *    is not equal to 0 or \a region[2] is not equal to 1.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueCopyImageToBuffer,
              (cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer,
               const size_t* src_origin, const size_t* region, size_t dst_offset,
               cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(src_image) || !is_valid(dst_buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::Image* srcImage = as_amd(src_image)->asImage();
  amd::Buffer* dstBuffer = as_amd(dst_buffer)->asBuffer();
  if (srcImage == NULL || dstBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcImage->getContext() ||
      hostQueue.context() != dstBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::Coord3D srcOrigin(src_origin[0], src_origin[1], src_origin[2]);
  amd::Coord3D dstOffset(dst_offset, 0, 0);
  amd::Coord3D srcRegion(region[0], region[1], region[2]);
  amd::Coord3D copySize(
      region[0] * region[1] * region[2] * srcImage->getImageFormat().getElementSize(), 0, 0);

  ImageViewRef mip;
  if (srcImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), NULL,
                               src_origin[srcImage->getDims()]);
    if (mip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (srcImage->getDims() < 3) {
      srcOrigin.c[srcImage->getDims()] = 0;
    }
    srcImage = mip();
  }

  if (!srcImage->validateRegion(srcOrigin, srcRegion) ||
      !dstBuffer->validateRegion(dstOffset, copySize)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::CopyMemoryCommand* command =
      new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_IMAGE_TO_BUFFER, eventWaitList,
                                 *srcImage, *dstBuffer, srcOrigin, dstOffset, srcRegion);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to copy a buffer object to an image object.
 *
 *  \param command_queue must be a valid command-queue. The OpenCL context
 *  associated with \a command_queue, \a src_buffer and \a dst_image must be
 *  the same.
 *
 *  \param src_buffer is a valid buffer object.
 *
 *  \param dst_image is a valid image object.
 *
 *  \param src_offset refers to the offset where to begin copying data in
 *  \a src_buffer.
 *
 *  \param dst_origin defines the (x, y, z) offset in the image from where to
 *  copy. If \a dst_image is a 2D image object, the z value given by
 *  \a dst_origin[2] must be 0.
 *
 *  \param region defines the (width, height, depth) of the 2D or 3D rectangle
 *  to copy. If dst_image is a 2D image object, the depth value given by
 *  \a region[2] must be 1. The size in bytes of the region to be copied from
 *  \a src_buffer referred to as \a src_cb is computed as
 *  width * height * depth * bytes/image element if \a dst_image is a 3D image
 *  object and is computed as width * height * bytes/image element if
 *  \a dst_image is a 2D image object.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then
 *  this particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular copy
 *  command and can be used to query or queue a wait for this particular command
 *  to complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for
 *  this command to complete. clEnqueueBarrier can be used instead.
 *
 *  \return CL_SUCCESS if the function is executed successfully. Otherwise it
 *  returns one of the following errors:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue,
 *    \a src_buffer and \a dst_image are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a src_buffer is not a valid buffer object or
 *    \a dst_image is not a valid image object.
 *  - CL_INVALID_VALUE if the 2D or 3D rectangular region specified by
 *    \a dst_origin and \a dst_origin + \a region refers to a region outside
 *    \a dst_image, or if the region specified by \a src_offset and
 *    \a src_offset + \a src_cb to a region outside \a src_buffer.
 *  - CL_INVALID_VALUE if \a dst_image is a 2D image object and \a dst_origin[2]
 *    is not equal to 0 or \a region[2] is not equal to 1.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in
 *    \a event_wait_list are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueCopyBufferToImage,
              (cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image,
               size_t src_offset, const size_t* dst_origin, const size_t* region,
               cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(src_buffer) || !is_valid(dst_image)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Buffer* srcBuffer = as_amd(src_buffer)->asBuffer();
  amd::Image* dstImage = as_amd(dst_image)->asImage();
  if (srcBuffer == NULL || dstImage == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext() ||
      hostQueue.context() != dstImage->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (dstImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::Coord3D dstOrigin(dst_origin[0], dst_origin[1], dst_origin[2]);
  amd::Coord3D srcOffset(src_offset, 0, 0);
  amd::Coord3D dstRegion(region[0], region[1], region[2]);
  amd::Coord3D copySize(
      region[0] * region[1] * region[2] * dstImage->getImageFormat().getElementSize(), 0, 0);

  ImageViewRef mip;
  if (dstImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = dstImage->createView(dstImage->getContext(), dstImage->getImageFormat(), NULL,
                               dst_origin[dstImage->getDims()]);
    if (mip() == NULL) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (dstImage->getDims() < 3) {
      dstOrigin.c[dstImage->getDims()] = 0;
    }
    dstImage = mip();
  }

  if (!srcBuffer->validateRegion(srcOffset, copySize) ||
      !dstImage->validateRegion(dstOrigin, dstRegion)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::CopyMemoryCommand* command =
      new amd::CopyMemoryCommand(hostQueue, CL_COMMAND_COPY_BUFFER_TO_IMAGE, eventWaitList,
                                 *srcBuffer, *dstImage, srcOffset, dstOrigin, dstRegion);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_MapUnmap
 *  @{
 */

/*! \brief Enqueue a command to map a region of a buffer object into the
 *  host address.
 *
 *  \param command_queue must be a valid command-queue.
 *
 *  \param blocking_map indicates if the map operation is blocking or
 *  non-blocking. If \a blocking_map is CL_TRUE, clEnqueueMapBuffer does not
 *  return until the specified region in \a buffer can be mapped. If
 *  \a blocking_map is CL_FALSE i.e. map operation is non-blocking, the pointer
 *  to the mapped region returned by clEnqueueMapBuffer cannot be used until the
 *  map command has completed. The event argument returns an event object which
 *  can be used to query the execution status of the map command. When the map
 *  command is completed, the application can access the contents of the mapped
 *  region using the pointer returned by clEnqueueMapBuffer.
 *
 *  \param map_flags is a bit-field and can be set to CL_MAP_READ to indicate
 *  that the region specified by (\a offset, \a cb) in the buffer object is
 *  being mapped for reading, and/or CL_MAP_WRITE to indicate that the region
 *  specified by (\a offset, \a cb) in the buffer object is being mapped for
 *  writing.
 *
 *  \param buffer is a valid buffer object. The OpenCL context associated with
 *  \a command_queue and \a buffer must be the same.
 *
 *  \param offset is the offset in bytes of the region in the buffer object
 *  that is being mapped
 *
 *  \param cb is the size in bytes of the region in the buffer object that
 *  is being mapped.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL, then
 *  this particular command does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0. If
 *  \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. \a event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue
 *  a wait for this command to complete.
 *
 *  \param errcode_ret will return an appropriate error code. If \a errcode_ret
 *  is NULL, no error code is returned.
 *
 *  \return A pointer to the mapped region if  buffer  is  a memory object
 *  created  with  clCreateBuffer  and the region specified by (offset , cb)
 *  is a valid region in the buffer  object  and is successfully mapped into the
 *  host address space .  The  \a errcode_ret  is set to CL_SUCCESS.
 *  A NULL pointer is returned otherwise with one of the following error values
 *  returned in \a errcode_ret:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
 *    \a buffer are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a buffer is not a valid buffer object.
 *  - CL_INVALID_OPERATION if buffer has been created with
 *    CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS and CL_MAP_READ
 *    is set in map_flags or if buffer has been created with
 *    CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS and CL_MAP_WRITE or
 *    CL_MAP_WRITE_INVALIDATE_REGION is set in map_flags.
 *  - CL_INVALID_VALUE if region being mapped given by (\a offset, \a cb) is out
 *    of bounds or if values specified in \a map_flags are not valid.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in
 *    \a event_wait_list are not valid events.
 *  - CL_MEM_O BJECT_MAP_FAILURE  if there is a failure to map  the specified
 *    region  in the host address space.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 *  The pointer returned maps a region starting at \a offset and is atleast
 *  \a cb bytes in size. The result of a memory access outside this region is
 *  undefined.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY_RET(void*, clEnqueueMapBuffer,
                  (cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map,
                   cl_map_flags map_flags, size_t offset, size_t cb,
                   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
                   cl_event* event, cl_int* errcode_ret)) {
  if (!is_valid(command_queue)) {
    *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE;
    return NULL;
  }

  if (!is_valid(buffer)) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }
  amd::Buffer* srcBuffer = as_amd(buffer)->asBuffer();
  if (srcBuffer == NULL) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcBuffer->getContext()) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    return NULL;
  }

  if ((srcBuffer->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) &&
      (map_flags & CL_MAP_READ)) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  if ((srcBuffer->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) &&
      (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  if (srcBuffer->getMemFlags() & CL_MEM_EXTERNAL_PHYSICAL_AMD) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  amd::Coord3D srcOffset(offset);
  amd::Coord3D srcSize(cb);

  if (!srcBuffer->validateRegion(srcOffset, srcSize)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  // Wait for possible pending operations
  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    *not_null(errcode_ret) = err;
    return (void*)0;
  }

  // Make sure we have memory for the command execution
  device::Memory* mem = srcBuffer->getDeviceMemory(hostQueue.device());
  if (NULL == mem) {
    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", srcBuffer->getSize());
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    return NULL;
  }
  // Attempt to allocate the map target now (whether blocking or non-blocking)
  void* mapPtr = mem->allocMapTarget(srcOffset, srcSize, map_flags);
  if (NULL == mapPtr) {
    *not_null(errcode_ret) = CL_MAP_FAILURE;
    return NULL;
  }

  // Allocate a map command for the queue thread
  amd::MapMemoryCommand* command = new amd::MapMemoryCommand(
      hostQueue, CL_COMMAND_MAP_BUFFER, eventWaitList, *srcBuffer, map_flags,
      blocking_map ? true : false, srcOffset, srcSize, nullptr, nullptr, mapPtr);
  if (command == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    return NULL;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    return NULL;
  }

  if (srcBuffer->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
    // [Windows VidMM restriction]
    // Runtime can't map persistent memory if it's still busy or
    // even wasn't submitted to HW from the worker thread yet
    hostQueue.finish();
  }

  // Send the map command for processing
  command->enqueue();

  // A blocking map has to wait for completion
  if (blocking_map) {
    command->awaitCompletion();
  }

  // Save the command event if applicaiton has requested it
  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  srcBuffer->incMapCount();
  return mapPtr;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to map a region in an image object given into
 *  the host address.
 *
 *  \param command_queue must be a valid command-queue.
 *
 *  \param image is a valid image object. The OpenCL context associated with
 *  \a command_queue and \a image must be the same.
 *
 *  \param blocking_map indicates if the map operation is blocking or
 *  non-blocking. If \a blocking_map is CL_TRUE, clEnqueueMapImage does not
 *  return until the specified region in image is mapped. If \a blocking_map is
 *  CL_FALSE i.e. map operation is non-blocking, the pointer to the mapped
 *  region returned by clEnqueueMapImage cannot be used until the map command
 *  has completed. The event argument returns an event object which can be used
 *  to query the execution status of the map command. When the map command is
 *  completed, the application can access the contents of the mapped region
 *  using the pointer returned by clEnqueueMapImage.
 *
 *  \param map_flags is a bit-field and can be set to CL_MAP_READ to indicate
 *  that the region specified by (\a origin, \a region) in the image object is
 *  being mapped for reading, and/or CL_MAP_WRITE to indicate that the region
 *  specified by (\a origin, \a region) in the image object is being mapped for
 *  writing.
 *
 *  \param origin defines the (x, y, z) offset in pixels in the image or (x, y)
 *  offset and the image index in the image array. If image is a 2D image
 *  object, origin[2] must be 0. If image is a 1D image or 1D image buffer
 *  object, origin[1] and origin[2] must be 0. If image is a 1D image array
 *  object, origin[2] must be 0. If image is a 1D image array object, origin[1]
 *  describes the image index in the 1D image array. If image is a 2D image
 *  array object, origin[2] describes the image index in the 2D image array.
 *
 *  \param region defines the (width, height, depth) in pixels of the 1D, 2D or
 *  3D rectangle or the (width, height) in pixels in pixels of the 1D or 2D
 *  rectangle and the image index of an image array. If image is a 2D image
 *  object, region[2] must be 1. If image is a 1D image or 1D image buffer
 *  object, region[1] and region[2] must be 1. If image is a 1D image array
 *  object, region[1] and region[2] must be 1. If image is a 2D image array
 *  object, region[2] must be 1.
 *
 *  \param origin define the (x, y, z) offset of the 2D or 3D rectangle region
 *  that is to be mapped. If image is a 2D image object, the z value given by
 *  \a origin[2] must be 0.
 *
 *  \param region define the (width, height, depth) of the 2D or 3D rectangle
 *  region that is to be mapped. If image is a 2D image object, the depth value
 *  given by \a region[2] must be 1.
 *
 *  \param image_row_pitch returns the scan-line pitch in bytes for the mapped
 *  region. This must be a non- NULL value.
 *
 *  \param image_slice_pitch returns the size in bytes of each 2D slice for the
 *  mapped region. For a 2D image this argument is ignored. For a 3D image this
 *  must be a non-NULL value.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before
 *  clEnqueueMapImage can be executed. If \a event_wait_list is NULL, then
 *  clEnqueueMapImage does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0. If
 *  \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular command
 *  and can be used to query or queue a wait for this particular command to
 *  complete. \a event can be NULL in which case it will not be possible for the
 *  application to query the status of this command or queue a wait for this
 *  command to complete
 *
 *  \param errcode_ret will return an appropriate error code. If \a errcode_ret
 *  is NULL, no error code is returned.
 *
 *  \return A pointer to the mapped region if  image  is  a memory object
 *  created  with  clCreateImage {2D|3D},  and the 2D or 3D rectangle specified
 *  by  origin  and  region is a valid region in the image object  and can be
 *  mapped into the host address space.
 *  The \a errcode_ret is set to CL_SUCCESS. A NULL pointer is returned
 *  otherwise with one of the following error values returned in \a errcode_ret:
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue.
 *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
 *    \a image are not the same.
 *  - CL_INVALID_MEM_OBJECT if \a image is not a valid image object.
 *  - CL_INVALID_VALUE if region being mapped given by
 *    (\a origin, \a origin + \a region) is out of bounds or if values
 *    specified in \a map_flags are not valid.
 *  - CL_INVALID_VALUE if values in origin and region do not follow rules
 *    described in the argument description for origin and region.
 *  - CL_INVALID_VALUE if \a image is a 2D image object and \a origin[2] is not
 *    equal to 0 or \a region[2] is not equal to 1.
 *  - CL_INVALID_VALUE if \a image_row_pitch is NULL.
 *  - CL_INVALID_VALUE if \a image is a 3D image object and \a image_slice_pitch
 *    is NULL.
 *  - CL_INVALID_IMAGE_FORMAT if image format (image channel order and data
 *    type) for image are not supported by device associated with queue.
 *  - CL_INVALID_OPERATION if buffer has been created with
 *    CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS and CL_MAP_READ
 *    is set in map_flags or if buffer has been created with
 *    CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS and CL_MAP_WRITE or
 *    CL_MAP_WRITE_INVALIDATE_REGION is set in map_flags.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_MEM_OBJECT_MAP_FAILURE  if there is a failure to map the  specified
 *    region in the host address space.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *
 * The pointer returned maps a 2D or 3D region starting at origin and is
 * at least (\a image_row_pitch * \a region[1] + \a region[0]) pixels in size
 * for a 2D image, and is at least (\a image_slice_pitch * \a region[2] +
 * \a image_row_pitch * \a region[1] + \a region[0]) pixels in size for a 3D
 * image. The result of a memory access outside this region is undefined.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY_RET(void*, clEnqueueMapImage,
                  (cl_command_queue command_queue, cl_mem image, cl_bool blocking_map,
                   cl_map_flags map_flags, const size_t* origin, const size_t* region,
                   size_t* image_row_pitch, size_t* image_slice_pitch,
                   cl_uint num_events_in_wait_list, const cl_event* event_wait_list,
                   cl_event* event, cl_int* errcode_ret)) {
  if (!is_valid(command_queue)) {
    *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE;
    return NULL;
  }

  if (!is_valid(image)) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }
  amd::Image* srcImage = as_amd(image)->asImage();
  if (srcImage == NULL) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }

  if (srcImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    *not_null(errcode_ret) = CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != srcImage->getContext()) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    return NULL;
  }

  if ((srcImage->getMemFlags() & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) &&
      (map_flags & CL_MAP_READ)) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  if ((srcImage->getMemFlags() & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) &&
      (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  if ((srcImage->getDims() == 1) && ((region[1] != 1) || (region[2] != 1))) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  if ((srcImage->getDims() == 2) && (region[2] != 1)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  amd::Coord3D srcOrigin(origin[0], origin[1], origin[2]);
  amd::Coord3D srcRegion(region[0], region[1], region[2]);

  ImageViewRef mip;
  if (srcImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = srcImage->createView(srcImage->getContext(), srcImage->getImageFormat(), hostQueue.vdev(),
                               origin[srcImage->getDims()]);
    if (mip() == NULL) {
      *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
      return NULL;
    }
    // Reset the mip level value to 0, since a view was created
    if (srcImage->getDims() < 3) {
      srcOrigin.c[srcImage->getDims()] = 0;
    }
    srcImage->incMapCount();
    srcImage = mip();
    // Retain this view until unmap is done
    srcImage->retain();
  }

  if (!srcImage->validateRegion(srcOrigin, srcRegion)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    return NULL;
  }

  // Wait for possible pending operations
  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    *not_null(errcode_ret) = err;
    return (void*)0;
  }

  // Make sure we have memory for the command execution
  device::Memory* mem = srcImage->getDeviceMemory(hostQueue.device());
  if (NULL == mem) {
    LogPrintfError("Can't allocate memory size - 0x%08X bytes!", srcImage->getSize());
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    return NULL;
  }
  // Attempt to allocate the map target now (whether blocking or non-blocking)
  void* mapPtr = mem->allocMapTarget(srcOrigin, srcRegion, map_flags,
                                     image_row_pitch, image_slice_pitch);
  if (NULL == mapPtr) {
    *not_null(errcode_ret) = CL_MAP_FAILURE;
    return NULL;
  }

  // Allocate a map command for the queue thread
  amd::MapMemoryCommand* command = new amd::MapMemoryCommand(
      hostQueue, CL_COMMAND_MAP_IMAGE, eventWaitList, *srcImage, map_flags,
      blocking_map ? true : false, srcOrigin, srcRegion, nullptr, nullptr, mapPtr);
  if (command == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    return NULL;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    return NULL;
  }

  if (srcImage->getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
    // [Windows VidMM restriction]
    // Runtime can't map persistent memory if it's still busy or
    // even wasn't submitted to HW from the worker thread yet
    hostQueue.finish();
  }

  // Send the map command for processing
  command->enqueue();

  // A blocking map has to wait for completion
  if (blocking_map) {
    command->awaitCompletion();
  }

  // Save the command event if applicaiton has requested it
  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  srcImage->incMapCount();

  return mapPtr;
}
RUNTIME_EXIT

/*! \brief Enqueue a command to unmap a previously mapped region of a memory i
 *  object.
 *
 *  Reads or writes from the host using the pointer returned by
 *  clEnqueueMapBuffer or clEnqueueMapImage are considered to be complete.
 *
 *  \param command_queue must be a valid command-queue.
 *
 *  \param memobj is a valid memory object. The OpenCL context associated with
 *  \a command_queue and \a memobj must be the same.
 *
 *  \param mapped_ptr is the host address returned by a previous call to
 *  clEnqueueMapBuffer or clEnqueueMapImage for \a memobj.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifies events that need to complete before
 *  clEnqueueUnmapMemObject can be executed. If \a event_wait_list is NULL,
 *  then clEnqueueUnmapMemObject does not wait on any event to complete. If
 *  \a event_wait_list is NULL, \a num_events_in_wait_list must be 0. If
 *  \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and \a num_events_in_wait_list must be
 *  greater than 0.  The events specified in \a event_wait_list act as
 *  synchronization points.
 *
 *  \param event returns an event object that identifies this particular command
 *  and can be used to query or queue a wait for this particular command to
 *  complete. \a event can be NULL in which case it will not be possible for the
 *  application to query the status of this command or queue a wait for this
 *  command to complete. clEnqueueBarrier can be used instead.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
 *  - CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
 *  - CL_INVALID_VALUE if \a mapped_ptr is not a valid pointer returned by
 *    clEnqueueMapBuffer or clEnqueueMapImage for \a memobj.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or if \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the runtime.
 *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
 *    \a memobj are not the same.
 *
 * clEnqueueMapBuffer and clEnqueueMapImage increments the mapped count of the
 * memory object. Multiple calls to clEnqueueMapBuffer or clEnqueueMapImage on
 * the same memory object will increment this mapped count by appropriate number
 * of calls. clEnqueueUnmapMemObject decrements the mapped count of the memory
 * object. clEnqueueMapBuffer and clEnqueueMapImage act as synchronization
 * points for a region of the memory object being mapped.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clEnqueueUnmapMemObject,
              (cl_command_queue command_queue, cl_mem memobj, void* mapped_ptr,
               cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::Memory* amdMemory = as_amd(memobj);

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != amdMemory->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::UnmapMemoryCommand* command = new amd::UnmapMemoryCommand(
      hostQueue, CL_COMMAND_UNMAP_MEM_OBJECT, eventWaitList, *amdMemory, mapped_ptr);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  device::Memory* mem = amdMemory->getDeviceMemory(hostQueue.device());
  bool blocking = false;
  if (mem->isPersistentMapped()) {
    blocking = true;
  }

  amdMemory->decMapCount();
  command->enqueue();

  if (blocking) {
    LogInfo("blocking wait in unmapping function");
    command->awaitCompletion();
  }

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! @}
 *  \addtogroup CL_MemObjQuery
 *  @{
 */

/*! \brief Get information that is common to all memory objects (buffer and
 *  image objects)
 *
 *  \param memobj specifies the memory object being queried.
 *
 *  \param param_name specifies the information to query.
 *
 *  \param param_value is a pointer to memory where the appropriate result being
 *  queried is returned. If \a param_value is NULL, it is ignored.
 *
 *  \param param_value_size is used to specify the size in bytes of memory
 *  pointed to by \a param_value. This size must be >= size of return type.
 *
 *  \param param_value_size_ret returns the actual size in bytes of data being
 *  queried by \a param_value. If \a param_value_size_ret is NULL, it is
 *  ignored.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
 *    specified by \a param_value_size is < size of return type.
 *  - CL_INVALID_MEM_OBJECT if \a memobj is a not a valid memory object.
 *
 *  \version 1.0r33
 */
RUNTIME_ENTRY(cl_int, clGetMemObjectInfo,
              (cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void* param_value,
               size_t* param_value_size_ret)) {
  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }

  switch (param_name) {
    case CL_MEM_TYPE: {
      cl_mem_object_type type = as_amd(memobj)->getType();
      return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_FLAGS: {
      cl_mem_flags flags = as_amd(memobj)->getMemFlags();
      return amd::clGetInfo(flags, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_SIZE: {
      size_t size = as_amd(memobj)->getSize();
      return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_HOST_PTR: {
      amd::Memory* memory = as_amd(memobj);
      const void* hostPtr =
          (memory->getMemFlags() & CL_MEM_USE_HOST_PTR) ? memory->getHostMem() : NULL;
      return amd::clGetInfo(hostPtr, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_MAP_COUNT: {
      cl_uint count = as_amd(memobj)->mapCount();
      return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_REFERENCE_COUNT: {
      cl_uint count = as_amd(memobj)->referenceCount();
      return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_CONTEXT: {
      cl_context context = as_cl(&as_amd(memobj)->getContext());
      return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_ASSOCIATED_MEMOBJECT: {
      amd::Memory* amdParent = as_amd(memobj)->parent();
      if ((NULL != amdParent) && (NULL != amdParent->getSvmPtr()) &&
          (NULL == amdParent->parent())) {
        amdParent = NULL;
      }
      cl_mem parent = as_cl(amdParent);
      return amd::clGetInfo(parent, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_OFFSET: {
      size_t mem_offset = as_amd(memobj)->getOrigin();
      return amd::clGetInfo(mem_offset, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_USES_SVM_POINTER: {
      cl_bool usesSvmPointer = as_amd(memobj)->usesSvmPointer();
      return amd::clGetInfo(usesSvmPointer, param_value_size, param_value, param_value_size_ret);
    }
#ifdef _WIN32
    case CL_MEM_D3D10_RESOURCE_KHR: {
      ID3D10Resource* pRes;

      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (interop) {
        amd::D3D10Object* d3d10obj = interop->asD3D10Object();
        if (d3d10obj) {
          pRes = d3d10obj->getD3D10ResOrig();
          if (!pRes) {
            pRes = d3d10obj->getD3D10Resource();
          }
        }
        return amd::clGetInfo(pRes, param_value_size, param_value, param_value_size_ret);
      }
      break;
    }
    case CL_MEM_D3D11_RESOURCE_KHR: {
      ID3D11Resource* pRes;

      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (interop) {
        amd::D3D11Object* d3d11obj = interop->asD3D11Object();
        if (d3d11obj) {
          pRes = d3d11obj->getD3D11ResOrig();
          if (!pRes) {
            pRes = d3d11obj->getD3D11Resource();
          }
        }
        return amd::clGetInfo(pRes, param_value_size, param_value, param_value_size_ret);
      }
      break;
    }
    case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: {
      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (interop) {
        amd::D3D9Object* d3d9obj = interop->asD3D9Object();
        if (d3d9obj)
          return amd::clGetInfo(d3d9obj->getSurfInfo(), param_value_size, param_value,
                                param_value_size_ret);
        else
          return CL_INVALID_MEM_OBJECT;
      } else
        return CL_INVALID_MEM_OBJECT;
      break;
    }
    case CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR: {
      cl_dx9_media_adapter_type_khr adapterType;

      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (interop) {
        amd::D3D9Object* d3d9obj = interop->asD3D9Object();
        if (d3d9obj) {
          adapterType = d3d9obj->getAdapterType();
        }
        return amd::clGetInfo(adapterType, param_value_size, param_value, param_value_size_ret);
      }
      break;
    }
#endif  //_WIN32
    default:
      break;
  }

  return CL_INVALID_VALUE;
}
RUNTIME_EXIT

/*! \brief Get information specific to an image object.
 *
 *  \param obj specifies the image object being queried.
 *
 *  \param param_name specifies the information to query.
 *
 *  \param param_value is a pointer to memory where the appropriate result being
 *  queried is returned. If \a param_value is NULL, it is ignored.
 *
 *  \param param_value_size is used to specify the size in bytes of memory
 *  pointed to by \a param_value.  This size must be >= size of return type.
 *
 *  \param param_value_size_ret returns the actual size in bytes of data being
 *  queried by \a param_value. If \a param_value_size_ret is NULL, it is
 *  ignored.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully
 *  - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
 *    specified by \a param_value_size is < size of return type and
 *    \a param_value is not NULL.
 *  - CL_INVALID_MEM_OBJECT if \a image is a not a valid image object.
 *
 *  \version 1.2r09
 */
RUNTIME_ENTRY(cl_int, clGetImageInfo,
              (cl_mem memobj, cl_image_info param_name, size_t param_value_size, void* param_value,
               size_t* param_value_size_ret)) {
  if (!is_valid(memobj)) {
    return CL_INVALID_MEM_OBJECT;
  }
  amd::Image* image = as_amd(memobj)->asImage();
  if (image == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  switch (param_name) {
    case CL_IMAGE_FORMAT: {
      cl_image_format format = image->getImageFormat();
      return amd::clGetInfo(format, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_ELEMENT_SIZE: {
      size_t elementSize = image->getImageFormat().getElementSize();
      return amd::clGetInfo(elementSize, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_ROW_PITCH: {
      size_t rowPitch = image->getRowPitch();
      return amd::clGetInfo(rowPitch, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_SLICE_PITCH: {
      size_t slicePitch = image->getSlicePitch();
      return amd::clGetInfo(slicePitch, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_WIDTH: {
      size_t width = image->getWidth();
      return amd::clGetInfo(width, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_HEIGHT: {
      size_t height = image->getHeight();
      if ((image->getType() == CL_MEM_OBJECT_IMAGE1D) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
        height = 0;
      }
      return amd::clGetInfo(height, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_DEPTH: {
      size_t depth = image->getDepth();
      if ((image->getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE1D) ||
          (image->getType() == CL_MEM_OBJECT_IMAGE2D)) {
        depth = 0;
      }
      return amd::clGetInfo(depth, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_ARRAY_SIZE: {
      size_t arraySize = 0;
      if (image->getType() == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
        arraySize = image->getHeight();
      } else if (image->getType() == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
        arraySize = image->getDepth();
      }
      return amd::clGetInfo(arraySize, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_BUFFER: {
      cl_mem buffer = 0;
      amd::Memory* parent = image->parent();
      while (parent && (parent->asBuffer() == NULL)) {
        parent = parent->parent();
      }
      buffer = as_cl(parent);
      return amd::clGetInfo(buffer, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_NUM_MIP_LEVELS: {
      cl_uint numMipLevels = image->getMipLevels();
      return amd::clGetInfo(numMipLevels, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_NUM_SAMPLES: {
      cl_uint numSamples = 0;
      return amd::clGetInfo(numSamples, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_BYTE_PITCH_AMD: {
      size_t bytePitch = image->getBytePitch();
      return amd::clGetInfo(bytePitch, param_value_size, param_value, param_value_size_ret);
    }
#ifdef _WIN32
    case CL_IMAGE_D3D10_SUBRESOURCE_KHR: {
      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (!interop) {
        return CL_INVALID_MEM_OBJECT;
      }
      amd::D3D10Object* d3d10obj = interop->asD3D10Object();
      if (!d3d10obj) {
        return CL_INVALID_MEM_OBJECT;
      }
      UINT subresource = d3d10obj->getSubresource();
      return amd::clGetInfo(subresource, param_value_size, param_value, param_value_size_ret);
    }
    case CL_IMAGE_D3D11_SUBRESOURCE_KHR: {
      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (!interop) {
        return CL_INVALID_MEM_OBJECT;
      }
      amd::D3D11Object* d3d11obj = interop->asD3D11Object();
      if (!d3d11obj) {
        return CL_INVALID_MEM_OBJECT;
      }
      UINT subresource = d3d11obj->getSubresource();
      return amd::clGetInfo(subresource, param_value_size, param_value, param_value_size_ret);
    }
    case CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR: {
      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (!interop) {
        return CL_INVALID_MEM_OBJECT;
      }
      amd::D3D9Object* d3d9obj = interop->asD3D9Object();
      if (!d3d9obj) {
        return CL_INVALID_MEM_OBJECT;
      }
      return amd::clGetInfo(d3d9obj->getSurfInfo(), param_value_size, param_value,
                            param_value_size_ret);
    }
    case CL_IMAGE_DX9_MEDIA_PLANE_KHR: {
      amd::InteropObject* interop = ((amd::Memory*)as_amd(memobj))->getInteropObj();
      if (!interop) {
        return CL_INVALID_MEM_OBJECT;
      }
      amd::D3D9Object* d3d9obj = interop->asD3D9Object();
      if (!d3d9obj) {
        return CL_INVALID_MEM_OBJECT;
      }
      cl_uint plane = d3d9obj->getPlane();
      return amd::clGetInfo(plane, param_value_size, param_value, param_value_size_ret);
    }
#endif  //_WIN32
    default:
      break;
  }
  return CL_INVALID_VALUE;
}
RUNTIME_EXIT

/*! \brief creates a 1D image, 1D image buffer, 1D image array, 2D image,
 *  2D image array and 3D image object
 *
 *  \param context is a valid OpenCL context on which the image object is
 *  to be created.
 *
 *  \param flags is a bit-field that is used to specify allocation and usage
 *  information about the image memory object being created and is described
 *  in table 5.3. If value specified for flags is 0, the default is used which
 *  is CL_MEM_READ_WRITE.
 *
 *  \param image_format is a pointer to a structure that describes format
 *  properties of the image to be allocated. Refer to section 5.3.1.1 for
 *  a detailed description of the image format descriptor.
 *
 *  \param image_desc is a pointer to a structure that describes type and
 *  dimensions of the image to be allocated. Refer to section 5.3.1.2 for
 *  a detailed description of the image descriptor.
 *
 *  \param host_ptr is a pointer to the image data that may already be
 *  allocated by the application. Refer to table below for a description of
 *  how large the buffer that host_ptr points to must be.
 *      CL_MEM_OBJECT_IMAGE1D >= image_row_pitch
 *      CL_MEM_OBJECT_IMAGE1D_BUFFER >= image_row_pitch
 *      CL_MEM_OBJECT_IMAGE2D >= image_row_pitch * image_height
 *      CL_MEM_OBJECT_IMAGE3D >= image_slice_pitch * image_depth
 *      CL_MEM_OBJECT_IMAGE1D_ARRAY >= image_slice_pitch * image_array_size
 *      CL_MEM_OBJECT_IMAGE2D_ARRAY >= image_slice_pitch * image_array_size
 *  For a 3D image or 2D image array, the image data specified by \a host_ptr
 *  is stored as a linear sequence of adjacent 2D image slices or 2D images
 *  respectively. Each 2D image is a linear sequence of adjacent scanlines.
 *  Each scanline is a linear sequence of image elements.
 *  For a 2D image array, the image data specified by \a host_ptr is stored
 *  as a linear sequence of adjacent scanlines. Each scanline is a linear
 *  sequence of image elements.
 *  For a 1D image array, the image data specified by \a host_ptr is stored
 *  as a linear sequence of adjacent 1D images respectively. Each 1D image
 *  or 1D image buffer is a single scanline which is a linear sequence of
 *  adjacent elements.
 *
 *  \param errcode_ret will return an appropriate error code.
 *  If \a errcode_ret is NULL, no error code is returned.
 *
 *  \return a valid non-zero image object created and the \a errcode_ret is
 *  set to CL_SUCCESS if the image object is created successfully. Otherwise,
 *  it returns a NULL value with one of the following error values
 *  returned in \a errcode_ret:
 *  - CL_INVALID_CONTEXT if \a context is not a valid context.
 *  - CL_INVALID_VALUE if values specified in \a flags are not valid.
 *  - CL_INVALID_IMAGE_FORMAT_DESCRIPTOR if values specified in \a image_format
 *    are not valid or if \a image_format is NULL.
 *  - CL_INVALID_IMAGE_DESCRIPTOR if values specified in \a image_desc are
 *    not valid or if \a image_desc is NULL.
 *  - CL_INVALID_HOST_PTR if \a host_ptr in \a image_desc is NULL and
 *    CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR are set in \a flags or
 *    if \a host_ptr is not NULL, but CL_MEM_COPY_HOST_PTR or
 *    CL_MEM_USE_HOST_PTR are not set in \a flags.
 *  - CL_INVALID_VALUE if a 1D image buffer is being created and
 *    the buffer object was created with CL_MEM_WRITE_ONLY and \a flags
 *    specifies CL_MEM_READ_WRITE or CL_MEM_READ_ONLY, or if the buffer object
 *    was created with CL_MEM_READ_ONLY and \a flags specifies
 *    CL_MEM_READ_WRITE or CL_MEM_WRITE_ONLY, or if \a flags specifies
 *    CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR or CL_MEM_COPY_HOST_PTR.
 *  - CL_IMAGE_FORMAT_NOT_SUPPORTED if the image_format is not supported.
 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory
 *    for image object.
 *  - CL_INVALID_OPERATION if there are no devices in \a context that support
 *    images
 *  - CL_DEVICE_IMAGE_SUPPORT specified in table 4.3 is CL_FALSE).
 *  - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *    by the OpenCL implementation on the device.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
 *    by the OpenCL implementation on the host.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY_RET(cl_mem, clCreateImage,
                  (cl_context context, cl_mem_flags flags, const cl_image_format* image_format,
                   const cl_image_desc* image_desc, void* host_ptr, cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    LogWarning("invalid parameter: context");
    return (cl_mem)0;
  }
  // check flags for validity
  if (!validateFlags(flags)) {
    *not_null(errcode_ret) = CL_INVALID_VALUE;
    LogWarning("invalid parameter: flags");
    return (cl_mem)0;
  }
  // check format
  if (image_format == NULL) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }

  const amd::Image::Format imageFormat(*image_format);
  if (!imageFormat.isValid()) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }

  amd::Context& amdContext = *as_amd(context);

  if (!imageFormat.isSupported(amdContext, image_desc->image_type)) {
    *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }

  // check host_ptr consistency
  if (host_ptr == NULL) {
    if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter: host_ptr");
      return (cl_mem)0;
    }
  } else {
    if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) {
      *not_null(errcode_ret) = CL_INVALID_HOST_PTR;
      LogWarning("invalid parameter: host_ptr");
      return (cl_mem)0;
    }
  }

  const std::vector<amd::Device*>& devices = as_amd(context)->devices();
  bool supportPass = false;
  for (auto& dev : devices) {
    if (dev->info().imageSupport_) {
      supportPass = true;
      break;
    }
  }

  if (!supportPass) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    LogWarning("there are no devices in context to support images");
    return (cl_mem)0;
  }

  if (!amd::Image::validateDimensions(devices, image_desc->image_type, image_desc->image_width,
                                      image_desc->image_height, image_desc->image_depth,
                                      image_desc->image_array_size)) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_SIZE;
    LogWarning("invalid parameter: image dimensions exceeding max");
    return (cl_mem)0;
  }

  size_t imageRowPitch = 0;
  size_t imageSlicePitch = 0;
  if (!validateImageDescriptor(devices, imageFormat, image_desc, host_ptr, imageRowPitch,
                               imageSlicePitch)) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR;
    LogWarning("invalid parameter: image_desc");
    return (cl_mem)0;
  }

  // Validate mip level
  if (image_desc->num_mip_levels != 0) {
    size_t maxDim = std::max(image_desc->image_width, image_desc->image_height);
    maxDim = std::max(maxDim, image_desc->image_depth);
    uint mipLevels;
    for (mipLevels = 0; maxDim > 0; maxDim >>= 1, mipLevels++)
      ;
    if (mipLevels < image_desc->num_mip_levels) {
      *not_null(errcode_ret) = CL_INVALID_MIP_LEVEL;
      LogWarning("Invalid mip level");
      return (cl_mem)0;
    }
  }
  amd::Image* image = NULL;

  switch (image_desc->image_type) {
    case CL_MEM_OBJECT_IMAGE1D:
      image = new (amdContext)
          amd::Image(amdContext, CL_MEM_OBJECT_IMAGE1D, flags, imageFormat, image_desc->image_width,
                     1, 1, imageRowPitch, 0, image_desc->num_mip_levels);
      break;
    case CL_MEM_OBJECT_IMAGE2D:
      if (image_desc->mem_object != NULL) {
        amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer());
        if (&amdContext != &buffer.getContext()) {
          *not_null(errcode_ret) = CL_INVALID_CONTEXT;
          LogWarning("invalid parameter: context");
          return (cl_mem)0;
        }

        // host_ptr is not supported, the buffer object is used instead.
        if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) != 0) {
          *not_null(errcode_ret) = CL_INVALID_VALUE;
          LogWarning("invalid parameter: flags");
          return (cl_mem)0;
        }

        cl_uint pitchAlignment = 0;
        for (unsigned int i = 0; i < devices.size(); ++i) {
          if (pitchAlignment < devices[i]->info().imagePitchAlignment_) {
            pitchAlignment = devices[i]->info().imagePitchAlignment_;
          }
        }
        if ((imageRowPitch % pitchAlignment) != 0) {
          *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
          LogWarning("invalid parameter: flags");
          return (cl_mem)0;
        }

        image = new (amdContext) amd::Image(
            buffer, CL_MEM_OBJECT_IMAGE2D, (flags != 0) ? flags : buffer.getMemFlags(), imageFormat,
            image_desc->image_width, image_desc->image_height, 1, imageRowPitch, imageSlicePitch);
      } else {
        image = new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE2D, flags, imageFormat,
                                            image_desc->image_width, image_desc->image_height, 1,
                                            imageRowPitch, 0, image_desc->num_mip_levels);
      }
      break;
    case CL_MEM_OBJECT_IMAGE3D:
      image = new (amdContext)
          amd::Image(amdContext, CL_MEM_OBJECT_IMAGE3D, flags, imageFormat, image_desc->image_width,
                     image_desc->image_height, image_desc->image_depth, imageRowPitch,
                     imageSlicePitch, image_desc->num_mip_levels);
      break;
    case CL_MEM_OBJECT_IMAGE1D_BUFFER: {
      amd::Buffer& buffer = *(as_amd(image_desc->mem_object)->asBuffer());
      if (&amdContext != &buffer.getContext()) {
        *not_null(errcode_ret) = CL_INVALID_CONTEXT;
        LogWarning("invalid parameter: context");
        return (cl_mem)0;
      }

      // host_ptr is not supported, the buffer object is used instead.
      if ((flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)) != 0) {
        *not_null(errcode_ret) = CL_INVALID_VALUE;
        LogWarning("invalid parameter: flags");
        return (cl_mem)0;
      }

      image = new (amdContext) amd::Image(
          buffer, CL_MEM_OBJECT_IMAGE1D_BUFFER, (flags != 0) ? flags : buffer.getMemFlags(),
          imageFormat, image_desc->image_width, 1, 1, imageRowPitch, imageSlicePitch);
    } break;
    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
      image =
          new (amdContext) amd::Image(amdContext, CL_MEM_OBJECT_IMAGE1D_ARRAY, flags, imageFormat,
                                      image_desc->image_width, image_desc->image_array_size, 1,
                                      imageRowPitch, imageSlicePitch, image_desc->num_mip_levels);
      break;
    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
      image = new (amdContext) amd::Image(
          amdContext, CL_MEM_OBJECT_IMAGE2D_ARRAY, flags, imageFormat, image_desc->image_width,
          image_desc->image_height, image_desc->image_array_size, imageRowPitch, imageSlicePitch,
          image_desc->num_mip_levels);
      break;
    default: {
      *not_null(errcode_ret) = CL_INVALID_IMAGE_DESCRIPTOR;
      LogWarning("invalid parameter: image_desc");
      return reinterpret_cast<cl_mem>(image);
    } break;
  }

  if (image == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    LogWarning("cannot allocate resources");
    return (cl_mem)0;
  }

  if (!image->create(host_ptr)) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    image->release();
    return (cl_mem)0;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return (cl_mem)as_cl<amd::Memory>(image);
}
RUNTIME_EXIT

/*! \brief Enqueues a command to fill a buffer object with
 *  a pattern of a given pattern size.
 *
 *  \param command_queue refers to the command-queue in which
 *  the fill command will be queued. The OpenCL context associated with
 *  command_queue and buffer must be the same.
 *
 *  \param buffer is a valid buffer object.
 *
 *  \param pattern is a pointer to the data pattern of size pattern_size
 *  in bytes. pattern will be used to fill a region in buffer starting
 *  at offset and is cb bytes in size. The data pattern must be a scalar or
 *  vector integer or floating-point data type supported by OpenCL
 *  as described in sections 6.1.1 and 6.1.2. For example, if buffer is
 *  to be filled with a pattern of float4 values, then pattern will be
 *  a pointer to a cl_float4 value and pattern_size will be sizeof(cl_float4).
 *  The maximum value of pattern_size is the size of the largest integer or
 *  floating-point vector data type supported by the OpenCL device.
 *
 *  \param offset is the location in bytes of the region being filled
 *  in buffer and must be a multiple of pattern_size. size is the size
 *  in bytes of region being filled in buffer and must be a multiple
 *  of pattern_size.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifes events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL,
 *  then this particular command does not wait on any event to complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and a\ num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points. The context associated with events in
 *  \a event_wait_list and \a command_queue must be the same.
 *  The memory associated with \a event_wait_list can be reused or
 *  freed after the function returns.
 *
 *  \param event returns an event object that identifies this particular command
 *  and can be used to query or queue a wait for this particular command to
 *  complete. \a event can be NULL in which case it will not be possible for the
 *  application to query the status of this command or queue a wait for this
 *  command to complete. clEnqueueBarrierWithWaitList can be used instead.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
 *    \a buffer are not the same or if the \a context associated with
 *    \a command_queue and \a events in \a event_wait_list are not the same.
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
 *  - CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
 *  - CL_INVALID_VALUE if pattern is NULL or if pattern_size is 0 or if
 *    \a pattern_size is one of {1, 2, 4, 8, 16, 32, 64, 128}.
 *  - CL_INVALID_VALUE if \a offset or \a offset + \a size require accessing
 *    elements outside the \a buffer object respectively.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or if \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *    by the OpenCL implementation on the device.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *    required by the OpenCL implementation on the host.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY(cl_int, clEnqueueFillBuffer,
              (cl_command_queue command_queue, cl_mem buffer, const void* pattern,
               size_t pattern_size, size_t offset, size_t size, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  amd::Buffer* fillBuffer;

  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(buffer)) {
    return CL_INVALID_MEM_OBJECT;
  }

  fillBuffer = as_amd(buffer)->asBuffer();
  if (fillBuffer == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  if ((pattern == NULL) || (pattern_size == 0) ||
      (pattern_size > amd::FillMemoryCommand::MaxFillPatterSize) ||
      ((pattern_size & (pattern_size - 1)) != 0)) {
    return CL_INVALID_VALUE;
  }

  // Offset must be a multiple of pattern_size
  if ((offset % pattern_size) != 0) {
    return CL_INVALID_VALUE;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != fillBuffer->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  amd::Coord3D fillOffset(offset, 0, 0);
  amd::Coord3D fillSize(size, 1, 1);
  // surface takes [pitch, width, height]
  amd::Coord3D surface(size, size, 1);
  if (!fillBuffer->validateRegion(fillOffset, fillSize)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::FillMemoryCommand* command =
      new amd::FillMemoryCommand(hostQueue, CL_COMMAND_FILL_BUFFER, eventWaitList, *fillBuffer,
                                 pattern, pattern_size, fillOffset, fillSize, surface);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief enqueues a command to fill an image object with
 *  a specified color.
 *
 *  \param command_queue refers to the command-queue in which
 *  the fill command will be queued. The OpenCL context associated with
 *  command_queue and buffer must be the same.
 *
 *  \param buffer is a valid buffer object.
 *
 *  \param fill_color is the fill color. The fill color is a four
 *  component RGBA floating-point color value if the image channel data type
 *  is not an unnormalized signed and unsigned integer type, is a four
 *  component signed integer value if the image channel data type is
 *  an unnormalized signed integer type and is a four component unsigned
 *  integer value if the image channel data type is an unormalized
 *  unsigned integer type. The fill color will be converted to
 *  the appropriate image channel format and order associated with image
 *  as described in sections 6.11.13 and 8.3.
 *
 *  \param origin defines the (x, y, z) offset in pixels in the image
 *  or (x, y) offset and the image index in the image array. If image is
 *  a 2D image object, origin[2] must be 0. If image is a 1D image or 1D
 *  image buffer object, origin[1] and origin[2] must be 0. If image is
 *  a 1D image array object, origin[2] must be 0. If image is a 1D image array
 *  object, origin[1] describes the image index in the 1D image array.
 *  If image is a 2D image array object, origin[2] describes the image index
 *  in the 2D image array.
 *
 *  \param region defines the (width, height, depth) in pixels of
 *  the 1D, 2D or 3D rectangle or the (width, height) in pixels in pixels of
 *  the 1D or 2D rectangle and the image index of an image array. If image is
 *  a 2D image object, region[2] must be 1. If image is a 1D image or
 *  1D image buffer object, region[1] and region[2] must be 1. If image is
 *  a 1D image array object, region[1] and region[2] must be 1.
 *  If image is a 2D image array object, region[2] must be 1.
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifes events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL,
 *  then this particular command does not wait on any event to complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and a\ num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points. The context associated with events in
 *  \a event_wait_list and \a command_queue must be the same.
 *  The memory associated with \a event_wait_list can be reused or
 *  freed after the function returns.
 *
 *  \param event returns an event object that identifies this particular command
 *  and can be used to query or queue a wait for this particular command to
 *  complete. \a event can be NULL in which case it will not be possible for
 *  the application to query the status of this command or queue a wait for this
 *  command to complete. clEnqueueBarrierWithWaitList can be used instead.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_CONTEXT if context associated with \a command_queue and
 *    \a buffer are not the same or if the \a context associated with
 *    \a command_queue and \a events in \a event_wait_list are not the same.
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
 *  - CL_INVALID_MEM_OBJECT if \a memobj is not a valid memory object.
 *  - CL_INVALID_VALUE if fill_color is NULL.
 *  - CL_INVALID_VALUE if the region being filled as specified by origin and
 *    region is out of bounds.
 *  - CL_INVALID_VALUE if values in origin and region do not follow rules
 *    described in the argument description for origin and region.
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or if \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified
 *    or compute row
 *  - CL_INVALID_IMAGE_FORMAT if image format (image channel order and data type)
 *    for image are not supported by device associated with queue.
 *  - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *    by the OpenCL implementation on the device.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *    required by the OpenCL implementation on the host.
 *
 *  \version 1.2r07
 */
RUNTIME_ENTRY(cl_int, clEnqueueFillImage,
              (cl_command_queue command_queue, cl_mem image, const void* fill_color,
               const size_t* origin, const size_t* region, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  amd::Image* fillImage;

  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if (!is_valid(image)) {
    return CL_INVALID_MEM_OBJECT;
  }

  if (fill_color == NULL) {
    return CL_INVALID_VALUE;
  }

  fillImage = as_amd(image)->asImage();
  if (fillImage == NULL) {
    return CL_INVALID_MEM_OBJECT;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if (hostQueue.context() != fillImage->getContext()) {
    return CL_INVALID_CONTEXT;
  }

  if (fillImage->getImageFormat().image_channel_order == CL_DEPTH_STENCIL) {
    return CL_INVALID_OPERATION;
  }

  amd::Coord3D fillOrigin(origin[0], origin[1], origin[2]);
  amd::Coord3D fillRegion(region[0], region[1], region[2]);
  // surface takes [pitch, width, height]
  amd::Coord3D surface(region[0], region[0], region[2]);

  ImageViewRef mip;
  if (fillImage->getMipLevels() > 1) {
    // Create a view for the specified mip level
    mip = fillImage->createView(fillImage->getContext(), fillImage->getImageFormat(), nullptr,
                                origin[fillImage->getDims()]);
    if (mip() == nullptr) {
      return CL_OUT_OF_HOST_MEMORY;
    }
    // Reset the mip level value to 0, since a view was created
    if (fillImage->getDims() < 3) {
      fillOrigin.c[fillImage->getDims()] = 0;
    }
    fillImage = mip();
  }

  if (!fillImage->validateRegion(fillOrigin, fillRegion)) {
    return CL_INVALID_VALUE;
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::FillMemoryCommand* command = new amd::FillMemoryCommand(
      hostQueue, CL_COMMAND_FILL_IMAGE, eventWaitList, *fillImage, fill_color,
      sizeof(cl_float4),  // @note color size is always 16 bytes value
      fillOrigin, fillRegion, surface);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueues a command to indicate which device a set of memory objects
 *  should be associated with. Typically, memory objects are implicitly
 *  migrated to a device for which enqueued commands, using the memory object,
 *  are targeted. \a clEnqueueMigrateMemObjects allows this migration to be
 *  explicitly performed ahead of the dependent commands. This allows a user to
 *  preemptively change the association of a memory object, through regular
 *  command queue scheduling, in order to prepare for another upcoming
 *  command. This also permits an application to overlap the placement of
 *  memory objects with other unrelated operations before these memory objects
 *  are needed potentially hiding transfer latencies. Once the event, returned
 *  from \a clEnqueueMigrateMemObjects, has been marked \a CL_COMPLETE
 *  the memory objects specified in \a mem_objects have been successfully
 *  migrated to the device associated with \a command_queue. The migrated memory
 *  object shall remain resident on the device until another command is enqueued
 *  that either implicitly or explicitly migrates it away.
 *  \a clEnqueueMigrateMemObjects can also be used to direct the initial
 *  placement of a memory object, after creation, possibly avoiding the initial
 *  overhead of instantiating the object on the first enqueued command to use it.
 *  The user is responsible for managing the event dependencies, associated with
 *  this command, in order to avoid overlapping access to memory objects.
 *  Improperly specified event dependencies passed to
 *  \a clEnqueueMigrateMemObjects could result in undefined results.
 *
 *  \param command_queue is a valid command-queue. The specified set of memory
 *  objects in \a mem_objects will be migrated to the OpenCL device associated
 *  with \a command_queue or to the host if the \a CL_MIGRATE_MEM_OBJECT_HOST
 *  has been specified.
 *
 *  \param num_mem_objects is the number of memory objects specified in
 *  \a mem_objects. \a mem_objects is a pointer to a list of memory objects.
 *
 *  \param flags is a bit-field that is used to specify migration options.
 *  The following table describes the possible values for flags.
 *  cl_mem_migration flags      Description
 *  CL_MIGRATE_MEM_OBJECT_HOST  This flag indicates that the specified set
 *                              of memory objects are to be migrated to the
 *                              host, regardless of the target command-queue.
 *  CL_MIGRATE_MEM_OBJECT_      This flag indicates that the contents of the set
 *  CONTENT_UNDEFINED           of memory objects are undefined after migration.
 *                              The specified set of memory objects are migrated
 *                              to the device associated with \a command_queue
 *                              without incurring
 *
 *  \param num_events_in_wait_list specifies the number of event objects in
 *  \a event_wait_list.
 *
 *  \param event_wait_list specifes events that need to complete before this
 *  particular command can be executed. If \a event_wait_list is NULL,
 *  then this particular command does not wait on any event to complete.
 *  If \a event_wait_list is NULL, \a num_events_in_wait_list must be 0.
 *  If \a event_wait_list is not NULL, the list of events pointed to by
 *  \a event_wait_list must be valid and a\ num_events_in_wait_list must be
 *  greater than 0. The events specified in \a event_wait_list act as
 *  synchronization points. The context associated with events in
 *  \a event_wait_list and \a command_queue must be the same.
 *  The memory associated with \a event_wait_list can be reused or
 *  freed after the function returns.
 *
 *  \return One of the following values:
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_COMMAND_QUEUE if \a command_queue is not a valid command-queue
 *  - CL_INVALID_CONTEXT if the context associated with \a command_queue
 *    and memory objects in \a mem_objects are not the same or if the context
 *    associated with \a command_queue and events in \a event_wait_list
 *    are not the same.
 *  - CL_INVALID_MEM_OBJECT if any of the memory objects in \a mem_objects
 *    is not a valid memory object.
 *  - CL_INVALID_VALUE if \a num_mem_objects is zero or
 *    if \a mem_objects is NULL.
 *  - CL_INVALID_VALUE if flags is not 0 or any of the values described
 *    in the table above
 *  - CL_INVALID_EVENT_WAIT_LIST if \a event_wait_list is NULL and
 *    \a num_events_in_wait_list > 0, or if \a event_wait_list is not NULL and
 *    \a num_events_in_wait_list is 0, or if event objects in \a event_wait_list
 *    are not valid events.
 *  - CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
 *    memory for the specified set of memory objects in \a mem_objects.
 *  - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
 *    by the OpenCL implementation on the device.
 *  - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
 *    required by the OpenCL implementation on the host.
 *
 *  \version 1.2r15
 */
RUNTIME_ENTRY(cl_int, clEnqueueMigrateMemObjects,
              (cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem* mem_objects,
               cl_mem_migration_flags flags, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  amd::HostQueue* queue = as_amd(command_queue)->asHostQueue();
  if (NULL == queue) {
    return CL_INVALID_COMMAND_QUEUE;
  }
  amd::HostQueue& hostQueue = *queue;

  if ((num_mem_objects == 0) || (mem_objects == NULL)) {
    return CL_INVALID_VALUE;
  }

  if (flags & ~(CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) {
    return CL_INVALID_VALUE;
  }

  std::vector<amd::Memory*> memObjects;
  for (uint i = 0; i < num_mem_objects; ++i) {
    if (!is_valid(mem_objects[i])) {
      return CL_INVALID_MEM_OBJECT;
    }
    amd::Memory* memory = as_amd(mem_objects[i]);
    if (hostQueue.context() != memory->getContext()) {
      return CL_INVALID_CONTEXT;
    }
    memObjects.push_back(memory);
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  amd::MigrateMemObjectsCommand* command = new amd::MigrateMemObjectsCommand(
      hostQueue, CL_COMMAND_MIGRATE_MEM_OBJECTS, eventWaitList, memObjects, flags);

  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Make sure we have memory for the command execution
  if (!command->validateMemory()) {
    delete command;
    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
  }

  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

RUNTIME_ENTRY_RET(cl_mem, clConvertImageAMD,
                  (cl_context context, cl_mem image, const cl_image_format* image_format,
                   cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    LogWarning("invalid parameter: context");
    return (cl_mem)0;
  }
  // check format
  if (image_format == NULL) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }
  const amd::Image::Format imageFormat(*image_format);
  if (!imageFormat.isValid()) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }

  amd::Context& amdContext = *as_amd(context);
  if (!imageFormat.isSupported(amdContext)) {
    *not_null(errcode_ret) = CL_IMAGE_FORMAT_NOT_SUPPORTED;
    LogWarning("invalid parameter: image_format");
    return (cl_mem)0;
  }
  amd::Image* amdImage = as_amd(image)->asImage();
  amd::Image* converted_image = amdImage->createView(amdContext, imageFormat, NULL);

  if (converted_image == NULL) {
    *not_null(errcode_ret) = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
    LogWarning("cannot allocate resources");
    return (cl_mem)0;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return (cl_mem)as_cl<amd::Memory>(converted_image);
}
RUNTIME_EXIT

RUNTIME_ENTRY_RET(cl_mem, clCreateBufferFromImageAMD,
                  (cl_context context, cl_mem image, cl_int* errcode_ret)) {
  if (!is_valid(context)) {
    *not_null(errcode_ret) = CL_INVALID_CONTEXT;
    LogWarning("invalid parameter: context");
    return (cl_mem)0;
  }

  amd::Context& amdContext = *as_amd(context);
  const std::vector<amd::Device*>& devices = amdContext.devices();
  bool supportPass = false;
  for (auto& dev : devices) {
    if (dev->info().bufferFromImageSupport_) {
      supportPass = true;
      break;
    }
  }

  if (!supportPass) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    LogWarning("there are no devices in context to support buffer from image");
    return (cl_mem)0;
  }

  amd::Image* amdImage = as_amd(image)->asImage();
  if (!is_valid(image) || amdImage == NULL) {
    *not_null(errcode_ret) = CL_INVALID_MEM_OBJECT;
    return NULL;
  }

  amd::Memory* mem = new (amdContext) amd::Buffer(*amdImage, 0, 0, amdImage->getSize());
  if (mem == NULL) {
    *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
    return (cl_mem)0;
  }

  if (!mem->create()) {
    *not_null(errcode_ret) = CL_MEM_OBJECT_ALLOCATION_FAILURE;
    mem->release();
    return NULL;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return (cl_mem)as_cl<amd::Memory>(mem);
}
RUNTIME_EXIT

/*! @}
 *  @}
 *  @}
 */
