//
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
//
#include "cl_common.hpp"
#include "cl_profile_amd.h"
#include "platform/context.hpp"
#include "platform/command.hpp"
#include "platform/perfctr.hpp"
#include "device/device.hpp"
#include <cstring>

/*! \addtogroup API
 *  @{
 *
 *  \addtogroup AMD_Extensions
 *  @{
 *
 */

/*! \brief Creates a new HW performance counter
 *   for the specified OpenCL context.
 *
 *  \param device must be a valid OpenCL device.
 *
 *  \param block_index index of the HW block to configure.
 *
 *  \param counter_index index of the hardware counter
 *  within the block to configure.
 *
 *  \param event_index Event you wish to count with
 *  the counter specified by block_index + counter_index
 *
 *  \param perf_counter the created perfcounter object
 *
 *  \param errcode_ret A non zero value if OpenCL failed to create PerfCounter
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_DEVICE if the specified context is invalid.
 *  - CL_INVALID_OPERATION if we couldn't create the object
 *
 *  \return Created perfcounter object
 */
RUNTIME_ENTRY_RET(cl_perfcounter_amd, clCreatePerfCounterAMD,
                  (cl_device_id device, cl_perfcounter_property* properties, cl_int* errcode_ret)) {
  // Make sure we have a valid device object
  if (!is_valid(device)) {
    *not_null(errcode_ret) = CL_INVALID_DEVICE;
    return NULL;
  }

  // Make sure we have a valid pointer to the performance counter properties
  if (NULL == properties) {
    return NULL;
  }

  amd::PerfCounter::Properties perfProperties;
  size_t size = 0;
  while (properties[size] != CL_PERFCOUNTER_NONE) {
    if (properties[size] < CL_PERFCOUNTER_LAST) {
      perfProperties[properties[size]] = static_cast<ulong>(properties[size + 1]);
      size += 2;
    } else {
      return NULL;
    }
  }

  // Create the device perf counter
  amd::PerfCounter* perfCounter = new amd::PerfCounter(*as_amd(device), perfProperties);

  if (perfCounter == NULL) {
    *not_null(errcode_ret) = CL_INVALID_OPERATION;
    return NULL;
  }

  *not_null(errcode_ret) = CL_SUCCESS;
  return as_cl(perfCounter);
}
RUNTIME_EXIT

/*! \brief Destroy a performance counter object.
 *
 *  \param perf_counter the perfcounter object for release
 *
 *  \return A non zero value if OpenCL failed to release PerfCounter
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_OPERATION if we failed to release the object
 */
RUNTIME_ENTRY(cl_int, clReleasePerfCounterAMD, (cl_perfcounter_amd perf_counter)) {
  if (!is_valid(perf_counter)) {
    return CL_INVALID_OPERATION;
  }
  as_amd(perf_counter)->release();
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Increments the perfcounter object reference count.
 *
 *  \param perf_counter the perfcounter object for retain
 *
 *  \return A non zero value if OpenCL failed to retain PerfCounter
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_OPERATION if we failed to release the object
 */
RUNTIME_ENTRY(cl_int, clRetainPerfCounterAMD, (cl_perfcounter_amd perf_counter)) {
  if (!is_valid(perf_counter)) {
    return CL_INVALID_OPERATION;
  }
  as_amd(perf_counter)->retain();
  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueues the begin command for the specified counters.
 *
 *  \param command_queue must be a valid OpenCL command queue.
 *
 *  \param num_perf_counters the number of perfcounter objects in the array.
 *
 *  \param perf_counters specifies an array of perfcounter objects.
 *
 *  \param event_wait_list specify [is a pointer to] events that need to
 *  complete before this particular command can be executed.
 *  If \a event_wait_list is NULL, then this particular command does not wait
 *  on any event to complete. If \a event_wait_list is NULL,
 *  \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
 *  the list of events pointed to by \a event_wait_list must be valid and
 *  \a num_events_in_wait_list must be greater than 0. The events specified in
 *  \a event_wait_list act as synchronization points.
 *
 *  \param num_events_in_wait_list specify the number of events in
 *  \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It  must be
 *  greater than 0 if \a event_wait_list is not NULL.
 *
 *  \param event returns an event object that identifies this particular
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. \a event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue a
 *  wait for this command to complete.
 *
 *  \return A non zero value if OpenCL failed to release PerfCounter
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_OPERATION if we failed to enqueue the begin operation
 *  - CL_INVALID_COMMAND_QUEUE if the queue is
 */
RUNTIME_ENTRY(cl_int, clEnqueueBeginPerfCounterAMD,
              (cl_command_queue command_queue, cl_uint num_perf_counters,
               cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if ((num_perf_counters == 0) || (perf_counters == NULL)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
  if (NULL == hostQueue) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  amd::PerfCounterCommand::PerfCounterList counters;

  // Place all counters into the list
  for (cl_uint i = 0; i < num_perf_counters; ++i) {
    amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
    if (&hostQueue->device() == &amdPerf->device()) {
      counters.push_back(amdPerf);
    } else {
      return CL_INVALID_DEVICE;
    }
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  // Create a new command for the performance counters
  amd::PerfCounterCommand* command = new amd::PerfCounterCommand(
      *hostQueue, eventWaitList, counters, amd::PerfCounterCommand::Begin);
  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Submit the command to the device
  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Enqueues the end command for the specified counters.
 *
 *  \param command_queue must be a valid OpenCL command queue.
 *
 *  \param num_perf_counters the number of perfcounter objects in the array.
 *
 *  \param perf_counters specifies an array of perfcounter objects.
 *
 *  \param event_wait_list specify [is a pointer to] events that need to
 *  complete before this particular command can be executed.
 *  If \a event_wait_list is NULL, then this particular command does not wait
 *  on any event to complete. If \a event_wait_list is NULL,
 *  \a num_events_in_wait_list must be 0. If \a event_wait_list is not NULL,
 *  the list of events pointed to by \a event_wait_list must be valid and
 *  \a num_events_in_wait_list must be greater than 0. The events specified in
 *  \a event_wait_list act as synchronization points.
 *
 *  \param num_events_in_wait_list specify the number of events in
 *  \a event_wait_list. It must be 0 if \a event_wait_list is NULL. It  must be
 *  greater than 0 if \a event_wait_list is not NULL.
 *
 *  \param event returns an event object that identifies this particular
 *  command and can be used to query or queue a wait for this particular
 *  command to complete. \a event can be NULL in which case it will not be
 *  possible for the application to query the status of this command or queue a
 *  wait for this command to complete.
 *
 *  \return A non zero value if OpenCL failed to release PerfCounter
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_INVALID_OPERATION if we failed to enqueue the end operation
 */
RUNTIME_ENTRY(cl_int, clEnqueueEndPerfCounterAMD,
              (cl_command_queue command_queue, cl_uint num_perf_counters,
               cl_perfcounter_amd* perf_counters, cl_uint num_events_in_wait_list,
               const cl_event* event_wait_list, cl_event* event)) {
  if (!is_valid(command_queue)) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  if ((num_perf_counters == 0) || (perf_counters == NULL)) {
    return CL_INVALID_OPERATION;
  }

  amd::HostQueue* hostQueue = as_amd(command_queue)->asHostQueue();
  if (NULL == hostQueue) {
    return CL_INVALID_COMMAND_QUEUE;
  }

  amd::PerfCounterCommand::PerfCounterList counters;

  // Place all counters into the list
  for (cl_uint i = 0; i < num_perf_counters; ++i) {
    amd::PerfCounter* amdPerf = as_amd(perf_counters[i]);
    if (&hostQueue->device() == &amdPerf->device()) {
      counters.push_back(amdPerf);
    } else {
      return CL_INVALID_DEVICE;
    }
  }

  amd::Command::EventWaitList eventWaitList;
  cl_int err = amd::clSetEventWaitList(eventWaitList, *hostQueue, num_events_in_wait_list,
                                       event_wait_list);
  if (err != CL_SUCCESS) {
    return err;
  }

  // Create a new command for the performance counters
  amd::PerfCounterCommand* command = new amd::PerfCounterCommand(
      *hostQueue, eventWaitList, counters, amd::PerfCounterCommand::End);
  if (command == NULL) {
    return CL_OUT_OF_HOST_MEMORY;
  }

  // Submit the command to the device
  command->enqueue();

  *not_null(event) = as_cl(&command->event());
  if (event == NULL) {
    command->release();
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

/*! \brief Retrieves the results from the counter objects.
 *
 *  \param num_perf_counter the perfcounter object for the information query.
 *
 *  \param perf_counters specifies an array of perfcounter objects.
 *
 *  \param wait_event specifies the wait event, returned in
 *  the clEnqueueEndPerfCounterAMD.
 *
 *  \param wait true if OpenCL should wait for the perfcounter data.
 *
 *  \param values must be a valid pointer to an array of 64-bit values
 *  and the array size must be equal to num_perf_counters.
 *
 *  \return
 *  - CL_SUCCESS if the function is executed successfully.
 *  - CL_PROFILING_INFO_NOT_AVAILABLE if event isn't finished.
 *  - CL_INVALID_OPERATION if we failed to get the data
 */
RUNTIME_ENTRY(cl_int, clGetPerfCounterInfoAMD,
              (cl_perfcounter_amd perf_counter, cl_perfcounter_info param_name,
               size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
  // Check if we have a valid performance counter
  if (!is_valid(perf_counter)) {
    return CL_INVALID_OPERATION;
  }

  // Find the kernel, associated with the specified device
  const device::PerfCounter* devCounter = as_amd(perf_counter)->getDeviceCounter();

  // Make sure we found a valid performance counter
  if (devCounter == NULL) {
    return CL_INVALID_OPERATION;
  }

  // Get the corresponded parameters
  switch (param_name) {
    case CL_PERFCOUNTER_REFERENCE_COUNT: {
      cl_uint count = as_amd(perf_counter)->referenceCount();
      // Return the reference counter
      return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
    }
    case CL_PERFCOUNTER_GPU_BLOCK_INDEX:
    case CL_PERFCOUNTER_GPU_COUNTER_INDEX:
    case CL_PERFCOUNTER_GPU_EVENT_INDEX: {
      cl_ulong data = devCounter->getInfo(param_name);
      // Return the device performance counter information
      return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret);
    }
    case CL_PERFCOUNTER_DATA: {
      cl_ulong data = devCounter->getInfo(param_name);
      if (static_cast<cl_ulong>(0xffffffffffffffffULL) == data) {
        return CL_PROFILING_INFO_NOT_AVAILABLE;
      }
      // Return the device performance counter result
      return amd::clGetInfo(data, param_value_size, param_value, param_value_size_ret);
    }
    default:
      return CL_INVALID_VALUE;
  }

  return CL_SUCCESS;
}
RUNTIME_EXIT

RUNTIME_ENTRY(cl_int, clSetDeviceClockModeAMD,
              (cl_device_id device, cl_set_device_clock_mode_input_amd set_clock_mode_input,
               cl_set_device_clock_mode_output_amd* set_clock_mode_output)) {
  // Make sure we have a valid device object
  if (!is_valid(device)) {
    return CL_INVALID_DEVICE;
  }
  if (set_clock_mode_input.clock_mode >= CL_DEVICE_CLOCK_MODE_COUNT_AMD) {
    return CL_INVALID_VALUE;
  }
  amd::Device* amdDevice = as_amd(device);
  bool ret = amdDevice->SetClockMode(set_clock_mode_input, set_clock_mode_output);
  return (ret == true)? CL_SUCCESS : CL_INVALID_OPERATION;
}
RUNTIME_EXIT

/*! @}
 *  @}
 */
