--- /home/grodgers/git/aomp/TEMP/support/lib/hostcall/CMakeLists.txt 2019-05-21 11:05:53.435504820 -0500 +++ lib/CMakeLists.txt 2019-05-21 13:40:44.104325720 -0500 @@ -17,8 +17,10 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../atmi/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include) -file(GLOB SOURCES "src/*.cpp") +file(GLOB SOURCES "src/*.cpp" "src/*.c") add_library(amd_hostcall SHARED ${SOURCES}) set_target_properties(amd_hostcall PROPERTIES CXX_STANDARD 11 @@ -39,6 +41,7 @@ set(INCLUDE_INSTALL_DIR include) set(LIB_INSTALL_DIR lib) +set(LIBDEBUG_INSTALL_DIR lib-debug) set(SHARE_INSTALL_DIR share) set(CONFIG_INSTALL_DIR cmake) @@ -46,6 +49,9 @@ EXPORT amd_hostcall_export DESTINATION ${LIB_INSTALL_DIR}) +install(TARGETS amd_hostcall LIBRARY DESTINATION + ${LIBDEBUG_INSTALL_DIR}) + install(EXPORT amd_hostcall_export DESTINATION ${CONFIG_INSTALL_DIR}) --- /home/grodgers/git/aomp/TEMP/support/lib/hostcall/include/amd_hostcall.h 2019-05-21 11:05:53.733498607 -0500 +++ lib/include/amd_hostcall.h 2019-05-22 13:59:06.637991402 -0500 @@ -258,6 +258,22 @@ void amd_hostcall_enable_debug(void); +#include "hsa/hsa_ext_amd.h" +AMD_HOSTCALL_API +unsigned long atmi_hostcall_assign_buffer(unsigned int minpackets, + hsa_queue_t * this_Q, hsa_amd_memory_pool_t finegrain_pool, + uint32_t device_id, int* is_new_buffer); + +AMD_HOSTCALL_API +hsa_status_t atmi_hostcall_init(); + +AMD_HOSTCALL_API +hsa_status_t atmi_hostcall_terminate(); + +AMD_HOSTCALL_API +hsa_status_t +atmi_hostcall_version_check(unsigned int device_vrm); + #ifdef __cplusplus } // extern "C" #endif --- /home/grodgers/git/aomp/TEMP/support/lib/hostcall/src/hostcall.cpp 2019-05-21 11:05:53.918494749 -0500 +++ lib/src/hostcall.cpp 2019-05-22 12:27:58.992184171 -0500 @@ -97,6 +97,8 @@ #endif } +#if 0 +// wait_on_signal replaced with single call to hsa_signal_wait_acquire static uint64_t wait_on_signal(signal_t doorbell, uint64_t timeout, uint64_t old_value) { @@ -121,6 +123,7 @@ return old_value; #endif } +#endif /** \brief Locked reference to critical data. * @@ -215,6 +218,8 @@ return buffer->payloads + get_ptr_index(ptr, buffer->index_size); } +static bool hostcall_version_checked; + void amd_hostcall_consumer_t::process_packets(buffer_t *buffer, uint64_t ready_stack) const @@ -242,7 +247,16 @@ auto header = get_header(buffer, iter); next = header->next; - auto service = header->service; + // Check if device hostcall or stubs are ahead of this host runtime + uint service = ((uint) header->service <<16 ) >> 16; + if (!hostcall_version_checked) { + uint device_vrm = ((uint) header->service >> 16 ); + hsa_status_t err = + atmi_hostcall_version_check(device_vrm); + if ( err != HSA_STATUS_SUCCESS ) + std::quick_exit(EXIT_FAILURE); + hostcall_version_checked = true; + } WHEN_DEBUG(std::cout << "packet service: " << (uint32_t)service << std::endl); @@ -274,7 +288,6 @@ uint64_t *slot = payload->slots[wi]; handler(slot); } - __atomic_store_n(&header->control, reset_ready_flag(header->control), std::memory_order_release); } @@ -294,7 +307,11 @@ uint64_t timeout = 1024 * 1024; while (true) { - signal_value = wait_on_signal(doorbell, timeout, signal_value); + + hsa_signal_t hs{doorbell.handle}; + signal_value = hsa_signal_wait_acquire(hs, HSA_SIGNAL_CONDITION_NE, + signal_value, timeout, HSA_WAIT_STATE_BLOCKED); + if (signal_value == SIGNAL_DONE) { return; } --- /home/grodgers/git/aomp/TEMP/hostcall/ockl/src/hostcall_impl.cl 2019-05-21 10:33:08.084304858 -0500 +++ libdevice/src/hostcall_invoke.cl 2019-05-22 11:18:16.497954349 -0500 @@ -300,11 +300,12 @@ * be linked into kernel objects that are loaded after this library. */ __ockl_hostcall_result_t -__hostcall_transaction(void *_buffer, uint service_id, +hostcall_invoke(uint service_id, ulong arg0, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5, ulong arg6, ulong arg7) { - __global buffer_t *buffer = (__global buffer_t *)_buffer; + __constant size_t* argptr = (__constant size_t *)__builtin_amdgcn_implicitarg_ptr(); + __global buffer_t *buffer = (__global buffer_t *)argptr[3]; ulong packet_ptr = pop_free_stack(buffer); __global header_t *header = get_header(buffer, packet_ptr); __global payload_t *payload = get_payload(buffer, packet_ptr);