// RUN: %hc %s -o %t.out && %t.out #include #include #define GRID_SIZE (16) // globalVar would be agent-allocated global variable with program linkage // add an initial value to prevent a bug in HLC [[hc]] float tableGlobal[GRID_SIZE] = { 0.1 }; using namespace hc; bool test2() { bool ret = true; // array which would be copied into the global variable array float tableInput[GRID_SIZE] { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; // array to store the outputs from the kernel array_view tableOutput1(GRID_SIZE); // array to store the result copied from device memory float tableOutput2[GRID_SIZE] { 0 }; // use get_symbol_address() and hc::accelerator::memcpySymbol() to copy testValue to globalVar // get the default accelerator accelerator acc = accelerator(); void* tableGlobalDevicePtr = GET_SYMBOL_ADDRESS(acc, tableGlobal); acc.memcpy_symbol(tableGlobalDevicePtr, tableInput, sizeof(float) * GRID_SIZE); // dispatch a kernel which reads from globalVar and stores result to table1 extent<1> ex(GRID_SIZE); completion_future fut = parallel_for_each(ex, [=](hc::index<1>& idx) __attribute__((hc)) { tableOutput1(idx) = tableGlobal[idx[0]]; }); // wait for the kernel to be completed fut.wait(); // copy data from device -> host acc.memcpy_symbol(tableGlobalDevicePtr, tableOutput2, sizeof(float) * GRID_SIZE, 0, hcMemcpyDeviceToHost); // read out the outputs, it should agree with testValue for (int i = 0; i < GRID_SIZE; ++i) { ret &= (tableInput[i] == tableOutput1[i]); ret &= (tableInput[i] == tableOutput2[i]); } return ret; } int main() { bool ret = true; ret &= test2(); return !(ret == true); }