// RUN: %hc %s -o %t.out && %t.out #include #include // loop to deliberately slow down kernel execution #define LOOP_COUNT (1024 * 512) #define TEST_DEBUG (0) // Number of async-ops changes if flush-opt is enabled since we may need to add extra ops in some places. #define HCC_OPT_FLUSH 1 /// test implicit synchronization of array_view and kernel dispatches /// /// in this test case, there are NO kernel dependencies because all kernels /// read from the same read-only array_view instances, and write to DIFFERENT /// output array_view instances. template void test1D() { // dependency graph // pfe1: av1 + av2 -> av3 // pfe2: av1 + av2 -> av4 // pfe3: av1 + av2 -> av5 // pfe1, pfe2, pfe3 are all independent std::vector table1(grid_size); std::vector table2(grid_size); std::vector table3(grid_size); std::vector table4(grid_size); std::vector table5(grid_size); for (int i = 0; i < grid_size; ++i) { table1[i] = i; table2[i] = i; } hc::array_view av1(grid_size, table1); hc::array_view av2(grid_size, table2); hc::array_view av3(grid_size, table3); hc::array_view av4(grid_size, table4); hc::array_view av5(grid_size, table5); #if TEST_DEBUG std::cout << "launch pfe1\n"; #endif hc::parallel_for_each(hc::extent<1>(grid_size), [=](hc::index<1>& idx) [[hc]] { // av3 = i * 2 for (int i = 0; i < LOOP_COUNT; ++i) av3(idx) = av1(idx) + av2(idx); }); #if TEST_DEBUG std::cout << "after pfe1\n"; #endif #if TEST_DEBUG std::cout << "launch pfe2\n"; #endif // this kernel dispatch shall NOT implicitly wait for the previous one to complete // because the array_view written is NOT used by the previous kernels hc::parallel_for_each(hc::extent<1>(grid_size), [=](hc::index<1>& idx) [[hc]] { // av4 = i * 2 for (int i = 0; i < LOOP_COUNT; ++i) av4(idx) = av1(idx) + av2(idx); }); #if TEST_DEBUG std::cout << "after pfe2\n"; #endif #if TEST_DEBUG std::cout << "launch pfe3\n"; #endif // this kernel dispatch shall NOT implicitly wait for the previous one to complete // because the array_view written is NOT used by the previous kernels hc::parallel_for_each(hc::extent<1>(grid_size), [=](hc::index<1>& idx) [[hc]] { // av5 = i * 2 for (int i = 0; i < LOOP_COUNT; ++i) av5(idx) = av1(idx) + av2(idx); }); #if TEST_DEBUG std::cout << "after pfe3 get_pending_async_ops=" << hc::accelerator().get_default_view().get_pending_async_ops() << "\n"; #endif const int expectedPendingOps = HCC_OPT_FLUSH ? 5 : 3; // now there must be 3 pending async operations for the accelerator_view assert (hc::accelerator().get_default_view().get_pending_async_ops() == expectedPendingOps); // for this test case we deliberately NOT wait on kernels // we want to check when array_view instances go to destruction // would all dependent kernels be waited or not } int main() { bool ret = true; hc::accelerator_view av = hc::accelerator().get_default_view(); test1D<32, 16>(); assert(av.get_pending_async_ops() == 0); test1D<64, 8>(); assert(av.get_pending_async_ops() == 0); test1D<128, 32>(); assert(av.get_pending_async_ops() == 0); test1D<256, 64>(); assert(av.get_pending_async_ops() == 0); test1D<1024, 256>(); assert(av.get_pending_async_ops() == 0); return !(ret == true); }