// RUN: %hc %s -o %t.out && %t.out #include #include #include #include #define WAVEFRONT_SIZE (64) // as of now, all HSA agents have wavefront size of 64 #define GRID_SIZE (WAVEFRONT_SIZE * (WAVEFRONT_SIZE + 1)) #define TEST_DEBUG (0) // A test case to verify HSAIL builtin function // - __any // - __all // test __any bool test__any() { using namespace hc; bool ret = true; // initialize test data // test is a table of size WAVEFRONT_SIZE * WAVEFRONT_SIZE std::vector test(GRID_SIZE); std::random_device rd; std::uniform_int_distribution int_dist(0, WAVEFRONT_SIZE - 1); // for each block of WAVEFRONT_SIZE, we randomly set 1s inside the block // the number of 1s in the block equals to the index of the block // (the 1st block of WAVEFRONT_SIZE has 0 1s, the 2nd block of WAVEFRONT_SIZE has 1 1, and so on) for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { for (int j = 0; j < WAVEFRONT_SIZE; ++j) { if (j < i) { test[i * WAVEFRONT_SIZE + j] = 1; } else { test[i * WAVEFRONT_SIZE + j] = 0; } } // randomly shuffle items in the block for (int j = 0; j < WAVEFRONT_SIZE * 10; ++j) { int k1 = int_dist(rd); int k2 = int_dist(rd); if (k1 != k2) { test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2] ^= test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2]; } } } for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { for (int j = 0; j < WAVEFRONT_SIZE; ++j) { #if TEST_DEBUG std::cout << test[i * WAVEFRONT_SIZE + j] << " "; #endif } #if TEST_DEBUG std::cout << "\n"; #endif } array test_GPU(GRID_SIZE); copy(test.begin(), test_GPU); array output_GPU(GRID_SIZE); extent<1> ex(GRID_SIZE); parallel_for_each(ex, [&](hc::index<1>& idx) [[hc]] { output_GPU(idx) = __any(test_GPU(idx)); }).wait(); // verify result std::vector output = output_GPU; for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { // in test__any(), only the first wavefront would have result 0 // all other wavefronts would have non-zero result because at least // one thread have non-zero predicate, so __any would let all threads // have non-zero results for (int j = 0; j < WAVEFRONT_SIZE; ++j) { ret &= ((i == 0) ? (output[i * WAVEFRONT_SIZE + j] == 0) : (output[i * WAVEFRONT_SIZE + j] != 0)); #if TEST_DEBUG std::cout << output[i * WAVEFRONT_SIZE + j] << " "; #endif } #if TEST_DEBUG std::cout << "\n"; #endif } return ret; } // test __all bool test__all() { using namespace hc; bool ret = true; // initialize test data // test is a table of size WAVEFRONT_SIZE * WAVEFRONT_SIZE std::vector test(GRID_SIZE); std::random_device rd; std::uniform_int_distribution int_dist(0, WAVEFRONT_SIZE - 1); // for each block of WAVEFRONT_SIZE, we randomly set 1s inside the block // the number of 1s in the block equals to the index of the block // (the 1st block of WAVEFRONT_SIZE has 0 1s, the 2nd block of WAVEFRONT_SIZE has 1 1, and so on) for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { for (int j = 0; j < WAVEFRONT_SIZE; ++j) { if (j < i) { test[i * WAVEFRONT_SIZE + j] = 1; } else { test[i * WAVEFRONT_SIZE + j] = 0; } } // randomly shuffle items in the block for (int j = 0; j < WAVEFRONT_SIZE * 10; ++j) { int k1 = int_dist(rd); int k2 = int_dist(rd); if (k1 != k2) { test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2] ^= test[i * WAVEFRONT_SIZE + k1] ^= test[i * WAVEFRONT_SIZE + k2]; } } } for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { for (int j = 0; j < WAVEFRONT_SIZE; ++j) { #if TEST_DEBUG std::cout << test[i * WAVEFRONT_SIZE + j] << " "; #endif } #if TEST_DEBUG std::cout << "\n"; #endif } array test_GPU(GRID_SIZE); copy(test.begin(), test_GPU); array output_GPU(GRID_SIZE); extent<1> ex(GRID_SIZE); parallel_for_each(ex, [&](hc::index<1>& idx) [[hc]] { output_GPU(idx) = __all(test_GPU(idx)); }); // verify result std::vector output = output_GPU; for (int i = 0; i <= WAVEFRONT_SIZE; ++i) { // in test__all(), only the last wavefront would have result 1 // all other wavefronts would have zero result because at least // one thread have non-zero predicate, so __all would let all threads // have results 0 for (int j = 0; j < WAVEFRONT_SIZE; ++j) { ret &= ((i == WAVEFRONT_SIZE) ? (output[i * WAVEFRONT_SIZE + j] != 0) : (output[i * WAVEFRONT_SIZE + j] == 0)); #if TEST_DEBUG std::cout << output[i * WAVEFRONT_SIZE + j] << " "; #endif } #if TEST_DEBUG std::cout << "\n"; #endif } return ret; } int main() { bool ret = true; ret &= test__any(); ret &= test__all(); return !(ret == true); }