// Make a unique directory to save the dumped IR.
// RUN: mkdir -p %T/indirect-func-arg
// RUN: %hc %s -o %t.out -Xlinker -dump-llvm -Xlinker -dump-dir=%T/indirect-func-arg %target_all_gpus
// RUN: %llvm-dis %T/indirect-func-arg/dump-gfx803.opt.bc -f -o - | %FileCheck %s
// RUN: %t.out
// XFAIL: *
// Hardcoded codegen pattern no longer obtains with enabled function calls.

#include <hc.hpp>
#include <vector>

#define GRID_SIZE (1024)

// CHECK-LABEL: define weak_odr amdgpu_kernel void @"_ZZ4mainEN3$_019__cxxamp_trampolineEPjiii"(i32* %0, i32 %1, i32 %2, i32 %3) local_unnamed_addr #19
struct A {
  int x[8];
  A()[[hc]] {
    x[0] = 1;
  }
};

int g(void *p) [[hc]] {
  return *((int*)p);
}
  
int f(A a, int i) [[hc]] {
  void *p[10];
  // CHECK-NOT:  bitcast [10 x i8*] addrspace(5)* %{{[^ ]+}} to %struct.A addrspace(5)* addrspace(5)*
  // The following addrspacecast and GEP are created by SROA.
  // CHECK: %[[a:[^ ]+]] = addrspacecast i8 addrspace(5)* %{{[^ ]+}} to i8*
  // CHECK: %[[p:[^ ]+]] = getelementptr inbounds [10 x i8*], [10 x i8*] addrspace(5)* %{{[^ ]+}}, i32 0, i32 0
  // CHECK: store i8* %[[a]], i8* addrspace(5)* %[[p]]
  p[0]  = (void*)&a;
  return g(p[i]);
} 
 
int main() {
  using namespace hc;
  array<unsigned int, 1> table(GRID_SIZE);
  extent<1> ex(GRID_SIZE);
  int i = 0;
  auto k = [&table, i](hc::index<1>& idx) [[hc]]{
    A a;
    table(idx) = f(a, i);
  };
  parallel_for_each(ex, k).wait();

  // verify result
  bool ret = true;
  std::vector<unsigned int> result = table;
  for (i = 0; i < GRID_SIZE; ++i) {
    ret &= (result[i] == 1);
  }

  return !(ret == true);
}

