// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// REQUIRES: amdgpu-registered-target

// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER

#define N 1000

int test_amdgcn_target_tid_threads() {

  int arr[N];

#pragma omp target
  for (int i = 0; i < N; i++) {
    arr[i] = 1;
  }

  return arr[0];
}

int test_amdgcn_target_tid_threads_simd() {

  int arr[N];

#pragma omp target simd
  for (int i = 0; i < N; i++) {
    arr[i] = 1;
  }
  return arr[0];
}

#endif
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l16
// CHECK-SAME: ([1000 x i32]* nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca [1000 x i32]*, align 8, addrspace(5)
// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT:    [[ARR_ADDR_ASCAST:%.*]] = addrspacecast [1000 x i32]* addrspace(5)* [[ARR_ADDR]] to [1000 x i32]**
// CHECK-NEXT:    [[I_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[I]] to i32*
// CHECK-NEXT:    store [1000 x i32]* [[ARR]], [1000 x i32]** [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* addrspacecast ([[STRUCT_IDENT_T:%.*]] addrspace(1)* @[[GLOB1:[0-9]+]] to %struct.ident_t*), i8 1, i1 true, i1 true)
// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK:       user_code.entry:
// CHECK-NEXT:    store i32 0, i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    br label [[FOR_COND:%.*]]
// CHECK:       for.cond:
// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
// CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK:       for.body:
// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64
// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX]], align 4
// CHECK-NEXT:    br label [[FOR_INC:%.*]]
// CHECK:       for.inc:
// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK-NEXT:    store i32 [[INC]], i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK:       worker.exit:
// CHECK-NEXT:    ret void
// CHECK:       for.end:
// CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* addrspacecast ([[STRUCT_IDENT_T]] addrspace(1)* @[[GLOB1]] to %struct.ident_t*), i8 1, i1 true)
// CHECK-NEXT:    ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l28
// CHECK-SAME: ([1000 x i32]* nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca [1000 x i32]*, align 8, addrspace(5)
// CHECK-NEXT:    [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT:    [[ARR_ADDR_ASCAST:%.*]] = addrspacecast [1000 x i32]* addrspace(5)* [[ARR_ADDR]] to [1000 x i32]**
// CHECK-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[TMP]] to i32*
// CHECK-NEXT:    [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[DOTOMP_IV]] to i32*
// CHECK-NEXT:    [[I_ASCAST:%.*]] = addrspacecast i32 addrspace(5)* [[I]] to i32*
// CHECK-NEXT:    store [1000 x i32]* [[ARR]], [1000 x i32]** [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT:    [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[ARR_ADDR_ASCAST]], align 8
// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* addrspacecast ([[STRUCT_IDENT_T:%.*]] addrspace(1)* @[[GLOB1]] to %struct.ident_t*), i8 2, i1 false, i1 false)
// CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK:       user_code.entry:
// CHECK-NEXT:    store i32 0, i32* [[DOTOMP_IV_ASCAST]], align 4
// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK:       omp.inner.for.cond:
// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
// CHECK-NEXT:    br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK:       omp.inner.for.body:
// CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK-NEXT:    store i32 [[ADD]], i32* [[I_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[I_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10
// CHECK-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK:       omp.body.continue:
// CHECK-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK:       omp.inner.for.inc:
// CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
// CHECK-NEXT:    store i32 [[ADD1]], i32* [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group !10
// CHECK-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK:       worker.exit:
// CHECK-NEXT:    ret void
// CHECK:       omp.inner.for.end:
// CHECK-NEXT:    store i32 1000, i32* [[I_ASCAST]], align 4
// CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* addrspacecast ([[STRUCT_IDENT_T]] addrspace(1)* @[[GLOB1]] to %struct.ident_t*), i8 2, i1 false)
// CHECK-NEXT:    ret void
//
