#!/bin/bash # hc-kernel-assemble kernel-bitcode kernel-object # enable bash debugging KMDBSCRIPT="${KMDBSCRIPT:=0}" # dump the LLVM bitcode KMDUMPLLVM="${KMDUMPLLVM:=0}" AMDGPU_OBJ_CODEGEN="${AMDGPU_OBJ_CODEGEN:=0}" # flag for function calls enabled AMDGPU_FUNC_CALLS="0" # inject additional GPU archs through an env var HCC_EXTRA_GPU_ARCH="${HCC_EXTRA_GPU_ARCH:=0}" if [ $KMDBSCRIPT == "1" ] then set -x fi BINDIR=$(dirname "$0") CLANG=$BINDIR/hcc LLVM_LINK=$BINDIR/llvm-link OPT=$BINDIR/opt CLAMP_DEVICE=$BINDIR/clamp-device LLVM_AS=$BINDIR/llvm-as LLVM_DIS=$BINDIR/llvm-dis CLAMP_ASM=$BINDIR/clamp-assemble LIBPATH=$BINDIR/../lib CLANG_OFFLOAD_BUNDLER=$BINDIR/clang-offload-bundler # Add error handling functions . $BINDIR/error-check # At build directory, HCC compiler tools are placed under compiler/bin/, and # header files are placed under include/. At install directory, HCC compiler # tools are placed under bin/, and header files are placed under include/. # Have -I$BIN/../include and -I$BINDIR/../../include ensures headers can # always be found either in build directory or installed directory. CXXFLAGS="-std=c++amp -I$BINDIR/../include -I$BINDIR/../../include -fPIC" # Set additional CXXFLAGS based on CMAKE_BUILD_TYPE shopt -s nocasematch CMAKE_BUILD_TYPE="@CMAKE_BUILD_TYPE@" case $CMAKE_BUILD_TYPE in release) CXXFLAGS=$CXXFLAGS" -O3" ;; relwithdebinfo) CXXFLAGS=$CXXFLAGS" -O2 -g" ;; minsizerel) CXXFLAGS=$CXXFLAGS" -Os" ;; debug) CXXFLAGS=$CXXFLAGS" -g" ;; *) CXXFLAGS=$CXXFLAGS esac if [ "$#" -lt 2 ]; then echo "Usage: $0 kernel-bitcode object" >&2 exit_with_code -1 fi AMDGPU_TARGET_ARRAY=() if [ $HCC_EXTRA_GPU_ARCH != "0" ] then AMDGPU_TARGET_ARRAY+=("$HCC_EXTRA_GPU_ARCH") fi for ARG in "$@" do case $ARG in ###################### # Parse AMDGPU target ###################### --amdgpu-target=*) AMDGPU_TARGET_ARRAY+=("${ARG#*=}") continue ;; --early_finalize) AMDGPU_OBJ_CODEGEN=1 continue ;; --amdgpu-func-calls) AMDGPU_FUNC_CALLS="1" continue ;; esac done # inject a new GPU IR to replace the IR from the FE HC_REPLACE_GPU_FE_LLVM="${HC_REPLACE_GPU_FE_LLVM:=0}" KERNEL_INPUT=$1 if [ $HC_REPLACE_GPU_FE_LLVM != "0" ]; then KERNEL_INPUT=$HC_REPLACE_GPU_FE_LLVM echo "Replacing GPU input IR from clang with $KERNEL_INPUT" fi if [ ! -f "$KERNEL_INPUT" ]; then echo "kernel-bitcode $KERNEL_INPUT is not valid" >&2 exit_with_code -1 fi CO="-c -o" TEMP_DIR=`mktemp -d` BASENAME=`basename "$2"` TEMP_NAME="$TEMP_DIR/$BASENAME" # ELF section names for IR and ISA KERNEL_IR_SECTION=".kernel_ir" KERNEL_SECTION=".kernel" # hip-kernel-assemble goes after hip-host-assemble, so attempt to link object from host if [ -f "$2" ]; then mv "$2" "$TEMP_DIR/$BASENAME.tmp.o" check_exit_status mv fi cp "$KERNEL_INPUT" "$TEMP_NAME.bc" check_exit_status cp if [ $KMDUMPLLVM == "1" ]; then $LLVM_DIS "$TEMP_NAME.bc" -o "$TEMP_NAME.ll" cp "$TEMP_NAME.ll" ./dump.kernel_input.ll fi if [ $AMDGPU_OBJ_CODEGEN == 1 ]; then # invoke the backend to finalize the IR # touch an empty object for host part, to accomodate rule required by # clang-offload-bundler touch $TEMP_DIR/__empty.o # invoke clang-offload-bundler to create kernel bundle CLANG_OFFLOAD_BUNDLER_INPUTS_ARGS="-inputs=$TEMP_DIR/__empty.o" CLANG_OFFLOAD_BUNDLER_TARGETS_ARGS="-targets=host-@CMAKE_SYSTEM_PROCESSOR@-unknown-linux" declare -a pids # for each GPU target, lower to GCN ISA in HSACO format for AMDGPU_TARGET in ${AMDGPU_TARGET_ARRAY[@]}; do if [ $AMDGPU_FUNC_CALLS == "1" ]; then { $CLAMP_DEVICE "$KERNEL_INPUT" "$TEMP_DIR/kernel-$AMDGPU_TARGET.hsaco" --amdgpu-target=$AMDGPU_TARGET --early-finalize --amdgpu-func-calls; } & else { $CLAMP_DEVICE "$KERNEL_INPUT" "$TEMP_DIR/kernel-$AMDGPU_TARGET.hsaco" --amdgpu-target=$AMDGPU_TARGET --early-finalize ; } & fi # error handling pids+=("${pids[@]}" "$!") # augment arguments to clang-offload-bundler CLANG_OFFLOAD_BUNDLER_INPUTS_ARGS+=",$TEMP_DIR/kernel-$AMDGPU_TARGET.hsaco" CLANG_OFFLOAD_BUNDLER_TARGETS_ARGS+=",hcc-amdgcn-amd-amdhsa--$AMDGPU_TARGET" done # collect all the error codes from forked processes # error handling ret=0 for pid in ${pids[*]}; do wait $pid || ret=$((ret+$?)) done if [ $ret != 0 ]; then exit_with_code -1 fi # invoke clang-offload-bundler $CLANG_OFFLOAD_BUNDLER -type=o $CLANG_OFFLOAD_BUNDLER_INPUTS_ARGS $CLANG_OFFLOAD_BUNDLER_TARGETS_ARGS -outputs=$TEMP_DIR/kernel.bundle check_exit_status $CLANG_OFFLOAD_BUNDLER OLD_PWD=$PWD cd $TEMP_DIR $CLAMP_ASM "kernel.bundle" "$TEMP_NAME.camp.o" "$KERNEL_SECTION" check_exit_status $CLAMP_ASM cd $OLD_PWD else ln -s "$KERNEL_INPUT" "$1.bc" $CLAMP_ASM "$1.bc" "$TEMP_NAME.camp.o" "$KERNEL_IR_SECTION" check_exit_status $CLAMP_ASM fi if [ -f "$TEMP_DIR/$BASENAME.tmp.o" ]; then ld -r --allow-multiple-definition "$TEMP_DIR/$BASENAME.tmp.o" "$TEMP_NAME.camp.o" -o "$2" check_exit_status ld else mv "$TEMP_NAME.camp.o" "$2" check_exit_status mv fi rm -rf "$TEMP_DIR"