#!/bin/bash # Compiles an LLVM bitcode file to GCN ISA, and store as HSACO # $1 = input ll name # $2 = HSACO code object file name # $3 = (optional) --amdgpu-target=(GPU family name) # selects AMDGPU target # enable bash debugging KMDBSCRIPT="${KMDBSCRIPT:=0}" if [ $KMDBSCRIPT == "1" ] then set -x fi # directory where files are dumped KMDUMPDIR="${KMDUMPDIR:=.}" # dump the LLVM bitcode KMDUMPLLVM="${KMDUMPLLVM:=0}" # dump the isa KMDUMPISA="${KMDUMPISA:=0}" # pass extra options to OPT # KMOPTOPT can be used to pass last-minute options to opt in the backend # if not set, then "-O3" would be passed to opt KMOPTOPT="${KMOPTOPT:="-O3"}" # pass extra options to LLC # KMOPTLLC can be used to pass last-minute options to llc in the backend # if not set, then "-O2" will be passed to llc KMOPTLLC="${KMOPTLLC:="-O2"}" # enable LLVM hijacking KMHACKLLVM="${KMHACKLLVM:=0}" # enable ThinLTO KMTHINLTO="${KMTHINLTO:=0}" # flag for early finalization AMDGPU_OBJ_CODEGEN="0" # flag for function calls enabled AMDGPU_FUNC_CALLS="0" if [ $KMDBSCRIPT == "1" ] then set -x fi # check command line arguments if [ "$#" -lt 2 ]; then echo "Usage: $0 input_LLVM output_hsaco_kernel (--amdgpu-target=(GPU family name)" >&2 echo " --amdgpu-target=(GPU family name)" >&2 echo " selects AMDGPU target" >&2 exit 1 fi if [ ! -f $1 ]; then echo "input LLVM IR $1 is not valid" >&2 exit 1 fi BINDIR=$(dirname $0) AS=$BINDIR/llvm-as OPT=$BINDIR/opt LLC=$BINDIR/llc LINK=$BINDIR/llvm-link LIB=$BINDIR/../lib LLD=$BINDIR/ld.lld ################ # Determine the ROCm device libs path ################ HCC_BIN_PATH=$(dirname -- "$(readlink -f -- "$BASH_SOURCE")") ROCM_DEVICE_LIBS_SEARCH_PATHS="$HCC_BIN_PATH/../../rocdl/lib;$HCC_BIN_PATH/../rocdl/lib;@ROCM_DEVICE_LIB_PATHS@" ROCM_LIB="" for SEARCH_PATH in $(echo $ROCM_DEVICE_LIBS_SEARCH_PATHS | tr ";" "\n") do if [ -f "$SEARCH_PATH/ocml.amdgcn.bc" ]; then ROCM_LIB="$(readlink -f -- "$SEARCH_PATH")" break fi done if [ ! -f "$ROCM_LIB/ocml.amdgcn.bc" ]; then echo "ROCm Device Libs file ocml.amdgcn.bc is missing from "$ROCM_LIB exit 1 fi ################ # AMDGPU target ################ ARGS="$@" for ARG in $ARGS do ###################### # Parse AMDGPU target ###################### case $ARG in --amdgpu-target=*) AMDGPU_TARGET="${ARG#*=}" continue ;; --early-finalize) AMDGPU_OBJ_CODEGEN="1" KMTHINLTO="0" continue ;; --amdgpu-func-calls) AMDGPU_FUNC_CALLS="1" continue ;; --hcc-extra-libs=*) HCC_EXTRA_LIBRARIES="$HCC_EXTRA_LIBRARIES ${ARG#*=}" continue ;; --dump-isa) KMDUMPISA=1 ;; --dump-llvm) KMDUMPLLVM=1 ;; --dump-dir=*) KMDUMPDIR="${ARG#*=}" continue ;; esac done # hijack LLVM #1 if [ $KMHACKLLVM == "1" ]; then if [ -e ./hack.input.ll ]; then echo "Use ./hack.input.ll to hijack $1" $AS ./hack.input.ll cp ./hack.input.bc $1 fi fi # emit GCN ISA kernel if [ $KMDUMPLLVM == "1" ]; then cp $1 ./dump.input.bc fi HCC_EXTRA_ARCH_FILE="" # select appropriate ROCm-Device-Libs per AMDGPU_TARGET if [ $AMDGPU_TARGET == "gfx700" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_700.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX700 elif [ $AMDGPU_TARGET == "gfx701" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_701.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX701 elif [ $AMDGPU_TARGET == "gfx801" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_801.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX801 elif [ $AMDGPU_TARGET == "gfx802" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_802.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX802 elif [ $AMDGPU_TARGET == "gfx803" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_803.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX803 elif [ $AMDGPU_TARGET == "gfx900" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_900.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX900 elif [ $AMDGPU_TARGET == "gfx901" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_901.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX901 elif [ $AMDGPU_TARGET == "gfx906" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_906.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX906 KMOPTLLC+=" -mattr=+sram-ecc" elif [ $AMDGPU_TARGET == "gfx908" ]; then OCLC_ISA_VERSION_LIB="$ROCM_LIB/oclc_isa_version_908.amdgcn.bc" HCC_EXTRA_ARCH_FILE=$HCC_EXTRA_LIBRARIES_GFX908 KMOPTLLC+=" -mattr=+sram-ecc" fi HCC_BC_LIBS="$ROCM_LIB/hc.amdgcn.bc $ROCM_LIB/hip.amdgcn.bc $ROCM_LIB/opencl.amdgcn.bc $ROCM_LIB/ocml.amdgcn.bc $ROCM_LIB/ockl.amdgcn.bc $OCLC_ISA_VERSION_LIB $ROCM_LIB/oclc_finite_only_off.amdgcn.bc $ROCM_LIB/oclc_daz_opt_off.amdgcn.bc $ROCM_LIB/oclc_correctly_rounded_sqrt_on.amdgcn.bc $ROCM_LIB/oclc_unsafe_math_off.amdgcn.bc" if [ -f "$ROCM_LIB/oclc_wavefrontsize64_on.amdgcn.bc" ]; then HCC_BC_LIBS="$HCC_BC_LIBS $ROCM_LIB/oclc_wavefrontsize64_on.amdgcn.bc" fi # include libraries specified through the HCC_EXTRA_LIBRARIES environment variable HCC_BC_LIBS="$HCC_BC_LIBS $HCC_EXTRA_LIBRARIES $HCC_EXTRA_ARCH_FILE" $LINK -suppress-warnings -o $2.linked.bc $1 $HCC_BC_LIBS # error handling for llvm-link RETVAL=$? if [ $RETVAL != 0 ]; then echo "Generating AMD GCN kernel failed in llvm-link with ROCm-Device-Libs for target: $AMDGPU_TARGET" exit $RETVAL fi if [ $KMDUMPLLVM == "1" ]; then cp $2.linked.bc ${KMDUMPDIR}/dump.linked.bc fi # Invoke HCC-specific opt passes $OPT ${KMOPTOPT} -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET \ -load $LIB/LLVMSelectAcceleratorCode@CMAKE_SHARED_LIBRARY_SUFFIX@ \ -load $LIB/LLVMPromotePointerKernArgsToGlobal@CMAKE_SHARED_LIBRARY_SUFFIX@ \ -select-accelerator-code -sac-enable-function-calls=$AMDGPU_FUNC_CALLS \ -promote-pointer-kernargs-to-global \ -infer-address-spaces \ -verify < $2.linked.bc -o $2.opt.bc # error handling for HCC-specific opt passes RETVAL=$? if [ $RETVAL != 0 ]; then echo "Generating AMD GCN kernel failed in HCC-specific opt passes for target: $AMDGPU_TARGET" exit $RETVAL fi # hijack LLVM #2 if [ $KMHACKLLVM == "1" ]; then if [ -e ./hack-$AMDGPU_TARGET.opt.ll ]; then echo "Use ./hack-$AMDGPU_TARGET.opt.ll to hijack $2.opt.bc" $AS ./hack-$AMDGPU_TARGET.opt.ll cp ./hack-$AMDGPU_TARGET.opt.bc $2.opt.bc fi fi if [ $KMDUMPLLVM == "1" ]; then cp $2.opt.bc ${KMDUMPDIR}/dump-$AMDGPU_TARGET.opt.bc fi # Disable code object v3, generate code object v2 for now CODE_OBJECT_FORMAT="-mattr=-code-object-v3" if [ $KMTHINLTO == "1" ]; then $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \ $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=obj -o $2 $2.opt.bc else $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \ $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=obj -o $2.isabin $2.opt.bc fi # error handling for llc RETVAL=$? if [ $RETVAL != 0 ]; then echo "Generating AMD GCN kernel failed in llc for target: $AMDGPU_TARGET" exit $RETVAL fi if [ $KMDUMPISA == "1" ]; then if [ $KMTHINLTO == "1" ]; then cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isabin else cp $2.isabin ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isabin fi $LLC -mtriple amdgcn-amd-amdhsa -mcpu=$AMDGPU_TARGET $CODE_OBJECT_FORMAT \ $KMOPTLLC -amdgpu-function-calls=$AMDGPU_FUNC_CALLS -filetype=asm -o $2.isa $2.opt.bc mv $2.isa ${KMDUMPDIR}/dump-$AMDGPU_TARGET.isa fi # ThinLTO does not performs LLD here inside clamp-device.in. # It will perform LLD in clamp-link.in after all parallel kernels # running clamp-device are finished for each target, linking the # isabin files into a single hsaco. Default path has single kernel. if [ $KMTHINLTO != "1" ]; then $LLD -shared $2.isabin -o $2 # error handling for ld.lld RETVAL=$? if [ $RETVAL != 0 ]; then echo "Generating AMD GCN kernel failed in ld.lld for target: $AMDGPU_TARGET" exit $RETVAL fi if [ $KMDUMPISA == "1" ]; then cp $2 ${KMDUMPDIR}/dump-$AMDGPU_TARGET.hsaco fi rm -f $2.isabin fi # remove temp file rm -f $2.promote.bc $2.linked.bc $2.opt.bc $2.isa