#!/usr/bin/perl use strict; use File::Copy; use File::Spec; use File::Basename; use Cwd 'realpath'; use Getopt::Std; use List::Util qw(max); sub usage { print("Usage: $0 [OPTION]... -i \n"); print("Extract the device kernels from an hcc executable.\n\n"); print("-h \t\t\t\tshow this help message\n"); print("-i \t\t\t\tinput file\n"); exit; } my $debug = 0; # use clang offload bundler (instead of "dd") # to extract device object from the bundle my $use_clang_offload_bundler = 1; my %options=(); getopts('hi:', \%options); if (!%options || defined $options{h}) { usage(); } # get the directory of this script my $tools_path_prefix = dirname(realpath($0)); if (defined $ENV{'HCC_HOME'}) { $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin"); } my $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); my $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); my $input_file; defined $options{i} || die("input not specified"); $input_file = $options{i}; (-f $input_file) || die("can't find $input_file"); my $kernel_section_size = hex(`objdump -h $input_file | grep ".kernel" | awk '{print \$3}'`); my $kernel_section_offset = hex(`objdump -h $input_file | grep ".kernel" | awk '{print \$6}'`); my $kernel_section_end = $kernel_section_offset + $kernel_section_size; $kernel_section_size or die("No .kernel section found\n"); if ($debug) { print "kernel section size: $kernel_section_size\n"; print "kernel section offset: $kernel_section_offset\n"; print "kernel section end: $kernel_section_end\n"; } # parse kernel bundle header open INPUT_FP, $input_file || die $!; binmode INPUT_FP; my $current_blob_offset = $kernel_section_offset; my $num_blobs = 0; #while ($current_blob_offset < $kernel_section_end) { while(1) { if ($debug) { print "Current blob offset: $current_blob_offset\n"; } if ($current_blob_offset >= $kernel_section_end) { if ($debug) { print "reached end of kernel section\n"; } last; } seek(INPUT_FP, $current_blob_offset, 0); # skip OFFLOAD_BUNDLER_MAGIC_STR my $magic_str; my $read_bytes = read(INPUT_FP, $magic_str, 24); if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) { # didn't detect the bundle magic string if ($debug) { print "Offload bundle magic string not detected\n"; } last; } # read number of bundles my $num_bundles; $read_bytes = read(INPUT_FP, $num_bundles, 8); $read_bytes == 8 or die("Fail to parse number of bundles\n"); $num_bundles = unpack("Q", $num_bundles); if ($debug) { print "Blob $num_blobs, number of bundles: $num_bundles\n"; } # detected GPU targets my @asic_target_array; my $last_bundle_offset = 0; my $last_bundle_size = 0; # strings for creating new files my $file_blob_number = sprintf("%03d", $num_blobs); my $filename_prefix = "${input_file}-${file_blob_number}"; my $clang_offloadbundler_outputs="-outputs=/dev/null"; my $clang_offloadbundler_targets="-targets=host-@CMAKE_SYSTEM_PROCESSOR@-unknown-linux"; for (my $iter = 0; $iter < $num_bundles; $iter++) { # read bundle offset my $offset; $read_bytes = read(INPUT_FP, $offset, 8); $read_bytes == 8 or die("Fail to parse bundle offset\n"); $offset = unpack("Q", $offset); $last_bundle_offset = max($last_bundle_offset, $offset); # read bundle size my $size; $read_bytes = read(INPUT_FP, $size, 8); $read_bytes == 8 or die("Fail to parse bundle size\n"); $size = unpack("Q", $size); if ($last_bundle_offset == $offset) { $last_bundle_size = $size; } # read triple size my $triple_size; $read_bytes = read(INPUT_FP, $triple_size, 8); $read_bytes == 8 or die("Fail to parse triple size\n"); $triple_size = unpack("Q", $triple_size); # triple my $triple; $read_bytes = read(INPUT_FP, $triple, $triple_size); $read_bytes == $triple_size or die("Fail to parse triple\n"); if ($debug) { print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n"); } # Only process GPU targets, skip host targets if ($triple =~ /^hcc-amdgcn-amd-amdhsa--/) { my $asic_target = substr($triple, 23); # hcc-amdgcn-amd-amdhsa-- # augment arguments for clang-offload-bundler my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; $clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}"; $clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}"; # add into asic_target_array $asic_target_array[$#asic_target_array + 1]=$asic_target; if (!$use_clang_offload_bundler) { my $offset_for_hsaco = $current_blob_offset + $offset; my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none"; if ($debug) { print("extract code bundle with dd: $dd_command\n"); } system($dd_command) == 0 or die("Fail to extract code bundle with dd\n"); } } else { #print("Host target: " . $Triple . "\n"); } } # extract the code blob my $blob_filename = "${filename_prefix}.bundle"; my $write_bytes = $last_bundle_offset + $last_bundle_size; system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0 or die("Extracting kernel bundle file failed: $?"); if ($use_clang_offload_bundler) { if (-f $clang_offload_bundler) { # use clang-offload-bundler to unbundle HSACO my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}"; if ($debug) { print("clang offload bundler command: $command\n"); } system($command) == 0 or die("Fail to execute clang-offload-bundler"); } else { die("Can't find clang-offload-bundler\n"); } } if (-f $llvm_objdump) { for (my $iter = 0; $iter <= $#asic_target_array; $iter++) { my $asic_target = $asic_target_array[$iter]; my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; my $isa_file_name = "${filename_prefix}-${asic_target}.isa"; # use llvm-objdump to dump out GCN ISA system("$llvm_objdump -disassemble -mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target); if ($debug) { print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n"); } } } else { die("Can't find llvm-objdump\n"); } $current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size; $num_blobs++; } $num_blobs or die("No device code found.\n"); exit(0);