#!/usr/bin/perl
use strict;
use File::Copy;
use File::Spec;
use File::Basename;
use File::Which;
use Cwd 'realpath';
use Getopt::Std;
use List::Util qw(max);
sub usage {
print("Usage: $0 [OPTION]... -i \n");
print("Extract the device kernels from an hcc executable.\n\n");
print("-h \t\t\t\tshow this help message\n");
print("-i \t\t\t\tinput file\n");
exit;
}
my $debug = 0;
# use clang offload bundler (instead of "dd")
# to extract device object from the bundle
my $use_clang_offload_bundler = 1;
my %options=();
getopts('hi:', \%options);
if (!%options || defined $options{h}) {
usage();
}
my $input_file;
defined $options{i} || die("input not specified");
$input_file = $options{i};
(-f $input_file) || die("can't find $input_file");
# look for llvm-objdump and clang-offload-bundler
my $tools_path_prefix;
my $llvm_objdump;
my $clang_offload_bundler;
if (defined $ENV{'HCC_HOME'}) {
$tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin");
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
}
else {
$tools_path_prefix = dirname(realpath($0));
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
if (!(-f $llvm_objdump)) {
$tools_path_prefix = File::Spec->catfile($tools_path_prefix, "/../../hcc/bin");
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");
}
}
if (!(-f $llvm_objdump)) {
$llvm_objdump = which("llvm-objdump");
if (!(-f $llvm_objdump)) {
die("Can't find llvm-objdump\n");
}
}
if (!(-f $clang_offload_bundler)) {
$clang_offload_bundler = which("clang-offload-bundler");
if (!(-f $clang_offload_bundler)) {
die("Can't find clang-offload-bundler\n");
}
}
# kernel section information for HCC
my $kernel_section_name = ".kernel";
my $kernel_triple = "hcc-amdgcn-amd-amdhsa--";
my $kernel_blob_alignment = 1;
my $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
if (!$kernel_section_size) {
# If there isn't a section created by HCC,
# try to detect a kernel section created by HIP-Clang
$kernel_section_name = ".hip_fatbin";
$kernel_triple = "hip-amdgcn-amd-amdhsa-";
$kernel_blob_alignment = 8;
$kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
$kernel_section_size or die("No kernel section found\n");
}
my $kernel_section_offset = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'`);
my $kernel_section_end = $kernel_section_offset + $kernel_section_size;
if ($debug) {
print "kernel section size: $kernel_section_size\n";
print "kernel section offset: $kernel_section_offset\n";
print "kernel section end: $kernel_section_end\n";
}
# parse kernel bundle header
open INPUT_FP, $input_file || die $!;
binmode INPUT_FP;
my $current_blob_offset = $kernel_section_offset;
my $num_blobs = 0;
#while ($current_blob_offset < $kernel_section_end) {
while(1) {
# adjust the offset to the blob alignment
$current_blob_offset = int(($current_blob_offset + ($kernel_blob_alignment - 1)) / $kernel_blob_alignment) * $kernel_blob_alignment;
if ($debug) {
print "Current blob offset: $current_blob_offset\n";
}
if ($current_blob_offset >= $kernel_section_end) {
if ($debug) {
print "reached end of kernel section\n";
}
last;
}
seek(INPUT_FP, $current_blob_offset, 0);
# skip OFFLOAD_BUNDLER_MAGIC_STR
my $magic_str;
my $read_bytes = read(INPUT_FP, $magic_str, 24);
if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) {
# didn't detect the bundle magic string
if ($debug) {
print "Offload bundle magic string not detected\n";
}
last;
}
# read number of bundles
my $num_bundles;
$read_bytes = read(INPUT_FP, $num_bundles, 8);
$read_bytes == 8 or die("Fail to parse number of bundles\n");
$num_bundles = unpack("Q", $num_bundles);
if ($debug) {
print "Blob $num_blobs, number of bundles: $num_bundles\n";
}
# detected GPU targets
my @asic_target_array;
my $last_bundle_offset = 0;
my $last_bundle_size = 0;
# strings for creating new files
my $file_blob_number = sprintf("%03d", $num_blobs);
my $filename_prefix = "${input_file}-${file_blob_number}";
my $clang_offloadbundler_outputs="-outputs=/dev/null";
my $clang_offloadbundler_targets="-targets=host-@CMAKE_SYSTEM_PROCESSOR@-unknown-linux";
for (my $iter = 0; $iter < $num_bundles; $iter++) {
# read bundle offset
my $offset;
$read_bytes = read(INPUT_FP, $offset, 8);
$read_bytes == 8 or die("Fail to parse bundle offset\n");
$offset = unpack("Q", $offset);
$last_bundle_offset = max($last_bundle_offset, $offset);
# read bundle size
my $size;
$read_bytes = read(INPUT_FP, $size, 8);
$read_bytes == 8 or die("Fail to parse bundle size\n");
$size = unpack("Q", $size);
if ($last_bundle_offset == $offset) {
$last_bundle_size = $size;
}
# read triple size
my $triple_size;
$read_bytes = read(INPUT_FP, $triple_size, 8);
$read_bytes == 8 or die("Fail to parse triple size\n");
$triple_size = unpack("Q", $triple_size);
# triple
my $triple;
$read_bytes = read(INPUT_FP, $triple, $triple_size);
$read_bytes == $triple_size or die("Fail to parse triple\n");
if ($debug) {
print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n");
}
# Only process GPU targets, skip host targets
my $triple_pattern = "^$kernel_triple";
if ($triple =~ /$triple_pattern/) {
my $asic_target = substr($triple, length($kernel_triple));
# augment arguments for clang-offload-bundler
my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
$clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}";
$clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}";
# add into asic_target_array
$asic_target_array[$#asic_target_array + 1]=$asic_target;
if (!$use_clang_offload_bundler) {
my $offset_for_hsaco = $current_blob_offset + $offset;
my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none";
if ($debug) {
print("extract code bundle with dd: $dd_command\n");
}
system($dd_command) == 0
or die("Fail to extract code bundle with dd\n");
}
} else {
#print("Host target: " . $Triple . "\n");
}
}
# extract the code blob
my $blob_filename = "${filename_prefix}.bundle";
my $write_bytes = $last_bundle_offset + $last_bundle_size;
system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0
or die("Extracting kernel bundle file failed: $?");
if ($use_clang_offload_bundler) {
# use clang-offload-bundler to unbundle HSACO
my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}";
if ($debug) {
print("clang offload bundler command: $command\n");
}
system($command) == 0
or die("Fail to execute clang-offload-bundler");
}
for (my $iter = 0; $iter <= $#asic_target_array; $iter++) {
my $asic_target = $asic_target_array[$iter];
my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
my $isa_file_name = "${filename_prefix}-${asic_target}.isa";
# use llvm-objdump to dump out GCN ISA
system("$llvm_objdump -disassemble -mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target);
if ($debug) {
print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n");
}
}
$current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size;
$num_blobs++;
}
$num_blobs or die("No device code found.\n");
exit(0);