//////////////////////////////////////////////////////////////////////////////// // // The University of Illinois/NCSA // Open Source License (NCSA) // // Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // // AMD Research and AMD HSA Software Development // // Advanced Micro Devices, Inc. // // www.amd.com // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to // deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // - Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimers in // the documentation and/or other materials provided with the distribution. // - Neither the names of Advanced Micro Devices, Inc, // nor the names of its contributors may be used to endorse or promote // products derived from this Software without specific prior written // permission. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS WITH THE SOFTWARE. // //////////////////////////////////////////////////////////////////////////////// #include "core/inc/amd_topology.h" #include "core/inc/amd_filter_device.h" #include #include #include #include #include #include #include #ifndef NDBEUG #include #endif #include "hsakmt.h" #include "core/inc/runtime.h" #include "core/inc/amd_cpu_agent.h" #include "core/inc/amd_gpu_agent.h" #include "core/inc/amd_memory_region.h" #include "core/util/utils.h" extern r_debug _amdgpu_r_debug; namespace rocr { namespace AMD { // Minimum acceptable KFD version numbers static const uint kKfdVersionMajor = 0; static const uint kKfdVersionMinor = 99; // Query for user preference and use that to determine Xnack mode of ROCm system. // Return true if Xnack mode is ON or false if OFF. Xnack mode of a system is // orthogonal to devices that do not support Xnack mode. It is legal for a // system with Xnack ON to have devices that do not support Xnack functionality. bool BindXnackMode() { // Get users' preference for Xnack mode of ROCm platform HSAint32 mode; mode = core::Runtime::runtime_singleton_->flag().xnack(); bool config_xnack = (core::Runtime::runtime_singleton_->flag().xnack() != Flag::XNACK_REQUEST::XNACK_UNCHANGED); // Indicate to driver users' preference for Xnack mode // Call to driver can fail and is a supported feature HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; if (config_xnack) { status = hsaKmtSetXNACKMode(mode); if (status == HSAKMT_STATUS_SUCCESS) { return mode; } } // Get Xnack mode of devices bound by driver. This could happen // when a call to SET Xnack mode fails or user has no particular // preference status = hsaKmtGetXNACKMode((HSAint32*)&mode); if(status != HSAKMT_STATUS_SUCCESS) { debug_print("KFD does not support xnack mode query.\nROCr must assume xnack is disabled.\n"); return false; } return mode; } CpuAgent* DiscoverCpu(HSAuint32 node_id, HsaNodeProperties& node_prop) { if (node_prop.NumCPUCores == 0) { return nullptr; } CpuAgent* cpu = new CpuAgent(node_id, node_prop); core::Runtime::runtime_singleton_->RegisterAgent(cpu); return cpu; } GpuAgent* DiscoverGpu(HSAuint32 node_id, HsaNodeProperties& node_prop, bool xnack_mode) { GpuAgent* gpu = nullptr; if (node_prop.NumFComputeCores == 0) { // Ignore non GPUs. return nullptr; } try { gpu = new GpuAgent(node_id, node_prop, xnack_mode, core::Runtime::runtime_singleton_->gpu_agents().size()); const HsaVersionInfo& kfd_version = core::Runtime::runtime_singleton_->KfdVersion().version; // Check for sramecc incompatibility due to sramecc not being reported correctly in kfd before // 1.4. if (gpu->isa()->IsSrameccSupported() && (kfd_version.KernelInterfaceMajorVersion <= 1 && kfd_version.KernelInterfaceMinorVersion < 4)) { // gfx906 has both sramecc modes in use. Suppress the device. if ((gpu->isa()->GetProcessorName() == "gfx906") && core::Runtime::runtime_singleton_->flag().check_sramecc_validity()) { char name[64]; gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, name); name[63] = '\0'; fprintf(stderr, "HSA Error: Incompatible kernel and userspace, %s disabled. Upgrade amdgpu.\n", name); delete gpu; return nullptr; } // gfx908 always has sramecc set to on in vbios. Set mode bit to on and recreate the device. if (gpu->isa()->GetProcessorName() == "gfx908") { node_prop.Capability.ui32.SRAM_EDCSupport = 1; delete gpu; gpu = new GpuAgent(node_id, node_prop, xnack_mode, core::Runtime::runtime_singleton_->gpu_agents().size()); } } } catch (const hsa_exception& e) { if(e.error_code() == HSA_STATUS_ERROR_INVALID_ISA) { ifdebug { if (!strIsEmpty(e.what())) debug_print("Warning: %s\n", e.what()); } // Ignore unrecognized GPUs. return nullptr; } else { // Rethrow remaining exceptions. throw; } } core::Runtime::runtime_singleton_->RegisterAgent(gpu); return gpu; } void RegisterLinkInfo(uint32_t node_id, uint32_t num_link) { // Register connectivity links for this agent to the runtime. if (num_link == 0) { return; } std::vector links(num_link); if (HSAKMT_STATUS_SUCCESS != hsaKmtGetNodeIoLinkProperties(node_id, num_link, &links[0])) { return; } for (HsaIoLinkProperties io_link : links) { // Populate link info with thunk property. hsa_amd_memory_pool_link_info_t link_info = {0}; switch (io_link.IoLinkType) { case HSA_IOLINKTYPE_HYPERTRANSPORT: link_info.link_type = HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT; link_info.atomic_support_32bit = true; link_info.atomic_support_64bit = true; link_info.coherent_support = true; break; case HSA_IOLINKTYPE_PCIEXPRESS: link_info.link_type = HSA_AMD_LINK_INFO_TYPE_PCIE; link_info.atomic_support_32bit = true; link_info.atomic_support_64bit = true; link_info.coherent_support = true; break; case HSA_IOLINK_TYPE_QPI_1_1: link_info.link_type = HSA_AMD_LINK_INFO_TYPE_QPI; link_info.atomic_support_32bit = true; link_info.atomic_support_64bit = true; link_info.coherent_support = true; break; case HSA_IOLINK_TYPE_INFINIBAND: link_info.link_type = HSA_AMD_LINK_INFO_TYPE_INFINBAND; debug_print("IOLINK is missing atomic and coherency defaults.\n"); break; case HSA_IOLINK_TYPE_XGMI: link_info.link_type = HSA_AMD_LINK_INFO_TYPE_XGMI; link_info.atomic_support_32bit = true; link_info.atomic_support_64bit = true; link_info.coherent_support = true; break; default: debug_print("Unrecognized IOLINK type.\n"); break; } // KFD is reporting wrong override status for XGMI. Disallow override for bringup. if (io_link.Flags.ui32.Override == 1) { if (io_link.Flags.ui32.NoPeerToPeerDMA == 1) { // Ignore this link since peer to peer is not allowed. continue; } link_info.atomic_support_32bit = (io_link.Flags.ui32.NoAtomics32bit == 0); link_info.atomic_support_64bit = (io_link.Flags.ui32.NoAtomics64bit == 0); link_info.coherent_support = (io_link.Flags.ui32.NonCoherent == 0); } link_info.max_bandwidth = io_link.MaximumBandwidth; link_info.max_latency = io_link.MaximumLatency; link_info.min_bandwidth = io_link.MinimumBandwidth; link_info.min_latency = io_link.MinimumLatency; link_info.numa_distance = io_link.Weight; core::Runtime::runtime_singleton_->RegisterLinkInfo( io_link.NodeFrom, io_link.NodeTo, io_link.Weight, link_info); } } /** * Process the list of Gpus that are surfaced to user */ static void SurfaceGpuList(std::vector& gpu_list, bool xnack_mode) { // Process user visible Gpu devices int32_t invalidIdx = -1; int32_t list_sz = gpu_list.size(); HsaNodeProperties node_prop = {0}; for (int32_t idx = 0; idx < list_sz; idx++) { if (gpu_list[idx] == invalidIdx) { break; } // Obtain properties of the node HSAKMT_STATUS err_val = hsaKmtGetNodeProperties(gpu_list[idx], &node_prop); assert(err_val == HSAKMT_STATUS_SUCCESS && "Error in getting Node Properties"); // Instantiate a Gpu device. The IO links // of this node have already been registered assert((node_prop.NumFComputeCores != 0) && "Improper node used for GPU device discovery."); DiscoverGpu(gpu_list[idx], node_prop, xnack_mode); } } /// @brief Calls Kfd thunk to get the snapshot of the topology of the system, /// which includes associations between, node, devices, memory and caches. void BuildTopology() { HsaVersionInfo kfd_version; if (hsaKmtGetVersion(&kfd_version) != HSAKMT_STATUS_SUCCESS) { return; } if (kfd_version.KernelInterfaceMajorVersion == kKfdVersionMajor && kfd_version.KernelInterfaceMinorVersion < kKfdVersionMinor) { return; } // Disable KFD event support when using open source KFD if (kfd_version.KernelInterfaceMajorVersion == 1 && kfd_version.KernelInterfaceMinorVersion == 0) { core::g_use_interrupt_wait = false; } core::Runtime::runtime_singleton_->KfdVersion(kfd_version); HsaSystemProperties props; hsaKmtReleaseSystemProperties(); if (hsaKmtAcquireSystemProperties(&props) != HSAKMT_STATUS_SUCCESS) { return; } core::Runtime::runtime_singleton_->SetLinkCount(props.NumNodes); // Query if env ROCR_VISIBLE_DEVICES is defined. If defined // determine number and order of GPU devices to be surfaced RvdFilter rvdFilter; int32_t invalidIdx = -1; uint32_t visibleCnt = 0; std::vector gpu_usr_list; bool filter = RvdFilter::FilterDevices(); if (filter) { rvdFilter.BuildRvdTokenList(); rvdFilter.BuildDeviceUuidList(props.NumNodes); visibleCnt = rvdFilter.BuildUsrDeviceList(); for (int32_t idx = 0; idx < visibleCnt; idx++) { gpu_usr_list.push_back(invalidIdx); } } // Discover agents on every node in the platform. int32_t kfdIdx = 0; for (HSAuint32 node_id = 0; node_id < props.NumNodes; node_id++) { HsaNodeProperties node_prop = {0}; if (hsaKmtGetNodeProperties(node_id, &node_prop) != HSAKMT_STATUS_SUCCESS) { continue; } // Instantiate a Cpu device const CpuAgent* cpu = DiscoverCpu(node_id, node_prop); assert(((node_prop.NumCPUCores == 0) || (cpu != nullptr)) && "CPU device failed discovery."); // Current node is either a dGpu or Apu and might belong // to user visible list. Process node if present in usr // visible list, continue if not found if (node_prop.NumFComputeCores != 0) { if (filter) { int32_t devRank = rvdFilter.GetUsrDeviceRank(kfdIdx); if (devRank != (-1)) { gpu_usr_list[devRank] = node_id; } } else { gpu_usr_list.push_back(node_id); } kfdIdx++; } // Register IO links of node without regard to // it being visible to user or not. It is not // possible to access links of nodes that are // not visible RegisterLinkInfo(node_id, node_prop.NumIOLinks); } // Determine the Xnack mode to be bound for system bool xnack_mode = BindXnackMode(); // Instantiate ROCr objects to encapsulate Gpu devices SurfaceGpuList(gpu_usr_list, xnack_mode); // Parse HSA_CU_MASK with GPU and CU count limits. uint32_t maxGpu = core::Runtime::runtime_singleton_->gpu_agents().size(); uint32_t maxCu = 0; uint32_t cus; for (auto& gpu : core::Runtime::runtime_singleton_->gpu_agents()) { gpu->GetInfo((hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cus); maxCu = Max(maxCu, cus); } const_cast(core::Runtime::runtime_singleton_->flag()).parse_masks(maxGpu, maxCu); } bool Load() { // Open connection to kernel driver. if (hsaKmtOpenKFD() != HSAKMT_STATUS_SUCCESS) { return false; } MAKE_NAMED_SCOPE_GUARD(kfd, [&]() { hsaKmtCloseKFD(); }); // Register runtime and optionally enable the debugger HSAKMT_STATUS err = hsaKmtRuntimeEnable(&_amdgpu_r_debug, core::Runtime::runtime_singleton_->flag().debug()); if ((err != HSAKMT_STATUS_SUCCESS) && (err != HSAKMT_STATUS_NOT_SUPPORTED)) return false; core::Runtime::runtime_singleton_->KfdVersion(err != HSAKMT_STATUS_NOT_SUPPORTED); // Build topology table. BuildTopology(); kfd.Dismiss(); return true; } bool Unload() { hsaKmtRuntimeDisable(); hsaKmtReleaseSystemProperties(); // Close connection to kernel driver. hsaKmtCloseKFD(); return true; } } // namespace amd } // namespace rocr