// This file is part of BOINC. // http://boinc.berkeley.edu // Copyright (C) 2012 University of California // // BOINC is free software; you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License // as published by the Free Software Foundation, // either version 3 of the License, or (at your option) any later version. // // BOINC is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public License // along with BOINC. If not, see . // Detection of GPUs using OpenCL #define TEST_OTHER_COPROC_LOGIC 0 #ifdef _WIN32 #include "boinc_win.h" #ifdef _MSC_VER #define snprintf _snprintf #endif #else #ifdef __APPLE__ // Suppress obsolete warning when building for OS 10.3.9 #define DLOPEN_NO_WARN #include #endif #include "config.h" #include #endif #include #include using std::vector; using std::string; #include "coproc.h" #include "str_replace.h" #include "util.h" #include "client_msgs.h" #include "client_state.h" #include "gpu_detect.h" #ifdef _WIN32 HMODULE opencl_lib = NULL; typedef cl_int (__stdcall *CL_PLATFORMIDS) (cl_uint, cl_platform_id*, cl_uint*); typedef cl_int (__stdcall *CL_PLATFORMINFO) (cl_platform_id, cl_platform_info, size_t, void*, size_t*); typedef cl_int (__stdcall *CL_DEVICEIDS)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*); typedef cl_int (__stdcall *CL_INFO) (cl_device_id, cl_device_info, size_t, void*, size_t*); CL_PLATFORMIDS p_clGetPlatformIDs = NULL; CL_PLATFORMINFO p_clGetPlatformInfo = NULL; CL_DEVICEIDS p_clGetDeviceIDs = NULL; CL_INFO p_clGetDeviceInfo = NULL; #else void* opencl_lib = NULL; cl_int (*p_clGetPlatformIDs)( cl_uint, // num_entries, cl_platform_id*, // platforms cl_uint * // num_platforms ); cl_int (*p_clGetPlatformInfo)( cl_platform_id, // platform cl_platform_info, // param_name size_t, // param_value_size void*, // param_value size_t* // param_value_size_ret ); cl_int (*p_clGetDeviceIDs)( cl_platform_id, // platform cl_device_type, // device_type cl_uint, // num_entries cl_device_id*, // devices cl_uint* // num_devices ); cl_int (*p_clGetDeviceInfo)( cl_device_id, // device cl_device_info, // param_name size_t, // param_value_size void*, // param_value size_t* // param_value_size_ret ); #endif static bool is_AMD(char *vendor) { if (strstr(vendor, "ATI")) return true; if (strstr(vendor, "AMD")) return true; if (strstr(vendor, "Advanced Micro Devices, Inc.")) return true; return false; } static bool is_NVIDIA(char* vendor) { if (strstr(vendor, "NVIDIA")) return true; return false; } static bool is_intel(char* vendor) { if (strcasestr(vendor, "intel")) return true; return false; } // If "loose", tolerate small diff // static int opencl_compare(OPENCL_DEVICE_PROP& c1, OPENCL_DEVICE_PROP& c2, bool loose) { if (c1.opencl_device_version_int > c2.opencl_device_version_int) return 1; if (c1.opencl_device_version_int < c2.opencl_device_version_int) return -1; if (loose) { if (c1.global_mem_size > 1.4*c2.global_mem_size) return 1; if (c1.global_mem_size < .7*c2.global_mem_size) return -1; return 0; } if (c1.global_mem_size > c2.global_mem_size) return 1; if (c1.global_mem_size < c2.global_mem_size) return -1; if (c1.peak_flops > c2.peak_flops) return 1; if (c1.peak_flops < c2.peak_flops) return -1; return 0; } #ifdef __APPLE__ static bool compare_pci_slots(int NVIDIA_GPU_Index1, int NVIDIA_GPU_Index2) { if (NVIDIA_GPU_Index1 >= (int)nvidia_gpus.size()) return false; // Should never happen if (NVIDIA_GPU_Index2 >= (int)nvidia_gpus.size()) return false; // Should never happen return ( nvidia_gpus[NVIDIA_GPU_Index1].pci_info.bus_id < nvidia_gpus[NVIDIA_GPU_Index2].pci_info.bus_id ); } // Test OS version number on all versions of OS X without using deprecated Gestalt // compareOSVersionTo(x, y) returns: // -1 if the OS version we are running on is less than x.y // 0 if the OS version we are running on is equal to x.y // +1 if the OS version we are running on is lgreater than x.y static int compareOSVersionTo(int toMajor, int toMinor) { static SInt32 major = -1; static SInt32 minor = -1; if (major < 0) { char vers[100], *p1 = NULL; FILE *f; vers[0] = '\0'; f = popen("sw_vers -productVersion", "r"); if (f) { fscanf(f, "%s", vers); pclose(f); } if (vers[0] == '\0') { fprintf(stderr, "popen(\"sw_vers -productVersion\" failed\n"); fflush(stderr); return 0; } // Extract the major system version number major = atoi(vers); // Extract the minor system version number p1 = strchr(vers, '.'); minor = atoi(p1+1); } if (major < toMajor) return -1; if (major > toMajor) return 1; // if (major == toMajor) compare minor version numbers if (minor < toMinor) return -1; if (minor > toMinor) return 1; return 0; } #endif // OpenCL interfaces are documented here: // http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/ and // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ void COPROCS::get_opencl( vector& warnings ) { cl_int ciErrNum; cl_platform_id platforms[MAX_OPENCL_PLATFORMS]; cl_uint num_platforms, platform_index, num_devices, device_index; cl_device_id devices[MAX_COPROC_INSTANCES]; char platform_version[256]; char platform_vendor[256]; char buf[256]; OPENCL_DEVICE_PROP prop; int current_CUDA_index; int current_CAL_index; int min_CAL_target; int num_CAL_devices = (int)ati_gpus.size(); vectordevnums_pci_slot_sort; vector::iterator it; int max_other_coprocs = MAX_RSC-1; // coprocs[0] is reserved for CPU string s; if (cc_config.no_opencl) { return; } #ifdef _WIN32 opencl_lib = LoadLibrary("OpenCL.dll"); if (!opencl_lib) { warnings.push_back("No OpenCL library found"); return; } p_clGetPlatformIDs = (CL_PLATFORMIDS)GetProcAddress( opencl_lib, "clGetPlatformIDs" ); p_clGetPlatformInfo = (CL_PLATFORMINFO)GetProcAddress( opencl_lib, "clGetPlatformInfo" ); p_clGetDeviceIDs = (CL_DEVICEIDS)GetProcAddress( opencl_lib, "clGetDeviceIDs" ); p_clGetDeviceInfo = (CL_INFO)GetProcAddress( opencl_lib, "clGetDeviceInfo" ); #else #ifdef __APPLE__ opencl_lib = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_NOW); #else opencl_lib = dlopen("libOpenCL.so", RTLD_NOW); if (!opencl_lib) { opencl_lib = dlopen("libOpenCL.so.1", RTLD_NOW); } #endif if (!opencl_lib) { sprintf(buf, "OpenCL: %s", dlerror()); warnings.push_back(buf); return; } p_clGetPlatformIDs = (cl_int(*)(cl_uint, cl_platform_id*, cl_uint*)) dlsym( opencl_lib, "clGetPlatformIDs" ); p_clGetPlatformInfo = (cl_int(*)(cl_platform_id, cl_platform_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetPlatformInfo" ); p_clGetDeviceIDs = (cl_int(*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*)) dlsym( opencl_lib, "clGetDeviceIDs" ); p_clGetDeviceInfo = (cl_int(*)(cl_device_id, cl_device_info, size_t, void*, size_t*)) dlsym( opencl_lib, "clGetDeviceInfo" ); #endif if (!p_clGetPlatformIDs) { warnings.push_back("clGetPlatformIDs() missing from OpenCL library"); goto leave; } if (!p_clGetPlatformInfo) { warnings.push_back("clGetPlatformInfo() missing from OpenCL library"); goto leave; } if (!p_clGetDeviceIDs) { warnings.push_back("clGetDeviceIDs() missing from OpenCL library"); goto leave; } if (!p_clGetDeviceInfo) { warnings.push_back("clGetDeviceInfo() missing from OpenCL library"); goto leave; } ciErrNum = (*p_clGetPlatformIDs)(MAX_OPENCL_PLATFORMS, platforms, &num_platforms); if ((ciErrNum != CL_SUCCESS) || (num_platforms == 0)) { warnings.push_back("clGetPlatformIDs() failed to return any OpenCL platforms"); goto leave; } if (nvidia_gpus.size()) { for (int i=0; i<(int)nvidia_gpus.size(); ++i) { devnums_pci_slot_sort.push_back(i); } #ifdef __APPLE__ std::stable_sort( devnums_pci_slot_sort.begin(), devnums_pci_slot_sort.end(), compare_pci_slots ); #endif } for (platform_index=0; platform_index 0)) { while (1) { int numToMatch = 0; for (int i=0; i= min_CAL_target) { ++numToMatch; } } if (numToMatch == (int)num_devices) break; if (numToMatch < (int)num_devices) { warnings.push_back( "Could not match ATI OpenCL and CAL GPUs: ignoring CAL." ); // If we can't match ATI OpenCL and CAL GPUs, ignore CAL // and keep OpenCL because AMD has deprecated CAL. ati_gpus.clear(); ati.have_cal = false; num_CAL_devices = 0; break; } ++min_CAL_target; } } for (device_index=0; device_index= (int)(nvidia_gpus.size())) { snprintf(buf, sizeof(buf), "OpenCL NVIDIA index #%d does not match any CUDA device", device_index ); warnings.push_back(buf); // Newer versions of CUDA driver don't support older NVIDIA GPUs if (nvidia.cuda_version >= 6050) { prop.device_num = (int)(nvidia_opencls.size()); current_CUDA_index = saved_CUDA_index; prop.warn_bad_cuda = true; break; } else { // Older CUDA drivers should report all NVIDIA GPUs reported by OpenCL goto leave; // Should never happen } } if (!strcmp(prop.name, nvidia_gpus[devnums_pci_slot_sort[current_CUDA_index]].prop.name) ) { cuda_match_found = true; prop.device_num = devnums_pci_slot_sort[current_CUDA_index]; break; // We have a match } // This CUDA GPU is not recognized by OpenCL, // so try the next // ++current_CUDA_index; } } else { prop.device_num = (int)(nvidia_opencls.size()); } prop.opencl_device_index = device_index; if (cuda_match_found) { prop.peak_flops = nvidia_gpus[prop.device_num].peak_flops; } else { COPROC_NVIDIA c; c.opencl_prop = prop; c.set_peak_flops(); if (c.bad_gpu_peak_flops("NVIDIA OpenCL", s)) { warnings.push_back(s); } prop.peak_flops = c.peak_flops; } if (cuda_match_found) { // Assumes OpenCL device_num and CUDA device_num now match // prop.opencl_available_ram = nvidia_gpus[prop.device_num].available_ram; } else { prop.opencl_available_ram = prop.global_mem_size; } // Build nvidia_opencls vector in device_num order for (it=nvidia_opencls.begin(); it != nvidia_opencls.end(); ++it) { if (it->device_num > prop.device_num) break; } nvidia_opencls.insert(it, prop); if (cuda_match_found) ++current_CUDA_index; } //////////// AMD / ATI ////////////// else if (is_AMD(prop.vendor)) { prop.opencl_device_index = device_index; if (ati.have_cal) { // AMD OpenCL does not recognize all AMD GPUs returned by // CAL but we assume that OpenCL and CAL return devices in // the same order. See additional comments earlier in // this source file for more details. // while (1) { if (current_CAL_index >= num_CAL_devices) { snprintf(buf, sizeof(buf), "OpenCL ATI device #%d does not match any CAL device", device_index ); warnings.push_back(buf); goto leave; // Should never happen } if ((int)ati_gpus[current_CAL_index].attribs.target >= min_CAL_target) { break; // We have a match } // This CAL GPU is not recognized by OpenCL, // so try the next // ++current_CAL_index; } prop.device_num = current_CAL_index++; // Always use GPU model name from CAL if // available for ATI / AMD GPUs because // (we believe) it is more user-friendly. // safe_strcpy(prop.name, ati_gpus[prop.device_num].name); // Work around a bug in OpenCL which returns only // 1/2 of total global RAM size: use the value from CAL. // This bug applies only to ATI GPUs, not to NVIDIA // See also further workaround code for Macs. // prop.global_mem_size = ati_gpus[prop.device_num].attribs.localRAM * MEGA; prop.peak_flops = ati_gpus[prop.device_num].peak_flops; } else { // ! ati.have_cal prop.device_num = (int)(ati_opencls.size()); COPROC_ATI c; c.opencl_prop = prop; c.set_peak_flops(); if (c.bad_gpu_peak_flops("AMD OpenCL", s)) { warnings.push_back(s); } prop.peak_flops = c.peak_flops; } if (ati_gpus.size()) { prop.opencl_available_ram = ati_gpus[prop.device_num].available_ram; } else { prop.opencl_available_ram = prop.global_mem_size; } ati_opencls.push_back(prop); } //////////// INTEL GPU ////////////// else if (is_intel(prop.vendor)) { prop.device_num = (int)(intel_gpu_opencls.size()); prop.opencl_device_index = device_index; COPROC_INTEL c; c.opencl_prop = prop; c.is_used = COPROC_UNUSED; c.available_ram = prop.global_mem_size; safe_strcpy(c.name, prop.name); safe_strcpy(c.version, prop.opencl_driver_version); c.set_peak_flops(); if (c.bad_gpu_peak_flops("Intel OpenCL", s)) { warnings.push_back(s); } prop.peak_flops = c.peak_flops; prop.opencl_available_ram = prop.global_mem_size; intel_gpu_opencls.push_back(prop); // At present Intel GPUs only support OpenCL // and do not have a native GPGPU framework, // so treat each detected Intel OpenCL GPU device as // a native device. // intel_gpus.push_back(c); } else { //////////// OTHER GPU OR ACCELERATOR ////////////// // Put each coprocessor instance into a separate other_opencls element // opencl_device_index is passed to project apps via init_data.xml // to differentiate among OpenCL devices from the same vendor. It is // used by boinc_get_opencl_ids() to select the correct OpenCL device. int opencl_device_index = 0; for (unsigned int coproc_index=0; coproc_index GPU_MAX_PEAK_FLOPS) { char buf2[256]; sprintf(buf2, "OpenCL generic: bad peak FLOPS; Max units %d, max freq %d MHz", prop.max_compute_units, prop.max_clock_frequency ); warnings.push_back(buf2); prop.peak_flops = GPU_DEFAULT_PEAK_FLOPS; } other_opencls.push_back(prop); } } } // Neither nvidia.count, ati.count nor intel_gpu.count have been set yet, // so we can't test have_nvidia(), have_ati() or have_intel_gpu() here. // if ((nvidia_opencls.size() > 0) || nvidia.have_cuda) max_other_coprocs--; if ((ati_opencls.size() > 0) || ati.have_cal) max_other_coprocs--; if (intel_gpu_opencls.size() > 0) max_other_coprocs--; if ((int)other_opencls.size() > max_other_coprocs) { warnings.push_back("Too many OpenCL device types found"); } #ifdef __APPLE__ // Work around a bug in OpenCL which returns only // 1/2 of total global RAM size. // This bug applies only to ATI GPUs, not to NVIDIA // This has already been fixed on latest Catalyst // drivers, but Mac does not use Catalyst drivers. if (ati_opencls.size() > 0) { // This problem seems to be fixed in OS 10.7 if (compareOSVersionTo(10, 7) < 0) { opencl_get_ati_mem_size_from_opengl(warnings); } } #endif if ((nvidia_opencls.size() == 0) && (ati_opencls.size() == 0) && (intel_gpu_opencls.size() == 0) && (cpu_opencls.size() == 0) && (other_opencls.size() == 0) ) { warnings.push_back( "OpenCL library present but no OpenCL-capable devices found" ); } leave: #ifdef _WIN32 if (opencl_lib) FreeLibrary(opencl_lib); #else if (opencl_lib) dlclose(opencl_lib); #endif } void COPROCS::correlate_opencl( bool use_all, IGNORE_GPU_INSTANCE& ignore_gpu_instance ) { if (nvidia_opencls.size() > 0) { if (nvidia.have_cuda) { // If CUDA already found the "best" NVIDIA GPU nvidia.merge_opencl( nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU] ); } else { nvidia.find_best_opencls( use_all, nvidia_opencls, ignore_gpu_instance[PROC_TYPE_NVIDIA_GPU] ); nvidia.prop.totalGlobalMem = nvidia.opencl_prop.global_mem_size; nvidia.available_ram = nvidia.opencl_prop.global_mem_size; nvidia.prop.clockRate = nvidia.opencl_prop.max_clock_frequency * 1000; safe_strcpy(nvidia.prop.name, nvidia.opencl_prop.name); } } if (ati_opencls.size() > 0) { if (ati.have_cal) { // If CAL already found the "best" CAL GPU ati.merge_opencl(ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]); } else { ati.find_best_opencls(use_all, ati_opencls, ignore_gpu_instance[PROC_TYPE_AMD_GPU]); ati.attribs.localRAM = ati.opencl_prop.global_mem_size/MEGA; ati.available_ram = ati.opencl_prop.global_mem_size; ati.attribs.engineClock = ati.opencl_prop.max_clock_frequency; safe_strcpy(ati.name, ati.opencl_prop.name); } } if (intel_gpu_opencls.size() > 0) { intel_gpu.find_best_opencls(use_all, intel_gpu_opencls, ignore_gpu_instance[PROC_TYPE_INTEL_GPU]); intel_gpu.available_ram = intel_gpu.opencl_prop.global_mem_size; safe_strcpy(intel_gpu.name, intel_gpu.opencl_prop.name); } } cl_int COPROCS::get_opencl_info( OPENCL_DEVICE_PROP& prop, cl_uint device_index, vector&warnings ) { cl_int ciErrNum; char buf[256]; ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_NAME, sizeof(prop.name), prop.name, NULL); if ((ciErrNum != CL_SUCCESS) || (prop.name[0] == 0)) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get name for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR, sizeof(prop.vendor), prop.vendor, NULL); if ((ciErrNum != CL_SUCCESS) || (prop.vendor[0] == 0)) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get vendor for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VENDOR_ID, sizeof(prop.vendor_id), &prop.vendor_id, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get vendor ID for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_AVAILABLE, sizeof(prop.available), &prop.available, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get availability for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_HALF_FP_CONFIG, sizeof(prop.half_fp_config), &prop.half_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) { prop.half_fp_config = 0; // Not supported by OpenCL 1.0 } else { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get half-precision floating point capabilities for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(prop.single_fp_config), &prop.single_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get single-precision floating point capabilities for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(prop.double_fp_config), &prop.double_fp_config, NULL ); if (ciErrNum != CL_SUCCESS) { if ((ciErrNum == CL_INVALID_VALUE) || (ciErrNum == CL_INVALID_OPERATION)) { prop.double_fp_config = 0; // Not supported by OpenCL 1.0 } else { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get double-precision floating point capabilities for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_ENDIAN_LITTLE, sizeof(prop.endian_little), &prop.endian_little, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get little or big endian for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(prop.execution_capabilities), &prop.execution_capabilities, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get execution capabilities for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_EXTENSIONS, sizeof(prop.extensions), prop.extensions, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get device extensions for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(prop.global_mem_size), &prop.global_mem_size, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get global memory size for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(prop.local_mem_size), &prop.local_mem_size, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get local memory size for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(prop.max_clock_frequency), &prop.max_clock_frequency, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get max clock frequency for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)( prop.device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(prop.max_compute_units), &prop.max_compute_units, NULL ); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get max compute units for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_VERSION, sizeof(prop.opencl_device_version), prop.opencl_device_version, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get OpenCL version supported by device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DRIVER_VERSION, sizeof(prop.opencl_driver_version), prop.opencl_driver_version, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get OpenCL driver version for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } // Nvidia Specific Extensions if (strstr(prop.extensions, "cl_nv_device_attribute_query") != NULL) { ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(prop.nv_compute_capability_major), &prop.nv_compute_capability_major, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(prop.nv_compute_capability_minor), &prop.nv_compute_capability_minor, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } } // AMD Specific Extensions if (strstr(prop.extensions, "cl_amd_device_attribute_query") != NULL) { ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_BOARD_NAME_AMD, sizeof(buf), buf, NULL); if (strlen(buf) && ciErrNum == CL_SUCCESS) { safe_strcpy(prop.name, buf); } else if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get AMD Board Name for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, sizeof(prop.amd_simd_per_compute_unit), &prop.amd_simd_per_compute_unit, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_WIDTH_AMD, sizeof(prop.amd_simd_width), &prop.amd_simd_width, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get CL_DEVICE_SIMD_WIDTH_AMD for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } ciErrNum = (*p_clGetDeviceInfo)(prop.device_id, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, sizeof(prop.amd_simd_instruction_width), &prop.amd_simd_instruction_width, NULL); if (ciErrNum != CL_SUCCESS) { snprintf(buf, sizeof(buf), "clGetDeviceInfo failed to get CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD for device %d", (int)device_index ); warnings.push_back(buf); return ciErrNum; } } return CL_SUCCESS; } // This is called for ATI GPUs with CAL or NVIDIA GPUs with CUDA, to merge // the OpenCL info into the CAL or CUDA data for the "best" CAL or CUDA GPU. // This assumes that, for each GPU, we have previously correlated its CAL // or CUDA device_num with its opencl_device_index. // void COPROC::merge_opencl( vector &opencls, vector& ignore_dev ) { unsigned int i, j; for (i=0; i &opencls, vector& ignore_dev ) { unsigned int i; // identify the most capable ATI, NVIDIA or Intel OpenCL GPU // bool first = true; for (i=0; i 0) { is_best = true; } if (is_best) { // fill in what info we have opencl_prop = opencls[i]; device_num = opencls[i].device_num; peak_flops = opencls[i].peak_flops; have_opencl = true; } } // see which other instances are equivalent, and set the count, // device_nums, opencl_device_count and opencl_device_ids fields // count = 0; opencl_device_count = 0; for (i=0; i #include #include #include #include static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID); void COPROCS::opencl_get_ati_mem_size_from_opengl(vector& warnings) { CGLRendererInfoObj info; long i, j; GLint numRenderers = 0, rv = 0, deviceVRAM, rendererID; cl_ulong deviceMemSize; CGLError theErr2 = kCGLNoError; CGLContextObj curr_ctx = CGLGetCurrentContext (); // save current CGL context int ati_gpu_index = 0; GLint rendererIDs[32]; CFDataRef modelName[32]; char opencl_name[256], iokit_name[256], buf[256]; char *p; if (log_flags.coproc_debug) { for (i=0; i<32; ++i) { rendererIDs[i] = 0; modelName[i] = NULL; CGOpenGLDisplayMask myMask = 1 << i; CGDirectDisplayID displayID = CGOpenGLDisplayMaskToDisplayID(myMask); theErr2 = CGLQueryRendererInfo(myMask, &info, &numRenderers); if ((displayID != kCGNullDirectDisplay) && (theErr2 == kCGLNoError)) { // Get the I/O Kit service port for the display // io_registry_entry_t dspPort = CGDisplayIOServicePort(displayID); // Deprecated in OS 10.9 io_registry_entry_t dspPort = IOServicePortFromCGDisplayID(displayID); for (j = 0; j < numRenderers; j++) { // find accelerated renderer (assume only one) CGLDescribeRenderer (info, j, kCGLRPAcceleratedCompute, &rv); if (true == rv) { // if openCL-capable // what is the renderer ID CGLDescribeRenderer (info, j, kCGLRPRendererID, &rendererIDs[i]); modelName[i] = (CFDataRef)IORegistryEntrySearchCFProperty( dspPort, kIOServicePlane, CFSTR("model"), kCFAllocatorDefault, kIORegistryIterateRecursively | kIORegistryIterateParents ); } if (modelName[i] != NULL) break; } } } } // End if (log_flags.coproc_debug) { theErr2 = CGLQueryRendererInfo( 0xffffffff, &info, &numRenderers); if (theErr2 == kCGLNoError) { CGLDescribeRenderer (info, 0, kCGLRPRendererCount, &numRenderers); for (i = 0; i < numRenderers; i++) { if (ati_gpu_index >= (int)ati_opencls.size()) { break; } CGLDescribeRenderer (info, i, kCGLRPAcceleratedCompute, &rv); if (true == rv) { // if openCL-capable // what is the renderer ID CGLDescribeRenderer (info, i, kCGLRPRendererID, &rendererID); // what is the VRAM? CGLError notAvail = CGLDescribeRenderer (info, i, kCGLRPVideoMemoryMegabytes, &deviceVRAM); if (notAvail == kCGLNoError) { deviceMemSize = ((cl_ulong)deviceVRAM) * (1024L*1024L); } else { // kCGLRPVideoMemoryMegabytes is not available before OS 10.7 #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" // kCGLRPVideoMemory=120 is deprecated in OS 10.7 and may not be // defined in later SDKs, so use a literal value here instead // CGLDescribeRenderer (info, i, kCGLRPVideoMemory, &deviceVRAM); CGLDescribeRenderer (info, i, (CGLRendererProperty)120, &deviceVRAM); deviceMemSize = deviceVRAM; #pragma clang diagnostic pop } // build context and context specific info CGLPixelFormatAttribute attribs[] = { kCGLPFARendererID, (CGLPixelFormatAttribute)rendererID, kCGLPFAAllowOfflineRenderers, (CGLPixelFormatAttribute)0 }; CGLPixelFormatObj pixelFormat = NULL; GLint numPixelFormats = 0; CGLContextObj cglContext; CGLChoosePixelFormat (attribs, &pixelFormat, &numPixelFormats); if (pixelFormat) { CGLCreateContext(pixelFormat, NULL, &cglContext); CGLDestroyPixelFormat (pixelFormat); CGLSetCurrentContext (cglContext); if (cglContext) { // get vendor string from renderer const GLubyte * strVend = glGetString (GL_VENDOR); if (is_AMD((char *)strVend)) { ati_opencls[ati_gpu_index].global_mem_size = deviceMemSize; ati_opencls[ati_gpu_index].opencl_available_ram = deviceMemSize; if (log_flags.coproc_debug) { // For some GPUs, one API returns "ATI" but the other API returns // "AMD" in the model name, so we normalize both to "AMD" strlcpy(opencl_name, ati_opencls[ati_gpu_index].name, sizeof(opencl_name)); if ((p = strstr(opencl_name, "ATI")) != NULL) { *++p='M'; *++p='D'; } for (j=0; j<32; j++) { if ((rendererID == rendererIDs[j]) && (modelName[j] != NULL)) { break; } } if (j < 32) { strlcpy(iokit_name, (char *)CFDataGetBytePtr(modelName[j]), sizeof(iokit_name)); if ((p = strstr(iokit_name, "ATI")) != NULL) { *++p='M'; *++p='D'; } if (strcmp(iokit_name, opencl_name)) { snprintf(buf, sizeof(buf), "opencl_get_ati_mem_size_from_opengl model name mismatch: %s vs %s\n", ati_opencls[ati_gpu_index].name, (char *)CFDataGetBytePtr(modelName[j]) ); warnings.push_back(buf); } } else { // Could not get model name from IOKit, so use renderer name const GLubyte * strRend = glGetString (GL_RENDERER); if (strRend != NULL) { strlcpy(iokit_name, (char *)strRend, sizeof(iokit_name)); if ((p = strstr(iokit_name, "ATI")) != NULL) { *++p='M'; *++p='D'; } } if ((strRend == NULL) || (!strstr(iokit_name, opencl_name))) { snprintf(buf, sizeof(buf), "opencl_get_ati_mem_size_from_opengl model name to renderer mismatch: %s vs %s\n", strRend, ati_opencls[ati_gpu_index].name ); warnings.push_back(buf); } } } // End if (log_flags.coproc_debug) { ati_gpu_index++; } // End if ATI / AMD GPU CGLDestroyContext (cglContext); } else { warnings.push_back( "opencl_get_ati_mem_size_from_opengl failed to create context\n" ); } } else { warnings.push_back( "opencl_get_ati_mem_size_from_opengl failed to create PixelFormat\n" ); } } // End if kCGLRPAcceleratedCompute attribute } // End loop: for (i = 0; i < numRenderers; i++) CGLDestroyRendererInfo (info); } if (log_flags.coproc_debug) { for (j=0; j<32; j++) { if (modelName[j] != NULL) { CFRelease(modelName[j]); } } } CGLSetCurrentContext (curr_ctx); // restore current CGL context } // The following replaces CGDisplayIOServicePort which is deprecated in OS 10.9 // //======================================================================== // GLFW 3.1 OS X - www.glfw.org //------------------------------------------------------------------------ // Copyright (c) 2002-2006 Marcus Geelnard // Copyright (c) 2006-2010 Camilla Berglund // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would // be appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not // be misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source // distribution. // //======================================================================== // Returns the io_service_t corresponding to a CG display ID, or 0 on failure. // The io_service_t should be released with IOObjectRelease when not needed. // static io_service_t IOServicePortFromCGDisplayID(CGDirectDisplayID displayID) { io_iterator_t iter; io_service_t serv, servicePort = 0; CFMutableDictionaryRef matching = IOServiceMatching("IODisplayConnect"); // releases matching for us kern_return_t err = IOServiceGetMatchingServices(kIOMasterPortDefault, matching, &iter); if (err) return 0; while ((serv = IOIteratorNext(iter)) != 0) { CFDictionaryRef info; CFIndex vendorID, productID, serialNumber; CFNumberRef vendorIDRef, productIDRef, serialNumberRef; Boolean success; info = IODisplayCreateInfoDictionary(serv, kIODisplayOnlyPreferredName); vendorIDRef = (CFNumberRef)CFDictionaryGetValue(info, CFSTR(kDisplayVendorID)); productIDRef = (CFNumberRef)CFDictionaryGetValue(info, CFSTR(kDisplayProductID)); serialNumberRef = (CFNumberRef)CFDictionaryGetValue(info, CFSTR(kDisplaySerialNumber)); success = CFNumberGetValue(vendorIDRef, kCFNumberCFIndexType, &vendorID); success &= CFNumberGetValue(productIDRef, kCFNumberCFIndexType, &productID); success &= CFNumberGetValue(serialNumberRef, kCFNumberCFIndexType, &serialNumber); if (!success) { CFRelease(info); continue; } // If the vendor and product id along with the serial don't match // then we are not looking at the correct monitor. // NOTE: The serial number is important in cases where two monitors // are the exact same. if (CGDisplayVendorNumber(displayID) != vendorID || CGDisplayModelNumber(displayID) != productID || CGDisplaySerialNumber(displayID) != serialNumber) { CFRelease(info); continue; } // The VendorID, Product ID, and the Serial Number all Match Up! // Therefore we have found the appropriate display io_service servicePort = serv; CFRelease(info); break; } IOObjectRelease(iter); return servicePort; } #endif// __APPLE__