/* * Copyright © 2012-2017 Inria. All rights reserved. * See COPYING in top-level directory. */ #include #include #include /* private headers allowed for convenience because this plugin is built within hwloc */ #include #include #include struct hwloc_nvml_backend_data_s { unsigned nr_devices; /* -1 when unknown yet, first callback will setup */ struct hwloc_nvml_device_info_s { char name[64]; char serial[64]; char uuid[64]; unsigned pcidomain, pcibus, pcidev, pcifunc; float maxlinkspeed; } * devices; }; static void hwloc_nvml_query_devices(struct hwloc_nvml_backend_data_s *data) { nvmlReturn_t ret; unsigned nb, i; /* mark the number of devices as 0 in case we fail below, * so that we don't try again later. */ data->nr_devices = 0; ret = nvmlInit(); if (NVML_SUCCESS != ret) goto out; ret = nvmlDeviceGetCount(&nb); if (NVML_SUCCESS != ret) goto out_with_init; /* allocate structs */ data->devices = malloc(nb * sizeof(*data->devices)); if (!data->devices) goto out_with_init; for(i=0; idevices[data->nr_devices]; nvmlPciInfo_t pci; nvmlDevice_t device; ret = nvmlDeviceGetHandleByIndex(i, &device); assert(ret == NVML_SUCCESS); ret = nvmlDeviceGetPciInfo(device, &pci); if (NVML_SUCCESS != ret) continue; info->pcidomain = pci.domain; info->pcibus = pci.bus; info->pcidev = pci.device; info->pcifunc = 0; info->name[0] = '\0'; ret = nvmlDeviceGetName(device, info->name, sizeof(info->name)); /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */ info->serial[0] = '\0'; ret = nvmlDeviceGetSerial(device, info->serial, sizeof(info->serial)); info->uuid[0] = '\0'; ret = nvmlDeviceGetUUID(device, info->uuid, sizeof(info->uuid)); info->maxlinkspeed = 0.0f; #if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION { unsigned maxwidth = 0, maxgen = 0; float lanespeed; nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth); nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen); /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ info->maxlinkspeed = lanespeed * maxwidth / 8; /* GB/s */ } #endif /* validate this device */ data->nr_devices++; } out_with_init: nvmlShutdown(); out: return; } static int hwloc_nvml_backend_notify_new_object(struct hwloc_backend *backend, struct hwloc_backend *caller __hwloc_attribute_unused, struct hwloc_obj *pcidev) { struct hwloc_topology *topology = backend->topology; struct hwloc_nvml_backend_data_s *data = backend->private_data; unsigned i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno NVML detection (not thissystem)\n"); return 0; } if (HWLOC_OBJ_PCI_DEVICE != pcidev->type) return 0; if (data->nr_devices == (unsigned) -1) { /* first call, lookup all devices */ hwloc_nvml_query_devices(data); /* if it fails, data->nr_devices = 0 so we won't do anything below and in next callbacks */ } if (!data->nr_devices) /* found no devices */ return 0; /* now the devices array is ready to use */ for(i=0; inr_devices; i++) { struct hwloc_nvml_device_info_s *info = &data->devices[i]; hwloc_obj_t osdev; char buffer[64]; if (info->pcidomain != pcidev->attr->pcidev.domain) continue; if (info->pcibus != pcidev->attr->pcidev.bus) continue; if (info->pcidev != pcidev->attr->pcidev.dev) continue; if (info->pcifunc != pcidev->attr->pcidev.func) continue; osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "nvml%u", i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "NVML"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); hwloc_obj_add_info(osdev, "GPUModel", info->name); if (info->serial[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIASerial", info->serial); if (info->uuid[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIAUUID", info->uuid); hwloc_insert_object_by_parent(topology, pcidev, osdev); if (info->maxlinkspeed != 0.0f) /* we found the max link speed, replace the current link speed found by pci (or none) */ pcidev->attr->pcidev.linkspeed = info->maxlinkspeed; return 1; } return 0; } static void hwloc_nvml_backend_disable(struct hwloc_backend *backend) { struct hwloc_nvml_backend_data_s *data = backend->private_data; free(data->devices); free(data); } static struct hwloc_backend * hwloc_nvml_component_instantiate(struct hwloc_disc_component *component, const void *_data1 __hwloc_attribute_unused, const void *_data2 __hwloc_attribute_unused, const void *_data3 __hwloc_attribute_unused) { struct hwloc_backend *backend; struct hwloc_nvml_backend_data_s *data; /* thissystem may not be fully initialized yet, we'll check flags in discover() */ backend = hwloc_backend_alloc(component); if (!backend) return NULL; data = malloc(sizeof(*data)); if (!data) { free(backend); return NULL; } /* the first callback will initialize those */ data->nr_devices = (unsigned) -1; /* unknown yet */ data->devices = NULL; backend->private_data = data; backend->disable = hwloc_nvml_backend_disable; backend->notify_new_object = hwloc_nvml_backend_notify_new_object; return backend; } static struct hwloc_disc_component hwloc_nvml_disc_component = { HWLOC_DISC_COMPONENT_TYPE_MISC, "nvml", HWLOC_DISC_COMPONENT_TYPE_GLOBAL, hwloc_nvml_component_instantiate, 5, /* after pci, and after cuda since likely less useful */ NULL }; static int hwloc_nvml_component_init(unsigned long flags) { if (flags) return -1; if (hwloc_plugin_check_namespace("nvml", "hwloc_backend_alloc") < 0) return -1; return 0; } #ifdef HWLOC_INSIDE_PLUGIN HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; #endif const struct hwloc_component hwloc_nvml_component = { HWLOC_COMPONENT_ABI, hwloc_nvml_component_init, NULL, HWLOC_COMPONENT_TYPE_DISC, 0, &hwloc_nvml_disc_component };