Source code for sleepguard.monitors.gpu

# Python Modules
import logging

from typing import Literal

from sleepguard.monitors import debug
# 3rd Party Modules

# Project Modules
from sleepguard.monitors.base import UtilizationMonitor


log = logging.getLogger(__name__)


GpuType = Literal["amd", "nvidia"]


[docs] class GpuUtilizationMonitor(UtilizationMonitor): """ Keeps track of the average GPU percent utilization over the given ``threshold_period``. """ def __init__( self, poll_period: float, log_history_size: int, threshold: float, *args, **kwargs ): super().__init__(poll_period, log_history_size, threshold, *args, **kwargs) self.device_types = self.get_device_types() if not self.device_types: raise ValueError("No GPU devices found.") else: log.debug(f"Found GPU devices: {self.device_types}")
[docs] @staticmethod def get_device_types() -> list[GpuType]: """ Get the list of available GPU types installed on the system. :return: """ devices: list[GpuType] = [] try: import GPUtil if GPUtil.getGPUs(): devices.append("nvidia") except Exception as e: log.debug(f"Unable to find any NVIDIA GPUs: {e}") try: import pyadl if pyadl.ADLManager.getInstance().getDevices(): devices.append("amd") except Exception as e: log.debug(f"Unable to find any AMD GPUs: {e}") return devices
[docs] @debug def get_instantaneous_value(self) -> float: """ Returns the max utilization over all the detected GPU devices. :return: """ values = [self._get_utilization(t) for t in self.device_types] or [0] return max(values)
def _get_utilization(self, device_type: GpuType) -> float: if device_type == "amd": return self._get_amd_utilization() elif device_type == "nvidia": return self._get_nvidia_utilization() else: raise NotImplementedError(f"Unsupported gpu type: {device_type}") @staticmethod def _get_amd_utilization() -> float: import pyadl devices = pyadl.ADLManager.getInstance().getDevices() return max([d.getCurrentUsage() for d in devices]) @staticmethod def _get_nvidia_utilization() -> float: import GPUtil gpus = GPUtil.getGPUs() return max([gpu.load * 100 for gpu in gpus])