"description":"Nvidia GPU Metrics based on the prometheus metrics from github.com/utkuozdemir/nvidia_gpu_exporter",
"editable":true,
"fiscalYearStartMonth":0,
"graphTooltip":0,
"id":1,
"links":[],
"panels":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"The official product name of the GPU. This is an alphanumeric string. For all products.",
"fieldConfig":{
"defaults":{
"color":{
"mode":"thresholds"
},
"decimals":2,
"mappings":[],
"thresholds":{
"mode":"absolute",
"steps":[
{
"color":"green",
"value":null
}
]
},
"unit":"none"
},
"overrides":[]
},
"gridPos":{
"h":3,
"w":4,
"x":0,
"y":0
},
"id":23,
"options":{
"colorMode":"value",
"graphMode":"none",
"justifyMode":"auto",
"orientation":"auto",
"percentChangeColorMode":"standard",
"reduceOptions":{
"calcs":[
"last"
],
"fields":"",
"values":false
},
"showPercentChange":false,
"text":{},
"textMode":"name",
"wideLayout":true
},
"pluginVersion":"11.4.0",
"targets":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_gpu_info{uuid=\"$gpu\"}",
"instant":true,
"interval":"",
"legendFormat":"{{name}}",
"refId":"A"
}
],
"title":"Name",
"type":"stat"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).",
"fieldConfig":{
"defaults":{
"color":{
"mode":"thresholds"
},
"decimals":0,
"mappings":[
{
"options":{
"":{
"text":""
}
},
"type":"value"
}
],
"thresholds":{
"mode":"absolute",
"steps":[
{
"color":"green",
"value":null
}
]
},
"unit":"prefix:P"
},
"overrides":[]
},
"gridPos":{
"h":3,
"w":2,
"x":4,
"y":0
},
"id":22,
"options":{
"colorMode":"value",
"graphMode":"none",
"justifyMode":"auto",
"orientation":"auto",
"percentChangeColorMode":"standard",
"reduceOptions":{
"calcs":[
"last"
],
"fields":"",
"values":false
},
"showPercentChange":false,
"text":{},
"textMode":"value",
"wideLayout":true
},
"pluginVersion":"11.4.0",
"targets":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_pstate{uuid=\"$gpu\"}",
"interval":"",
"legendFormat":"",
"refId":"A"
}
],
"title":"P-State",
"type":"stat"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
"description":"The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts / The software power limit in watts.",
"description":"The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n",
"description":"Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
"description":"The version of the installed NVIDIA display driver. This is an alphanumeric string.",
"fieldConfig":{
"defaults":{
"color":{
"mode":"thresholds"
},
"decimals":2,
"mappings":[],
"thresholds":{
"mode":"absolute",
"steps":[
{
"color":"green",
"value":null
}
]
},
"unit":"none"
},
"overrides":[]
},
"gridPos":{
"h":2,
"w":3,
"x":0,
"y":3
},
"id":14,
"options":{
"colorMode":"value",
"graphMode":"none",
"justifyMode":"auto",
"orientation":"auto",
"percentChangeColorMode":"standard",
"reduceOptions":{
"calcs":[
"last"
],
"fields":"",
"values":false
},
"showPercentChange":false,
"text":{},
"textMode":"name",
"wideLayout":true
},
"pluginVersion":"11.4.0",
"targets":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_gpu_info{uuid=\"$gpu\"}",
"instant":true,
"interval":"",
"legendFormat":"{{driver_version}}",
"refId":"A"
}
],
"title":"Driver Version",
"type":"stat"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"The BIOS of the GPU board.",
"fieldConfig":{
"defaults":{
"color":{
"mode":"thresholds"
},
"decimals":2,
"mappings":[],
"thresholds":{
"mode":"absolute",
"steps":[
{
"color":"green",
"value":null
}
]
},
"unit":"none"
},
"overrides":[]
},
"gridPos":{
"h":2,
"w":3,
"x":3,
"y":3
},
"id":34,
"options":{
"colorMode":"value",
"graphMode":"none",
"justifyMode":"auto",
"orientation":"auto",
"percentChangeColorMode":"standard",
"reduceOptions":{
"calcs":[
"last"
],
"fields":"",
"values":false
},
"showPercentChange":false,
"text":{},
"textMode":"name",
"wideLayout":true
},
"pluginVersion":"11.4.0",
"targets":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_gpu_info{uuid=\"$gpu\"}",
"instant":true,
"interval":"",
"legendFormat":"{{vbios_version}}",
"refId":"A"
}
],
"title":"Vbios Version",
"type":"stat"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"Information about factors that are reducing the frequency of clocks. If all throttle reasons are returned as \"Not Active\" it means that clocks are running as high as possible.",
"fieldConfig":{
"defaults":{
"color":{
"mode":"thresholds"
},
"decimals":0,
"mappings":[
{
"options":{
"0":{
"text":"Not Active"
},
"1":{
"text":"Active"
}
},
"type":"value"
}
],
"thresholds":{
"mode":"absolute",
"steps":[
{
"color":"green",
"value":null
}
]
},
"unit":"none"
},
"overrides":[]
},
"gridPos":{
"h":5,
"w":6,
"x":0,
"y":5
},
"id":32,
"options":{
"displayMode":"gradient",
"legend":{
"calcs":[],
"displayMode":"list",
"placement":"bottom",
"showLegend":false
},
"maxVizHeight":300,
"minVizHeight":16,
"minVizWidth":8,
"namePlacement":"auto",
"orientation":"horizontal",
"reduceOptions":{
"calcs":[
"last"
],
"fields":"",
"values":false
},
"showUnfilled":true,
"sizing":"auto",
"text":{},
"valueMode":"color"
},
"pluginVersion":"11.4.0",
"targets":[
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_gpu_idle{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_gpu_idle{uuid=\"$gpu\"}",
"instant":false,
"interval":"",
"legendFormat":"Idle",
"refId":"A"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_hw_thermal_slowdown{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_hw_thermal_slowdown{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"HW Thermal Slowdown",
"refId":"B"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_sw_power_cap{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_sw_power_cap{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"SW Power Cap",
"refId":"C"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_applications_clocks_setting{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_applications_clocks_setting{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"App Clocks Setting",
"refId":"D"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_hw_power_brake_slowdown{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_hw_power_brake_slowdown{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"HW Power Brake",
"refId":"E"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_sw_thermal_slowdown{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_sw_thermal_slowdown{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"SW Thermal Slowdown",
"refId":"F"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"exemplar":true,
"expr":"nvidia_smi_clocks_event_reasons_sync_boost{uuid=\"$gpu\"} or nvidia_smi_clocks_throttle_reasons_sync_boost{uuid=\"$gpu\"}",
"hide":false,
"interval":"",
"legendFormat":"Sync Boost",
"refId":"G"
}
],
"title":"Throttle Reasons",
"type":"bargauge"
},
{
"datasource":{
"type":"prometheus",
"uid":"prom"
},
"description":"Current frequency of graphics (shader) clock\n/\nMaximum frequency of graphics (shader) clock.\n",
"description":"Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
"description":"Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
"description":"The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts",
"description":"The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n",