Compare commits

3 Commits

4 changed files with 4638 additions and 1922 deletions

1558
gorush.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

888
nvidia_dcgm.json Normal file
View File

@@ -0,0 +1,888 @@
{
"annotations": {
"list": [
{
"$$hashKey": "object:192",
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.19+) cluster",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 52,
"links": [],
"panels": [
{
"datasource": {
"uid": "$datasource"
},
"description": "avg temp per gpu",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 83
},
{
"color": "red",
"value": 87
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 14,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "avg by (instance, gpu) (\r\n DCGM_FI_DEV_GPU_TEMP{instance=~\"$instance\", gpu=~\"$gpu\", pod=~\"$pod\"}\r\n)\r\n",
"interval": "",
"legendFormat": "GPU: {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Avg. Temp",
"type": "gauge"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "#E0B400",
"mode": "thresholds"
},
"mappings": [],
"max": 2400,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "#EAB839",
"value": 1800.0001
},
{
"color": "red",
"value": 2200
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 16,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"sum"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto",
"text": {}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "sum by (instance, gpu) (\r\n DCGM_FI_DEV_POWER_USAGE{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n)\r\n",
"instant": true,
"interval": "",
"legendFormat": "GPU: {{gpu}}",
"range": false,
"refId": "A"
}
],
"title": "GPU Power Total",
"type": "gauge"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "celsius"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 12,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_DEV_GPU_TEMP{instance=~\"$instance\", gpu=~\"$gpu\", pod=~\"$pod\"}\r\n)\r\n",
"instant": false,
"interval": "",
"legendFormat": "GPU {{gpu}}",
"refId": "A"
}
],
"title": "GPU Temperature",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 100,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 6,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Max",
"sortDesc": false
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_DEV_GPU_UTIL{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n)\r\n",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Utilization",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "watt"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 10,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_DEV_POWER_USAGE{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n)\r\n",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Power Usage",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "decmbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 18,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_DEV_FB_USED{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n)\r\n",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU Framebuffer Mem Used",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "hertz"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 2,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true,
"sortBy": "Name",
"sortDesc": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_DEV_SM_CLOCK{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n) * 1000000\r\n",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "GPU SM Clocks",
"type": "timeseries"
},
{
"datasource": {
"uid": "$datasource"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 4,
"options": {
"dataLinks": [],
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "right",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"uid": "$datasource"
},
"editorMode": "code",
"expr": "max by (instance, gpu) (\r\n DCGM_FI_PROF_PIPE_TENSOR_ACTIVE{\r\n instance=~\"$instance\",\r\n gpu=~\"$gpu\",\r\n pod=~\"$pod\"\r\n }\r\n)\r\n",
"interval": "",
"legendFormat": "GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"title": "Tensor Core Utilization",
"type": "timeseries"
}
],
"preload": false,
"refresh": false,
"schemaVersion": 41,
"tags": [],
"templating": {
"list": [
{
"current": {
"text": "Mimir",
"value": "prom"
},
"includeAll": false,
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"current": {
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": "$datasource",
"definition": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)",
"includeAll": true,
"multi": true,
"name": "instance",
"options": [],
"query": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
},
{
"current": {
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": "$datasource",
"definition": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)",
"includeAll": true,
"multi": true,
"name": "gpu",
"options": [],
"query": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)",
"refresh": 1,
"regex": "",
"sort": 1,
"type": "query"
},
{
"current": {
"text": "All",
"value": "$__all"
},
"definition": "label_values(DCGM_FI_DEV_GPU_TEMP{instance=\"$instance\"},pod)",
"includeAll": true,
"label": "pod",
"name": "pod",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(DCGM_FI_DEV_GPU_TEMP{instance=\"$instance\"},pod)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "NVIDIA DCGM Exporter Dashboard",
"uid": "nvidia_dcgm_exporter",
"version": 34
}

View File

@@ -18,7 +18,7 @@
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 0, "graphTooltip": 0,
"id": 18, "id": 47,
"links": [], "links": [],
"panels": [ "panels": [
{ {
@@ -43,7 +43,7 @@
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
"mode": "palette-classic" "mode": "palette-classic-by-name"
}, },
"custom": { "custom": {
"axisBorderShow": false, "axisBorderShow": false,
@@ -91,7 +91,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "bytes"
}, },
"overrides": [] "overrides": []
}, },
@@ -140,6 +141,7 @@
"type": "prometheus", "type": "prometheus",
"uid": "prom" "uid": "prom"
}, },
"description": "старт tusd",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@@ -164,7 +166,7 @@
"overrides": [] "overrides": []
}, },
"gridPos": { "gridPos": {
"h": 6, "h": 7,
"w": 14, "w": 14,
"x": 10, "x": 10,
"y": 1 "y": 1
@@ -201,107 +203,6 @@
"title": "Process Start Time (hours)", "title": "Process Start Time (hours)",
"type": "gauge" "type": "gauge"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"description": "Показывает производительность GC и его частоту. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 14,
"x": 10,
"y": 7
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"editorMode": "code",
"expr": "rate(go_gc_duration_seconds_sum{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}[5m])",
"legendFormat": "{{instance}} pod : {{pod}} :GC Time Rate (s/sec)",
"range": true,
"refId": "A"
},
{
"editorMode": "code",
"expr": "go_memstats_next_gc_bytes{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}",
"legendFormat": "{{instance}} pod : {{pod}} : Next GC Threshold (bytes)",
"range": true,
"refId": "B"
}
],
"title": "GC Duration & Next GC",
"type": "timeseries"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@@ -373,6 +274,197 @@
"title": "Open Connections", "title": "Open Connections",
"type": "gauge" "type": "gauge"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"description": "доля времени, которое процесс тратит на GC (CPU-time на GC)",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 7,
"x": 10,
"y": 8
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"editorMode": "code",
"expr": "rate(go_gc_duration_seconds_sum{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}[5m]) * 100",
"legendFormat": "{{instance}} pod : {{pod}}",
"range": true,
"refId": "A"
}
],
"title": "GC Duration",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"description": "Объём heap-памяти, при достижении которого Go runtime запустит следующий garbage collection.\nНе отражает текущее потребление памяти.",
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "dark-orange",
"mode": "fixed"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 7,
"x": 17,
"y": 8
},
"id": 28,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.0",
"targets": [
{
"editorMode": "code",
"expr": "go_memstats_next_gc_bytes{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}",
"legendFormat": "{{instance}} pod : {{pod}}",
"range": true,
"refId": "A"
}
],
"title": "Go next GC heap target",
"type": "timeseries"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@@ -504,7 +596,7 @@
"h": 12, "h": 12,
"w": 14, "w": 14,
"x": 10, "x": 10,
"y": 14 "y": 15
}, },
"id": 9, "id": 9,
"options": { "options": {
@@ -602,7 +694,7 @@
"overrides": [] "overrides": []
}, },
"gridPos": { "gridPos": {
"h": 8, "h": 9,
"w": 10, "w": 10,
"x": 0, "x": 0,
"y": 18 "y": 18
@@ -699,7 +791,7 @@
"h": 6, "h": 6,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 26 "y": 27
}, },
"id": 10, "id": 10,
"options": { "options": {
@@ -755,6 +847,7 @@
"type": "prometheus", "type": "prometheus",
"uid": "prom" "uid": "prom"
}, },
"description": "использование CPU by TUSD",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@@ -814,7 +907,7 @@
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 0, "x": 0,
"y": 32 "y": 33
}, },
"id": 4, "id": 4,
"options": { "options": {
@@ -908,7 +1001,7 @@
"h": 7, "h": 7,
"w": 7, "w": 7,
"x": 8, "x": 8,
"y": 32 "y": 33
}, },
"id": 6, "id": 6,
"options": { "options": {
@@ -1001,7 +1094,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "bytes"
}, },
"overrides": [] "overrides": []
}, },
@@ -1009,7 +1103,7 @@
"h": 7, "h": 7,
"w": 9, "w": 9,
"x": 15, "x": 15,
"y": 32 "y": 33
}, },
"id": 8, "id": 8,
"options": { "options": {
@@ -1051,7 +1145,7 @@
"h": 1, "h": 1,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 39 "y": 40
}, },
"id": 20, "id": 20,
"panels": [], "panels": [],
@@ -1114,7 +1208,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "ops"
}, },
"overrides": [] "overrides": []
}, },
@@ -1122,7 +1217,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 40 "y": 41
}, },
"id": 22, "id": 22,
"options": { "options": {
@@ -1207,7 +1302,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "ops"
}, },
"overrides": [] "overrides": []
}, },
@@ -1215,7 +1311,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 40 "y": 41
}, },
"id": 21, "id": 21,
"options": { "options": {
@@ -1301,7 +1397,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "ms"
}, },
"overrides": [] "overrides": []
}, },
@@ -1309,7 +1406,7 @@
"h": 5, "h": 5,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 48 "y": 49
}, },
"id": 11, "id": 11,
"options": { "options": {
@@ -1351,7 +1448,7 @@
"h": 1, "h": 1,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 53 "y": 54
}, },
"id": 23, "id": 23,
"panels": [], "panels": [],
@@ -1420,9 +1517,9 @@
}, },
"gridPos": { "gridPos": {
"h": 8, "h": 8,
"w": 18, "w": 11,
"x": 0, "x": 0,
"y": 54 "y": 55
}, },
"id": 26, "id": 26,
"options": { "options": {
@@ -1464,6 +1561,78 @@
"title": "S3 Upload Semaphore Demand vs Limit", "title": "S3 Upload Semaphore Demand vs Limit",
"type": "timeseries" "type": "timeseries"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"description": "99-й перцентиль длительности операций записи на диск в S3.\nПоказывает худшие 1% disk write операций и используется для выявления деградаций и длинных хвостов задержек.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 7,
"x": 11,
"y": 55
},
"id": 13,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"text": {
"titleSize": 11,
"valueSize": 50
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"editorMode": "code",
"expr": "tusd_s3_disk_write_duration_ms{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\", quantile=\"0.99\"}",
"hide": false,
"instant": false,
"legendFormat": "{{instance}} pod : {{pod}} : tusd Disk Write Time {{quantile}}",
"range": true,
"refId": "C"
}
],
"title": "S3 disk write latency (p99)",
"type": "stat"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@@ -1487,7 +1656,8 @@
"value": 80 "value": 80
} }
] ]
} },
"unit": "ms"
}, },
"overrides": [] "overrides": []
}, },
@@ -1495,7 +1665,7 @@
"h": 8, "h": 8,
"w": 6, "w": 6,
"x": 18, "x": 18,
"y": 54 "y": 55
}, },
"id": 25, "id": 25,
"options": { "options": {
@@ -1590,7 +1760,7 @@
"h": 9, "h": 9,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 62 "y": 63
}, },
"id": 24, "id": 24,
"options": { "options": {
@@ -1619,112 +1789,13 @@
"title": "Request Duration (Avg, ms)", "title": "Request Duration (Avg, ms)",
"type": "timeseries" "type": "timeseries"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"description": "Среднюю продолжительность записи на диск",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 15,
"w": 24,
"x": 0,
"y": 71
},
"id": 13,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": true,
"text": {
"titleSize": 15,
"valueSize": 25
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"editorMode": "code",
"expr": "tusd_s3_disk_write_duration_ms_sum{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}",
"legendFormat": "{{instance}} pod : {{pod}} : Disk Write Duration sum (ms)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"editorMode": "code",
"expr": "tusd_s3_disk_write_duration_ms_count{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}",
"hide": false,
"instant": false,
"legendFormat": "{{instance}} pod : {{pod}} : Disk Write Duration count (ms)",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prom"
},
"editorMode": "code",
"expr": "tusd_s3_disk_write_duration_ms{job=\"integrations/tusd\", instance=~\"$Instance\", pod=~\"$pod\"}",
"hide": false,
"instant": false,
"legendFormat": "{{instance}} pod : {{pod}} : tusd Disk Write Time {{quantile}}",
"range": true,
"refId": "C"
}
],
"title": "S3 Disk operations",
"type": "stat"
},
{ {
"collapsed": false, "collapsed": false,
"gridPos": { "gridPos": {
"h": 1, "h": 1,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 86 "y": 72
}, },
"id": 19, "id": 19,
"panels": [], "panels": [],
@@ -1795,7 +1866,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 87 "y": 73
}, },
"id": 16, "id": 16,
"options": { "options": {
@@ -1888,7 +1959,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 87 "y": 73
}, },
"id": 17, "id": 17,
"options": { "options": {
@@ -1981,7 +2052,7 @@
"h": 5, "h": 5,
"w": 24, "w": 24,
"x": 0, "x": 0,
"y": 95 "y": 81
}, },
"id": 27, "id": 27,
"options": { "options": {
@@ -2041,7 +2112,9 @@
{ {
"current": { "current": {
"text": "All", "text": "All",
"value": "$__all" "value": [
"$__all"
]
}, },
"definition": "label_values({cluster=~\"$cluster\", job=\"integrations/tusd\"},instance)", "definition": "label_values({cluster=~\"$cluster\", job=\"integrations/tusd\"},instance)",
"includeAll": true, "includeAll": true,
@@ -2060,7 +2133,9 @@
{ {
"current": { "current": {
"text": "All", "text": "All",
"value": "$__all" "value": [
"$__all"
]
}, },
"definition": "label_values({cluster=~\"$cluster\", job=\"integrations/tusd\"},pod)", "definition": "label_values({cluster=~\"$cluster\", job=\"integrations/tusd\"},pod)",
"includeAll": true, "includeAll": true,
@@ -2085,6 +2160,6 @@
"timepicker": {}, "timepicker": {},
"timezone": "browser", "timezone": "browser",
"title": "Tusd Metrics", "title": "Tusd Metrics",
"uid": "tusd-metric-new-try", "uid": "eeeebd69-5bb9-4715-feeewwds-544",
"version": 3 "version": 17
} }