Add Prometheus and Grafana to Docker Compose (#221)

* Add Prometheus and Grafana to Docker Compose

* Expose relay's Prometheus metrics port

* Use Docker network references intead of localhost

When you have containers on the same network they don't communicate
over localhost, they instead refer to their container names

* Move dashboard components into deployment folder

* Update folder structure for Grafana and Prometheus config files

The new folder structure more closely matches the expected defaults
by Grafana and Prometheus, which allows us to clean up the paths
in our docker-compose file a bit.

* Add documentation about Prometheus and Grafana

* Refer to Prometheus server instead of node
This commit is contained in:
Hernando Castano
2020-07-21 06:16:53 -04:00
committed by Bastian Köcher
parent bdf6901ce2
commit 663eb57323
5 changed files with 7 additions and 441 deletions
@@ -1,410 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"links": [],
"panels": [
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {
"align": null
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"interval": "5s",
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
}
},
"pluginVersion": "7.0.6",
"targets": [
{
"expr": "best_block_numbers",
"format": "time_series",
"instant": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "Best known block on {{node}} node",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Best blocks relay knows of",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 12,
"y": 0
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "system_average_load",
"interval": "",
"legendFormat": "Average system load in last {{over}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "System load average",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 6,
"x": 18,
"y": 0
},
"id": 12,
"options": {
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"pluginVersion": "7.0.6",
"targets": [
{
"expr": "process_cpu_usage_percentage",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Relay process CPU usage (1 CPU = 100)",
"type": "gauge"
},
{
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 0,
"y": 6
},
"id": 4,
"options": {
"displayMode": "gradient",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"showUnfilled": true
},
"pluginVersion": "7.0.6",
"targets": [
{
"expr": "blocks_in_state",
"instant": true,
"interval": "",
"legendFormat": "{{state}}",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Blocks in state on relay",
"type": "bargauge"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "process_memory_usage_bytes / 1024 / 1024",
"interval": "",
"legendFormat": "Process memory, MB",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory used by relay process",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 25,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "RelayDashboard",
"uid": "oj3y7vnGz",
"version": 1
}
@@ -1,6 +0,0 @@
- name: 'default'
orgId: 1
folder: ''
type: file
options:
path: '/etc/grafana/provisioning/dashboards/grafana-dashboard.json'
@@ -1,9 +0,0 @@
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://localhost:9090
editable: false
is_default: true
version: 1
@@ -1,9 +0,0 @@
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'relay_node'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['127.0.0.1:9616']