From c0541055d6b997671c34ab5f1050ef0a5cd14bba Mon Sep 17 00:00:00 2001 From: Vladimir Denisov Date: Mon, 3 Sep 2018 11:17:58 +0300 Subject: [PATCH 1/3] Update monitoring/docker-compose.yml Nodeexporter does not have an access to system network metrics, need to map /proc and make some magic with PID=1 --- monitoring/docker-compose.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 3c83847..7212fc1 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -9,9 +9,18 @@ services: # Runs on your node(s) and forwards node(host) metrics to Prometheus. master-nodeexporter: - image: prom/node-exporter:v0.14.0 + image: prom/node-exporter:v0.15.2 expose: - 9100 + volumes: + - /proc:/host/proc:ro + - /proc/1/net/dev:/host/proc/net/dev:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)"' restart: always labels: container_group: monitoring From 4dfc9dff147596d61b672f51a606909db641b69e Mon Sep 17 00:00:00 2001 From: Vladimir Denisov Date: Thu, 13 Sep 2018 10:31:57 +0300 Subject: [PATCH 2/3] Changed Prometheus version, updated related section in monitoring/docker-compose.yml, updated Prometheus alertmanager rules --- monitoring/docker-compose.yml | 17 ++- monitoring/prometheus/prometheus.yml | 114 +++++++++++------ .../rules/alert.rules_container-groups | 18 +-- .../prometheus/rules/alert.rules_containers | 78 ++++++------ monitoring/prometheus/rules/alert.rules_nodes | 119 +++++++++--------- monitoring/prometheus/rules/alert.rules_sites | 20 +-- 6 files changed, 201 insertions(+), 165 deletions(-) mode change 100644 => 100755 monitoring/prometheus/prometheus.yml mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_container-groups mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_containers mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_nodes mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_sites diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 7212fc1..009f56a 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -55,7 +55,7 @@ services: # Storage and search backend. Gets all metrics from cAdvisor and Nodeexporter and is the backend that Grafana runs on. prometheus: - image: prom/prometheus:v1.7.1 + image: prom/prometheus:v2.3.2 expose: - 9090 volumes: @@ -66,14 +66,13 @@ services: - ./prometheus:/etc/prometheus restart: always command: - - "-config.file=/etc/prometheus/prometheus.yml" - - "-storage.local.path=/prometheus" - - "-web.console.libraries=/etc/prometheus/console_libraries" - - "-web.console.templates=/etc/prometheus/consoles" - - "-web.listen-address=:9090" - - "-alertmanager.url=http://alertmanager:9093" - - "-storage.local.memory-chunks=300000" - - "-storage.local.retention=744h" + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--query.lookback-delta=25s" + - "--web.console.libraries=/etc/prometheus/console_libraries" + - "--web.console.templates=/etc/prometheus/consoles" + - "--web.listen-address=:9090" + - "--storage.tsdb.retention=744h" labels: container_group: monitoring logging: diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml old mode 100644 new mode 100755 index 1c6967f..39fdc7a --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -1,7 +1,7 @@ # my global config global: - scrape_interval: 15s # By default, scrape targets every 15 seconds. - evaluation_interval: 15s # By default, scrape targets every 15 seconds. + scrape_interval: 10s # By default, scrape targets every 15 seconds. + evaluation_interval: 10s # By default, scrape targets every 15 seconds. # scrape_timeout is set to the global default (10s). # Attach these labels to any time series or alerts when communicating with @@ -21,19 +21,54 @@ rule_files: scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'node' - scrape_interval: 10s - scrape_timeout: 5s - static_configs: - - targets: ['master-nodeexporter:9100'] - labels: {'host': 'host1'} + - job_name: 'netdata' - - job_name: 'containers' - scrape_interval: 10s - scrape_timeout: 5s + metrics_path: /api/v1/allmetrics + params: + format: [prometheus] +# honor_labels: true static_configs: - - targets: ['master-cadvisor:8080'] - labels: {'host': 'host1'} + - targets: ['netdata:19999'] + metric_relabel_configs: + - source_labels: [chart] + regex: cgroup_(.+)_(.+_.+)\..* + target_label: container_group + replacement: '$1' + + - source_labels: [chart] + regex: cgroup_(.+)_(.+_.+)\..* + target_label: container_name + replacement: '$2' + +# - source_labels: [__name__] +# regex: (.*) +# replacement: ${1} +# target_label: container_name + + + # - job_name: 'node' + # scrape_interval: 10s + # scrape_timeout: 5s + # static_configs: + # - targets: ['master-nodeexporter:9100'] + # labels: {'host': 'host1'} + + # - job_name: 'containers' + # scrape_interval: 10s + # scrape_timeout: 5s + # static_configs: + # - targets: ['master-cadvisor:8080'] + # labels: {'host': 'host1'} + + # - job_name: 'job-count' + # scrape_interval: 10s + # scrape_timeout: 5s + # metrics_path: /services/admin/metrics + # static_configs: + # - targets: ['dev-backend-mypos:1337'] + # labels: {'host': 'host1'} + +# http://myposdev.vallab.ru/services/metrics #Will/23.08.16: dcom restart prometheus suffices to load config/rule changes @@ -45,32 +80,33 @@ scrape_configs: labels: {'host': 'host1'} # see https://github.com/prometheus/blackbox_exporter relabel - - job_name: 'service' - scrape_interval: 60s - scrape_timeout: 15s - metrics_path: /probe - # if your target is https, you either need to install cert in blackbox proble container - # or add below line to ignore verify - # tls_config: - # insecure_skip_verify: true - params: - module: [http_2xx] # Look for a HTTP 200 response. - file_sd_configs: - - files: - - /etc/prometheus/service.yml - relabel_configs: - - source_labels: [__address__] - regex: (.*) - target_label: __param_target - replacement: ${1} - - source_labels: [__address__] - regex: (.*) - target_label: service_url - replacement: ${1} - - source_labels: [] - regex: .* - target_label: __address__ - replacement: blackboxprober:9115 + # - job_name: 'service' + # scrape_interval: 10s + # scrape_timeout: 10s + # metrics_path: /probe + # # if your target is https, you either need to install cert in blackbox proble container + # # or add below line to ignore verify + # # tls_config: + # # insecure_skip_verify: true + # params: + # # module: [http_2xx] # Look for a HTTP 200 response. + # module: [icmp] # Look for a ping. + # file_sd_configs: + # - files: + # - /etc/prometheus/service.yml + # relabel_configs: + # - source_labels: [__address__] + # regex: (.*) + # target_label: __param_target + # replacement: ${1} + # - source_labels: [__address__] + # regex: (.*) + # target_label: service_url + # replacement: ${1} + # - source_labels: [] + # regex: .* + # target_label: __address__ + # replacement: blackboxprober:9115 # - job_name: 'couchdb' # scrape_interval: 10s diff --git a/monitoring/prometheus/rules/alert.rules_container-groups b/monitoring/prometheus/rules/alert.rules_container-groups old mode 100644 new mode 100755 index d8cf81b..754b7db --- a/monitoring/prometheus/rules/alert.rules_container-groups +++ b/monitoring/prometheus/rules/alert.rules_container-groups @@ -1,8 +1,10 @@ - -ALERT monitoring_container_group_missing_members - IF count(rate(container_last_seen{name=~".+",container_group=~"monitoring"}[5m])) by (container_group,host) < 10 - FOR 5m - ANNOTATIONS { - summary = "CONTAINER GROUP WARNING: CONTAINER GROUP '{{ $labels.container_group }}' on '{{ $labels.host }}'", - description = "{{ $labels.container_group }} is missing containers. Container count is {{ $value }}/13.", - } +groups: +- name: container_groups + rules: + - alert: monitoring_container_group_missing_members + expr: count(rate(container_last_seen{name=~".+",container_group=~"monitoring"}[5m])) by (container_group,host) < 6 + for: 5m + annotations: + summary: "CONTAINER GROUP WARNING CONTAINER GROUP '{{ $labels.container_group }}' on '{{ $labels.host }}'" + description: "{{ $labels.container_group }} is missing containers. Container count is {{ $value }}/13." + diff --git a/monitoring/prometheus/rules/alert.rules_containers b/monitoring/prometheus/rules/alert.rules_containers old mode 100644 new mode 100755 index 41d732f..29d6c4c --- a/monitoring/prometheus/rules/alert.rules_containers +++ b/monitoring/prometheus/rules/alert.rules_containers @@ -1,45 +1,41 @@ +groups: +- name: containers + rules: + - alert: high_cpu_usage_on_container + expr: sum(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name,host) * 100 > 50 + for: 5m + annotations: + summary: "HIGH CPU USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'" + description: "{{ $labels.name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%." + + - alert: elasticsearch_eating_memory + expr: sum(container_memory_rss{name=~"logging_elasticsearch_1"}) by (host, name) > 1200000000 + for: 5m + annotations: + summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'" + description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}." -ALERT high_cpu_usage_on_container - IF sum(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name,host) * 100 > 50 - FOR 5m - ANNOTATIONS { - summary = "HIGH CPU USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'", - description = "{{ $labels.name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.", - } + - alert: prometheus_eating_memory + expr: sum(container_memory_rss{name=~"monitoring_prometheus_1"}) by (host, name) > 1200000000 + for: 5m + annotations: + summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'" + description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}." -ALERT elasticsearch_eating_memory - IF sum(container_memory_rss{name=~"logging_elasticsearch_1"}) by (host, name) > 1200000000 - FOR 5m - ANNOTATIONS { - summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'", - description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.", - } + - alert: container_eating_memory + expr: sum(container_memory_rss{name=~".+",name!="logging_elasticsearch_1",name!="monitoring_prometheus_1"}) by (host,name) > 700000000 + for: 5m + annotations: + summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'" + description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}." -ALERT prometheus_eating_memory - IF sum(container_memory_rss{name=~"monitoring_prometheus_1"}) by (host, name) > 1200000000 - FOR 5m - ANNOTATIONS { - summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'", - description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.", - } - -ALERT container_eating_memory - IF sum(container_memory_rss{name=~".+",name!="logging_elasticsearch_1",name!="monitoring_prometheus_1"}) by (host,name) > 700000000 - FOR 5m - ANNOTATIONS { - summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'", - description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.", - } - - -ALERT container_down -IF (absent(container_memory_usage_bytes{name="logging_elasticsearch_1"}) - or absent(container_memory_usage_bytes{name="monitoring_prometheus_1"}) - ) - FOR 5m - LABELS { severity = "Critical" } - ANNOTATIONS { - summary= "CONTAINER '{{ $labels.name }}' down", - description = "container with name '{{ $labels.name }}' is down for more than 5 minutes" - } + - alert: container_down + expr: (absent(container_memory_usage_bytes{name="logging_elasticsearch_1"}) or absent(container_memory_usage_bytes{name="monitoring_prometheus_1"})) + for: 5m + labels: + severity: Critical + annotations: + summary: "CONTAINER '{{ $labels.name }}' down" + description: "container with name '{{ $labels.name }}' is down for: more than 5 minutes" + diff --git a/monitoring/prometheus/rules/alert.rules_nodes b/monitoring/prometheus/rules/alert.rules_nodes old mode 100644 new mode 100755 index 970b1d5..9c40874 --- a/monitoring/prometheus/rules/alert.rules_nodes +++ b/monitoring/prometheus/rules/alert.rules_nodes @@ -1,60 +1,59 @@ - -ALERT monitoring_service_down - IF up == 0 - FOR 5m - ANNOTATIONS { - summary = "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'", - description = "The monitoring service '{{ $labels.job }}' is down.", - } - -ALERT high_load_on_node - IF node_load5 > 2 - FOR 3m - ANNOTATIONS { - summary = "HIGH LOAD WARINING: NODE '{{ $labels.host }}'", - description = "{{ $labels.host}} is under high load. Load is {{ humanize $value }}.", - } - -ALERT node_running_out_of_memory - IF node_memory_MemAvailable < 1500000000 - FOR 5m - ANNOTATIONS { - summary = "LOW MEMORY WARING: NODE '{{ $labels.host }}'", - description = "Less than 1.5GB of free memory. Free memory at {{ humanize $value }} GB.", - } - -ALERT node_running_out_of_disk_space - IF node_filesystem_free{mountpoint="/etc/hostname"} < 40000000000 - FOR 5m - ANNOTATIONS { - summary = "LOW DISK SPACE WARING: NODE '{{ $labels.host }}'", - description = "Less than 40GB of free disk space. Free disk space at {{ humanize $value }} GB.", - } - -ALERT node_low_disk_space - IF 100 * min(node_filesystem_avail/node_filesystem_size{mountpoint=~"/etc/hostname|^/mnt.*|^/data.*"}) by (device, fstype, host, instance, job) < 10.0 - FOR 5m - LABELS { severity = "Warning" } - ANNOTATIONS { - summary = "LOW DISK SPACE WARING: NODE '{{ $labels.host }}'", - description = "Warning, node '{{ $labels.host }}' has less than 10% of free disk space on device {{ $labels.device }} with mount point {{ $labels.mountpoint }}. Available disk space at {{ humanize $value }}%", - } - -ALERT node_restarted - IF node_time - node_boot_time < 300 - FOR 1m - LABELS { severity = "Warning" } - ANNOTATIONS { - summary = "NODE RESTARTED WARNING: NODE '{{ $labels.host }}'", - description = "Warning: Node '{{ $labels.host }}' was restarted at {{ $value }} seconds ago.", - } - -################################### TEST ALERT - -ALERT TESTING_high_load_on_node - IF node_load1 > 0 - FOR 1s - ANNOTATIONS { - summary = "+++ TESTING ++++ TESTING ++++ TESTING +++ ::: HIGH LOAD WARNING: NODE '{{ $labels.host }}'", - description = "{{ $labels.host}} is under high load. Load is {{ humanize $value }}.", - } +groups: +- name: nodes + rules: + - alert: monitoring_service_down + expr: up == 0 + for: 5m + annotations: + summary: "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels:.host }}'" + description: "The monitoring service '{{ $labels:.job }}' is down." + + + - alert: high_load_on_node + expr: node_load5 > 2 + for: 3m + annotations: + summary: "HIGH LOAD WARINING: NODE '{{ $labels:.host }}'" + description: "{{ $labels:.host}} is under high load. Load is {{ humanize $value }}." + + + - alert: node_running_out_of_memory + expr: node_memory_MemAvailable < 1500000000 + for: 5m + annotations: + summary: "LOW MEMORY WARING: NODE '{{ $labels:.host }}'" + description: "Less than 1.5GB of free memory. Free memory at {{ humanize $value }} GB." + + + - alert: node_running_out_of_disk_space + expr: node_filesystem_free{mountpoint="/etc/hostname"} < 40000000000 + for: 5m + annotations: + summary: "LOW DISK SPACE WARING: NODE '{{ $labels:.host }}'" + description: Less than 40GB of free disk space. Free disk space at {{ humanize $value }} GB. + + + - alert: node_low_disk_space + expr: 100 * min(node_filesystem_avail/node_filesystem_size{mountpoint=~"/etc/hostname|^/mnt.*|^/data.*"}) by (device, fstype, host, instance, job) < 10.0 + for: 5m + annotations: + summary: "LOW DISK SPACE WARING: NODE '{{ $labels:.host }}'" + description: "Warning, node '{{ $labels:.host }}' has less than 10% of free disk space on device {{ $labels:.device }} with mount point {{ $labels:.mountpoint }}. Available disk space at {{ humanize $value }}%" + + - alert: node_restarted + expr: node_time - node_boot_time < 300 + for: 1m + annotations: + summary: "NODE RESTARTED WARNING: NODE '{{ $labels:.host }}'" + description: "Warning: Node '{{ $labels:.host }}' was restarted at {{ $value }} seconds ago." + + +################################### TEST alert: + + - alert: TESTING_high_load_on_node + expr: node_load1 > 0 + for: 1s + annotations: + summary: "+++ TESTING ++++ TESTING ++++ TESTING +++ ::: HIGH LOAD WARNING: NODE '{{ $labels:.host }}'" + description: "{{ $labels:.host}} is under high load. Load is {{ humanize $value }}." + diff --git a/monitoring/prometheus/rules/alert.rules_sites b/monitoring/prometheus/rules/alert.rules_sites old mode 100644 new mode 100755 index b362399..8b0c5d8 --- a/monitoring/prometheus/rules/alert.rules_sites +++ b/monitoring/prometheus/rules/alert.rules_sites @@ -1,9 +1,13 @@ -ALERT service_endpoint_down - IF probe_success{job='service'} == 0 - FOR 3m - LABELS { severity = "Critical" } - ANNOTATIONS { - summary = "ENGOPS SERVICE NON-OPERATIONAL", - description = "Service {{ $labels.service_name }} with endpoint {{ $labels.service_url }} is not reachable. Please check whether the instance is running", - } +groups: +- name: sites + rules: + - alert: service_endpoint_down + expr: probe_success{job='service'} == 0 + for: 3m + labels: + severity: Critical + annotations: + summary: ENGOPS SERVICE NON-OPERATIONAL + description: Service {{ $labels.service_name }} with endpoint {{ $labels.service_url }} is not reachable. Please check whether the instance is running + From cf018a51a851ffa1c015f2205ad97639bcb72387 Mon Sep 17 00:00:00 2001 From: Vladimir Denisov Date: Thu, 13 Sep 2018 10:36:43 +0300 Subject: [PATCH 3/3] Revert back previous version of prometheus.yml --- monitoring/prometheus/prometheus.yml | 114 +++++++++------------------ 1 file changed, 39 insertions(+), 75 deletions(-) diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index 39fdc7a..1c6967f 100755 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -1,7 +1,7 @@ # my global config global: - scrape_interval: 10s # By default, scrape targets every 15 seconds. - evaluation_interval: 10s # By default, scrape targets every 15 seconds. + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. # scrape_timeout is set to the global default (10s). # Attach these labels to any time series or alerts when communicating with @@ -21,54 +21,19 @@ rule_files: scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'netdata' - - metrics_path: /api/v1/allmetrics - params: - format: [prometheus] -# honor_labels: true + - job_name: 'node' + scrape_interval: 10s + scrape_timeout: 5s static_configs: - - targets: ['netdata:19999'] - metric_relabel_configs: - - source_labels: [chart] - regex: cgroup_(.+)_(.+_.+)\..* - target_label: container_group - replacement: '$1' - - - source_labels: [chart] - regex: cgroup_(.+)_(.+_.+)\..* - target_label: container_name - replacement: '$2' - -# - source_labels: [__name__] -# regex: (.*) -# replacement: ${1} -# target_label: container_name - - - # - job_name: 'node' - # scrape_interval: 10s - # scrape_timeout: 5s - # static_configs: - # - targets: ['master-nodeexporter:9100'] - # labels: {'host': 'host1'} - - # - job_name: 'containers' - # scrape_interval: 10s - # scrape_timeout: 5s - # static_configs: - # - targets: ['master-cadvisor:8080'] - # labels: {'host': 'host1'} - - # - job_name: 'job-count' - # scrape_interval: 10s - # scrape_timeout: 5s - # metrics_path: /services/admin/metrics - # static_configs: - # - targets: ['dev-backend-mypos:1337'] - # labels: {'host': 'host1'} + - targets: ['master-nodeexporter:9100'] + labels: {'host': 'host1'} -# http://myposdev.vallab.ru/services/metrics + - job_name: 'containers' + scrape_interval: 10s + scrape_timeout: 5s + static_configs: + - targets: ['master-cadvisor:8080'] + labels: {'host': 'host1'} #Will/23.08.16: dcom restart prometheus suffices to load config/rule changes @@ -80,33 +45,32 @@ scrape_configs: labels: {'host': 'host1'} # see https://github.com/prometheus/blackbox_exporter relabel - # - job_name: 'service' - # scrape_interval: 10s - # scrape_timeout: 10s - # metrics_path: /probe - # # if your target is https, you either need to install cert in blackbox proble container - # # or add below line to ignore verify - # # tls_config: - # # insecure_skip_verify: true - # params: - # # module: [http_2xx] # Look for a HTTP 200 response. - # module: [icmp] # Look for a ping. - # file_sd_configs: - # - files: - # - /etc/prometheus/service.yml - # relabel_configs: - # - source_labels: [__address__] - # regex: (.*) - # target_label: __param_target - # replacement: ${1} - # - source_labels: [__address__] - # regex: (.*) - # target_label: service_url - # replacement: ${1} - # - source_labels: [] - # regex: .* - # target_label: __address__ - # replacement: blackboxprober:9115 + - job_name: 'service' + scrape_interval: 60s + scrape_timeout: 15s + metrics_path: /probe + # if your target is https, you either need to install cert in blackbox proble container + # or add below line to ignore verify + # tls_config: + # insecure_skip_verify: true + params: + module: [http_2xx] # Look for a HTTP 200 response. + file_sd_configs: + - files: + - /etc/prometheus/service.yml + relabel_configs: + - source_labels: [__address__] + regex: (.*) + target_label: __param_target + replacement: ${1} + - source_labels: [__address__] + regex: (.*) + target_label: service_url + replacement: ${1} + - source_labels: [] + regex: .* + target_label: __address__ + replacement: blackboxprober:9115 # - job_name: 'couchdb' # scrape_interval: 10s