From c0541055d6b997671c34ab5f1050ef0a5cd14bba Mon Sep 17 00:00:00 2001
From: Vladimir Denisov <vladimirdenisov69@gmail.com>
Date: Mon, 3 Sep 2018 11:17:58 +0300
Subject: [PATCH 1/3] Update monitoring/docker-compose.yml

Nodeexporter does not have an access to system network metrics, need to map /proc and make some magic with PID=1
---
 monitoring/docker-compose.yml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml
index 3c83847..7212fc1 100644
--- a/monitoring/docker-compose.yml
+++ b/monitoring/docker-compose.yml
@@ -9,9 +9,18 @@ services:
 
   # Runs on your node(s) and forwards node(host) metrics to Prometheus.
   master-nodeexporter:
-    image: prom/node-exporter:v0.14.0
+    image: prom/node-exporter:v0.15.2
     expose:
       - 9100
+    volumes:
+      - /proc:/host/proc:ro
+      - /proc/1/net/dev:/host/proc/net/dev:ro
+      - /sys:/host/sys:ro
+      - /:/rootfs:ro
+    command:
+      - '--path.procfs=/host/proc'
+      - '--path.sysfs=/host/sys'
+      - '--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)"'
     restart: always
     labels:
       container_group: monitoring

From 4dfc9dff147596d61b672f51a606909db641b69e Mon Sep 17 00:00:00 2001
From: Vladimir Denisov <vladimirdenisov69@gmail.com>
Date: Thu, 13 Sep 2018 10:31:57 +0300
Subject: [PATCH 2/3] Changed Prometheus version, updated related section in
 monitoring/docker-compose.yml, updated Prometheus alertmanager rules

---
 monitoring/docker-compose.yml                 |  17 ++-
 monitoring/prometheus/prometheus.yml          | 114 +++++++++++------
 .../rules/alert.rules_container-groups        |  18 +--
 .../prometheus/rules/alert.rules_containers   |  78 ++++++------
 monitoring/prometheus/rules/alert.rules_nodes | 119 +++++++++---------
 monitoring/prometheus/rules/alert.rules_sites |  20 +--
 6 files changed, 201 insertions(+), 165 deletions(-)
 mode change 100644 => 100755 monitoring/prometheus/prometheus.yml
 mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_container-groups
 mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_containers
 mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_nodes
 mode change 100644 => 100755 monitoring/prometheus/rules/alert.rules_sites

diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml
index 7212fc1..009f56a 100644
--- a/monitoring/docker-compose.yml
+++ b/monitoring/docker-compose.yml
@@ -55,7 +55,7 @@ services:
 
   # Storage and search backend. Gets all metrics from cAdvisor and Nodeexporter and is the backend that Grafana runs on.
   prometheus:
-    image: prom/prometheus:v1.7.1
+    image: prom/prometheus:v2.3.2
     expose:
       - 9090
     volumes:
@@ -66,14 +66,13 @@ services:
       - ./prometheus:/etc/prometheus
     restart: always
     command:
-      - "-config.file=/etc/prometheus/prometheus.yml"
-      - "-storage.local.path=/prometheus"
-      - "-web.console.libraries=/etc/prometheus/console_libraries"
-      - "-web.console.templates=/etc/prometheus/consoles"
-      - "-web.listen-address=:9090"
-      - "-alertmanager.url=http://alertmanager:9093"
-      - "-storage.local.memory-chunks=300000"
-      - "-storage.local.retention=744h"
+      - "--config.file=/etc/prometheus/prometheus.yml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--query.lookback-delta=25s"
+      - "--web.console.libraries=/etc/prometheus/console_libraries"
+      - "--web.console.templates=/etc/prometheus/consoles"
+      - "--web.listen-address=:9090"
+      - "--storage.tsdb.retention=744h"
     labels:
       container_group: monitoring
     logging:
diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml
old mode 100644
new mode 100755
index 1c6967f..39fdc7a
--- a/monitoring/prometheus/prometheus.yml
+++ b/monitoring/prometheus/prometheus.yml
@@ -1,7 +1,7 @@
 # my global config
 global:
-  scrape_interval:     15s # By default, scrape targets every 15 seconds.
-  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
+  scrape_interval:     10s # By default, scrape targets every 15 seconds.
+  evaluation_interval: 10s # By default, scrape targets every 15 seconds.
   # scrape_timeout is set to the global default (10s).
 
   # Attach these labels to any time series or alerts when communicating with
@@ -21,19 +21,54 @@ rule_files:
 scrape_configs:
   # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 
-  - job_name: 'node'
-    scrape_interval: 10s
-    scrape_timeout: 5s
-    static_configs:
-      - targets: ['master-nodeexporter:9100']
-        labels: {'host': 'host1'}
+  - job_name: 'netdata'
 
-  - job_name: 'containers'
-    scrape_interval: 10s
-    scrape_timeout: 5s
+    metrics_path: /api/v1/allmetrics
+    params:
+      format: [prometheus]
+#    honor_labels: true
     static_configs:
-      - targets: ['master-cadvisor:8080']
-        labels: {'host': 'host1'}
+      - targets: ['netdata:19999']
+    metric_relabel_configs:
+      - source_labels: [chart]
+        regex: cgroup_(.+)_(.+_.+)\..*
+        target_label: container_group
+        replacement: '$1'
+
+      - source_labels: [chart]
+        regex: cgroup_(.+)_(.+_.+)\..*
+        target_label: container_name
+        replacement: '$2'
+      
+#      - source_labels: [__name__]
+#        regex: (.*)
+#        replacement: ${1}
+#        target_label: container_name
+
+
+  # - job_name: 'node'
+  #   scrape_interval: 10s
+  #   scrape_timeout: 5s
+  #   static_configs:
+  #     - targets: ['master-nodeexporter:9100']
+  #       labels: {'host': 'host1'}
+
+  # - job_name: 'containers'
+  #   scrape_interval: 10s
+  #   scrape_timeout: 5s
+  #   static_configs:
+  #     - targets: ['master-cadvisor:8080']
+  #       labels: {'host': 'host1'}
+
+  # - job_name: 'job-count'
+  #   scrape_interval: 10s
+  #   scrape_timeout: 5s
+  #   metrics_path: /services/admin/metrics
+  #   static_configs:
+  #     - targets: ['dev-backend-mypos:1337']
+  #       labels: {'host': 'host1'}
+
+# http://myposdev.vallab.ru/services/metrics
 
   #Will/23.08.16: dcom restart prometheus suffices to load config/rule changes
 
@@ -45,32 +80,33 @@ scrape_configs:
         labels: {'host': 'host1'}
 
   # see https://github.com/prometheus/blackbox_exporter relabel 
-  - job_name: 'service'
-    scrape_interval: 60s
-    scrape_timeout: 15s
-    metrics_path: /probe
-    # if your target is https, you either need to install cert in blackbox proble container
-    # or add below line to ignore verify
-    # tls_config:
-    #  insecure_skip_verify: true
-    params:
-      module: [http_2xx]  # Look for a HTTP 200 response. 
-    file_sd_configs:
-      - files:
-        - /etc/prometheus/service.yml
-    relabel_configs:
-      - source_labels: [__address__]
-        regex: (.*)
-        target_label: __param_target
-        replacement: ${1}
-      - source_labels: [__address__]
-        regex: (.*)
-        target_label: service_url
-        replacement: ${1}
-      - source_labels: []
-        regex: .*
-        target_label: __address__
-        replacement: blackboxprober:9115
+  # - job_name: 'service'
+  #   scrape_interval: 10s
+  #   scrape_timeout: 10s
+  #   metrics_path: /probe
+  #   # if your target is https, you either need to install cert in blackbox proble container
+  #   # or add below line to ignore verify
+  #   # tls_config:
+  #   #  insecure_skip_verify: true
+  #   params:
+  # #    module: [http_2xx]  # Look for a HTTP 200 response. 
+  #     module: [icmp]  # Look for a ping.
+  #   file_sd_configs:
+  #     - files:
+  #       - /etc/prometheus/service.yml
+  #   relabel_configs:
+  #     - source_labels: [__address__]
+  #       regex: (.*)
+  #       target_label: __param_target
+  #       replacement: ${1}
+  #     - source_labels: [__address__]
+  #       regex: (.*)
+  #       target_label: service_url
+  #       replacement: ${1}
+  #     - source_labels: []
+  #       regex: .*
+  #       target_label: __address__
+  #       replacement: blackboxprober:9115
 
   # - job_name: 'couchdb'
   #   scrape_interval: 10s
diff --git a/monitoring/prometheus/rules/alert.rules_container-groups b/monitoring/prometheus/rules/alert.rules_container-groups
old mode 100644
new mode 100755
index d8cf81b..754b7db
--- a/monitoring/prometheus/rules/alert.rules_container-groups
+++ b/monitoring/prometheus/rules/alert.rules_container-groups
@@ -1,8 +1,10 @@
-
-ALERT monitoring_container_group_missing_members
-  IF count(rate(container_last_seen{name=~".+",container_group=~"monitoring"}[5m])) by (container_group,host) < 10
-  FOR 5m
-  ANNOTATIONS {
-      summary = "CONTAINER GROUP WARNING: CONTAINER GROUP '{{ $labels.container_group }}' on '{{ $labels.host }}'",
-      description = "{{ $labels.container_group }} is missing containers. Container count is {{ $value }}/13.",
-  }
+groups:
+- name: container_groups
+  rules:
+  - alert: monitoring_container_group_missing_members
+    expr: count(rate(container_last_seen{name=~".+",container_group=~"monitoring"}[5m])) by (container_group,host) < 6
+    for: 5m
+    annotations:
+      summary: "CONTAINER GROUP WARNING CONTAINER GROUP '{{ $labels.container_group }}' on '{{ $labels.host }}'"
+      description: "{{ $labels.container_group }} is missing containers. Container count is {{ $value }}/13."
+  
diff --git a/monitoring/prometheus/rules/alert.rules_containers b/monitoring/prometheus/rules/alert.rules_containers
old mode 100644
new mode 100755
index 41d732f..29d6c4c
--- a/monitoring/prometheus/rules/alert.rules_containers
+++ b/monitoring/prometheus/rules/alert.rules_containers
@@ -1,45 +1,41 @@
+groups:
+- name: containers
+  rules:
+  - alert: high_cpu_usage_on_container
+    expr: sum(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name,host) * 100 > 50
+    for: 5m
+    annotations:
+      summary: "HIGH CPU USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'"
+      description: "{{ $labels.name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%."
+  
+  - alert: elasticsearch_eating_memory
+    expr: sum(container_memory_rss{name=~"logging_elasticsearch_1"}) by (host, name) > 1200000000
+    for: 5m
+    annotations: 
+      summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'"
+      description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}."
 
-ALERT high_cpu_usage_on_container
-  IF sum(rate(container_cpu_usage_seconds_total{name=~".+"}[5m])) by (name,host) * 100 > 50
-  FOR 5m
-  ANNOTATIONS {
-      summary = "HIGH CPU USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'",
-      description = "{{ $labels.name }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.",
-  }
+  - alert: prometheus_eating_memory
+    expr: sum(container_memory_rss{name=~"monitoring_prometheus_1"}) by (host, name) > 1200000000
+    for: 5m
+    annotations:
+      summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'"
+      description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}."
 
-ALERT elasticsearch_eating_memory
-  IF sum(container_memory_rss{name=~"logging_elasticsearch_1"}) by (host, name) > 1200000000
-  FOR 5m
-  ANNOTATIONS {
-      summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'",
-      description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.",
-  }
+  - alert: container_eating_memory
+    expr: sum(container_memory_rss{name=~".+",name!="logging_elasticsearch_1",name!="monitoring_prometheus_1"}) by (host,name) > 700000000
+    for: 5m
+    annotations: 
+      summary: "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'"
+      description: "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}."
 
-ALERT prometheus_eating_memory
-  IF sum(container_memory_rss{name=~"monitoring_prometheus_1"}) by (host, name) > 1200000000
-  FOR 5m
-  ANNOTATIONS {
-      summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'",
-      description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.",
-  }
-
-ALERT container_eating_memory
-  IF sum(container_memory_rss{name=~".+",name!="logging_elasticsearch_1",name!="monitoring_prometheus_1"}) by (host,name) > 700000000
-  FOR 5m
-  ANNOTATIONS {
-      summary = "HIGH MEMORY USAGE WARNING: CONTAINER '{{ $labels.name }}' on '{{ $labels.host }}'",
-      description = "{{ $labels.name }} is eating up a LOT of memory. Memory consumption of {{ $labels.name }} is at {{ humanize $value}}.",
-  }
-
-
-ALERT container_down
-IF (absent(container_memory_usage_bytes{name="logging_elasticsearch_1"})
-  or absent(container_memory_usage_bytes{name="monitoring_prometheus_1"}) 
-  )
-  FOR 5m
-  LABELS { severity = "Critical" }
-  ANNOTATIONS {
-    summary= "CONTAINER '{{ $labels.name }}' down",
-    description =  "container with  name '{{ $labels.name }}' is down for more than 5 minutes"
-  }
+  - alert: container_down
+    expr: (absent(container_memory_usage_bytes{name="logging_elasticsearch_1"}) or absent(container_memory_usage_bytes{name="monitoring_prometheus_1"}))
+    for: 5m
+    labels: 
+      severity: Critical
+    annotations: 
+      summary: "CONTAINER '{{ $labels.name }}' down"
+      description: "container with  name '{{ $labels.name }}' is down for: more than 5 minutes"
+  
 
diff --git a/monitoring/prometheus/rules/alert.rules_nodes b/monitoring/prometheus/rules/alert.rules_nodes
old mode 100644
new mode 100755
index 970b1d5..9c40874
--- a/monitoring/prometheus/rules/alert.rules_nodes
+++ b/monitoring/prometheus/rules/alert.rules_nodes
@@ -1,60 +1,59 @@
-
-ALERT monitoring_service_down
-  IF up == 0
-  FOR 5m
-  ANNOTATIONS {
-      summary = "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels.host }}'",
-      description = "The monitoring service '{{ $labels.job }}' is down.",
-  }
-
-ALERT high_load_on_node
-  IF node_load5 > 2
-  FOR 3m
-  ANNOTATIONS {
-      summary = "HIGH LOAD WARINING: NODE '{{ $labels.host }}'",
-      description = "{{ $labels.host}} is under high load. Load is {{ humanize $value }}.",
-  }
-
-ALERT node_running_out_of_memory
-  IF node_memory_MemAvailable < 1500000000
-  FOR 5m
-  ANNOTATIONS {
-      summary = "LOW MEMORY WARING: NODE '{{ $labels.host }}'",
-      description = "Less than 1.5GB of free memory. Free memory at {{ humanize $value }} GB.",
-  }
-
-ALERT node_running_out_of_disk_space
-  IF node_filesystem_free{mountpoint="/etc/hostname"} < 40000000000
-  FOR 5m
-  ANNOTATIONS {
-      summary = "LOW DISK SPACE WARING: NODE '{{ $labels.host }}'",
-      description = "Less than 40GB of free disk space. Free disk space at {{ humanize $value }} GB.",
-  }
-
-ALERT node_low_disk_space
-  IF 100 * min(node_filesystem_avail/node_filesystem_size{mountpoint=~"/etc/hostname|^/mnt.*|^/data.*"}) by (device, fstype, host, instance, job) < 10.0
-  FOR 5m
-  LABELS { severity = "Warning" }
-  ANNOTATIONS {
-      summary = "LOW DISK SPACE WARING: NODE '{{ $labels.host }}'",
-      description = "Warning, node '{{ $labels.host }}' has less than 10% of free disk space on device {{ $labels.device }} with mount point {{ $labels.mountpoint }}. Available disk space at {{ humanize $value }}%",
-  }
-
-ALERT node_restarted
-  IF node_time - node_boot_time < 300
-  FOR 1m
-  LABELS { severity = "Warning" }
-  ANNOTATIONS {
-      summary = "NODE RESTARTED WARNING: NODE '{{ $labels.host }}'",
-      description = "Warning: Node '{{ $labels.host }}' was restarted at {{ $value }} seconds ago.",
-  }
-
-################################### TEST ALERT
-
-ALERT TESTING_high_load_on_node
-  IF node_load1 > 0
-  FOR 1s
-  ANNOTATIONS {
-      summary = "+++ TESTING ++++ TESTING ++++ TESTING +++ ::: HIGH LOAD WARNING: NODE '{{ $labels.host }}'",
-      description = "{{ $labels.host}} is under high load. Load is {{ humanize $value }}.",
-  }
+groups:
+- name: nodes
+  rules:
+  - alert: monitoring_service_down
+    expr: up == 0
+    for: 5m
+    annotations:
+      summary: "MONITORING SERVICE DOWN WARNING: NODE '{{ $labels:.host }}'"
+      description: "The monitoring service '{{ $labels:.job }}' is down."
+  
+
+  - alert: high_load_on_node
+    expr: node_load5 > 2
+    for: 3m
+    annotations:
+      summary: "HIGH LOAD WARINING: NODE '{{ $labels:.host }}'"
+      description: "{{ $labels:.host}} is under high load. Load is {{ humanize $value }}."
+  
+
+  - alert: node_running_out_of_memory
+    expr: node_memory_MemAvailable < 1500000000
+    for: 5m
+    annotations: 
+      summary: "LOW MEMORY WARING: NODE '{{ $labels:.host }}'"
+      description: "Less than 1.5GB of free memory. Free memory at {{ humanize $value }} GB."
+  
+
+  - alert: node_running_out_of_disk_space
+    expr: node_filesystem_free{mountpoint="/etc/hostname"} < 40000000000
+    for: 5m
+    annotations:
+      summary: "LOW DISK SPACE WARING: NODE '{{ $labels:.host }}'"
+      description: Less than 40GB of free disk space. Free disk space at {{ humanize $value }} GB.
+  
+
+  - alert: node_low_disk_space
+    expr: 100 * min(node_filesystem_avail/node_filesystem_size{mountpoint=~"/etc/hostname|^/mnt.*|^/data.*"}) by (device, fstype, host, instance, job) < 10.0
+    for: 5m
+    annotations:
+      summary: "LOW DISK SPACE WARING: NODE '{{ $labels:.host }}'"
+      description: "Warning, node '{{ $labels:.host }}' has less than 10% of free disk space on device {{ $labels:.device }} with mount point {{ $labels:.mountpoint }}. Available disk space at {{ humanize $value }}%"
+
+  - alert: node_restarted
+    expr: node_time - node_boot_time < 300
+    for: 1m
+    annotations: 
+      summary: "NODE RESTARTED WARNING: NODE '{{ $labels:.host }}'"
+      description: "Warning: Node '{{ $labels:.host }}' was restarted at {{ $value }} seconds ago."
+  
+
+################################### TEST alert:
+
+  - alert: TESTING_high_load_on_node
+    expr: node_load1 > 0
+    for: 1s
+    annotations: 
+      summary: "+++ TESTING ++++ TESTING ++++ TESTING +++ ::: HIGH LOAD WARNING: NODE '{{ $labels:.host }}'"
+      description: "{{ $labels:.host}} is under high load. Load is {{ humanize $value }}."
+  
diff --git a/monitoring/prometheus/rules/alert.rules_sites b/monitoring/prometheus/rules/alert.rules_sites
old mode 100644
new mode 100755
index b362399..8b0c5d8
--- a/monitoring/prometheus/rules/alert.rules_sites
+++ b/monitoring/prometheus/rules/alert.rules_sites
@@ -1,9 +1,13 @@
-ALERT service_endpoint_down
-  IF probe_success{job='service'} == 0
-  FOR 3m
-  LABELS { severity = "Critical" }
-  ANNOTATIONS {
-      summary = "ENGOPS SERVICE NON-OPERATIONAL",
-      description = "Service {{ $labels.service_name }} with endpoint {{ $labels.service_url }} is not reachable. Please check whether the instance is running",
-  }
+groups:
+- name: sites
+  rules:
+  - alert: service_endpoint_down
+    expr: probe_success{job='service'} == 0
+    for: 3m
+    labels: 
+      severity: Critical
+    annotations:
+      summary: ENGOPS SERVICE NON-OPERATIONAL
+      description: Service {{ $labels.service_name }} with endpoint {{ $labels.service_url }} is not reachable. Please check whether the instance is running
+  
 

From cf018a51a851ffa1c015f2205ad97639bcb72387 Mon Sep 17 00:00:00 2001
From: Vladimir Denisov <vladimirdenisov69@gmail.com>
Date: Thu, 13 Sep 2018 10:36:43 +0300
Subject: [PATCH 3/3] Revert back previous version of prometheus.yml

---
 monitoring/prometheus/prometheus.yml | 114 +++++++++------------------
 1 file changed, 39 insertions(+), 75 deletions(-)

diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml
index 39fdc7a..1c6967f 100755
--- a/monitoring/prometheus/prometheus.yml
+++ b/monitoring/prometheus/prometheus.yml
@@ -1,7 +1,7 @@
 # my global config
 global:
-  scrape_interval:     10s # By default, scrape targets every 15 seconds.
-  evaluation_interval: 10s # By default, scrape targets every 15 seconds.
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
   # scrape_timeout is set to the global default (10s).
 
   # Attach these labels to any time series or alerts when communicating with
@@ -21,54 +21,19 @@ rule_files:
 scrape_configs:
   # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 
-  - job_name: 'netdata'
-
-    metrics_path: /api/v1/allmetrics
-    params:
-      format: [prometheus]
-#    honor_labels: true
+  - job_name: 'node'
+    scrape_interval: 10s
+    scrape_timeout: 5s
     static_configs:
-      - targets: ['netdata:19999']
-    metric_relabel_configs:
-      - source_labels: [chart]
-        regex: cgroup_(.+)_(.+_.+)\..*
-        target_label: container_group
-        replacement: '$1'
-
-      - source_labels: [chart]
-        regex: cgroup_(.+)_(.+_.+)\..*
-        target_label: container_name
-        replacement: '$2'
-      
-#      - source_labels: [__name__]
-#        regex: (.*)
-#        replacement: ${1}
-#        target_label: container_name
-
-
-  # - job_name: 'node'
-  #   scrape_interval: 10s
-  #   scrape_timeout: 5s
-  #   static_configs:
-  #     - targets: ['master-nodeexporter:9100']
-  #       labels: {'host': 'host1'}
-
-  # - job_name: 'containers'
-  #   scrape_interval: 10s
-  #   scrape_timeout: 5s
-  #   static_configs:
-  #     - targets: ['master-cadvisor:8080']
-  #       labels: {'host': 'host1'}
-
-  # - job_name: 'job-count'
-  #   scrape_interval: 10s
-  #   scrape_timeout: 5s
-  #   metrics_path: /services/admin/metrics
-  #   static_configs:
-  #     - targets: ['dev-backend-mypos:1337']
-  #       labels: {'host': 'host1'}
+      - targets: ['master-nodeexporter:9100']
+        labels: {'host': 'host1'}
 
-# http://myposdev.vallab.ru/services/metrics
+  - job_name: 'containers'
+    scrape_interval: 10s
+    scrape_timeout: 5s
+    static_configs:
+      - targets: ['master-cadvisor:8080']
+        labels: {'host': 'host1'}
 
   #Will/23.08.16: dcom restart prometheus suffices to load config/rule changes
 
@@ -80,33 +45,32 @@ scrape_configs:
         labels: {'host': 'host1'}
 
   # see https://github.com/prometheus/blackbox_exporter relabel 
-  # - job_name: 'service'
-  #   scrape_interval: 10s
-  #   scrape_timeout: 10s
-  #   metrics_path: /probe
-  #   # if your target is https, you either need to install cert in blackbox proble container
-  #   # or add below line to ignore verify
-  #   # tls_config:
-  #   #  insecure_skip_verify: true
-  #   params:
-  # #    module: [http_2xx]  # Look for a HTTP 200 response. 
-  #     module: [icmp]  # Look for a ping.
-  #   file_sd_configs:
-  #     - files:
-  #       - /etc/prometheus/service.yml
-  #   relabel_configs:
-  #     - source_labels: [__address__]
-  #       regex: (.*)
-  #       target_label: __param_target
-  #       replacement: ${1}
-  #     - source_labels: [__address__]
-  #       regex: (.*)
-  #       target_label: service_url
-  #       replacement: ${1}
-  #     - source_labels: []
-  #       regex: .*
-  #       target_label: __address__
-  #       replacement: blackboxprober:9115
+  - job_name: 'service'
+    scrape_interval: 60s
+    scrape_timeout: 15s
+    metrics_path: /probe
+    # if your target is https, you either need to install cert in blackbox proble container
+    # or add below line to ignore verify
+    # tls_config:
+    #  insecure_skip_verify: true
+    params:
+      module: [http_2xx]  # Look for a HTTP 200 response. 
+    file_sd_configs:
+      - files:
+        - /etc/prometheus/service.yml
+    relabel_configs:
+      - source_labels: [__address__]
+        regex: (.*)
+        target_label: __param_target
+        replacement: ${1}
+      - source_labels: [__address__]
+        regex: (.*)
+        target_label: service_url
+        replacement: ${1}
+      - source_labels: []
+        regex: .*
+        target_label: __address__
+        replacement: blackboxprober:9115
 
   # - job_name: 'couchdb'
   #   scrape_interval: 10s