Compare commits
4 Commits
adf2c8d555
...
fb0915841d
Author | SHA1 | Date | |
---|---|---|---|
fb0915841d | |||
d8a3b0ab75 | |||
adfba06008 | |||
190f1480d2 |
@ -11,6 +11,9 @@ metrics or alerting.
|
||||
alertmanager for receiving alerts from prometheus and routing them
|
||||
to the correct configured receivers.
|
||||
|
||||
- [`cadvisor`](roles/cadvisor/README.md): Run and configure cAdvisor, googles'
|
||||
container performance and resource usage collection and aggregation daemon.
|
||||
|
||||
- [`grafana`](roles/grafana/README.md): a popular visualization and
|
||||
dashboard creation tool able to use various datasources.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
namespace: finallycoffee
|
||||
name: observability
|
||||
version: 0.1.1
|
||||
version: 0.1.2
|
||||
readme: README.md
|
||||
authors:
|
||||
- transcaffeine <transcaffeine@finally.coffee>
|
||||
@ -18,3 +18,4 @@ tags:
|
||||
- prometheus
|
||||
- victoriametrics
|
||||
- grafana
|
||||
- alertmanager
|
||||
|
@ -1,11 +1,4 @@
|
||||
---
|
||||
alertmanager_user: alertmanager
|
||||
alertmanager_version: 0.27.0
|
||||
alertmanager_base_path: /opt/alertmanager
|
||||
alertmanager_config_path: "{{ alertmanager_base_path }}/config"
|
||||
alertmanager_config_file: "{{ alertmanager_config_path }}/alertmanager.yml"
|
||||
alertmanager_data_path: "{{ alertmanager_base_path }}/data"
|
||||
|
||||
alertmanager_container_name: alertmanager
|
||||
alertmanager_container_image_name: alertmanager
|
||||
alertmanager_container_image_namespace: prometheus/
|
||||
@ -22,7 +15,7 @@ alertmanager_container_image_reference: >-
|
||||
alertmanager_container_image_repository + ':'
|
||||
+ (alertmanager_container_image_tag | default('v' + alertmanager_version))
|
||||
}}
|
||||
|
||||
alertmanager_container_image_source: pull
|
||||
alertmanager_container_image_force_pull: "{{ alertmanager_container_image_tag is defined }}"
|
||||
|
||||
alertmanager_container_default_volumes:
|
||||
@ -32,8 +25,5 @@ alertmanager_container_volumes: >-
|
||||
{{ alertmanager_container_default_volumes
|
||||
+ alertmanager_container_extra_volumes | default([]) }}
|
||||
alertmanager_container_restart_policy: "unless-stopped"
|
||||
|
||||
alertmanager_config:
|
||||
global: {}
|
||||
route: {}
|
||||
receivers: []
|
||||
alertmanager_container_state: >-2
|
||||
{{ (alertmanager_state == 'present') | ternary('started', 'absent') }}
|
15
roles/alertmanager/defaults/main/main.yml
Normal file
15
roles/alertmanager/defaults/main/main.yml
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
alertmanager_user: alertmanager
|
||||
alertmanager_version: 0.27.0
|
||||
alertmanager_state: present
|
||||
alertmanager_deployment_method: docker
|
||||
alertmanager_base_path: /opt/alertmanager
|
||||
alertmanager_config_path: "{{ alertmanager_base_path }}/config"
|
||||
alertmanager_config_file: "{{ alertmanager_config_path }}/alertmanager.yml"
|
||||
alertmanager_data_path: "{{ alertmanager_base_path }}/data"
|
||||
|
||||
|
||||
alertmanager_config:
|
||||
global: {}
|
||||
route: {}
|
||||
receivers: []
|
@ -1,8 +1,8 @@
|
||||
---
|
||||
|
||||
- name: Ensure alertmanager is restarted
|
||||
community.docker.docker_container:
|
||||
name: "{{ alertmanager_container_name }}"
|
||||
state: started
|
||||
state: "{{ alertmanager_container_state }}"
|
||||
restart: true
|
||||
listen: restart-alertmanager
|
||||
when: alertmanager_deployment_method == 'docker'
|
||||
|
10
roles/alertmanager/meta/main.yml
Normal file
10
roles/alertmanager/meta/main.yml
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
allow_duplicates: true
|
||||
dependencies: []
|
||||
galaxy_info:
|
||||
role_name: alertmanager
|
||||
description: Deploy and configure prometheus alertmanager
|
||||
galaxy_tags:
|
||||
- prometheus
|
||||
- alertmanager
|
||||
- observability
|
21
roles/alertmanager/tasks/deploy-docker.yml
Normal file
21
roles/alertmanager/tasks/deploy-docker.yml
Normal file
@ -0,0 +1,21 @@
|
||||
---
|
||||
- name: Ensure container image is {{ alertmanager_state }} on host
|
||||
community.docker.docker_image:
|
||||
name: "{{ alertmanager_container_image_reference }}"
|
||||
state: "{{ alertmanager_state }}"
|
||||
source: "{{ alertmanager_container_image_source }}"
|
||||
force_source: "{{ alertmanager_container_image_force_pull | bool }}"
|
||||
|
||||
- name: Ensure container '{{ alertmanager_container_name }}' is {{ alertmanager_container_state }}
|
||||
community.docker.docker_container:
|
||||
name: "{{ alertmanager_container_name }}"
|
||||
image: "{{ alertmanager_container_image_reference }}"
|
||||
env: "{{ alertmanager_container_env | default(omit) }}"
|
||||
user: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
|
||||
ports: "{{ alertmanager_container_ports | default(omit) }}"
|
||||
volumes: "{{ alertmanager_container_volumes | default(omit) }}"
|
||||
networks: "{{ alertmanager_container_networks | default(omit) }}"
|
||||
purge_networks: "{{ alertmanager_container_purge_networks | default(omit) }}"
|
||||
etc_hosts: "{{ alertmanager_container_etc_hosts | default(omit) }}"
|
||||
restart_policy: "{{ alertmanager_container_restart_policy }}"
|
||||
state: "{{ alertmanager_container_state }}"
|
@ -1,16 +1,29 @@
|
||||
---
|
||||
- name: Ensure state is valid
|
||||
ansible.builtin.fail:
|
||||
msg: >-2
|
||||
Invalid state '{{ alertmanager_state }}'! Valid
|
||||
states are {{ alertmanager_states | join(', ') }}.
|
||||
when: alertmanager_state not in alertmanager_states
|
||||
|
||||
- name: Ensure alertmanager user '{{ alertmanager_user }}' exists
|
||||
- name: Ensure deployment method is valid
|
||||
ansible.builtin.fail:
|
||||
msg: >-2
|
||||
Invalid deployment method {{ alertmanager_deployment_method }}!
|
||||
Supported deployment methods are {{ alertmanager_deployment_methods | join(', ') }}.
|
||||
when: alertmanager_deployment_method not in alertmanager_deployment_methods
|
||||
|
||||
- name: Ensure alertmanager user '{{ alertmanager_user }}' is {{ alertmanager_state }}
|
||||
ansible.builtin.user:
|
||||
name: "{{ alertmanager_user }}"
|
||||
state: present
|
||||
state: "{{ alertmanager_state }}"
|
||||
system: true
|
||||
register: alertmanager_user_info
|
||||
|
||||
- name: Ensure mounts are created
|
||||
- name: Ensure mounts are {{ alertmanager_state }}
|
||||
ansible.builtin.file:
|
||||
dest: "{{ item.path }}"
|
||||
state: directory
|
||||
state: "{{ (alertmanager_state == 'present') | ternary('directory', 'absent') }}"
|
||||
owner: "{{ item.owner | default(alertmanager_user_info.uid | default(alertmanager_user)) }}"
|
||||
group: "{{ item.owner | default(alertmanager_user_info.group | default(alertmanager_user)) }}"
|
||||
mode: "{{ item.mode | default('0755') }}"
|
||||
@ -24,28 +37,12 @@
|
||||
dest: "{{ alertmanager_config_file }}"
|
||||
content: "{{ alertmanager_config | to_nice_yaml }}"
|
||||
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
|
||||
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
|
||||
group: "{{ alertmanager_user_info.group | default(alertmanager_user) }}"
|
||||
mode: "0640"
|
||||
when: alertmanager_state == 'present'
|
||||
notify:
|
||||
- restart-alertmanager
|
||||
|
||||
- name: Ensure container image is present on host
|
||||
community.docker.docker_image:
|
||||
name: "{{ alertmanager_container_image_reference }}"
|
||||
state: present
|
||||
source: pull
|
||||
force_source: "{{ alertmanager_container_image_force_pull | bool }}"
|
||||
|
||||
- name: Ensure container '{{ alertmanager_container_name }}' is running
|
||||
community.docker.docker_container:
|
||||
name: "{{ alertmanager_container_name }}"
|
||||
image: "{{ alertmanager_container_image_reference }}"
|
||||
env: "{{ alertmanager_container_env | default(omit) }}"
|
||||
user: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
|
||||
ports: "{{ alertmanager_container_ports | default(omit) }}"
|
||||
volumes: "{{ alertmanager_container_volumes | default(omit) }}"
|
||||
networks: "{{ alertmanager_container_networks | default(omit) }}"
|
||||
purge_networks: "{{ alertmanager_container_purge_networks | default(omit) }}"
|
||||
etc_hosts: "{{ alertmanager_container_etc_hosts | default(omit) }}"
|
||||
restart_policy: "{{ alertmanager_container_restart_policy }}"
|
||||
state: started
|
||||
- name: Deploy alertmanager using {{ alertmanager_deployment_method }}
|
||||
ansible.builtin.include_tasks:
|
||||
file: "deploy-{{ alertmanager_deployment_method }}.yml"
|
||||
|
6
roles/alertmanager/vars/main.yml
Normal file
6
roles/alertmanager/vars/main.yml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
alertmanager_states:
|
||||
- present
|
||||
- absent
|
||||
alertmanager_deployment_methods:
|
||||
- docker
|
@ -1,52 +0,0 @@
|
||||
---
|
||||
cadvisor_version: 0.49.1
|
||||
|
||||
cadvisor_container_name: cadvisor
|
||||
cadvisor_container_image_name: gcr.io/cadvisor/cadvisor
|
||||
cadvisor_container_image_tag: ~
|
||||
cadvisor_container_image_ref: >-
|
||||
{{ cadvisor_container_image_name }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, True) }}
|
||||
cadvisor_container_volumes: >-
|
||||
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], True) }}
|
||||
cadvisor_container_extra_volumes: ~
|
||||
cadvisor_container_env: ~
|
||||
cadvisor_container_labels: "{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}"
|
||||
cadvisor_container_extra_labels: {}
|
||||
cadvisor_container_ports: ~
|
||||
cadvisor_container_networks: ~
|
||||
cadvisor_container_etc_hosts: ~
|
||||
cadvisor_container_devices: [ "/dev/kmsg:/dev/kmsg:rwm" ]
|
||||
cadvisor_container_privileged: yes
|
||||
cadvisor_container_pid_mode: "host"
|
||||
cadvisor_container_userns_mode: "host"
|
||||
cadvisor_container_capabilities: ~
|
||||
cadvisor_container_restart_policy: unless-stopped
|
||||
cadvisor_container_command: >-2
|
||||
{{ ["--docker_only=false"]
|
||||
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
|
||||
if cadvisor_disabled_metrics | default(false, True) else [])
|
||||
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
|
||||
if cadvisor_force_enable_metrics | default(false, True) else [])
|
||||
}}
|
||||
cadvisor_container_base_labels:
|
||||
version: "{{ cadvisor_version }}"
|
||||
cadvisor_container_base_volumes:
|
||||
- "/:/rootfs:ro"
|
||||
- "/var/run:/var/run:ro"
|
||||
- "/sys:/sys:ro"
|
||||
- "/var/lib/docker/:/var/lib/docker:ro"
|
||||
- "/dev/disk/:/dev/disk:ro"
|
||||
|
||||
cadvisor_disabled_metrics:
|
||||
- advtcp
|
||||
- cpu_topology
|
||||
- cpuset
|
||||
- hugetlb
|
||||
- memory_numa
|
||||
- process
|
||||
- referenced_memory
|
||||
- resctrl
|
||||
- sched
|
||||
- tcp
|
||||
- udp
|
||||
cadvisor_force_enable_metrics: []
|
56
roles/cadvisor/defaults/main/container.yml
Normal file
56
roles/cadvisor/defaults/main/container.yml
Normal file
@ -0,0 +1,56 @@
|
||||
---
|
||||
cadvisor_container_image_registry: gcr.io
|
||||
cadvisor_container_image_namespace: cadvisor
|
||||
cadvisor_container_image_name: cadvisor
|
||||
cadvisor_container_image: >-2
|
||||
{{
|
||||
[
|
||||
cadvisor_container_image_registry,
|
||||
cadvisor_container_image_namespace,
|
||||
cadvisor_container_image_name,
|
||||
] | flatten | join('/')
|
||||
}}
|
||||
cadvisor_container_image_tag: ~
|
||||
cadvisor_container_image_ref: >-2
|
||||
{{ cadvisor_container_image }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, true) }}
|
||||
cadvisor_container_image_source: pull
|
||||
cadvisor_container_image_force_source: >-2
|
||||
{{ cadvisor_container_image_tag | default(false, true) | bool }}
|
||||
|
||||
cadvisor_container_state: >-2
|
||||
{{ (cadvisor_state == 'present') | ternary('started', 'absent') }}
|
||||
|
||||
cadvisor_container_name: cadvisor
|
||||
cadvisor_container_volumes: >-2
|
||||
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], true) }}
|
||||
cadvisor_container_extra_volumes: ~
|
||||
cadvisor_container_env: ~
|
||||
cadvisor_container_labels: >-2
|
||||
{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}
|
||||
cadvisor_container_extra_labels: {}
|
||||
cadvisor_container_ports: ~
|
||||
cadvisor_container_networks: ~
|
||||
cadvisor_container_etc_hosts: ~
|
||||
cadvisor_container_devices:
|
||||
- "/dev/kmsg:/dev/kmsg:rwm"
|
||||
cadvisor_container_privileged: true
|
||||
cadvisor_container_pid_mode: "host"
|
||||
cadvisor_container_userns_mode: "host"
|
||||
cadvisor_container_capabilities: ~
|
||||
cadvisor_container_restart_policy: "unless-stopped"
|
||||
cadvisor_container_command: >-2
|
||||
{{ ["--docker_only=false"]
|
||||
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
|
||||
if cadvisor_disabled_metrics | default(false, true) else [])
|
||||
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
|
||||
if cadvisor_force_enable_metrics | default(false, true) else [])
|
||||
}}
|
||||
cadvisor_container_base_labels:
|
||||
version: "{{ cadvisor_version }}"
|
||||
cadvisor_container_base_volumes:
|
||||
- "/:/rootfs:ro"
|
||||
- "/var/run:/var/run:ro"
|
||||
- "/sys:/sys:ro"
|
||||
- "/var/lib/docker/:/var/lib/docker:ro"
|
||||
- "/dev/disk/:/dev/disk:ro"
|
||||
|
18
roles/cadvisor/defaults/main/main.yml
Normal file
18
roles/cadvisor/defaults/main/main.yml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
cadvisor_version: "0.50.0"
|
||||
cadvisor_state: present
|
||||
cadvisor_deployment_method: docker
|
||||
|
||||
cadvisor_disabled_metrics:
|
||||
- advtcp
|
||||
- cpu_topology
|
||||
- cpuset
|
||||
- hugetlb
|
||||
- memory_numa
|
||||
- process
|
||||
- referenced_memory
|
||||
- resctrl
|
||||
- sched
|
||||
- tcp
|
||||
- udp
|
||||
cadvisor_force_enable_metrics: []
|
11
roles/cadvisor/meta/main.yml
Normal file
11
roles/cadvisor/meta/main.yml
Normal file
@ -0,0 +1,11 @@
|
||||
---
|
||||
allow_duplicates: true
|
||||
dependencies: []
|
||||
galaxy_info:
|
||||
role_name: cadvisor
|
||||
description: Deploy cadvisor (Container Advisor), a container performance and resource usage aggregation daemon
|
||||
galaxy_tags:
|
||||
- cadvisor
|
||||
- observability
|
||||
- container
|
||||
- docker
|
25
roles/cadvisor/tasks/deploy-docker.yml
Normal file
25
roles/cadvisor/tasks/deploy-docker.yml
Normal file
@ -0,0 +1,25 @@
|
||||
---
|
||||
- name: Ensure cadvisor container image '{{ cadvisor_container_image_ref }}' is {{ cadvisor_state }}
|
||||
community.docker.docker_image:
|
||||
name: "{{ cadvisor_container_image_ref }}"
|
||||
state: "{{ cadvisor_state }}"
|
||||
source: "{{ cadvisor_container_image_source }}"
|
||||
force_source: "{{ cadvisor_container_image_force_source }}"
|
||||
|
||||
- name: Ensure cadvisor container '{{ cadvisor_container_name }}' is {{ cadvisor_container_state }}
|
||||
community.docker.docker_container:
|
||||
name: "{{ cadvisor_container_name }}"
|
||||
image: "{{ cadvisor_container_image_ref }}"
|
||||
env: "{{ cadvisor_container_env | default(omit, true) }}"
|
||||
ports: "{{ cadvisor_container_ports | default(omit, true) }}"
|
||||
labels: "{{ cadvisor_container_labels }}"
|
||||
devices: "{{ cadvisor_container_devices }}"
|
||||
volumes: "{{ cadvisor_container_volumes }}"
|
||||
networks: "{{ cadvisor_container_networks | default(omit, true) }}"
|
||||
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, true) }}"
|
||||
privileged: "{{ cadvisor_container_privileged }}"
|
||||
command: "{{ cadvisor_container_command }}"
|
||||
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, true) }}"
|
||||
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, true) }}"
|
||||
restart_policy: "{{ cadvisor_container_restart_policy }}"
|
||||
state: "{{ cadvisor_container_state }}"
|
@ -1,26 +1,18 @@
|
||||
---
|
||||
- name: Ensure state is valid
|
||||
ansible.builtin.fail:
|
||||
msg: >-2
|
||||
Unknown state '{{ cadvisor_state }}'! Supported
|
||||
states are: {{ cadvisor_states | join(', ') }}.
|
||||
when: cadvisor_state not in cadvisor_states
|
||||
|
||||
- name: Ensure container image is present
|
||||
docker_image:
|
||||
name: "{{ cadvisor_container_image_ref }}"
|
||||
state: present
|
||||
source: pull
|
||||
force_source: "{{ cadvisor_container_image_tag|default(False, True) | bool }}"
|
||||
- name: Ensure deployment method is valid
|
||||
ansible.builtin.fail:
|
||||
msg: >-2
|
||||
Unknown deployment method '{{ cadvisor_deployment_method }}'! Supported
|
||||
deployment methods are: {{ cadvisor_deployment_methods | join(', ') }}.
|
||||
when: cadvisor_deployment_method not in cadvisor_deployment_methods
|
||||
|
||||
- name: Ensure cadvisor container is running
|
||||
docker_container:
|
||||
name: "{{ cadvisor_container_name }}"
|
||||
image: "{{ cadvisor_container_image_ref }}"
|
||||
env: "{{ cadvisor_container_env | default(omit, True) }}"
|
||||
ports: "{{ cadvisor_container_ports | default(omit, True) }}"
|
||||
labels: "{{ cadvisor_container_labels }}"
|
||||
devices: "{{ cadvisor_container_devices }}"
|
||||
volumes: "{{ cadvisor_container_volumes }}"
|
||||
networks: "{{ cadvisor_container_networks | default(omit, True) }}"
|
||||
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, True) }}"
|
||||
privileged: "{{ cadvisor_container_privileged }}"
|
||||
command: "{{ cadvisor_container_command }}"
|
||||
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, True) }}"
|
||||
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, True) }}"
|
||||
restart_policy: "{{ cadvisor_container_restart_policy }}"
|
||||
state: started
|
||||
- name: Deploy using {{ cadvisor_deployment_method }}
|
||||
ansible.builtin.include_tasks:
|
||||
file: "deploy-{{ cadvisor_deployment_method }}.yml"
|
||||
|
6
roles/cadvisor/vars/main.yml
Normal file
6
roles/cadvisor/vars/main.yml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
cadvisor_states:
|
||||
- present
|
||||
- absent
|
||||
cadvisor_deployment_methods:
|
||||
- docker
|
Loading…
Reference in New Issue
Block a user