Compare commits

...

4 Commits

16 changed files with 215 additions and 116 deletions

View File

@ -11,6 +11,9 @@ metrics or alerting.
alertmanager for receiving alerts from prometheus and routing them
to the correct configured receivers.
- [`cadvisor`](roles/cadvisor/README.md): Run and configure cAdvisor, googles'
container performance and resource usage collection and aggregation daemon.
- [`grafana`](roles/grafana/README.md): a popular visualization and
dashboard creation tool able to use various datasources.

View File

@ -1,6 +1,6 @@
namespace: finallycoffee
name: observability
version: 0.1.1
version: 0.1.2
readme: README.md
authors:
- transcaffeine <transcaffeine@finally.coffee>
@ -18,3 +18,4 @@ tags:
- prometheus
- victoriametrics
- grafana
- alertmanager

View File

@ -1,11 +1,4 @@
---
alertmanager_user: alertmanager
alertmanager_version: 0.27.0
alertmanager_base_path: /opt/alertmanager
alertmanager_config_path: "{{ alertmanager_base_path }}/config"
alertmanager_config_file: "{{ alertmanager_config_path }}/alertmanager.yml"
alertmanager_data_path: "{{ alertmanager_base_path }}/data"
alertmanager_container_name: alertmanager
alertmanager_container_image_name: alertmanager
alertmanager_container_image_namespace: prometheus/
@ -22,7 +15,7 @@ alertmanager_container_image_reference: >-
alertmanager_container_image_repository + ':'
+ (alertmanager_container_image_tag | default('v' + alertmanager_version))
}}
alertmanager_container_image_source: pull
alertmanager_container_image_force_pull: "{{ alertmanager_container_image_tag is defined }}"
alertmanager_container_default_volumes:
@ -32,8 +25,5 @@ alertmanager_container_volumes: >-
{{ alertmanager_container_default_volumes
+ alertmanager_container_extra_volumes | default([]) }}
alertmanager_container_restart_policy: "unless-stopped"
alertmanager_config:
global: {}
route: {}
receivers: []
alertmanager_container_state: >-2
{{ (alertmanager_state == 'present') | ternary('started', 'absent') }}

View File

@ -0,0 +1,15 @@
---
alertmanager_user: alertmanager
alertmanager_version: 0.27.0
alertmanager_state: present
alertmanager_deployment_method: docker
alertmanager_base_path: /opt/alertmanager
alertmanager_config_path: "{{ alertmanager_base_path }}/config"
alertmanager_config_file: "{{ alertmanager_config_path }}/alertmanager.yml"
alertmanager_data_path: "{{ alertmanager_base_path }}/data"
alertmanager_config:
global: {}
route: {}
receivers: []

View File

@ -1,8 +1,8 @@
---
- name: Ensure alertmanager is restarted
community.docker.docker_container:
name: "{{ alertmanager_container_name }}"
state: started
state: "{{ alertmanager_container_state }}"
restart: true
listen: restart-alertmanager
when: alertmanager_deployment_method == 'docker'

View File

@ -0,0 +1,10 @@
---
allow_duplicates: true
dependencies: []
galaxy_info:
role_name: alertmanager
description: Deploy and configure prometheus alertmanager
galaxy_tags:
- prometheus
- alertmanager
- observability

View File

@ -0,0 +1,21 @@
---
- name: Ensure container image is {{ alertmanager_state }} on host
community.docker.docker_image:
name: "{{ alertmanager_container_image_reference }}"
state: "{{ alertmanager_state }}"
source: "{{ alertmanager_container_image_source }}"
force_source: "{{ alertmanager_container_image_force_pull | bool }}"
- name: Ensure container '{{ alertmanager_container_name }}' is {{ alertmanager_container_state }}
community.docker.docker_container:
name: "{{ alertmanager_container_name }}"
image: "{{ alertmanager_container_image_reference }}"
env: "{{ alertmanager_container_env | default(omit) }}"
user: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
ports: "{{ alertmanager_container_ports | default(omit) }}"
volumes: "{{ alertmanager_container_volumes | default(omit) }}"
networks: "{{ alertmanager_container_networks | default(omit) }}"
purge_networks: "{{ alertmanager_container_purge_networks | default(omit) }}"
etc_hosts: "{{ alertmanager_container_etc_hosts | default(omit) }}"
restart_policy: "{{ alertmanager_container_restart_policy }}"
state: "{{ alertmanager_container_state }}"

View File

@ -1,16 +1,29 @@
---
- name: Ensure state is valid
ansible.builtin.fail:
msg: >-2
Invalid state '{{ alertmanager_state }}'! Valid
states are {{ alertmanager_states | join(', ') }}.
when: alertmanager_state not in alertmanager_states
- name: Ensure alertmanager user '{{ alertmanager_user }}' exists
- name: Ensure deployment method is valid
ansible.builtin.fail:
msg: >-2
Invalid deployment method {{ alertmanager_deployment_method }}!
Supported deployment methods are {{ alertmanager_deployment_methods | join(', ') }}.
when: alertmanager_deployment_method not in alertmanager_deployment_methods
- name: Ensure alertmanager user '{{ alertmanager_user }}' is {{ alertmanager_state }}
ansible.builtin.user:
name: "{{ alertmanager_user }}"
state: present
state: "{{ alertmanager_state }}"
system: true
register: alertmanager_user_info
- name: Ensure mounts are created
- name: Ensure mounts are {{ alertmanager_state }}
ansible.builtin.file:
dest: "{{ item.path }}"
state: directory
state: "{{ (alertmanager_state == 'present') | ternary('directory', 'absent') }}"
owner: "{{ item.owner | default(alertmanager_user_info.uid | default(alertmanager_user)) }}"
group: "{{ item.owner | default(alertmanager_user_info.group | default(alertmanager_user)) }}"
mode: "{{ item.mode | default('0755') }}"
@ -24,28 +37,12 @@
dest: "{{ alertmanager_config_file }}"
content: "{{ alertmanager_config | to_nice_yaml }}"
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
group: "{{ alertmanager_user_info.group | default(alertmanager_user) }}"
mode: "0640"
when: alertmanager_state == 'present'
notify:
- restart-alertmanager
- name: Ensure container image is present on host
community.docker.docker_image:
name: "{{ alertmanager_container_image_reference }}"
state: present
source: pull
force_source: "{{ alertmanager_container_image_force_pull | bool }}"
- name: Ensure container '{{ alertmanager_container_name }}' is running
community.docker.docker_container:
name: "{{ alertmanager_container_name }}"
image: "{{ alertmanager_container_image_reference }}"
env: "{{ alertmanager_container_env | default(omit) }}"
user: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
ports: "{{ alertmanager_container_ports | default(omit) }}"
volumes: "{{ alertmanager_container_volumes | default(omit) }}"
networks: "{{ alertmanager_container_networks | default(omit) }}"
purge_networks: "{{ alertmanager_container_purge_networks | default(omit) }}"
etc_hosts: "{{ alertmanager_container_etc_hosts | default(omit) }}"
restart_policy: "{{ alertmanager_container_restart_policy }}"
state: started
- name: Deploy alertmanager using {{ alertmanager_deployment_method }}
ansible.builtin.include_tasks:
file: "deploy-{{ alertmanager_deployment_method }}.yml"

View File

@ -0,0 +1,6 @@
---
alertmanager_states:
- present
- absent
alertmanager_deployment_methods:
- docker

View File

@ -1,52 +0,0 @@
---
cadvisor_version: 0.49.1
cadvisor_container_name: cadvisor
cadvisor_container_image_name: gcr.io/cadvisor/cadvisor
cadvisor_container_image_tag: ~
cadvisor_container_image_ref: >-
{{ cadvisor_container_image_name }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, True) }}
cadvisor_container_volumes: >-
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], True) }}
cadvisor_container_extra_volumes: ~
cadvisor_container_env: ~
cadvisor_container_labels: "{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}"
cadvisor_container_extra_labels: {}
cadvisor_container_ports: ~
cadvisor_container_networks: ~
cadvisor_container_etc_hosts: ~
cadvisor_container_devices: [ "/dev/kmsg:/dev/kmsg:rwm" ]
cadvisor_container_privileged: yes
cadvisor_container_pid_mode: "host"
cadvisor_container_userns_mode: "host"
cadvisor_container_capabilities: ~
cadvisor_container_restart_policy: unless-stopped
cadvisor_container_command: >-2
{{ ["--docker_only=false"]
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
if cadvisor_disabled_metrics | default(false, True) else [])
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
if cadvisor_force_enable_metrics | default(false, True) else [])
}}
cadvisor_container_base_labels:
version: "{{ cadvisor_version }}"
cadvisor_container_base_volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
cadvisor_disabled_metrics:
- advtcp
- cpu_topology
- cpuset
- hugetlb
- memory_numa
- process
- referenced_memory
- resctrl
- sched
- tcp
- udp
cadvisor_force_enable_metrics: []

View File

@ -0,0 +1,56 @@
---
cadvisor_container_image_registry: gcr.io
cadvisor_container_image_namespace: cadvisor
cadvisor_container_image_name: cadvisor
cadvisor_container_image: >-2
{{
[
cadvisor_container_image_registry,
cadvisor_container_image_namespace,
cadvisor_container_image_name,
] | flatten | join('/')
}}
cadvisor_container_image_tag: ~
cadvisor_container_image_ref: >-2
{{ cadvisor_container_image }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, true) }}
cadvisor_container_image_source: pull
cadvisor_container_image_force_source: >-2
{{ cadvisor_container_image_tag | default(false, true) | bool }}
cadvisor_container_state: >-2
{{ (cadvisor_state == 'present') | ternary('started', 'absent') }}
cadvisor_container_name: cadvisor
cadvisor_container_volumes: >-2
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], true) }}
cadvisor_container_extra_volumes: ~
cadvisor_container_env: ~
cadvisor_container_labels: >-2
{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}
cadvisor_container_extra_labels: {}
cadvisor_container_ports: ~
cadvisor_container_networks: ~
cadvisor_container_etc_hosts: ~
cadvisor_container_devices:
- "/dev/kmsg:/dev/kmsg:rwm"
cadvisor_container_privileged: true
cadvisor_container_pid_mode: "host"
cadvisor_container_userns_mode: "host"
cadvisor_container_capabilities: ~
cadvisor_container_restart_policy: "unless-stopped"
cadvisor_container_command: >-2
{{ ["--docker_only=false"]
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
if cadvisor_disabled_metrics | default(false, true) else [])
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
if cadvisor_force_enable_metrics | default(false, true) else [])
}}
cadvisor_container_base_labels:
version: "{{ cadvisor_version }}"
cadvisor_container_base_volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"

View File

@ -0,0 +1,18 @@
---
cadvisor_version: "0.50.0"
cadvisor_state: present
cadvisor_deployment_method: docker
cadvisor_disabled_metrics:
- advtcp
- cpu_topology
- cpuset
- hugetlb
- memory_numa
- process
- referenced_memory
- resctrl
- sched
- tcp
- udp
cadvisor_force_enable_metrics: []

View File

@ -0,0 +1,11 @@
---
allow_duplicates: true
dependencies: []
galaxy_info:
role_name: cadvisor
description: Deploy cadvisor (Container Advisor), a container performance and resource usage aggregation daemon
galaxy_tags:
- cadvisor
- observability
- container
- docker

View File

@ -0,0 +1,25 @@
---
- name: Ensure cadvisor container image '{{ cadvisor_container_image_ref }}' is {{ cadvisor_state }}
community.docker.docker_image:
name: "{{ cadvisor_container_image_ref }}"
state: "{{ cadvisor_state }}"
source: "{{ cadvisor_container_image_source }}"
force_source: "{{ cadvisor_container_image_force_source }}"
- name: Ensure cadvisor container '{{ cadvisor_container_name }}' is {{ cadvisor_container_state }}
community.docker.docker_container:
name: "{{ cadvisor_container_name }}"
image: "{{ cadvisor_container_image_ref }}"
env: "{{ cadvisor_container_env | default(omit, true) }}"
ports: "{{ cadvisor_container_ports | default(omit, true) }}"
labels: "{{ cadvisor_container_labels }}"
devices: "{{ cadvisor_container_devices }}"
volumes: "{{ cadvisor_container_volumes }}"
networks: "{{ cadvisor_container_networks | default(omit, true) }}"
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, true) }}"
privileged: "{{ cadvisor_container_privileged }}"
command: "{{ cadvisor_container_command }}"
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, true) }}"
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, true) }}"
restart_policy: "{{ cadvisor_container_restart_policy }}"
state: "{{ cadvisor_container_state }}"

View File

@ -1,26 +1,18 @@
---
- name: Ensure state is valid
ansible.builtin.fail:
msg: >-2
Unknown state '{{ cadvisor_state }}'! Supported
states are: {{ cadvisor_states | join(', ') }}.
when: cadvisor_state not in cadvisor_states
- name: Ensure container image is present
docker_image:
name: "{{ cadvisor_container_image_ref }}"
state: present
source: pull
force_source: "{{ cadvisor_container_image_tag|default(False, True) | bool }}"
- name: Ensure deployment method is valid
ansible.builtin.fail:
msg: >-2
Unknown deployment method '{{ cadvisor_deployment_method }}'! Supported
deployment methods are: {{ cadvisor_deployment_methods | join(', ') }}.
when: cadvisor_deployment_method not in cadvisor_deployment_methods
- name: Ensure cadvisor container is running
docker_container:
name: "{{ cadvisor_container_name }}"
image: "{{ cadvisor_container_image_ref }}"
env: "{{ cadvisor_container_env | default(omit, True) }}"
ports: "{{ cadvisor_container_ports | default(omit, True) }}"
labels: "{{ cadvisor_container_labels }}"
devices: "{{ cadvisor_container_devices }}"
volumes: "{{ cadvisor_container_volumes }}"
networks: "{{ cadvisor_container_networks | default(omit, True) }}"
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, True) }}"
privileged: "{{ cadvisor_container_privileged }}"
command: "{{ cadvisor_container_command }}"
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, True) }}"
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, True) }}"
restart_policy: "{{ cadvisor_container_restart_policy }}"
state: started
- name: Deploy using {{ cadvisor_deployment_method }}
ansible.builtin.include_tasks:
file: "deploy-{{ cadvisor_deployment_method }}.yml"

View File

@ -0,0 +1,6 @@
---
cadvisor_states:
- present
- absent
cadvisor_deployment_methods:
- docker