8 Commits

24 changed files with 474 additions and 3 deletions

View File

@ -7,10 +7,19 @@ metrics or alerting.
## Roles
- [`alertmanager`](roles/alertmanager/README.md): Runs prometheus'
alertmanager for receiving alerts from prometheus and routing them
to the correct configured receivers.
- [`matrix-alertmanager`](roles/matrix-alertmanager/README.md): An alert-
manager receiver which posts alerts to a configured matrix channel
using alertmanagers' webhooks.
- [`vmtsdb`](roles/vmtsdb/README.md): VictoriaMetrics time series database.
- [`vmalert`](roles/vmalert/README.md): VictoriaMetrics alerting and
ruling engine.
- [`postgres_exporter`](roles/postgres_exporter/README.md): Prometheus
exporter for postgres databases, in a docker container.

View File

@ -3,12 +3,11 @@ name: observability
version: 0.0.1
readme: README.md
authors:
- Johanna Dorothea Reichmann <transcaffeine@finallycoffee.eu>
- transcaffeine <transcaffeine@finally.coffee>
description: Various ansible roles useful for automating infrastructure
dependencies:
"community.docker": "^1.10.0"
license:
- CNPLv7+
license_file: LICENSE.md
build_ignore:
- '*.tar.gz'
repository: https://git.finally.coffee/finallycoffee/observability

3
meta/runtime.yml Normal file
View File

@ -0,0 +1,3 @@
---
requires_ansible: ">=2.12"

6
playbooks/vmalert.yml Normal file
View File

@ -0,0 +1,6 @@
---
- name: Install vmalert using docker
hosts: "{{ vmalert_hosts | default('vmalert') }}"
become: "{{ vmalert_become | default(false) }}"
roles:
- role: finallycoffee.observability.vmalert

6
playbooks/vmtsdb.yml Normal file
View File

@ -0,0 +1,6 @@
---
- name: Install vmtsdb using docker
hosts: "{{ vmtsdb_hosts | default('vmtsdb') }}"
become: "{{ vmtsdb_become | default(false) }}"
roles:
- role: finallycoffee.observability.vmtsdb

View File

@ -0,0 +1,10 @@
# `finallycoffee.observability.alertmanager` ansible role
## Description
This role configures and runs prometheus alertmanager in a docker container.
The config file is templated on the host and persisted in `alertmanager_config_file`.
The alertmanager config can be passed by setting `alertmanager_config`, which expects the same yaml
format as the "normal" alertmanager config file (with top-level keys `global`, `route` and `receivers`).

View File

@ -0,0 +1,40 @@
---
alertmanager_user: alertmanager
alertmanager_version: 0.25.0
alertmanager_base_path: /opt/alertmanager
alertmanager_config_path: "{{ alertmanager_base_path }}/config"
alertmanager_config_file: "{{ alertmanager_config_path }}/alertmanager.yml"
alertmanager_data_path: "{{ alertmanager_base_path }}/data"
alertmanager_container_name: alertmanager
alertmanager_container_image_name: alertmanager
alertmanager_container_image_namespace: prometheus/
alertmanager_container_image_registry: quay.io
alertmanager_container_image_repository: >-
{{
(container_registries[alertmanager_container_image_registry] | default(alertmanager_container_image_registry))
+ '/' + (alertmanager_container_image_namespace | default(''))
+ alertmanager_container_image_name
}}
alertmanager_container_image_reference: >-
{{
alertmanager_container_image_repository + ':'
+ (alertmanager_container_image_tag | default('v' + alertmanager_version))
}}
alertmanager_container_image_force_pull: "{{ alertmanager_container_image_tag is defined }}"
alertmanager_container_default_volumes:
- "{{ alertmanager_config_file }}:/etc/alertmanager/alertmanager.yml:ro"
- "{{ alertmanager_data_path }}:/alertmanager:rw"
alertmanager_container_volumes: >-
{{ alertmanager_container_default_volumes
+ alertmanager_container_extra_volumes | default([]) }}
alertmanager_container_restart_policy: "unless-stopped"
alertmanager_config:
global: {}
route: {}
receivers: []

View File

@ -0,0 +1,8 @@
---
- name: Ensure alertmanager is restarted
community.docker.docker_container:
name: "{{ alertmanager_container_name }}"
state: started
restart: true
listen: restart-alertmanager

View File

@ -0,0 +1,51 @@
---
- name: Ensure alertmanager user '{{ alertmanager_user }}' exists
ansible.builtin.user:
name: "{{ alertmanager_user }}"
state: present
system: true
register: alertmanager_user_info
- name: Ensure mounts are created
ansible.builtin.file:
dest: "{{ item.path }}"
state: directory
owner: "{{ item.owner | default(alertmanager_user_info.uid | default(alertmanager_user)) }}"
group: "{{ item.owner | default(alertmanager_user_info.group | default(alertmanager_user)) }}"
mode: "{{ item.mode | default('0755') }}"
loop:
- path: "{{ alertmanager_base_path }}"
- path: "{{ alertmanager_data_path }}"
- path: "{{ alertmanager_config_path }}"
- name: Ensure config file is templated
ansible.builtin.copy:
dest: "{{ alertmanager_config_file }}"
content: "{{ alertmanager_config | to_nice_yaml }}"
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
owner: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
mode: "0640"
notify:
- restart-alertmanager
- name: Ensure container image is present on host
community.docker.docker_image:
name: "{{ alertmanager_container_image_reference }}"
state: present
source: pull
force_source: "{{ alertmanager_container_image_force_pull | bool }}"
- name: Ensure container '{{ alertmanager_container_name }}' is running
community.docker.docker_container:
name: "{{ alertmanager_container_name }}"
image: "{{ alertmanager_container_image_reference }}"
env: "{{ alertmanager_container_env | default(omit) }}"
user: "{{ alertmanager_user_info.uid | default(alertmanager_user) }}"
ports: "{{ alertmanager_container_ports | default(omit) }}"
volumes: "{{ alertmanager_container_volumes | default(omit) }}"
networks: "{{ alertmanager_container_networks | default(omit) }}"
purge_networks: "{{ alertmanager_container_purge_networks | default(omit) }}"
etc_hosts: "{{ alertmanager_container_etc_hosts | default(omit) }}"
restart_policy: "{{ alertmanager_container_restart_policy }}"
state: started

24
roles/cadvisor/README.md Normal file
View File

@ -0,0 +1,24 @@
# `finallycoffee.observability.cadvisor` ansible role
## Overview
Deploys [cadvisor](https://github.com/google/cadvisor/), a daemon
for collecting and exporting information about running (docker)
containers in a docker container.
## Configuration
In order to scrape `/metrics` of running containers, it is recommended
to expose the default port of cadvisor to the host using
```yaml
cadvisor_container_ports:
- "127.0.0.1:8080:8080`
```
so that cadvisor metrics are exposed at `http://127.0.0.1:8080/metrics`.
### Enabling/Disabling collection of metrics
By setting `cadvisor_disabled_metrics`, the collection of metrics
can be disabled. The default list of disabled metrics is quite extensive,
so when enabling a disabled-by-default metric, it is recommended to
use `cadvisor_force_enable_metrics` instead, as it's empty by default.

View File

@ -0,0 +1,53 @@
---
cadvisor_version: 0.45.0
cadvisor_container_name: cadvisor
cadvisor_container_image_name: gcr.io/cadvisor/cadvisor
cadvisor_container_image_tag: ~
cadvisor_container_image_ref: >-
{{ cadvisor_container_image_name }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, True) }}
cadvisor_container_volumes: >-
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], True) }}
cadvisor_container_extra_volumes: ~
cadvisor_container_env: ~
cadvisor_container_labels: "{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}"
cadvisor_container_extra_labels: {}
cadvisor_container_ports: ~
cadvisor_container_networks: ~
cadvisor_container_etc_hosts: ~
cadvisor_container_devices: [ "/dev/kmsg:/dev/kmsg:rwm" ]
cadvisor_container_privileged: yes
cadvisor_container_pid_mode: "host"
cadvisor_container_userns_mode: "host"
cadvisor_container_capabilities: ~
cadvisor_container_restart_policy: unless-stopped
cadvisor_container_command: >-2
{{ ["--docker_only=false"]
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
if cadvisor_disabled_metrics | default(false, True) else [])
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
if cadvisor_force_enable_metrics | default(false, True) else [])
}}
cadvisor_container_base_labels:
version: "{{ cadvisor_version }}"
cadvisor_container_base_volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
cadvisor_disabled_metrics:
- advtcp
- cpu_topology
- cpuset
- hugetlb
- memory_numa
- process
- referenced_memory
- resctrl
- sched
- tcp
- udp
cadvisor_force_enable_metrics: []

View File

@ -0,0 +1,26 @@
---
- name: Ensure container image is present
docker_image:
name: "{{ cadvisor_container_image_ref }}"
state: present
source: pull
force_source: "{{ cadvisor_container_image_tag|default(False, True) | bool }}"
- name: Ensure cadvisor container is running
docker_container:
name: "{{ cadvisor_container_name }}"
image: "{{ cadvisor_container_image_ref }}"
env: "{{ cadvisor_container_env | default(omit, True) }}"
ports: "{{ cadvisor_container_ports | default(omit, True) }}"
labels: "{{ cadvisor_container_labels }}"
devices: "{{ cadvisor_container_devices }}"
volumes: "{{ cadvisor_container_volumes }}"
networks: "{{ cadvisor_container_networks | default(omit, True) }}"
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, True) }}"
privileged: "{{ cadvisor_container_privileged }}"
command: "{{ cadvisor_container_command }}"
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, True) }}"
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, True) }}"
restart_policy: "{{ cadvisor_container_restart_policy }}"
state: started

11
roles/vmalert/README.md Normal file
View File

@ -0,0 +1,11 @@
# `finallycoffee.observability.vmalert` ansible role
## Description
This role configures `vmalert` and runs it in the officially distributed docker container.
The default configuration file for recording rules is `vmalert_recording_config` and the default file for alerts is `vmalert_alert_config`. To set rules in a prometheus-like syntax, supply them to the role using `vmalert_alerts` or `vmalert_records`.
It is also possible to pass extra rule-files to load using `vmalert_rule_files`, though care must be taken to also mount them to the location in the container by populating `vmalert_container_volumes`.
VM alert runs with the `envflag.enable` flag by default, so configuration to vmalert can be passed using `vmalert_container_env` with the syntax found on the official victoriametrics documentation.

View File

@ -0,0 +1,57 @@
---
vmalert_state: present
vmalert_user: vmalert
vmalert_version: "1.87.5"
vmalert_base_path: "/opt/vmalert"
vmalert_config_path: "{{ vmalert_base_path }}/config"
vmalert_alert_config: "{{ vmalert_config_path }}/alerts.yml"
vmalert_recording_config: "{{ vmalert_config_path }}/records.yml"
vmalert_alerts: {}
vmalert_records: {}
vmalert_rule_files: []
vmalert_default_rule_files:
- "{{ vmalert_alert_config }}"
- "{{ vmalert_recording_config }}"
vmalert_merged_rule_files: >-
{{ vmalert_default_rule_files + vmalert_rule_files }}
vmalert_container_image_server: docker.io
vmalert_container_image_namespace: "victoriametrics"
vmalert_container_image_container: "vmalert"
vmalert_container_image_name: >-2
{{
vmalert_container_image_server
+ ((vmalert_container_image_namespace is defined)
| ternary('/' ~ vmalert_container_image_namespace, ''))
+ '/' + vmalert_container_image_container
}}
#vmalert_container_image_tag:
vmalert_container_image: >-2
{{ vmalert_container_image_name }}:{{ vmalert_container_image_tag | default('v' + vmalert_version, false) }}
vmalert_user_id: >-
{{ (vmalert_user_info is defined and 'uid' in vmalert_user_info) | ternary(vmalert_user_info.uid, vmalert_user) }}
vmalert_group_id: >-
{{ (vmalert_user_info is defined and 'group' in vmalert_user_info) | ternary(vmalert_user_info.group, vmalert_user) }}
vmalert_container_user: "{{ vmalert_user_id }}"
vmalert_container_group: "{{ vmalert_group_id }}"
vmalert_container_name: "vmalert"
vmalert_container_command: []
vmalert_container_default_command:
- "-enableTCP6"
- "-envflag.enable"
vmalert_container_merged_command: >-
{{ vmalert_container_default_command + (vmalert_container_command | default([], false)) }}
vmalert_container_env: {}
vmalert_container_default_env:
PATH: "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
rule: "{{ vmalert_merged_rule_files | join(',') }}"
vmalert_container_merged_env: >-
{{ vmalert_container_default_env | combine(vmalert_container_env) }}
vmalert_container_volumes: []
vmalert_container_default_volumes:
- "{{ vmalert_config_path }}:{{ vmalert_config_path }}:z"
vmalert_container_merged_volumes: >-
{{ vmalert_container_default_volumes | combine(vmalert_container_volumes) }}
vmalert_container_restart_policy: "unless-stopped"

View File

View File

@ -0,0 +1,69 @@
---
- name: Ensure user {{ vmalert_user }} is {{ vmalert_state }}
ansible.builtin.user:
name: "{{ vmalert_user }}"
state: present
system: true
create_home: false
register: vmalert_user_info
- name: Ensure directories for vmalert are {{ vmalert_state }}
ansible.builtin.file:
path: "{{ item.path }}"
state: "{{ (vmalert_state == 'present') | ternary('directory', 'absent') }}"
owner: "{{ item.owner | default(vmalert_user_id) }}"
group: "{{ item.group | default(vmalert_group_id) }}"
mode: "{{ item.mode | default('0775') }}"
loop:
- path: "{{ vmalert_base_path }}"
- path: "{{ vmalert_config_path }}"
mode: "0755"
loop_control:
label: "{{ item.path }}"
- name: Ensure alert configuration is present
ansible.builtin.copy:
dest: "{{ vmalert_alert_config }}"
content: |
{{ ({ 'groups': vmalert_alerts})
| to_nice_yaml(indent=2, width=1024, default_style='"') }}
owner: "{{ item.owner | default(vmalert_user_id) }}"
group: "{{ item.group | default(vmalert_group_id) }}"
mode: "{{ item.mode | default('0775') }}"
when: vmalert_state == 'present'
- name: Ensure recording rule configuration is present
ansible.builtin.copy:
dest: "{{ vmalert_recording_config }}"
content: |
{{ ({ 'groups': vmalert_records})
| to_nice_yaml(indent=2, width=1024, default_style='"') }}
owner: "{{ item.owner | default(vmalert_user_id) }}"
group: "{{ item.group | default(vmalert_group_id) }}"
mode: "{{ item.mode | default('0775') }}"
when: vmalert_state == 'present'
- name: Ensure container image {{ vmalert_container_image }} is {{ vmalert_state }}
community.docker.docker_image:
name: "{{ vmalert_container_image }}"
state: "{{ vmalert_state }}"
source: "{{ (vmalert_state == 'present') | ternary('pull', omit) }}"
force_source: >-2
{{ (vmalert_container_image == 'present') | ternary(vmalert_container_image_tag, omit) }}
- name: Ensure vmalert container is {{ vmalert_state }}
community.docker.docker_container:
name: "{{ vmalert_container_name}}"
image: "{{ vmalert_container_image }}"
env: "{{ vmalert_container_merged_env }}"
user: "{{ vmalert_container_user }}"
ports: "{{ vmalert_container_ports | default(omit) }}"
groups: "{{ vmalert_container_group }}"
labels: "{{ vmalert_container_labels | default(omit) }}"
volumes: "{{ vmalert_container_merged_volumes }}"
command: "{{ vmalert_container_merged_command }}"
networks: "{{ vmalert_container_networks | default(omit) }}"
etc_hosts: "{{ vmalert_container_etc_hosts | default(omit )}}"
purge_networks: "{{ vmalert_container_purge_networks | default(omit) }}"
restart_policy: "{{ vmalert_container_restart_policy | default(omit) }}"
state: "{{ (vmalert_state == 'present') | ternary('started', 'absent') }}"

7
roles/vmtsdb/README.md Normal file
View File

@ -0,0 +1,7 @@
# `finallycoffee.observability.vmtsdb` ansible role
## Description
This role configures `vmtsdb`, the time-series database part of victoria metrics, run in a docker container.
Per default `enableTCP6` and `envflag.enable` flags are passed to victoriametrics, enabling configuration using `vmtsdb_container_env`, using the syntax found on the official victoriametrics documentation.

View File

@ -0,0 +1,45 @@
---
vmtsdb_state: present
vmtsdb_user: vmtsdb
vmtsdb_version: "1.87.5"
vmtsdb_base_path: "/opt/vmtsdb"
vmtsdb_data_path: "{{ vmtsdb_base_path }}/data"
vmtsdb_container_image_server: docker.io
vmtsdb_container_image_namespace: "victoriametrics"
vmtsdb_container_image_container: "victoria-metrics"
vmtsdb_container_image_name: >-2
{{
vmtsdb_container_image_server
+ ((vmtsdb_container_image_namespace is defined)
| ternary('/' ~ vmtsdb_container_image_namespace, ''))
+ '/' + vmtsdb_container_image_container
}}
#vmtsdb_container_image_tag:
vmtsdb_container_image: >-2
{{ vmtsdb_container_image_name }}:{{ vmtsdb_container_image_tag | default('v' + vmtsdb_version, false) }}
vmtsdb_user_id: >-
{{ (vmtsdb_user_info is defined and 'uid' in vmtsdb_user_info) | ternary(vmtsdb_user_info.uid, vmtsdb_user) }}
vmtsdb_group_id: >-
{{ (vmtsdb_user_info is defined and 'group' in vmtsdb_user_info) | ternary(vmtsdb_user_info.group, vmtsdb_user) }}
vmtsdb_container_user: "{{ vmtsdb_user_id }}"
vmtsdb_container_group: "{{ vmtsdb_group_id }}"
vmtsdb_container_name: "vmtsdb"
vmtsdb_container_command: []
vmtsdb_container_default_command:
- "-enableTCP6"
- "-envflag.enable"
vmtsdb_container_merged_command: >-
{{ vmtsdb_container_default_command + (vmtsdb_container_command | default([], false)) }}
vmtsdb_container_env: {}
vmtsdb_container_default_env:
PATH: "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
vmtsdb_container_merged_env: >-
{{ vmtsdb_container_default_env | combine(vmtsdb_container_env) }}
vmtsdb_container_volumes: []
vmtsdb_container_default_volumes:
- "{{ vmtsdb_data_path }}:/victoria-metrics-data:z"
vmtsdb_container_merged_volumes: >-
{{ vmtsdb_container_default_volumes | combine(vmtsdb_container_volumes) }}
vmtsdb_container_restart_policy: "unless-stopped"

View File

@ -0,0 +1,47 @@
---
- name: Ensure user {{ vmtsdb_user }} is {{ vmtsdb_state }}
ansible.builtin.user:
name: "{{ vmtsdb_user }}"
state: present
system: true
create_home: false
register: vmtsdb_user_info
- name: Ensure directories for vmtsdb are {{ vmtsdb_state }}
ansible.builtin.file:
path: "{{ item.path }}"
state: "{{ (vmtsdb_state == 'present') | ternary('directory', 'absent') }}"
owner: "{{ item.owner | default(vmtsdb_user_id) }}"
group: "{{ item.group | default(vmtsdb_group_id) }}"
mode: "{{ item.mode | default('0775') }}"
loop:
- path: "{{ vmtsdb_base_path }}"
- path: "{{ vmtsdb_data_path }}"
mode: "0755"
loop_control:
label: "{{ item.path }}"
- name: Ensure container image {{ vmtsdb_container_image }} is {{ vmtsdb_state }}
community.docker.docker_image:
name: "{{ vmtsdb_container_image }}"
state: "{{ vmtsdb_state }}"
source: "{{ (vmtsdb_state == 'present') | ternary('pull', omit) }}"
force_source: >-2
{{ (vmtsdb_container_image == 'present') | ternary(vmtsdb_container_image_tag, omit) }}
- name: Ensure vmtsdb container is {{ vmtsdb_state }}
community.docker.docker_container:
name: "{{ vmtsdb_container_name}}"
image: "{{ vmtsdb_container_image }}"
env: "{{ vmtsdb_container_merged_env }}"
user: "{{ vmtsdb_container_user }}"
ports: "{{ vmtsdb_container_ports | default(omit) }}"
groups: "{{ vmtsdb_container_group }}"
labels: "{{ vmtsdb_container_labels | default(omit) }}"
volumes: "{{ vmtsdb_container_merged_volumes }}"
command: "{{ vmtsdb_container_merged_command }}"
networks: "{{ vmtsdb_container_networks | default(omit) }}"
etc_hosts: "{{ vmtsdb_container_etc_hosts | default(omit )}}"
purge_networks: "{{ vmtsdb_container_purge_networks | default(omit) }}"
restart_policy: "{{ vmtsdb_container_restart_policy | default(omit) }}"
state: "{{ (vmtsdb_state == 'present') | ternary('started', 'absent') }}"