8 Commits
0.1.0 ... 0.1.2

20 changed files with 383 additions and 76 deletions

View File

@ -11,6 +11,9 @@ metrics or alerting.
alertmanager for receiving alerts from prometheus and routing them alertmanager for receiving alerts from prometheus and routing them
to the correct configured receivers. to the correct configured receivers.
- [`cadvisor`](roles/cadvisor/README.md): Run and configure cAdvisor, googles'
container performance and resource usage collection and aggregation daemon.
- [`grafana`](roles/grafana/README.md): a popular visualization and - [`grafana`](roles/grafana/README.md): a popular visualization and
dashboard creation tool able to use various datasources. dashboard creation tool able to use various datasources.
@ -18,6 +21,8 @@ metrics or alerting.
manager receiver which posts alerts to a configured matrix channel manager receiver which posts alerts to a configured matrix channel
using alertmanagers' webhooks. using alertmanagers' webhooks.
- [`vmagent`](roles/vmagent/README.md): VictoriaMetrics agent
- [`vmtsdb`](roles/vmtsdb/README.md): VictoriaMetrics time series database. - [`vmtsdb`](roles/vmtsdb/README.md): VictoriaMetrics time series database.
- [`vmalert`](roles/vmalert/README.md): VictoriaMetrics alerting and - [`vmalert`](roles/vmalert/README.md): VictoriaMetrics alerting and

View File

@ -1,6 +1,6 @@
namespace: finallycoffee namespace: finallycoffee
name: observability name: observability
version: 0.1.0 version: 0.1.2
readme: README.md readme: README.md
authors: authors:
- transcaffeine <transcaffeine@finally.coffee> - transcaffeine <transcaffeine@finally.coffee>
@ -12,3 +12,9 @@ build_ignore:
- '*.tar.gz' - '*.tar.gz'
repository: https://git.finally.coffee/finallycoffee/observability repository: https://git.finally.coffee/finallycoffee/observability
issues: https://codeberg.org/finallycoffee/ansible-collection-observability/issues issues: https://codeberg.org/finallycoffee/ansible-collection-observability/issues
tags:
- observability
- monitoring
- prometheus
- victoriametrics
- grafana

6
playbooks/vmagent.yml Normal file
View File

@ -0,0 +1,6 @@
---
- name: Install and configure vmagent
hosts: "{{ vmagent_hosts | default('vmagent') }}"
become: "{{ vmagent_become | default(false) }}"
roles:
- role: finallycoffee.observability.vmagent

View File

@ -1,52 +0,0 @@
---
cadvisor_version: 0.49.1
cadvisor_container_name: cadvisor
cadvisor_container_image_name: gcr.io/cadvisor/cadvisor
cadvisor_container_image_tag: ~
cadvisor_container_image_ref: >-
{{ cadvisor_container_image_name }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, True) }}
cadvisor_container_volumes: >-
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], True) }}
cadvisor_container_extra_volumes: ~
cadvisor_container_env: ~
cadvisor_container_labels: "{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}"
cadvisor_container_extra_labels: {}
cadvisor_container_ports: ~
cadvisor_container_networks: ~
cadvisor_container_etc_hosts: ~
cadvisor_container_devices: [ "/dev/kmsg:/dev/kmsg:rwm" ]
cadvisor_container_privileged: yes
cadvisor_container_pid_mode: "host"
cadvisor_container_userns_mode: "host"
cadvisor_container_capabilities: ~
cadvisor_container_restart_policy: unless-stopped
cadvisor_container_command: >-2
{{ ["--docker_only=false"]
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
if cadvisor_disabled_metrics | default(false, True) else [])
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
if cadvisor_force_enable_metrics | default(false, True) else [])
}}
cadvisor_container_base_labels:
version: "{{ cadvisor_version }}"
cadvisor_container_base_volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
cadvisor_disabled_metrics:
- advtcp
- cpu_topology
- cpuset
- hugetlb
- memory_numa
- process
- referenced_memory
- resctrl
- sched
- tcp
- udp
cadvisor_force_enable_metrics: []

View File

@ -0,0 +1,56 @@
---
cadvisor_container_image_registry: gcr.io
cadvisor_container_image_namespace: cadvisor
cadvisor_container_image_name: cadvisor
cadvisor_container_image: >-2
{{
[
cadvisor_container_image_registry,
cadvisor_container_image_namespace,
cadvisor_container_image_name,
] | flatten | join('/')
}}
cadvisor_container_image_tag: ~
cadvisor_container_image_ref: >-2
{{ cadvisor_container_image }}:{{ cadvisor_container_image_tag | default('v' + cadvisor_version, true) }}
cadvisor_container_image_source: pull
cadvisor_container_image_force_source: >-2
{{ cadvisor_container_image_tag | default(false, true) | bool }}
cadvisor_container_state: >-2
{{ (cadvisor_state == 'present') | ternary('started', 'absent') }}
cadvisor_container_name: cadvisor
cadvisor_container_volumes: >-2
{{ cadvisor_container_base_volumes + cadvisor_container_extra_volumes | default([], true) }}
cadvisor_container_extra_volumes: ~
cadvisor_container_env: ~
cadvisor_container_labels: >-2
{{ cadvisor_container_base_labels | combine(cadvisor_container_extra_labels) }}
cadvisor_container_extra_labels: {}
cadvisor_container_ports: ~
cadvisor_container_networks: ~
cadvisor_container_etc_hosts: ~
cadvisor_container_devices:
- "/dev/kmsg:/dev/kmsg:rwm"
cadvisor_container_privileged: true
cadvisor_container_pid_mode: "host"
cadvisor_container_userns_mode: "host"
cadvisor_container_capabilities: ~
cadvisor_container_restart_policy: "unless-stopped"
cadvisor_container_command: >-2
{{ ["--docker_only=false"]
+ (["--disable_metrics=" + cadvisor_disabled_metrics | join( ',' )]
if cadvisor_disabled_metrics | default(false, true) else [])
+ (["--enable_metrics=" + cadvisor_force_enable_metrics | join( ',' )]
if cadvisor_force_enable_metrics | default(false, true) else [])
}}
cadvisor_container_base_labels:
version: "{{ cadvisor_version }}"
cadvisor_container_base_volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"

View File

@ -0,0 +1,18 @@
---
cadvisor_version: "0.50.0"
cadvisor_state: present
cadvisor_deployment_method: docker
cadvisor_disabled_metrics:
- advtcp
- cpu_topology
- cpuset
- hugetlb
- memory_numa
- process
- referenced_memory
- resctrl
- sched
- tcp
- udp
cadvisor_force_enable_metrics: []

View File

@ -0,0 +1,11 @@
---
allow_duplicates: true
dependencies: []
galaxy_info:
role_name: cadvisor
description: Deploy cadvisor (Container Advisor), a container performance and resource usage aggregation daemon
galaxy_tags:
- cadvisor
- observability
- container
- docker

View File

@ -0,0 +1,25 @@
---
- name: Ensure cadvisor container image '{{ cadvisor_container_image_ref }}' is {{ cadvisor_state }}
community.docker.docker_image:
name: "{{ cadvisor_container_image_ref }}"
state: "{{ cadvisor_state }}"
source: "{{ cadvisor_container_image_source }}"
force_source: "{{ cadvisor_container_image_force_source }}"
- name: Ensure cadvisor container '{{ cadvisor_container_name }}' is {{ cadvisor_container_state }}
community.docker.docker_container:
name: "{{ cadvisor_container_name }}"
image: "{{ cadvisor_container_image_ref }}"
env: "{{ cadvisor_container_env | default(omit, true) }}"
ports: "{{ cadvisor_container_ports | default(omit, true) }}"
labels: "{{ cadvisor_container_labels }}"
devices: "{{ cadvisor_container_devices }}"
volumes: "{{ cadvisor_container_volumes }}"
networks: "{{ cadvisor_container_networks | default(omit, true) }}"
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, true) }}"
privileged: "{{ cadvisor_container_privileged }}"
command: "{{ cadvisor_container_command }}"
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, true) }}"
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, true) }}"
restart_policy: "{{ cadvisor_container_restart_policy }}"
state: "{{ cadvisor_container_state }}"

View File

@ -1,26 +1,18 @@
--- ---
- name: Ensure state is valid
ansible.builtin.fail:
msg: >-2
Unknown state '{{ cadvisor_state }}'! Supported
states are: {{ cadvisor_states | join(', ') }}.
when: cadvisor_state not in cadvisor_states
- name: Ensure container image is present - name: Ensure deployment method is valid
docker_image: ansible.builtin.fail:
name: "{{ cadvisor_container_image_ref }}" msg: >-2
state: present Unknown deployment method '{{ cadvisor_deployment_method }}'! Supported
source: pull deployment methods are: {{ cadvisor_deployment_methods | join(', ') }}.
force_source: "{{ cadvisor_container_image_tag|default(False, True) | bool }}" when: cadvisor_deployment_method not in cadvisor_deployment_methods
- name: Ensure cadvisor container is running - name: Deploy using {{ cadvisor_deployment_method }}
docker_container: ansible.builtin.include_tasks:
name: "{{ cadvisor_container_name }}" file: "deploy-{{ cadvisor_deployment_method }}.yml"
image: "{{ cadvisor_container_image_ref }}"
env: "{{ cadvisor_container_env | default(omit, True) }}"
ports: "{{ cadvisor_container_ports | default(omit, True) }}"
labels: "{{ cadvisor_container_labels }}"
devices: "{{ cadvisor_container_devices }}"
volumes: "{{ cadvisor_container_volumes }}"
networks: "{{ cadvisor_container_networks | default(omit, True) }}"
etc_hosts: "{{ cadvisor_container_etc_hosts | default(omit, True) }}"
privileged: "{{ cadvisor_container_privileged }}"
command: "{{ cadvisor_container_command }}"
pid_mode: "{{ cadvisor_container_pid_mode | default(omit, True) }}"
userns_mode: "{{ cadvisor_container_userns_mode | default(omit, True) }}"
restart_policy: "{{ cadvisor_container_restart_policy }}"
state: started

View File

@ -0,0 +1,6 @@
---
cadvisor_states:
- present
- absent
cadvisor_deployment_methods:
- docker

27
roles/vmagent/README.md Normal file
View File

@ -0,0 +1,27 @@
# `finallycoffee.observability.vmagent` ansible role
Install and configure the
[victoriametrics agent `vmagent`](https://docs.victoriametrics.com/vmagent/)
using the [supported deployment types (see `vars/main.yml#L5`)](vars/main.yml#L5).
## Configuration
Set scrape job configuration as complex data in `vmagent_config_scrape_configs`.
To tune the scrape interval, override `vmagent_config_global_scrape_interval`,
or modify / extend `vmagent_config` directly.
### Prometheus remote write api with basic auth
One of the more common methods of sending the collected metrics to a
central prometheus server. Set the following variables to archieve this:
```yaml
vmagent_flags:
remoteWrite_url: https://my.prometheus.instance.example.com/api/v1/write
remoteWrite_basicAuth_username: my_prom_user
remoteWrite_basicAuth_passwordFile: /path/to/password/file.key
```
For the full set of options, see either the
[vmagents' "Advanced usage" documentation](https://docs.victoriametrics.com/vmagent/#advanced-usage)
or run `vmagent -help` for the same output.

View File

@ -0,0 +1,17 @@
---
vmagent_config_global_scrape_interval: "30s"
vmagent_config_global_scrape_timeout: "10s"
vmagent_config_global_external_labels: {}
vmagent_config_scrape_configs: []
vmagent_config: ~
vmagent_base_config:
global:
scrape_interval: "{{ vmagent_config_global_scrape_interval }}"
scrape_timeout: "{{ vmagent_config_global_scrape_timeout }}"
external_labels: "{{ vmagent_config_global_external_labels }}"
scrape_configs: "{{ vmagent_config_scrape_configs }}"
vmagent_merged_config: >-2
{{ (vmagent_base_config | default({}, true))
| combine(vmagent_config | default({}, true), recursive=True) }}

View File

@ -0,0 +1,59 @@
---
vmagent_container_image_registry: "docker.io"
vmagent_container_image_namespace: "victoriametrics"
vmagent_container_image_name: "vmagent"
vmagent_container_image_tag: ~
vmagent_container_image: >-2
{{
([
vmagent_container_image_registry,
vmagent_container_image_namespace,
vmagent_container_image_name,
] | join('/'))
+ ':'
+ (vmagent_container_image_tag
| default('v' + vmagent_version, true))
}}
vmagent_container_image_source: pull
vmagent_container_image_force_source: >-2
{{ vmagent_container_image_tag | default(false, true) | bool }}
vmagent_container_image_network_retries: 3
vmagent_container_image_network_delay: 5
vmagent_container_name: vmagent
vmagent_container_user: ~
vmagent_container_ports: ~
vmagent_container_labels: ~
vmagent_container_command: >-2
{% for flag in vmagent_all_flags -%}
-{{ flag }}
{% endfor -%}
vmagent_container_networks: ~
vmagent_container_network_mode: ~
vmagent_container_etc_hosts: ~
vmagent_container_dns_servers: ~
vmagent_container_restart_policy: >-2
{{ (vmagent_deployment_type == 'docker')
| ternary('unless-stopped', 'on-failure') }}
vmagent_container_state: >-2
{{ (vmagent_state == 'present') | ternary('started', 'absent') }}
vmagent_container_base_volumes:
- "{{ vmagent_scrape_config_file }}:{{ vmagent_scrape_config_file }}:ro"
- "{{ vmagent_cache_path }}:{{ vmagent_cache_path }}:z"
vmagent_container_volumes: ~
vmagent_container_all_volumes: >-2
{{ (vmagent_container_base_volumes | default([], true))
+ (vmagent_container_volumes | default([], true)) }}
vmagent_container_base_env:
remoteWrite_tmpDataPath: "{{ vmagent_cache_path }}"
promscrape_config: "{{ vmagent_scrape_config_file }}"
vmagent_container_env: ~
vmagent_container_merged_env: >-2
{{ (vmagent_container_base_env | default({}, true))
| combine(vmagent_container_env | default({})) }}
vmagent_container_comparisons:
env: allow_more_present
image: strict
labels: allow_more_present

View File

@ -0,0 +1,16 @@
---
vmagent_user: vmagent
vmagent_version: "1.104.0"
vmagent_state: present
vmagent_deployment_method: "docker"
vmagent_scrape_config_file: "/etc/vmagent/scrape_config.yml"
vmagent_config_path: "{{ vmagent_scrape_config_file | dirname }}"
vmagent_cache_path: "/var/cache/vmagent"
vmagent_base_flags:
- "enableTCP6"
- "envflag.enable"
vmagent_flags: ~
vmagent_all_flags: >-2
{{ vmagent_base_flags + (vmagent_flags | default([], true)) }}

View File

@ -0,0 +1,7 @@
---
vmagent_user_groups: ~
vmagent_run_user_id: >-2
{{ vmagent_user_info.uid | default(vmagent_user) }}
vmagent_run_group_id: >-2
{{ vmagent_user_info.group | default(vmagent_user) }}

View File

@ -0,0 +1,9 @@
---
- name: Ensure vmagent container '{{ vmagent_container_name }}' is restarted
community.docker.docker_container:
name: "{{ vmagent_container_name }}"
state: "{{ vmagent_container_state }}"
restart: true
listen: "vmagent-reload"
ignore_errors: "{{ ansible_check_mode }}"
when: vmagent_deployment_method == 'docker'

View File

@ -0,0 +1,10 @@
---
allow_duplicates: true
dependencies: []
galaxy_info:
role_name: vmagent
description: Deploy and configure the victoriametrics agent `vmagent`
galaxy_tags:
- victoriametrics
- vmagent
- prometheus

View File

@ -0,0 +1,29 @@
---
- name: Ensure container image '{{ vmagent_container_image }}' is {{ vmagent_state }}
community.docker.docker_image:
name: "{{ vmagent_container_image }}"
state: "{{ vmagent_state }}"
source: "{{ vmagent_container_image_source }}"
force_source: "{{ vmagent_container_image_force_source }}"
register: vmagent_container_image_info
until: vmagent_container_image_info is success
retries: "{{ vmagent_container_image_network_retries }}"
delay: "{{ vmagent_container_image_network_delay }}"
- name: Ensure container '{{ vmagent_container_name }}' is {{ vmagent_container_state }}
community.docker.docker_container:
name: "{{ vmagent_container_name }}"
image: "{{ vmagent_container_image }}"
env: "{{ vmagent_container_merged_env }}"
user: "{{ vmagent_container_user }}"
ports: "{{ vmagent_container_ports | default(omit, true) }}"
labels: "{{ vmagent_container_labels | default(omit, true) }}"
command: "{{ vmagent_container_command }}"
volumes: "{{ vmagent_container_all_volumes }}"
networks: "{{ vmagent_container_networks | default(omit, true) }}"
etc_hosts: "{{ vmagent_container_etc_hosts | default(omit, true) }}"
dns_servers: "{{ vmagent_container_dns_servers | default(omit, true) }}"
network_mode: "{{ vmagent_container_network_mode | default(omit, true) }}"
restart_policy: "{{ vmagent_container_restart_policy | default(omit, true) }}"
comparisons: "{{ vmagent_container_comparisons | default(omit, true) }}"
state: "{{ vmagent_container_state }}"

View File

@ -0,0 +1,54 @@
---
- name: Check that `vmagent_state` is valid
ansible.builtin.fail:
msg: >-2
Unsupported state '{{ vmagent_state }}'! Supported states
are {{ vmagent_states | join(', ') }}.
when: vmagent_state not in vmagent_states
- name: Check that `vmagent_deployment_method` is valid
ansible.builtin.fail:
msg: >-2
Unsupported deployment method '{{ vmagent_deployment_method }}'!
Supported are: {{ vmagent_deployment_methods | join(', ') }}.
when: vmagent_deployment_method not in vmagent_deployment_methods
- name: Ensure vmagent user '{{ vmagent_user }}' is {{ vmagent_state }}
ansible.builtin.user:
name: "{{ vmagent_user }}"
state: "{{ vmagent_state }}"
system: "{{ vmagent_user_system | default(true, true) }}"
groups: "{{ vmagent_user_groups | default(omit, true) }}"
append: "{{ (vmagent_user_groups | default([], true)) | length > 0 }}"
create_home: "{{ vmagent_user_create_home | default(false, true) }}"
register: vmagent_user_info
- name: Ensure configuration file '{{ vmagent_scrape_config_file }}' is {{ vmagent_state }}
ansible.builtin.file:
path: "{{ vmagent_scrape_config_file }}"
state: "{{ vmagent_state }}"
when: vmagent_state == 'absent'
- name: Ensure config directory '{{ vmagent_config_path }}' is {{ vmagent_state }}
ansible.builtin.file:
path: "{{ vmagent_config_path }}"
state: >-2
{{ (vmagent_state == 'present') | ternary('directory', 'absent') }}
owner: "{{ vmagent_run_user_id }}"
group: "{{ vmagent_run_group_id }}"
mode: "0755"
- name: Ensure configuration file '{{ vmagent_scrape_config_file }}' is {{ vmagent_state }}
ansible.builtin.copy:
dest: "{{ vmagent_scrape_config_file }}"
content: "{{ vmagent_merged_config | to_nice_yaml(indent=4, width=1000) }}"
owner: "{{ vmagent_run_user_id }}"
group: "{{ vmagent_run_group_id }}"
mode: "0644"
when: vmagent_state == 'present'
notify:
- vmagent-reload
- name: Ensure vmagent is deployed using {{ vmagent_deployment_method }}
ansible.builtin.include_tasks:
file: "deploy-{{ vmagent_deployment_method }}.yml"

View File

@ -0,0 +1,6 @@
---
vmagent_states:
- present
- absent
vmagent_deployment_methods:
- docker