Use docker inspect for Synapse systemd health check and lower health interval
Switch the systemd ExecStartPost health check from docker exec + curl to polling docker inspect for container health status. This piggybacks on the container image's built-in HEALTHCHECK instead of duplicating it. Also add a configurable container health interval (5s for Traefik setups, 15s otherwise) to speed up startup readiness detection without affecting non-Traefik deployments. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -322,6 +322,21 @@ matrix_synapse_container_labels_public_metrics_middleware_basic_auth_users: ''
|
||||
# another.label="here"
|
||||
matrix_synapse_container_labels_additional_labels: ''
|
||||
|
||||
# Specifies how often the container health check will run.
|
||||
#
|
||||
# The Synapse container image ships with a default HEALTHCHECK (curl to /health)
|
||||
# with an interval of 15s, timeout of 5s, and start period of 5s.
|
||||
#
|
||||
# For Traefik-based setups, it's important that the interval is short,
|
||||
# because the interval value also specifies the "initial wait time".
|
||||
# This is a Docker (moby) bug: https://github.com/moby/moby/issues/33410
|
||||
# Without a successful healthcheck, Traefik will not register the service for reverse-proxying.
|
||||
# A shorter interval also lets our systemd ExecStartPost health check
|
||||
# (see matrix_synapse_systemd_healthcheck_enabled) detect readiness faster at startup.
|
||||
#
|
||||
# For non-Traefik setups, we use the default healthcheck interval (15s) to decrease overhead.
|
||||
matrix_synapse_container_health_interval: "{{ '5s' if matrix_synapse_container_labels_traefik_enabled else '15s' }}"
|
||||
|
||||
# A list of extra arguments to pass to the container
|
||||
# Also see `matrix_synapse_container_arguments`
|
||||
matrix_synapse_container_extra_arguments: []
|
||||
@@ -359,25 +374,24 @@ matrix_synapse_goofys_systemd_required_services_list_auto: []
|
||||
matrix_synapse_goofys_systemd_required_services_list_custom: []
|
||||
|
||||
# Controls the post-start health check in the systemd service.
|
||||
# When enabled, ExecStartPost polls Synapse's /health endpoint via `docker exec` + `curl`,
|
||||
# When enabled, ExecStartPost polls Docker's container health status via `docker inspect`,
|
||||
# keeping the service in "activating (start-post)" state until Synapse is ready.
|
||||
# Services with After=matrix-synapse.service will properly wait.
|
||||
# This relies on the container image's built-in HEALTHCHECK (curl to /health),
|
||||
# with the interval controlled by matrix_synapse_container_health_interval.
|
||||
matrix_synapse_systemd_healthcheck_enabled: true
|
||||
matrix_synapse_systemd_healthcheck_max_retries: 60
|
||||
matrix_synapse_systemd_healthcheck_interval_seconds: 1
|
||||
|
||||
# The command used for the health check in ExecStartPost.
|
||||
# Uses `docker exec` + `curl` (available in the Synapse container image) to poll /health.
|
||||
# We intentionally don't rely on Docker's built-in container HEALTHCHECK (polling via `docker inspect`),
|
||||
# because its check interval (default: 15s) is too slow for our startup needs, and lowering it
|
||||
# would add unnecessary overhead for the entire container lifetime. We only need fast polling at startup.
|
||||
# Polls `docker inspect` for the container's health status until it reports "healthy".
|
||||
matrix_synapse_systemd_healthcheck_command: >-
|
||||
{{ devture_systemd_docker_base_host_command_sh }} -c
|
||||
'for i in $(seq 1 {{ matrix_synapse_systemd_healthcheck_max_retries }}); do
|
||||
echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: checking readiness via /health..";
|
||||
{{ devture_systemd_docker_base_host_command_docker }} exec matrix-synapse
|
||||
curl -fSs http://localhost:8008/health > /dev/null 2>&1 && echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: passed" && exit 0;
|
||||
echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: not ready yet, retrying in {{ matrix_synapse_systemd_healthcheck_interval_seconds }}s..";
|
||||
echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: checking container health status..";
|
||||
status=$( {{ devture_systemd_docker_base_host_command_docker }} inspect --format={{ '"{{' }}.State.Health.Status{{ '}}"' }} matrix-synapse 2>/dev/null);
|
||||
if [ "$status" = "healthy" ]; then echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: passed" && exit 0; fi;
|
||||
echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: not ready yet (status: $status), retrying in {{ matrix_synapse_systemd_healthcheck_interval_seconds }}s..";
|
||||
sleep {{ matrix_synapse_systemd_healthcheck_interval_seconds }};
|
||||
done; echo "[Attempt $i/{{ matrix_synapse_systemd_healthcheck_max_retries }}] Synapse systemd health check: failed after {{ matrix_synapse_systemd_healthcheck_max_retries }} attempts"; exit 1'
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@ ExecStartPre={{ devture_systemd_docker_base_host_command_docker }} create \
|
||||
--read-only \
|
||||
--tmpfs=/tmp:rw,noexec,nosuid,size={{ matrix_synapse_tmp_directory_size_mb }}m \
|
||||
--network={{ matrix_synapse_container_network }} \
|
||||
--health-interval={{ matrix_synapse_container_health_interval }} \
|
||||
{% if matrix_synapse_container_client_api_host_bind_port %}
|
||||
-p {{ matrix_synapse_container_client_api_host_bind_port }}:{{ matrix_synapse_container_client_api_port }} \
|
||||
{% endif %}
|
||||
|
||||
Reference in New Issue
Block a user