Increase Synapse caches and enable cache-autotuning by default (#3017)

* Modify Synapse Cache Factor to use Auto Tune Synapse has the ability to as it calls in its config auto tune caches. This ability lets us set very high cache factors and then instead limit our resource use. Defaults for this commit are 1/10th of what Element apparently runs for EMS stuff and matrix.org on Cache Factor and upstream documentation defaults for auto tune. * Add vars to Synapse main.yml to control cache related config This commit adds various cache related vars to main.yml for Synapse. Some are auto tune and some are just adding explicit ways to control upstream vars. * Updated Auto Tune figures Autotuned figures have been bumped in consultation with other community members as to a reasonable level. Please note these defaults are more on the one of each workers side than they are on the monolith Side. * Fix YML Error The playbook is not happy with the previous state of this patch so this commit hopefully fixes it * Add to_json to various Synapse tuning related configs * Fix incorrect indication in homeserver.yaml.j2 * Minor cleanups * Synapse Cache Autotuning Documentation * Upgrade Synapse Cache Autotune to auto configure memory use * Update Synapse Tuning docs to reflect automatic memory use configuration * Fix Linting errors in synapses main.yml * Rename variables for consistency (matrix_synapse_caches_autotuning_* -> matrix_synapse_cache_autotuning_*) * Remove FIX ME comment about Synapse's `cache_autotuning` `docs/maintenance-synapse.md` and `roles/custom/matrix-synapse/defaults/main.yml` already contains documentation about these variables and the default values we set. * Improve "Tuning caches and cache autotuning" documentation for Synapse * Announce larger Synapse caches and cache auto-tuning --------- Co-authored-by: Slavi Pantaleev <slavi@devture.com>
2024-02-14 11:02:06 +01:00
parent f999947dfe
commit 9eab0292d4
5 changed files with 103 additions and 37 deletions
--- a/roles/custom/matrix-synapse/defaults/main.yml
+++ b/roles/custom/matrix-synapse/defaults/main.yml
@@ -547,8 +547,23 @@ matrix_synapse_event_cache_size: "100K"

 # Controls cache sizes for Synapse.
 # Raise this to increase cache sizes or lower it to potentially lower memory use.
-# To learn more, see: https://github.com/matrix-org/synapse/issues/3939
-matrix_synapse_caches_global_factor: 0.5
+# To learn more, see:
+# - https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching
+# - https://github.com/matrix-org/synapse/issues/3939
+# Defaults for timings of caches is from https://tcpipuk.github.io/synapse/deployment/synapse.html
+# The idea with the timings used is that you get to evict soon but also you keep stuff around for a long time when its not forced out.
+# Long cache lifetimes together with the low minimum TTL allows autotune to be the primary eviction method assuming size of cache is hit before we hit other caps.
+matrix_synapse_caches_global_factor: 10
+matrix_synapse_caches_expire_caches: true
+matrix_synapse_caches_cache_entry_ttl: "1080m"
+matrix_synapse_caches_sync_response_cache_duration: "2m"
+matrix_synapse_cache_autotuning_min_cache_ttl: "30s"
+# The Cache tune math used here is a derivative of the same math used to autotune sizes for postgres.
+# The memtotal variable can in theory be overiden to make Synapse think it has less ram to work with.
+# But if your at the point of considering that just override the math or put static values in.
+matrix_synapse_memtotal_kb: "{{ ansible_memtotal_mb*1024|int }}"
+matrix_synapse_cache_autotuning_max_cache_memory_usage: "{{ 2097152 if (matrix_synapse_memtotal_kb|int/8)/1024 >= 2048 else matrix_synapse_memtotal_kb|int/8 }}"
+matrix_synapse_cache_autotuning_target_cache_memory_usage: "{{ 1048576 if (matrix_synapse_memtotal_kb|int/16)/1024 >= 1024 else matrix_synapse_memtotal_kb|int/16 }}"

 # Controls whether Synapse will federate at all.
 # Disable this to completely isolate your server from the rest of the Matrix network.
--- a/roles/custom/matrix-synapse/tasks/validate_config.yml
+++ b/roles/custom/matrix-synapse/tasks/validate_config.yml
@@ -89,6 +89,9 @@
    - {'old': 'matrix_synapse_send_federation', 'new': '<unnecessary - Synapse relies on federation_sender_instances now>'}
    - {'old': 'matrix_synapse_start_pushers', 'new': '<unnecessary - Synapse relies on pusher_instances now>'}
    - {'old': 'matrix_synapse_spam_checker', 'new': '<superseded by matrix_synapse_modules>'}
+    - {'old': 'matrix_synapse_caches_autotuning_max_cache_memory_usage', 'new': 'matrix_synapse_cache_autotuning_max_cache_memory_usage'}
+    - {'old': 'matrix_synapse_caches_autotuning_target_cache_memory_usage', 'new': 'matrix_synapse_cache_autotuning_target_cache_memory_usage'}
+    - {'old': 'matrix_synapse_caches_autotuning_min_cache_ttl', 'new': 'matrix_synapse_cache_autotuning_min_cache_ttl'}

 - name: (Deprecation) Catch and report renamed settings in matrix_synapse_configuration_extension_yaml
  ansible.builtin.fail:
--- a/roles/custom/matrix-synapse/templates/synapse/homeserver.yaml.j2
+++ b/roles/custom/matrix-synapse/templates/synapse/homeserver.yaml.j2
@@ -760,49 +760,48 @@ federation_domain_whitelist: {{ matrix_synapse_federation_domain_whitelist|to_js
 # The number of events to cache in memory. Not affected by
 # caches.global_factor.
 #
-event_cache_size: "{{ matrix_synapse_event_cache_size }}"
+event_cache_size: {{ matrix_synapse_event_cache_size | to_json }}

 caches:
-   # Controls the global cache factor, which is the default cache factor
-   # for all caches if a specific factor for that cache is not otherwise
-   # set.
-   #
-   # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
-   # variable. Setting by environment variable takes priority over
-   # setting through the config file.
-   #
-   # Defaults to 0.5, which will half the size of all caches.
-   #
-   global_factor: {{ matrix_synapse_caches_global_factor }}
-
-   # A dictionary of cache name to cache factor for that individual
-   # cache. Overrides the global cache factor for a given cache.
-   #
-   # These can also be set through environment variables comprised
-   # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
-   # letters and underscores. Setting by environment variable
-   # takes priority over setting through the config file.
-   # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
-   #
-   # Some caches have '*' and other characters that are not
-   # alphanumeric or underscores. These caches can be named with or
-   # without the special characters stripped. For example, to specify
-   # the cache factor for `*stateGroupCache*` via an environment
-   # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
-   #
-   per_cache_factors:
-     #get_users_who_share_room_with_user: 2.0
+  # Controls the global cache factor, which is the default cache factor
+  # for all caches if a specific factor for that cache is not otherwise
+  # set.
+  #
+  # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment
+  # variable. Setting by environment variable takes priority over
+  # setting through the config file.
+  #
+  # Defaults to 0.5, which will half the size of all caches.
+  #
+  global_factor: {{ matrix_synapse_caches_global_factor | to_json }}
+  # A dictionary of cache name to cache factor for that individual
+  # cache. Overrides the global cache factor for a given cache.
+  #
+  # These can also be set through environment variables comprised
+  # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital
+  # letters and underscores. Setting by environment variable
+  # takes priority over setting through the config file.
+  # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0
+  #
+  # Some caches have '*' and other characters that are not
+  # alphanumeric or underscores. These caches can be named with or
+  # without the special characters stripped. For example, to specify
+  # the cache factor for `*stateGroupCache*` via an environment
+  # variable would be `SYNAPSE_CACHE_FACTOR_STATEGROUPCACHE=2.0`.
+  #
+  per_cache_factors:
+    #get_users_who_share_room_with_user: 2.0

  # Controls whether cache entries are evicted after a specified time
  # period. Defaults to true. Uncomment to disable this feature.
  #
-  #expire_caches: false
+  expire_caches: {{ matrix_synapse_caches_expire_caches | to_json }}

  # If expire_caches is enabled, this flag controls how long an entry can
  # be in a cache without having been accessed before being evicted.
  # Defaults to 30m. Uncomment to set a different time to live for cache entries.
  #
-  #cache_entry_ttl: 30m
+  cache_entry_ttl: {{ matrix_synapse_caches_cache_entry_ttl | to_json }}

  # Controls how long the results of a /sync request are cached for after
  # a successful response is returned. A higher duration can help clients with
@@ -811,7 +810,12 @@ caches:
  # By default, this is zero, which means that sync responses are not cached
  # at all.
  #
-  #sync_response_cache_duration: 2m
+  sync_response_cache_duration: {{ matrix_synapse_caches_sync_response_cache_duration | to_json }}
+
+  cache_autotuning:
+    max_cache_memory_usage: {{ matrix_synapse_cache_autotuning_max_cache_memory_usage | to_json }}
+    target_cache_memory_usage: {{ matrix_synapse_cache_autotuning_target_cache_memory_usage | to_json }}
+    min_cache_ttl: {{ matrix_synapse_cache_autotuning_min_cache_ttl | to_json }}


 ## Database ##