Fix yaml[comments-indentation] in workers config and remove automation

This commit is contained in:
Slavi Pantaleev 2022-11-05 07:29:47 +02:00
parent da4a82c48b
commit a9e2607d80
3 changed files with 4 additions and 509 deletions

View File

@ -1,145 +0,0 @@
#!/usr/bin/awk
# Hackish approach to get a machine-readable list of current matrix
# synapse REST API endpoints from the official documentation at
# https://github.com/matrix-org/synapse/raw/master/docs/workers.md
#
# invoke in shell with:
# URL=https://github.com/matrix-org/synapse/raw/master/docs/workers.md
# curl -L ${URL} | awk -f workers-doc-to-yaml.awk -
function worker_stanza_append(string) {
worker_stanza = worker_stanza string
}
function line_is_endpoint_url(line) {
# probably API endpoint if it starts with white-space and ^ or /
return (line ~ /^ +[\^\/].*\//)
}
# Put YAML marker at beginning of file.
BEGIN {
endpoint_conditional_comment = " # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually\n"
}
# Enable further processing after the introductory text.
# Read each synapse worker section as record and its lines as fields.
/Available worker applications/ {
enable_parsing = 1
# set record separator to markdown section header
RS = "\n### "
# set field separator to newline
FS = "\n"
}
# Once parsing is active, this will process each section as record.
enable_parsing {
# Each worker section starts with a synapse.app.X headline
if ($1 ~ /synapse\.app\./) {
# get rid of the backticks and extract worker type from headline
gsub("`", "", $1)
gsub("synapse.app.", "", $1)
worker_type = $1
# initialize empty worker stanza
worker_stanza = ""
# track if any endpoints are mentioned in a specific section
worker_has_urls = 0
# some endpoint descriptions contain flag terms
endpoints_seem_conditional = 0
# also, collect a list of available workers
workers = (workers ? workers "\n" : "") " - " worker_type
# loop through the lines (2 - number of fields in record)
for (i = 2; i < NF + 1; i++) {
# copy line for gsub replacements
line = $i
# end all lines but the last with a linefeed
linefeed = (i < NF - 1) ? "\n" : ""
# line starts with white-space and a hash: endpoint block headline
if (line ~ /^ +#/) {
# copy to output verbatim, normalizing white-space
gsub(/^ +/, "", line)
worker_stanza_append(" " line linefeed)
} else if (line_is_endpoint_url(line)) {
# mark section for special output formatting
worker_has_urls = 1
# remove leading white-space
gsub(/^ +/, "", line)
api_endpoint_regex = line
# FIXME: https://github.com/matrix-org/synapse/issues/new
# munge inconsistent media_repository endpoint notation
if (api_endpoint_regex == "/_matrix/media/") {
api_endpoint_regex = "^" line
}
# FIXME: https://github.com/matrix-org/synapse/issues/7530
# https://github.com/spantaleev/matrix-docker-ansible-deploy/pull/456#issuecomment-719015911
if (api_endpoint_regex == "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$") {
worker_stanza_append(" # FIXME: possible bug with SSO and multiple generic workers\n")
worker_stanza_append(" # see https://github.com/matrix-org/synapse/issues/7530\n")
worker_stanza_append(" # " api_endpoint_regex linefeed)
continue
}
# disable endpoints which specify complications
if (endpoints_seem_conditional) {
# only add notice if previous line didn't match
if (!line_is_endpoint_url($(i - 1))) {
worker_stanza_append(endpoint_conditional_comment)
}
worker_stanza_append(" # " api_endpoint_regex linefeed)
} else {
# output endpoint regex
worker_stanza_append(" - " api_endpoint_regex linefeed)
}
# white-space only line?
} else if (line ~ /^ *$/) {
if (i > 3 && i < NF) {
# print white-space lines unless 1st or last line in section
worker_stanza_append(line linefeed)
}
# nothing of the above: the line is regular documentation text
} else {
# include this text line as comment
worker_stanza_append(" # " line linefeed)
# and take note of words hinting at additional conditions to be met
if (line ~ /(^[Ii]f|care must be taken|can be handled for)/) {
endpoints_seem_conditional = 1
}
}
}
if (worker_has_urls) {
print "\nmatrix_synapse_workers_" worker_type "_endpoints:"
print worker_stanza
} else {
# include workers without endpoints as well for reference
print "\n# " worker_type " worker (no API endpoints) ["
print worker_stanza
print "# ]"
}
}
}
END {
print "\nmatrix_synapse_workers_avail_list:"
print workers | "sort"
}
# vim: tabstop=4 shiftwidth=4 expandtab autoindent

View File

@ -1,7 +0,0 @@
#!/bin/sh
# Fetch the synapse worker documentation and extract endpoint URLs
# matrix-org/synapse master branch points to current stable release
# and put it between `workers:start` and `workers:end` tokens in ../vars/main.yml
snippet="$(curl -L https://github.com/matrix-org/synapse/raw/master/docs/workers.md | awk -f workers-doc-to-yaml.awk)"
awk -v snippet="$snippet" -i inplace '/workers:start/{f=1;print;print snippet}/workers:end/{f=0}!f' ../vars/main.yml

View File

@ -111,16 +111,12 @@ matrix_synapse_known_worker_types: |
matrix_synapse_known_instance_map_eligible_worker_types:
- stream_writer
# the following section contains semi-automatic generated content
# The following section contains content that had previously been generated by a script (`workers-doc-to-yaml.awk`) processing https://github.com/matrix-org/synapse/raw/master/docs/workers.md,
# but is now maintained manually due to:
# - the script being tripped up by the content and generating somewhat inaccurate definitions, which had to be fixed up manually.
# - the script being complicated and unmaintainable
### workers:start
matrix_synapse_workers_generic_worker_endpoints:
# This worker can handle API requests matching the following regular expressions.
# These endpoints can be routed to any worker. If a worker is set up to handle a
# stream then, for maximum efficiency, additional endpoints should be routed to that
# worker: refer to the [stream writers](#stream-writers) section below for further
# information.
# Sync requests
- ^/_matrix/client/(r0|v3)/sync$
- ^/_matrix/client/(api/v1|r0|v3)/events$
@ -191,317 +187,6 @@ matrix_synapse_workers_generic_worker_endpoints:
- ^/_matrix/client/(api/v1|r0|v3|unstable)/join/
- ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/
# These appear to be conditional and should not be enabled by default.
# We need to fix up our workers-doc-to-yaml.awk parsing script to exclude them.
# For now, they've been commented out manually.
# # Account data requests
# - ^/_matrix/client/(r0|v3|unstable)/.*/tags
# - ^/_matrix/client/(r0|v3|unstable)/.*/account_data
#
# # Receipts requests
# - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt
# - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers
#
# # Presence requests
# - ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
# User directory search requests
# Any worker can handle these, but we have a dedicated user_dir worker for this,
# so we'd like for other generic workers to not try and capture these requests.
# - ^/_matrix/client/(r0|v3|unstable)/user_directory/search$
# Additionally, the following REST endpoints can be handled for GET requests:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/pushrules/
# Pagination requests can also be handled, but all requests for a given
# room must be routed to the same instance. Additionally, care must be taken to
# ensure that the purge history admin API is not used while pagination requests
# for the room are in flight:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$
# Additionally, the following endpoints should be included if Synapse is configured
# to use SSO (you only need to include the ones for whichever SSO provider you're
# using):
# for all SSO providers
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/login/sso/redirect
# ^/_synapse/client/pick_idp$
# ^/_synapse/client/pick_username
# ^/_synapse/client/new_user_consent$
# ^/_synapse/client/sso_register$
# OpenID Connect requests.
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_synapse/client/oidc/callback$
# SAML requests.
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_synapse/client/saml2/authn_response$
# CAS requests.
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/login/cas/ticket$
# Ensure that all SSO logins go to a single process.
# For multiple workers not handling the SSO endpoints properly, see
# [#7530](https://github.com/matrix-org/synapse/issues/7530) and
# [#9427](https://github.com/matrix-org/synapse/issues/9427).
# Note that a [HTTP listener](usage/configuration/config_documentation.md#listeners)
# with `client` and `federation` `resources` must be configured in the `worker_listeners`
# option in the worker config.
# #### Load balancing
# It is possible to run multiple instances of this worker app, with incoming requests
# being load-balanced between them by the reverse-proxy. However, different endpoints
# have different characteristics and so admins
# may wish to run multiple groups of workers handling different endpoints so that
# load balancing can be done in different ways.
# For `/sync` and `/initialSync` requests it will be more efficient if all
# requests from a particular user are routed to a single instance. Extracting a
# user ID from the access token or `Authorization` header is currently left as an
# exercise for the reader. Admins may additionally wish to separate out `/sync`
# requests that have a `since` query parameter from those that don't (and
# `/initialSync`), as requests that don't are known as "initial sync" that happens
# when a user logs in on a new device and can be *very* resource intensive, so
# isolating these requests will stop them from interfering with other users ongoing
# syncs.
# Federation and client requests can be balanced via simple round robin.
# The inbound federation transaction request `^/_matrix/federation/v1/send/`
# should be balanced by source IP so that transactions from the same remote server
# go to the same process.
# Registration/login requests can be handled separately purely to help ensure that
# unexpected load doesn't affect new logins and sign ups.
# Finally, event sending requests can be balanced by the room ID in the URI (or
# the full URI, or even just round robin), the room ID is the path component after
# `/rooms/`. If there is a large bridge connected that is sending or may send lots
# of events, then a dedicated set of workers can be provisioned to limit the
# effects of bursts of events from that bridge on events sent by normal users.
# #### Stream writers
# Additionally, the writing of specific streams (such as events) can be moved off
# of the main process to a particular worker.
# To enable this, the worker must have a
# [HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured,
# have a `worker_name` and be listed in the `instance_map` config. The same worker
# can handle multiple streams, but unless otherwise documented, each stream can only
# have a single writer.
# For example, to move event persistence off to a dedicated worker, the shared
# configuration would include:
# ```yaml
# instance_map:
# event_persister1:
# host: localhost
# port: 8034
# stream_writers:
# events: event_persister1
# ```
# An example for a stream writer instance:
# ```yaml
# {{#include systemd-with-workers/workers/event_persister.yaml}}
# ```
# Some of the streams have associated endpoints which, for maximum efficiency, should
# be routed to the workers handling that stream. See below for the currently supported
# streams and the endpoints associated with them:
# ##### The `events` stream
# The `events` stream experimentally supports having multiple writers, where work
# is sharded between them by room ID. Note that you *must* restart all worker
# instances when adding or removing event persisters. An example `stream_writers`
# configuration with multiple writers:
# ```yaml
# stream_writers:
# events:
# - event_persister1
# - event_persister2
# ```
# ##### The `typing` stream
# The following endpoints should be routed directly to the worker configured as
# the stream writer for the `typing` stream:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/typing
# ##### The `to_device` stream
# The following endpoints should be routed directly to the worker configured as
# the stream writer for the `to_device` stream:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(r0|v3|unstable)/sendToDevice/
# ##### The `account_data` stream
# The following endpoints should be routed directly to the worker configured as
# the stream writer for the `account_data` stream:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(r0|v3|unstable)/.*/tags
# ^/_matrix/client/(r0|v3|unstable)/.*/account_data
# ##### The `receipts` stream
# The following endpoints should be routed directly to the worker configured as
# the stream writer for the `receipts` stream:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt
# ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers
# ##### The `presence` stream
# The following endpoints should be routed directly to the worker configured as
# the stream writer for the `presence` stream:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
# #### Background tasks
# There is also support for moving background tasks to a separate
# worker. Background tasks are run periodically or started via replication. Exactly
# which tasks are configured to run depends on your Synapse configuration (e.g. if
# stats is enabled). This worker doesn't handle any REST endpoints itself.
# To enable this, the worker must have a `worker_name` and can be configured to run
# background tasks. For example, to move background tasks to a dedicated worker,
# the shared configuration would include:
# ```yaml
# run_background_tasks_on: background_worker
# ```
# You might also wish to investigate the `update_user_directory_from_worker` and
# `media_instance_running_background_jobs` settings.
# An example for a dedicated background worker instance:
# ```yaml
# {{#include systemd-with-workers/workers/background_worker.yaml}}
# ```
# #### Updating the User Directory
# You can designate one generic worker to update the user directory.
# Specify its name in the shared configuration as follows:
# ```yaml
# update_user_directory_from_worker: worker_name
# ```
# This work cannot be load-balanced; please ensure the main process is restarted
# after setting this option in the shared configuration!
# User directory updates allow REST endpoints matching the following regular
# expressions to work:
# FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
# ^/_matrix/client/(r0|v3|unstable)/user_directory/search$
# The above endpoints can be routed to any worker, though you may choose to route
# it to the chosen user directory worker.
# This style of configuration supersedes the legacy `synapse.app.user_dir`
# worker application type.
# #### Notifying Application Services
# You can designate one generic worker to send output traffic to Application Services.
# Doesn't handle any REST endpoints itself, but you should specify its name in the
# shared configuration as follows:
# ```yaml
# notify_appservices_from_worker: worker_name
# ```
# This work cannot be load-balanced; please ensure the main process is restarted
# after setting this option in the shared configuration!
# This style of configuration supersedes the legacy `synapse.app.appservice`
# worker application type.
# pusher worker (no API endpoints) [
# Handles sending push notifications to sygnal and email. Doesn't handle any
# REST endpoints itself, but you should set `start_pushers: False` in the
# shared configuration file to stop the main synapse sending push notifications.
# To run multiple instances at once the `pusher_instances` option should list all
# pusher instances by their worker name, e.g.:
# ```yaml
# pusher_instances:
# - pusher_worker1
# - pusher_worker2
# ```
# An example for a pusher instance:
# ```yaml
# {{#include systemd-with-workers/workers/pusher_worker.yaml}}
# ```
# ]
# appservice worker (no API endpoints) [
# **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the
# `notify_appservices_from_worker` option instead.](#notifying-application-services)
# Handles sending output traffic to Application Services. Doesn't handle any
# REST endpoints itself, but you should set `notify_appservices: False` in the
# shared configuration file to stop the main synapse sending appservice notifications.
# Note this worker cannot be load-balanced: only one instance should be active.
# ]
# federation_sender worker (no API endpoints) [
# Handles sending federation traffic to other servers. Doesn't handle any
# REST endpoints itself, but you should set `send_federation: False` in the
# shared configuration file to stop the main synapse sending this traffic.
# If running multiple federation senders then you must list each
# instance in the `federation_sender_instances` option by their `worker_name`.
# All instances must be stopped and started when adding or removing instances.
# For example:
# ```yaml
# federation_sender_instances:
# - federation_sender1
# - federation_sender2
# ```
# An example for a federation sender instance:
# ```yaml
# {{#include systemd-with-workers/workers/federation_sender.yaml}}
# ```
# ]
matrix_synapse_workers_media_repository_endpoints:
# Handles the media repository. It can handle all endpoints starting with:
@ -517,50 +202,12 @@ matrix_synapse_workers_media_repository_endpoints:
- ^/_synapse/admin/v1/quarantine_media/.*$
- ^/_synapse/admin/v1/users/.*/media$
# You should also set `enable_media_repo: False` in the shared configuration
# file to stop the main synapse running background jobs related to managing the
# media repository. Note that doing so will prevent the main process from being
# able to handle the above endpoints.
# In the `media_repository` worker configuration file, configure the
# [HTTP listener](usage/configuration/config_documentation.md#listeners) to
# expose the `media` resource. For example:
# ```yaml
# {{#include systemd-with-workers/workers/media_worker.yaml}}
# ```
# Note that if running multiple media repositories they must be on the same server
# and you must configure a single instance to run the background tasks, e.g.:
# ```yaml
# media_instance_running_background_jobs: "media-repository-1"
# ```
# Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for both inbound client and federation requests (if they are handled separately).
matrix_synapse_workers_user_dir_endpoints:
# **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the
# `update_user_directory_from_worker` option instead.](#updating-the-user-directory)
# Handles searches in the user directory. It can handle REST endpoints matching
# the following regular expressions:
- ^/_matrix/client/(r0|v3|unstable)/user_directory/search$
# When using this worker you must also set `update_user_directory: false` in the
# shared configuration file to stop the main synapse running background
# jobs related to updating the user directory.
# Above endpoint is not *required* to be routed to this worker. By default,
# `update_user_directory` is set to `true`, which means the main process
# will handle updates. All workers configured with `client` can handle the above
# endpoint as long as either this worker or the main process are configured to
# handle it, and are online.
# If `update_user_directory` is set to `false`, and this worker is not running,
# the above endpoint may give outdated results.
matrix_synapse_workers_avail_list:
- appservice
- federation_sender