Fix yaml[comments-indentation] in workers config and remove automation

2022-11-05 07:29:47 +02:00
parent da4a82c48b
commit a9e2607d80
3 changed files with 4 additions and 509 deletions
--- a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk
+++ b/roles/custom/matrix-synapse/files/workers-doc-to-yaml.awk
@@ -1,145 +0,0 @@
-#!/usr/bin/awk
-# Hackish approach to get a machine-readable list of current matrix
-# synapse REST API endpoints from the official documentation at
-# https://github.com/matrix-org/synapse/raw/master/docs/workers.md
-#
-# invoke in shell with:
-# URL=https://github.com/matrix-org/synapse/raw/master/docs/workers.md
-# curl -L ${URL} | awk -f workers-doc-to-yaml.awk -
-
-function worker_stanza_append(string) {
-    worker_stanza = worker_stanza string
-}
-
-function line_is_endpoint_url(line) {
-    # probably API endpoint if it starts with white-space and ^ or /
-    return (line ~ /^ +[\^\/].*\//)
-}
-
-# Put YAML marker at beginning of file.
-BEGIN {
-    endpoint_conditional_comment = "  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually\n"
-}
-
-# Enable further processing after the introductory text.
-# Read each synapse worker section as record and its lines as fields.
-/Available worker applications/ {
-    enable_parsing = 1
-    # set record separator to markdown section header
-    RS = "\n### "
-    # set field separator to newline
-    FS = "\n"
-}
-
-# Once parsing is active, this will process each section as record.
-enable_parsing {
-    # Each worker section starts with a synapse.app.X headline
-    if ($1 ~ /synapse\.app\./) {
-
-        # get rid of the backticks and extract worker type from headline
-        gsub("`", "", $1)
-        gsub("synapse.app.", "", $1)
-        worker_type = $1
-
-        # initialize empty worker stanza
-        worker_stanza = ""
-
-        # track if any endpoints are mentioned in a specific section
-        worker_has_urls = 0
-
-        # some endpoint descriptions contain flag terms
-        endpoints_seem_conditional = 0
-
-        # also, collect a list of available workers
-        workers = (workers ? workers "\n" : "") "  - " worker_type
-
-        # loop through the lines (2 - number of fields in record)
-        for (i = 2; i < NF + 1; i++) {
-            # copy line for gsub replacements
-            line = $i
-
-            # end all lines but the last with a linefeed
-            linefeed = (i < NF - 1) ? "\n" : ""
-
-            # line starts with white-space and a hash: endpoint block headline
-            if (line ~ /^ +#/) {
-
-                # copy to output verbatim, normalizing white-space
-                gsub(/^ +/, "", line)
-                worker_stanza_append("  " line linefeed)
-
-            } else if (line_is_endpoint_url(line)) {
-
-                # mark section for special output formatting
-                worker_has_urls = 1
-
-                # remove leading white-space
-                gsub(/^ +/, "", line)
-                api_endpoint_regex = line
-
-                # FIXME: https://github.com/matrix-org/synapse/issues/new
-                # munge inconsistent media_repository endpoint notation
-                if (api_endpoint_regex == "/_matrix/media/") {
-                    api_endpoint_regex = "^" line
-                }
-
-                # FIXME: https://github.com/matrix-org/synapse/issues/7530
-                # https://github.com/spantaleev/matrix-docker-ansible-deploy/pull/456#issuecomment-719015911
-                if (api_endpoint_regex == "^/_matrix/client/(r0|v3|unstable)/auth/.*/fallback/web$") {
-                    worker_stanza_append("  # FIXME: possible bug with SSO and multiple generic workers\n")
-                    worker_stanza_append("  # see https://github.com/matrix-org/synapse/issues/7530\n")
-                    worker_stanza_append("  # " api_endpoint_regex linefeed)
-                    continue
-                }
-
-                # disable endpoints which specify complications
-                if (endpoints_seem_conditional) {
-                    # only add notice if previous line didn't match
-                    if (!line_is_endpoint_url($(i - 1))) {
-                        worker_stanza_append(endpoint_conditional_comment)
-                    }
-                    worker_stanza_append("  # " api_endpoint_regex linefeed)
-                } else {
-                    # output endpoint regex
-                    worker_stanza_append("  - " api_endpoint_regex linefeed)
-                }
-
-            # white-space only line?
-            } else if (line ~ /^ *$/) {
-
-                if (i > 3 && i < NF) {
-                    # print white-space lines unless 1st or last line in section
-                    worker_stanza_append(line linefeed)
-                }
-
-            # nothing of the above: the line is regular documentation text
-            } else {
-
-                # include this text line as comment
-                worker_stanza_append("  # " line linefeed)
-
-                # and take note of words hinting at additional conditions to be met
-                if (line ~ /(^[Ii]f|care must be taken|can be handled for)/) {
-                    endpoints_seem_conditional = 1
-                }
-            }
-        }
-
-        if (worker_has_urls) {
-            print "\nmatrix_synapse_workers_" worker_type "_endpoints:"
-            print worker_stanza
-        } else {
-            # include workers without endpoints as well for reference
-            print "\n# " worker_type " worker (no API endpoints) ["
-            print worker_stanza
-            print "# ]"
-        }
-    }
-}
-
-END {
-    print "\nmatrix_synapse_workers_avail_list:"
-    print workers | "sort"
-}
-
-# vim: tabstop=4 shiftwidth=4 expandtab autoindent
--- a/roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh
+++ b/roles/custom/matrix-synapse/files/workers-doc-to-yaml.sh
@@ -1,7 +0,0 @@
-#!/bin/sh
-# Fetch the synapse worker documentation and extract endpoint URLs
-# matrix-org/synapse master branch points to current stable release
-# and put it between `workers:start` and `workers:end` tokens in ../vars/main.yml
-
-snippet="$(curl -L https://github.com/matrix-org/synapse/raw/master/docs/workers.md | awk -f workers-doc-to-yaml.awk)"
-awk -v snippet="$snippet" -i inplace '/workers:start/{f=1;print;print snippet}/workers:end/{f=0}!f' ../vars/main.yml
--- a/roles/custom/matrix-synapse/vars/main.yml
+++ b/roles/custom/matrix-synapse/vars/main.yml
@@ -111,16 +111,12 @@ matrix_synapse_known_worker_types: |
 matrix_synapse_known_instance_map_eligible_worker_types:
  - stream_writer

-# the following section contains semi-automatic generated content
+# The following section contains content that had previously been generated by a script (`workers-doc-to-yaml.awk`) processing https://github.com/matrix-org/synapse/raw/master/docs/workers.md,
+# but is now maintained manually due to:
+# - the script being tripped up by the content and generating somewhat inaccurate definitions, which had to be fixed up manually.
+# - the script being complicated and unmaintainable
 ### workers:start
-
 matrix_synapse_workers_generic_worker_endpoints:
-  # This worker can handle API requests matching the following regular expressions.
-  # These endpoints can be routed to any worker. If a worker is set up to handle a
-  # stream then, for maximum efficiency, additional endpoints should be routed to that
-  # worker: refer to the [stream writers](#stream-writers) section below for further
-  # information.
-
  # Sync requests
  - ^/_matrix/client/(r0|v3)/sync$
  - ^/_matrix/client/(api/v1|r0|v3)/events$
@@ -191,317 +187,6 @@ matrix_synapse_workers_generic_worker_endpoints:
  - ^/_matrix/client/(api/v1|r0|v3|unstable)/join/
  - ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/

-# These appear to be conditional and should not be enabled by default.
-# We need to fix up our workers-doc-to-yaml.awk parsing script to exclude them.
-# For now, they've been commented out manually.
-#  # Account data requests
-#  - ^/_matrix/client/(r0|v3|unstable)/.*/tags
-#  - ^/_matrix/client/(r0|v3|unstable)/.*/account_data
-#
-#  # Receipts requests
-#  - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt
-#  - ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers
-#
-#  # Presence requests
-#  - ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
-
-  # User directory search requests
-  # Any worker can handle these, but we have a dedicated user_dir worker for this,
-  # so we'd like for other generic workers to not try and capture these requests.
-  # - ^/_matrix/client/(r0|v3|unstable)/user_directory/search$
-
-  # Additionally, the following REST endpoints can be handled for GET requests:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/pushrules/
-
-  # Pagination requests can also be handled, but all requests for a given
-  # room must be routed to the same instance. Additionally, care must be taken to
-  # ensure that the purge history admin API is not used while pagination requests
-  # for the room are in flight:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/messages$
-
-  # Additionally, the following endpoints should be included if Synapse is configured
-  # to use SSO (you only need to include the ones for whichever SSO provider you're
-  # using):
-
-  # for all SSO providers
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/login/sso/redirect
-  # ^/_synapse/client/pick_idp$
-  # ^/_synapse/client/pick_username
-  # ^/_synapse/client/new_user_consent$
-  # ^/_synapse/client/sso_register$
-
-  # OpenID Connect requests.
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_synapse/client/oidc/callback$
-
-  # SAML requests.
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_synapse/client/saml2/authn_response$
-
-  # CAS requests.
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/login/cas/ticket$
-
-  # Ensure that all SSO logins go to a single process.
-  # For multiple workers not handling the SSO endpoints properly, see
-  # [#7530](https://github.com/matrix-org/synapse/issues/7530) and
-  # [#9427](https://github.com/matrix-org/synapse/issues/9427).
-
-  # Note that a [HTTP listener](usage/configuration/config_documentation.md#listeners)
-  # with `client` and `federation` `resources` must be configured in the `worker_listeners`
-  # option in the worker config.
-
-  # #### Load balancing
-
-  # It is possible to run multiple instances of this worker app, with incoming requests
-  # being load-balanced between them by the reverse-proxy. However, different endpoints
-  # have different characteristics and so admins
-  # may wish to run multiple groups of workers handling different endpoints so that
-  # load balancing can be done in different ways.
-
-  # For `/sync` and `/initialSync` requests it will be more efficient if all
-  # requests from a particular user are routed to a single instance. Extracting a
-  # user ID from the access token or `Authorization` header is currently left as an
-  # exercise for the reader. Admins may additionally wish to separate out `/sync`
-  # requests that have a `since` query parameter from those that don't (and
-  # `/initialSync`), as requests that don't are known as "initial sync" that happens
-  # when a user logs in on a new device and can be *very* resource intensive, so
-  # isolating these requests will stop them from interfering with other users ongoing
-  # syncs.
-
-  # Federation and client requests can be balanced via simple round robin.
-
-  # The inbound federation transaction request `^/_matrix/federation/v1/send/`
-  # should be balanced by source IP so that transactions from the same remote server
-  # go to the same process.
-
-  # Registration/login requests can be handled separately purely to help ensure that
-  # unexpected load doesn't affect new logins and sign ups.
-
-  # Finally, event sending requests can be balanced by the room ID in the URI (or
-  # the full URI, or even just round robin), the room ID is the path component after
-  # `/rooms/`. If there is a large bridge connected that is sending or may send lots
-  # of events, then a dedicated set of workers can be provisioned to limit the
-  # effects of bursts of events from that bridge on events sent by normal users.
-
-  # #### Stream writers
-
-  # Additionally, the writing of specific streams (such as events) can be moved off
-  # of the main process to a particular worker.
-
-  # To enable this, the worker must have a
-  # [HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured,
-  # have a `worker_name` and be listed in the `instance_map` config. The same worker
-  # can handle multiple streams, but unless otherwise documented, each stream can only
-  # have a single writer.
-
-  # For example, to move event persistence off to a dedicated worker, the shared
-  # configuration would include:
-
-  # ```yaml
-  # instance_map:
-  #     event_persister1:
-  #         host: localhost
-  #         port: 8034
-
-  # stream_writers:
-  #     events: event_persister1
-  # ```
-
-  # An example for a stream writer instance:
-
-  # ```yaml
-  # {{#include systemd-with-workers/workers/event_persister.yaml}}
-  # ```
-
-  # Some of the streams have associated endpoints which, for maximum efficiency, should
-  # be routed to the workers handling that stream. See below for the currently supported
-  # streams and the endpoints associated with them:
-
-  # ##### The `events` stream
-
-  # The `events` stream experimentally supports having multiple writers, where work
-  # is sharded between them by room ID. Note that you *must* restart all worker
-  # instances when adding or removing event persisters. An example `stream_writers`
-  # configuration with multiple writers:
-
-  # ```yaml
-  # stream_writers:
-  #     events:
-  #         - event_persister1
-  #         - event_persister2
-  # ```
-
-  # ##### The `typing` stream
-
-  # The following endpoints should be routed directly to the worker configured as
-  # the stream writer for the `typing` stream:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/typing
-
-  # ##### The `to_device` stream
-
-  # The following endpoints should be routed directly to the worker configured as
-  # the stream writer for the `to_device` stream:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(r0|v3|unstable)/sendToDevice/
-
-  # ##### The `account_data` stream
-
-  # The following endpoints should be routed directly to the worker configured as
-  # the stream writer for the `account_data` stream:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(r0|v3|unstable)/.*/tags
-  # ^/_matrix/client/(r0|v3|unstable)/.*/account_data
-
-  # ##### The `receipts` stream
-
-  # The following endpoints should be routed directly to the worker configured as
-  # the stream writer for the `receipts` stream:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(r0|v3|unstable)/rooms/.*/receipt
-  # ^/_matrix/client/(r0|v3|unstable)/rooms/.*/read_markers
-
-  # ##### The `presence` stream
-
-  # The following endpoints should be routed directly to the worker configured as
-  # the stream writer for the `presence` stream:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/
-
-  # #### Background tasks
-
-  # There is also support for moving background tasks to a separate
-  # worker. Background tasks are run periodically or started via replication. Exactly
-  # which tasks are configured to run depends on your Synapse configuration (e.g. if
-  # stats is enabled). This worker doesn't handle any REST endpoints itself.
-
-  # To enable this, the worker must have a `worker_name` and can be configured to run
-  # background tasks. For example, to move background tasks to a dedicated worker,
-  # the shared configuration would include:
-
-  # ```yaml
-  # run_background_tasks_on: background_worker
-  # ```
-
-  # You might also wish to investigate the `update_user_directory_from_worker` and
-  # `media_instance_running_background_jobs` settings.
-
-  # An example for a dedicated background worker instance:
-
-  # ```yaml
-  # {{#include systemd-with-workers/workers/background_worker.yaml}}
-  # ```
-
-  # #### Updating the User Directory
-
-  # You can designate one generic worker to update the user directory.
-
-  # Specify its name in the shared configuration as follows:
-
-  # ```yaml
-  # update_user_directory_from_worker: worker_name
-  # ```
-
-  # This work cannot be load-balanced; please ensure the main process is restarted
-  # after setting this option in the shared configuration!
-
-  # User directory updates allow REST endpoints matching the following regular
-  # expressions to work:
-
-  # FIXME: ADDITIONAL CONDITIONS REQUIRED: to be enabled manually
-  # ^/_matrix/client/(r0|v3|unstable)/user_directory/search$
-
-  # The above endpoints can be routed to any worker, though you may choose to route
-  # it to the chosen user directory worker.
-
-  # This style of configuration supersedes the legacy `synapse.app.user_dir`
-  # worker application type.
-
-
-  # #### Notifying Application Services
-
-  # You can designate one generic worker to send output traffic to Application Services.
-  # Doesn't handle any REST endpoints itself, but you should specify its name in the
-  # shared configuration as follows:
-
-  # ```yaml
-  # notify_appservices_from_worker: worker_name
-  # ```
-
-  # This work cannot be load-balanced; please ensure the main process is restarted
-  # after setting this option in the shared configuration!
-
-  # This style of configuration supersedes the legacy `synapse.app.appservice`
-  # worker application type.
-
-
-# pusher worker (no API endpoints) [
-  # Handles sending push notifications to sygnal and email. Doesn't handle any
-  # REST endpoints itself, but you should set `start_pushers: False` in the
-  # shared configuration file to stop the main synapse sending push notifications.
-
-  # To run multiple instances at once the `pusher_instances` option should list all
-  # pusher instances by their worker name, e.g.:
-
-  # ```yaml
-  # pusher_instances:
-  #     - pusher_worker1
-  #     - pusher_worker2
-  # ```
-
-  # An example for a pusher instance:
-
-  # ```yaml
-  # {{#include systemd-with-workers/workers/pusher_worker.yaml}}
-  # ```
-
-# ]
-
-# appservice worker (no API endpoints) [
-  # **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the
-  # `notify_appservices_from_worker` option instead.](#notifying-application-services)
-
-  # Handles sending output traffic to Application Services. Doesn't handle any
-  # REST endpoints itself, but you should set `notify_appservices: False` in the
-  # shared configuration file to stop the main synapse sending appservice notifications.
-
-  # Note this worker cannot be load-balanced: only one instance should be active.
-
-# ]
-
-# federation_sender worker (no API endpoints) [
-  # Handles sending federation traffic to other servers. Doesn't handle any
-  # REST endpoints itself, but you should set `send_federation: False` in the
-  # shared configuration file to stop the main synapse sending this traffic.
-
-  # If running multiple federation senders then you must list each
-  # instance in the `federation_sender_instances` option by their `worker_name`.
-  # All instances must be stopped and started when adding or removing instances.
-  # For example:
-
-  # ```yaml
-  # federation_sender_instances:
-  #     - federation_sender1
-  #     - federation_sender2
-  # ```
-
-  # An example for a federation sender instance:
-
-  # ```yaml
-  # {{#include systemd-with-workers/workers/federation_sender.yaml}}
-  # ```
-# ]

 matrix_synapse_workers_media_repository_endpoints:
  # Handles the media repository. It can handle all endpoints starting with:
@@ -517,50 +202,12 @@ matrix_synapse_workers_media_repository_endpoints:
  - ^/_synapse/admin/v1/quarantine_media/.*$
  - ^/_synapse/admin/v1/users/.*/media$

-  # You should also set `enable_media_repo: False` in the shared configuration
-  # file to stop the main synapse running background jobs related to managing the
-  # media repository. Note that doing so will prevent the main process from being
-  # able to handle the above endpoints.
-
-  # In the `media_repository` worker configuration file, configure the
-  # [HTTP listener](usage/configuration/config_documentation.md#listeners) to
-  # expose the `media` resource. For example:
-
-  # ```yaml
-  # {{#include systemd-with-workers/workers/media_worker.yaml}}
-  # ```
-
-  # Note that if running multiple media repositories they must be on the same server
-  # and you must configure a single instance to run the background tasks, e.g.:
-
-  # ```yaml
-  # media_instance_running_background_jobs: "media-repository-1"
-  # ```
-
-  # Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for both inbound client and federation requests (if they are handled separately).
-
 matrix_synapse_workers_user_dir_endpoints:
-  # **Deprecated as of Synapse v1.59.** [Use `synapse.app.generic_worker` with the
-  # `update_user_directory_from_worker` option instead.](#updating-the-user-directory)
-
  # Handles searches in the user directory. It can handle REST endpoints matching
  # the following regular expressions:

  - ^/_matrix/client/(r0|v3|unstable)/user_directory/search$

-  # When using this worker you must also set `update_user_directory: false` in the
-  # shared configuration file to stop the main synapse running background
-  # jobs related to updating the user directory.
-
-  # Above endpoint is not *required* to be routed to this worker. By default,
-  # `update_user_directory` is set to `true`, which means the main process
-  # will handle updates. All workers configured with `client` can handle the above
-  # endpoint as long as either this worker or the main process are configured to
-  # handle it, and are online.
-
-  # If `update_user_directory` is set to `false`, and this worker is not running,
-  # the above endpoint may give outdated results.
-
 matrix_synapse_workers_avail_list:
  - appservice
  - federation_sender