From a94ebe7424d69806445a3f18c257d2ff5db38678 Mon Sep 17 00:00:00 2001 From: Antoine-Ali Zarrouk <3798576+sidewinder94@users.noreply.github.com> Date: Tue, 20 Jun 2023 15:03:12 +0200 Subject: [PATCH 1/3] Add documentation to detail the steps needed to federate using SRV delegation --- docs/howto-server-delegation.md | 1 + docs/howto-srv-server-delegation.md | 203 ++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 docs/howto-srv-server-delegation.md diff --git a/docs/howto-server-delegation.md b/docs/howto-server-delegation.md index e9ca5bde6..36b0c2ab8 100644 --- a/docs/howto-server-delegation.md +++ b/docs/howto-server-delegation.md @@ -49,6 +49,7 @@ To use DNS SRV record validation, you need to: - ensure that you are serving the Matrix Federation API (tcp/8448) with a certificate for `` (not `matrix.`!). Getting this certificate to the `matrix.` server may be complicated. The playbook's automatic SSL obtaining/renewal flow will likely not work and you'll need to copy certificates around manually. See below. +For more details on [how to configure the playbook to work with SRV delegation]() ### Obtaining certificates diff --git a/docs/howto-srv-server-delegation.md b/docs/howto-srv-server-delegation.md new file mode 100644 index 000000000..cb0263045 --- /dev/null +++ b/docs/howto-srv-server-delegation.md @@ -0,0 +1,203 @@ +# Server Delegation via a DNS SRV record (advanced) + +**Reminder** : unless you are affected by the [Downsides of well-known-based Server Delegation](#downsides-of-well-known-based-server-delegation), we suggest you stay on the simple/default path : [Server Delegation](howto-server-delegation.md) + +## Prerequisites + +SRV delegation while still using the playbook provided traefik to get / renew the certificate requires a wildcard certificate. + +To obtain / renew one from let's encrypt, one needs to use a dns-01 challenge method instead of the default http-01. + +This means that thi is limited to the list of providers supported by traefik, unless you bring in your own certificate. + +The up-to-date list can be accessed on [traefik's documentation](https://doc.traefik.io/traefik/https/acme/#providers) + +## The changes + +### Federation Endpoint + +```yaml +# To serve the federation from any domain, as long as the path match +matrix_nginx_proxy_container_labels_traefik_proxy_matrix_federation_rule: PathPrefix(`/_matrix`) +``` + +This is because with SRV federation, some servers / tools (one of which being the federation tester) try to access the federation API using the resolved IP address instead of the domain name (or they are not using SNI). This change will make traefik route all traffic for which the path match this rule go to the federation endpoint. + +### Tell Traefik which certificate to serve for the federation endpoint + +Now that the federation endpoint is not bound to a domain anymore we need to explicitely tell traefik to use a wildcard certificate in addition to one containing the base name. + +This is because the matrix specification expects the federation endpoint to be served using a certificate comatible with the base domain, however, the other resources on the endpoint still need a valid certificate to work. + +```yaml +# To let traefik know which domains certificates to serve +matrix_nginx_proxy_container_labels_additional_labels: | + traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.main="example.com" + traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.sans="*.example.com" +``` + +### Configure the DNS-01 challenge for let's encrypt + +Since we're now requesting a wildcard certificate, we need to change the ACME challenge method. To request a wildcard certificate from let's encrypt we are required to use the DNS-01 challenge. + +This will need 3 changes : +1. Add a new certificate resolver that works with DNS-01 +2. Configure the resolver to allow access to the DNS zone to configure the reocrds to answer the challenge (refer to [traefik's documentation](https://doc.traefik.io/traefik/https/acme/#providers) to know which environment variables to set) +3. Tell the playbook to use the new resolver as default + +We cannot just disable the default resolver as that would disable SSL in quite a few places in the playbook. + + +```yaml +# 1. Add a new ACME configuration without having to disable the default one, since it would have a wide range of side effects +devture_traefik_configuration_extension_yaml: | + certificatesResolvers: + dns: + acme: + # caServer: https://acme-staging-v02.api.letsencrypt.org/directory + email: {{ devture_traefik_config_certificatesResolvers_acme_email | to_json }} + dnsChallenge: + provider: cloudflare + resolvers: + - "1.1.1.1:53" + - "8.8.8.8:53" + storage: {{ devture_traefik_config_certificatesResolvers_acme_storage | to_json }} + +# 2. Configure the environment variables needed by traefik to automate the ACME DNS CHallenge (example for cloudflare) +devture_traefik_environment_variables: | + CF_API_EMAIL=redacted + CF_ZONE_API_TOKEN=redacted + CF_DNS_API_TOKEN=redacted + LEGO_DISABLE_CNAME_SUPPORT=true + +# 3. Instruct thep laybook to use the new ACME configuration +devture_traefik_certResolver_primary: "dns" +``` + +## Adjust coturn's configuration + +Last step is to alter the generated coturn configuration. + +By default, it is configured to wait on the certificate for the matrix subdomain using an instanced service using the domain name as the parameter for this service. However, we need to serve the wildcard certificate, which is incompatible with systemd, it will try to exxpand the "*" preventing coturn from starting. + +We also need to indicate to coturn where the wildcard certificate is. + +**⚠ WARNING ⚠** : On first start of the services, coturn might still fail to start because traefik is still in thep rocess of obtaining the certificates. If you still get an error, make sure traefik obtained the certificates and restart the coturn service. + +This should not happen again afterwards as traefik will renew certificates well before their expiry date, and the coturn service is setup to restart periodically. + +```yaml +# Only depend on docker.service, this removes the dependency on the certificate exporter, might imply the need to manually restart coturn on the first installation once the certificates are obtained, afterwards, the reload service should handle things +matrix_coturn_systemd_required_services_list: ['docker.service'] + +# This changes the path of the loaded certificate, while maintaining the original functionality, we're now loading the wildcard certificate. +matrix_coturn_container_additional_volumes: | + {{ + ( + [ + { + 'src': (matrix_ssl_config_dir_path + '/live/*.' + matrix_domain + '/fullchain.pem'), + 'dst': '/fullchain.pem', + 'options': 'ro', + }, + { + 'src': (matrix_ssl_config_dir_path + '/live/*.' + matrix_domain + '/privkey.pem'), + 'dst': '/privkey.pem', + 'options': 'ro', + }, + ] if matrix_playbook_reverse_proxy_type in ['playbook-managed-nginx', 'other-nginx-non-container'] and matrix_coturn_tls_enabled else [] + ) + + + ( + [ + { + 'src': (devture_traefik_certs_dumper_dumped_certificates_dir_path + '/*.' + matrix_domain + '/certificate.crt'), + 'dst': '/certificate.crt', + 'options': 'ro', + }, + { + 'src': (devture_traefik_certs_dumper_dumped_certificates_dir_path + '/*.' + matrix_domain + '/privatekey.key'), + 'dst': '/privatekey.key', + 'options': 'ro', + }, + ] if matrix_playbook_reverse_proxy_type in ['playbook-managed-traefik', 'other-traefik-container'] and devture_traefik_certs_dumper_enabled and matrix_coturn_tls_enabled else [] + ) + }} +``` + +## Full example of a working configuration + +```yaml +# Choosing the reverse proxy implementation +matrix_playbook_reverse_proxy_type: playbook-managed-traefik +devture_traefik_config_certificatesResolvers_acme_email: redacted@example.com + +# To serve the federation from any domain, as long as the path match +matrix_nginx_proxy_container_labels_traefik_proxy_matrix_federation_rule: PathPrefix(`/_matrix`) + +# To let traefik know which domains certificates to serve +matrix_nginx_proxy_container_labels_additional_labels: | + traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.main="example.com" + traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.sans="*.example.com" + +# Add a new ACME configuration without having to disable the default one, since it would have a wide range of side effects +devture_traefik_configuration_extension_yaml: | + certificatesResolvers: + dns: + acme: + # caServer: https://acme-staging-v02.api.letsencrypt.org/directory + email: {{ devture_traefik_config_certificatesResolvers_acme_email | to_json }} + dnsChallenge: + provider: cloudflare + resolvers: + - "1.1.1.1:53" + - "8.8.8.8:53" + storage: {{ devture_traefik_config_certificatesResolvers_acme_storage | to_json }} + +# Instruct thep laybook to use the new ACME configuration +devture_traefik_certResolver_primary: "dns" + +# Configure the environment variables needed by traefik to automate the ACME DNS CHallenge +devture_traefik_environment_variables: | + CF_API_EMAIL=redacted + CF_ZONE_API_TOKEN=redacted + CF_DNS_API_TOKEN=redacted + LEGO_DISABLE_CNAME_SUPPORT=true + +# Only depend on docker.service, this removes the dependency on the certificate exporter, might imply the need to manually restart coturn on the first installation once the certificates are obtained, afterwards, the reload service should handle things +matrix_coturn_systemd_required_services_list: ['docker.service'] + +# This changes the path of the loaded certificate, while maintaining the original functionality, we're now loading the wildcard certificate. +matrix_coturn_container_additional_volumes: | + {{ + ( + [ + { + 'src': (matrix_ssl_config_dir_path + '/live/*.' + matrix_domain + '/fullchain.pem'), + 'dst': '/fullchain.pem', + 'options': 'ro', + }, + { + 'src': (matrix_ssl_config_dir_path + '/live/*.' + matrix_domain + '/privkey.pem'), + 'dst': '/privkey.pem', + 'options': 'ro', + }, + ] if matrix_playbook_reverse_proxy_type in ['playbook-managed-nginx', 'other-nginx-non-container'] and matrix_coturn_tls_enabled else [] + ) + + + ( + [ + { + 'src': (devture_traefik_certs_dumper_dumped_certificates_dir_path + '/*.' + matrix_domain + '/certificate.crt'), + 'dst': '/certificate.crt', + 'options': 'ro', + }, + { + 'src': (devture_traefik_certs_dumper_dumped_certificates_dir_path + '/*.' + matrix_domain + '/privatekey.key'), + 'dst': '/privatekey.key', + 'options': 'ro', + }, + ] if matrix_playbook_reverse_proxy_type in ['playbook-managed-traefik', 'other-traefik-container'] and devture_traefik_certs_dumper_enabled and matrix_coturn_tls_enabled else [] + ) + }} +``` \ No newline at end of file From 9ab50ac8de89588accb9748c67bff8c8087833b3 Mon Sep 17 00:00:00 2001 From: Antoine-Ali ZARROUK <3798576+sidewinder94@users.noreply.github.com> Date: Tue, 20 Jun 2023 16:32:50 +0200 Subject: [PATCH 2/3] Fix inter-doc links --- docs/howto-server-delegation.md | 2 +- docs/howto-srv-server-delegation.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/howto-server-delegation.md b/docs/howto-server-delegation.md index 36b0c2ab8..e9a6b23d8 100644 --- a/docs/howto-server-delegation.md +++ b/docs/howto-server-delegation.md @@ -49,7 +49,7 @@ To use DNS SRV record validation, you need to: - ensure that you are serving the Matrix Federation API (tcp/8448) with a certificate for `` (not `matrix.`!). Getting this certificate to the `matrix.` server may be complicated. The playbook's automatic SSL obtaining/renewal flow will likely not work and you'll need to copy certificates around manually. See below. -For more details on [how to configure the playbook to work with SRV delegation]() +For more details on [how to configure the playbook to work with SRV delegation](howto-srv-server-delegation.md) ### Obtaining certificates diff --git a/docs/howto-srv-server-delegation.md b/docs/howto-srv-server-delegation.md index cb0263045..284977aa4 100644 --- a/docs/howto-srv-server-delegation.md +++ b/docs/howto-srv-server-delegation.md @@ -1,6 +1,6 @@ # Server Delegation via a DNS SRV record (advanced) -**Reminder** : unless you are affected by the [Downsides of well-known-based Server Delegation](#downsides-of-well-known-based-server-delegation), we suggest you stay on the simple/default path : [Server Delegation](howto-server-delegation.md) +**Reminder** : unless you are affected by the [Downsides of well-known-based Server Delegation](howto-server-delegation.md#downsides-of-well-known-based-server-delegation), we suggest you stay on the simple/default path : [Server Delegation](howto-server-delegation.md) ## Prerequisites From eef0fd69bfba2974a148fdeb19a4a793e0ba2de2 Mon Sep 17 00:00:00 2001 From: Slavi Pantaleev Date: Wed, 21 Jun 2023 09:56:16 +0300 Subject: [PATCH 3/3] Fix some typos, add more links --- docs/howto-srv-server-delegation.md | 51 +++++++++++++++-------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/docs/howto-srv-server-delegation.md b/docs/howto-srv-server-delegation.md index 284977aa4..a1afe59de 100644 --- a/docs/howto-srv-server-delegation.md +++ b/docs/howto-srv-server-delegation.md @@ -1,14 +1,16 @@ # Server Delegation via a DNS SRV record (advanced) -**Reminder** : unless you are affected by the [Downsides of well-known-based Server Delegation](howto-server-delegation.md#downsides-of-well-known-based-server-delegation), we suggest you stay on the simple/default path : [Server Delegation](howto-server-delegation.md) +**Reminder** : unless you are affected by the [Downsides of well-known-based Server Delegation](howto-server-delegation.md#downsides-of-well-known-based-server-delegation), we suggest you **stay on the simple/default path**: [Server Delegation](howto-server-delegation.md) by [configuring well-known files](configuring-well-known.md) at the base domain. + +This guide is about configuring Server Delegation using DNS SRV records (for the [Traefik](https://doc.traefik.io/traefik/) webserver). This method has special requirements when it comes to SSL certificates, so various changes are required. ## Prerequisites -SRV delegation while still using the playbook provided traefik to get / renew the certificate requires a wildcard certificate. +SRV delegation while still using the playbook provided Traefik to get / renew the certificate requires a wildcard certificate. -To obtain / renew one from let's encrypt, one needs to use a dns-01 challenge method instead of the default http-01. +To obtain / renew one from [Let's Encrypt](https://letsencrypt.org/), one needs to use a [DNS-01 challenge](https://letsencrypt.org/docs/challenge-types/#dns-01-challenge) method instead of the default [HTTP-01](https://letsencrypt.org/docs/challenge-types/#http-01-challenge). -This means that thi is limited to the list of providers supported by traefik, unless you bring in your own certificate. +This means that this is **limited to the list of DNS providers supported by Traefik**, unless you bring in your own certificate. The up-to-date list can be accessed on [traefik's documentation](https://doc.traefik.io/traefik/https/acme/#providers) @@ -21,16 +23,16 @@ The up-to-date list can be accessed on [traefik's documentation](https://doc.tra matrix_nginx_proxy_container_labels_traefik_proxy_matrix_federation_rule: PathPrefix(`/_matrix`) ``` -This is because with SRV federation, some servers / tools (one of which being the federation tester) try to access the federation API using the resolved IP address instead of the domain name (or they are not using SNI). This change will make traefik route all traffic for which the path match this rule go to the federation endpoint. +This is because with SRV federation, some servers / tools (one of which being the federation tester) try to access the federation API using the resolved IP address instead of the domain name (or they are not using SNI). This change will make Traefik route all traffic for which the path match this rule go to the federation endpoint. ### Tell Traefik which certificate to serve for the federation endpoint -Now that the federation endpoint is not bound to a domain anymore we need to explicitely tell traefik to use a wildcard certificate in addition to one containing the base name. +Now that the federation endpoint is not bound to a domain anymore we need to explicitely tell Traefik to use a wildcard certificate in addition to one containing the base name. This is because the matrix specification expects the federation endpoint to be served using a certificate comatible with the base domain, however, the other resources on the endpoint still need a valid certificate to work. ```yaml -# To let traefik know which domains certificates to serve +# To let Traefik know which domains' certificates to serve matrix_nginx_proxy_container_labels_additional_labels: | traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.main="example.com" traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.sans="*.example.com" @@ -38,22 +40,22 @@ matrix_nginx_proxy_container_labels_additional_labels: | ### Configure the DNS-01 challenge for let's encrypt -Since we're now requesting a wildcard certificate, we need to change the ACME challenge method. To request a wildcard certificate from let's encrypt we are required to use the DNS-01 challenge. +Since we're now requesting a wildcard certificate, we need to change the ACME challenge method. To request a wildcard certificate from Let's Encrypt we are required to use the DNS-01 challenge. -This will need 3 changes : +This will need 3 changes: 1. Add a new certificate resolver that works with DNS-01 -2. Configure the resolver to allow access to the DNS zone to configure the reocrds to answer the challenge (refer to [traefik's documentation](https://doc.traefik.io/traefik/https/acme/#providers) to know which environment variables to set) +2. Configure the resolver to allow access to the DNS zone to configure the records to answer the challenge (refer to [Traefik's documentation](https://doc.traefik.io/traefik/https/acme/#providers) to know which environment variables to set) 3. Tell the playbook to use the new resolver as default We cannot just disable the default resolver as that would disable SSL in quite a few places in the playbook. - ```yaml # 1. Add a new ACME configuration without having to disable the default one, since it would have a wide range of side effects devture_traefik_configuration_extension_yaml: | certificatesResolvers: dns: acme: + # To use a staging endpoint for testing purposes, uncomment the line below. # caServer: https://acme-staging-v02.api.letsencrypt.org/directory email: {{ devture_traefik_config_certificatesResolvers_acme_email | to_json }} dnsChallenge: @@ -63,28 +65,28 @@ devture_traefik_configuration_extension_yaml: | - "8.8.8.8:53" storage: {{ devture_traefik_config_certificatesResolvers_acme_storage | to_json }} -# 2. Configure the environment variables needed by traefik to automate the ACME DNS CHallenge (example for cloudflare) +# 2. Configure the environment variables needed by Rraefik to automate the ACME DNS Challenge (example for Cloudflare) devture_traefik_environment_variables: | CF_API_EMAIL=redacted CF_ZONE_API_TOKEN=redacted CF_DNS_API_TOKEN=redacted LEGO_DISABLE_CNAME_SUPPORT=true -# 3. Instruct thep laybook to use the new ACME configuration -devture_traefik_certResolver_primary: "dns" +# 3. Instruct the playbook to use the new ACME configuration +devture_traefik_certResolver_primary: dns ``` -## Adjust coturn's configuration +## Adjust Coturn's configuration -Last step is to alter the generated coturn configuration. +The last step is to alter the generated Coturn configuration. -By default, it is configured to wait on the certificate for the matrix subdomain using an instanced service using the domain name as the parameter for this service. However, we need to serve the wildcard certificate, which is incompatible with systemd, it will try to exxpand the "*" preventing coturn from starting. +By default, Coturn is configured to wait on the certificate for the `matrix.` subdomain using an [instantiated systemd service](https://www.freedesktop.org/software/systemd/man/systemd.service.html#Service%20Templates) using the domain name as the parameter for this service. However, we need to serve the wildcard certificate, which is incompatible with systemd, it will try to expand the `*`, which will break and prevent Coturn from starting. -We also need to indicate to coturn where the wildcard certificate is. +We also need to indicate to Coturn where the wildcard certificate is. -**⚠ WARNING ⚠** : On first start of the services, coturn might still fail to start because traefik is still in thep rocess of obtaining the certificates. If you still get an error, make sure traefik obtained the certificates and restart the coturn service. +**⚠ WARNING ⚠** : On first start of the services, Coturn might still fail to start because Traefik is still in the process of obtaining the certificates. If you still get an error, make sure Traefik obtained the certificates and restart the Coturn service (`just start-group coturn`). -This should not happen again afterwards as traefik will renew certificates well before their expiry date, and the coturn service is setup to restart periodically. +This should not happen again afterwards as Traefik will renew certificates well before their expiry date, and the Coturn service is setup to restart periodically. ```yaml # Only depend on docker.service, this removes the dependency on the certificate exporter, might imply the need to manually restart coturn on the first installation once the certificates are obtained, afterwards, the reload service should handle things @@ -135,7 +137,7 @@ devture_traefik_config_certificatesResolvers_acme_email: redacted@example.com # To serve the federation from any domain, as long as the path match matrix_nginx_proxy_container_labels_traefik_proxy_matrix_federation_rule: PathPrefix(`/_matrix`) -# To let traefik know which domains certificates to serve +# To let Traefik know which domains' certificates to serve matrix_nginx_proxy_container_labels_additional_labels: | traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.main="example.com" traefik.http.routers.matrix-nginx-proxy-matrix-federation.tls.domains.sans="*.example.com" @@ -145,6 +147,7 @@ devture_traefik_configuration_extension_yaml: | certificatesResolvers: dns: acme: + # To use a staging endpoint for testing purposes, uncomment the line below. # caServer: https://acme-staging-v02.api.letsencrypt.org/directory email: {{ devture_traefik_config_certificatesResolvers_acme_email | to_json }} dnsChallenge: @@ -157,14 +160,14 @@ devture_traefik_configuration_extension_yaml: | # Instruct thep laybook to use the new ACME configuration devture_traefik_certResolver_primary: "dns" -# Configure the environment variables needed by traefik to automate the ACME DNS CHallenge +# Configure the environment variables needed by Traefik to automate the ACME DNS Challenge (example for Cloudflare) devture_traefik_environment_variables: | CF_API_EMAIL=redacted CF_ZONE_API_TOKEN=redacted CF_DNS_API_TOKEN=redacted LEGO_DISABLE_CNAME_SUPPORT=true -# Only depend on docker.service, this removes the dependency on the certificate exporter, might imply the need to manually restart coturn on the first installation once the certificates are obtained, afterwards, the reload service should handle things +# Only depend on docker.service, this removes the dependency on the certificate exporter, might imply the need to manually restart Coturn on the first installation once the certificates are obtained, afterwards, the reload service should handle things matrix_coturn_systemd_required_services_list: ['docker.service'] # This changes the path of the loaded certificate, while maintaining the original functionality, we're now loading the wildcard certificate. @@ -200,4 +203,4 @@ matrix_coturn_container_additional_volumes: | ] if matrix_playbook_reverse_proxy_type in ['playbook-managed-traefik', 'other-traefik-container'] and devture_traefik_certs_dumper_enabled and matrix_coturn_tls_enabled else [] ) }} -``` \ No newline at end of file +```