[PATCH] kernel-5.4: backport latest patches for wireguard

Stijn Segers foss at volatilesystems.org
Fri Jun 18 14:07:08 PDT 2021


Hi,

Op zondag 6 juni 2021 om 12u37 schreef Jason A. Donenfeld 
<Jason at zx2c4.com>:
> These are the latest patches that just landed upstream for 5.13, will 
> be
> backported by Greg into 5.10 (because of stable@), and are now in the
> 5.4 backport branch of wireguard: 
> https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y
> 

Probably not the most kosher way, but I've runtested this patch set on 
21.02 HEAD, octeon (EdgeRouter 4), not on master. Works as expected.

Thank you for the hard work!

> Cc: Ilya Lipnitskiy <ilya.lipnitskiy at gmail.com>
> Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>

Tested-by: Stijn Segers <foss at volatilesystems.org>

> ---
>  ...y1305-enable-for-all-MIPS-processors.patch |  60 ++
>  ...ps-add-poly1305-core.S-to-.gitignore.patch |  24 +
>  ...fix-poly1305_core_setkey-declaration.patch | 172 ++++++
>  ...sts-remove-old-conntrack-kconfig-val.patch |  29 +
>  ...sts-make-sure-rp_filter-is-disabled-.patch |  31 ++
>  ...reguard-0129-wireguard-do-not-use-O3.patch |  33 ++
>  ...nchronize_net-rather-than-synchroniz.patch |  66 +++
>  ...ireguard-peer-allocate-in-kmem_cache.patch | 125 +++++
>  ...dips-initialize-list-head-in-selftes.patch |  43 ++
>  ...guard-allowedips-remove-nodes-in-O-1.patch | 237 ++++++++
>  ...owedips-allocate-nodes-in-kmem_cache.patch | 173 ++++++
>  ...dips-free-empty-intermediate-nodes-w.patch | 521 
> ++++++++++++++++++
>  12 files changed, 1514 insertions(+)
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch
>  create mode 100644 
> target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch
> 
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch
> new file mode 100644
> index 0000000000..c0ee841b02
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0124-crypto-mips-poly1305-enable-for-all-MIPS-processors.patch
> @@ -0,0 +1,60 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Maciej W. Rozycki" <macro at orcam.me.uk>
> +Date: Thu, 11 Mar 2021 21:50:47 -0700
> +Subject: [PATCH] crypto: mips/poly1305 - enable for all MIPS 
> processors
> +
> +commit 6c810cf20feef0d4338e9b424ab7f2644a8b353e upstream.
> +
> +The MIPS Poly1305 implementation is generic MIPS code written such 
> as to
> +support down to the original MIPS I and MIPS III ISA for the 32-bit 
> and
> +64-bit variant respectively.  Lift the current limitation then to 
> enable
> +code for MIPSr1 ISA or newer processors only and have it available 
> for
> +all MIPS processors.
> +
> +Signed-off-by: Maciej W. Rozycki <macro at orcam.me.uk>
> +Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate 
> OpenSSL/CRYPTOGAMS optimized implementation")
> +Cc: stable at vger.kernel.org # v5.5+
> +Acked-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: Thomas Bogendoerfer <tsbogend at alpha.franken.de>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + arch/mips/crypto/Makefile | 4 ++--
> + crypto/Kconfig            | 2 +-
> + drivers/net/Kconfig       | 2 +-
> + 3 files changed, 4 insertions(+), 4 deletions(-)
> +
> +--- a/arch/mips/crypto/Makefile
> ++++ b/arch/mips/crypto/Makefile
> +@@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to
> + obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
> + poly1305-mips-y := poly1305-core.o poly1305-glue.o
> +
> +-perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
> +-perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
> ++perlasm-flavour-$(CONFIG_32BIT) := o32
> ++perlasm-flavour-$(CONFIG_64BIT) := 64
> +
> + quiet_cmd_perlasm = PERLASM $@
> +       cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
> +--- a/crypto/Kconfig
> ++++ b/crypto/Kconfig
> +@@ -740,7 +740,7 @@ config CRYPTO_POLY1305_X86_64
> +
> + config CRYPTO_POLY1305_MIPS
> + 	tristate "Poly1305 authenticator algorithm (MIPS optimized)"
> +-	depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
> ++	depends on MIPS
> + 	select CRYPTO_ARCH_HAVE_LIB_POLY1305
> +
> + config CRYPTO_MD4
> +--- a/drivers/net/Kconfig
> ++++ b/drivers/net/Kconfig
> +@@ -92,7 +92,7 @@ config WIREGUARD
> + 	select CRYPTO_POLY1305_ARM if ARM
> + 	select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
> + 	select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
> +-	select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
> ++	select CRYPTO_POLY1305_MIPS if MIPS
> + 	help
> + 	  WireGuard is a secure, fast, and easy to use replacement for 
> IPSec
> + 	  that uses modern cryptography and clever networking tricks. It's
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch
> new file mode 100644
> index 0000000000..856d67d5b8
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0125-crypto-mips-add-poly1305-core.S-to-.gitignore.patch
> @@ -0,0 +1,24 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: Ilya Lipnitskiy <ilya.lipnitskiy at gmail.com>
> +Date: Sat, 27 Mar 2021 19:39:43 -0700
> +Subject: [PATCH] crypto: mips: add poly1305-core.S to .gitignore
> +
> +commit dc92d0df51dc61de88bf6f4884a17bf73d5c6326 upstream.
> +
> +poly1305-core.S is an auto-generated file, so it should be ignored.
> +
> +Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate 
> OpenSSL/CRYPTOGAMS optimized implementation")
> +Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy at gmail.com>
> +Cc: Ard Biesheuvel <ardb at kernel.org>
> +Signed-off-by: Thomas Bogendoerfer <tsbogend at alpha.franken.de>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + arch/mips/crypto/.gitignore | 2 ++
> + 1 file changed, 2 insertions(+)
> + create mode 100644 arch/mips/crypto/.gitignore
> +
> +--- /dev/null
> ++++ b/arch/mips/crypto/.gitignore
> +@@ -0,0 +1,2 @@
> ++# SPDX-License-Identifier: GPL-2.0-only
> ++poly1305-core.S
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch
> new file mode 100644
> index 0000000000..ded6625aeb
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0126-crypto-poly1305-fix-poly1305_core_setkey-declaration.patch
> @@ -0,0 +1,172 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: Arnd Bergmann <arnd at arndb.de>
> +Date: Mon, 22 Mar 2021 18:05:15 +0100
> +Subject: [PATCH] crypto: poly1305 - fix poly1305_core_setkey() 
> declaration
> +MIME-Version: 1.0
> +Content-Type: text/plain; charset=UTF-8
> +Content-Transfer-Encoding: 8bit
> +
> +commit 8d195e7a8ada68928f2aedb2c18302a4518fe68e upstream.
> +
> +gcc-11 points out a mismatch between the declaration and the 
> definition
> +of poly1305_core_setkey():
> +
> +lib/crypto/poly1305-donna32.c:13:67: error: argument 2 of type 
> ‘const u8[16]’ {aka ‘const unsigned char[16]’} with 
> mismatched bound [-Werror=array-parameter=]
> +   13 | void poly1305_core_setkey(struct poly1305_core_key *key, 
> const u8 raw_key[16])
> +      |                                                          
> ~~~~~~~~~^~~~~~~~~~~
> +In file included from lib/crypto/poly1305-donna32.c:11:
> +include/crypto/internal/poly1305.h:21:68: note: previously declared 
> as ‘const u8 *’ {aka ‘const unsigned char *’}
> +   21 | void poly1305_core_setkey(struct poly1305_core_key *key, 
> const u8 *raw_key);
> +
> +This is harmless in principle, as the calling conventions are the 
> same,
> +but the more specific prototype allows better type checking in the
> +caller.
> +
> +Change the declaration to match the actual function definition.
> +The poly1305_simd_init() is a bit suspicious here, as it previously
> +had a 32-byte argument type, but looks like it needs to take the
> +16-byte POLY1305_BLOCK_SIZE array instead.
> +
> +Fixes: 1c08a104360f ("crypto: poly1305 - add new 32 and 64-bit 
> generic versions")
> +Signed-off-by: Arnd Bergmann <arnd at arndb.de>
> +Reviewed-by: Ard Biesheuvel <ardb at kernel.org>
> +Reviewed-by: Eric Biggers <ebiggers at google.com>
> +Signed-off-by: Herbert Xu <herbert at gondor.apana.org.au>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + arch/arm/crypto/poly1305-glue.c    | 2 +-
> + arch/arm64/crypto/poly1305-glue.c  | 2 +-
> + arch/mips/crypto/poly1305-glue.c   | 2 +-
> + arch/x86/crypto/poly1305_glue.c    | 6 +++---
> + include/crypto/internal/poly1305.h | 3 ++-
> + include/crypto/poly1305.h          | 6 ++++--
> + lib/crypto/poly1305-donna32.c      | 3 ++-
> + lib/crypto/poly1305-donna64.c      | 3 ++-
> + lib/crypto/poly1305.c              | 3 ++-
> + 9 files changed, 18 insertions(+), 12 deletions(-)
> +
> +--- a/arch/arm/crypto/poly1305-glue.c
> ++++ b/arch/arm/crypto/poly1305-glue.c
> +@@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *s
> +
> + static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> +
> +-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> *key)
> ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> key[POLY1305_KEY_SIZE])
> + {
> + 	poly1305_init_arm(&dctx->h, key);
> + 	dctx->s[0] = get_unaligned_le32(key + 16);
> +--- a/arch/arm64/crypto/poly1305-glue.c
> ++++ b/arch/arm64/crypto/poly1305-glue.c
> +@@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *stat
> +
> + static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
> +
> +-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> *key)
> ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> key[POLY1305_KEY_SIZE])
> + {
> + 	poly1305_init_arm64(&dctx->h, key);
> + 	dctx->s[0] = get_unaligned_le32(key + 16);
> +--- a/arch/mips/crypto/poly1305-glue.c
> ++++ b/arch/mips/crypto/poly1305-glue.c
> +@@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void
> + asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, 
> u32 len, u32 hibit);
> + asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const 
> u32 *nonce);
> +
> +-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> *key)
> ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> key[POLY1305_KEY_SIZE])
> + {
> + 	poly1305_init_mips(&dctx->h, key);
> + 	dctx->s[0] = get_unaligned_le32(key + 16);
> +--- a/arch/x86/crypto/poly1305_glue.c
> ++++ b/arch/x86/crypto/poly1305_glue.c
> +@@ -15,7 +15,7 @@
> + #include <asm/simd.h>
> +
> + asmlinkage void poly1305_init_x86_64(void *ctx,
> +-				     const u8 key[POLY1305_KEY_SIZE]);
> ++				     const u8 key[POLY1305_BLOCK_SIZE]);
> + asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
> + 				       const size_t len, const u32 padbit);
> + asmlinkage void poly1305_emit_x86_64(void *ctx, u8 
> mac[POLY1305_DIGEST_SIZE],
> +@@ -80,7 +80,7 @@ static void convert_to_base2_64(void *ct
> + 	state->is_base2_26 = 0;
> + }
> +
> +-static void poly1305_simd_init(void *ctx, const u8 
> key[POLY1305_KEY_SIZE])
> ++static void poly1305_simd_init(void *ctx, const u8 
> key[POLY1305_BLOCK_SIZE])
> + {
> + 	poly1305_init_x86_64(ctx, key);
> + }
> +@@ -128,7 +128,7 @@ static void poly1305_simd_emit(void *ctx
> + 		poly1305_emit_avx(ctx, mac, nonce);
> + }
> +
> +-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> *key)
> ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 
> key[POLY1305_KEY_SIZE])
> + {
> + 	poly1305_simd_init(&dctx->h, key);
> + 	dctx->s[0] = get_unaligned_le32(&key[16]);
> +--- a/include/crypto/internal/poly1305.h
> ++++ b/include/crypto/internal/poly1305.h
> +@@ -18,7 +18,8 @@
> +  * only the ε-almost-∆-universal hash function (not the full 
> MAC) is computed.
> +  */
> +
> +-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 
> *raw_key);
> ++void poly1305_core_setkey(struct poly1305_core_key *key,
> ++			  const u8 raw_key[POLY1305_BLOCK_SIZE]);
> + static inline void poly1305_core_init(struct poly1305_state *state)
> + {
> + 	*state = (struct poly1305_state){};
> +--- a/include/crypto/poly1305.h
> ++++ b/include/crypto/poly1305.h
> +@@ -58,8 +58,10 @@ struct poly1305_desc_ctx {
> + 	};
> + };
> +
> +-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 
> *key);
> +-void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 
> *key);
> ++void poly1305_init_arch(struct poly1305_desc_ctx *desc,
> ++			const u8 key[POLY1305_KEY_SIZE]);
> ++void poly1305_init_generic(struct poly1305_desc_ctx *desc,
> ++			   const u8 key[POLY1305_KEY_SIZE]);
> +
> + static inline void poly1305_init(struct poly1305_desc_ctx *desc, 
> const u8 *key)
> + {
> +--- a/lib/crypto/poly1305-donna32.c
> ++++ b/lib/crypto/poly1305-donna32.c
> +@@ -10,7 +10,8 @@
> + #include <asm/unaligned.h>
> + #include <crypto/internal/poly1305.h>
> +
> +-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 
> raw_key[16])
> ++void poly1305_core_setkey(struct poly1305_core_key *key,
> ++			  const u8 raw_key[POLY1305_BLOCK_SIZE])
> + {
> + 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
> + 	key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff;
> +--- a/lib/crypto/poly1305-donna64.c
> ++++ b/lib/crypto/poly1305-donna64.c
> +@@ -12,7 +12,8 @@
> +
> + typedef __uint128_t u128;
> +
> +-void poly1305_core_setkey(struct poly1305_core_key *key, const u8 
> raw_key[16])
> ++void poly1305_core_setkey(struct poly1305_core_key *key,
> ++			  const u8 raw_key[POLY1305_BLOCK_SIZE])
> + {
> + 	u64 t0, t1;
> +
> +--- a/lib/crypto/poly1305.c
> ++++ b/lib/crypto/poly1305.c
> +@@ -12,7 +12,8 @@
> + #include <linux/module.h>
> + #include <asm/unaligned.h>
> +
> +-void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 
> *key)
> ++void poly1305_init_generic(struct poly1305_desc_ctx *desc,
> ++			   const u8 key[POLY1305_KEY_SIZE])
> + {
> + 	poly1305_core_setkey(&desc->core_r, key);
> + 	desc->s[0] = get_unaligned_le32(key + 16);
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch
> new file mode 100644
> index 0000000000..3e7d1a8e02
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0127-wireguard-selftests-remove-old-conntrack-kconfig-val.patch
> @@ -0,0 +1,29 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:30 +0200
> +Subject: [PATCH] wireguard: selftests: remove old conntrack kconfig 
> value
> +
> +commit acf2492b51c9a3c4dfb947f4d3477a86d315150f upstream.
> +
> +On recent kernels, this config symbol is no longer used.
> +
> +Reported-by: Rui Salvaterra <rsalvaterra at gmail.com>
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + tools/testing/selftests/wireguard/qemu/kernel.config | 1 -
> + 1 file changed, 1 deletion(-)
> +
> +--- a/tools/testing/selftests/wireguard/qemu/kernel.config
> ++++ b/tools/testing/selftests/wireguard/qemu/kernel.config
> +@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
> + CONFIG_NETFILTER_XT_NAT=y
> + CONFIG_NETFILTER_XT_MATCH_LENGTH=y
> + CONFIG_NETFILTER_XT_MARK=y
> +-CONFIG_NF_CONNTRACK_IPV4=y
> + CONFIG_NF_NAT_IPV4=y
> + CONFIG_IP_NF_IPTABLES=y
> + CONFIG_IP_NF_FILTER=y
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch
> new file mode 100644
> index 0000000000..22d0f3e32e
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0128-wireguard-selftests-make-sure-rp_filter-is-disabled-.patch
> @@ -0,0 +1,31 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:31 +0200
> +Subject: [PATCH] wireguard: selftests: make sure rp_filter is 
> disabled on
> + vethc
> +
> +commit f8873d11d4121aad35024f9379e431e0c83abead upstream.
> +
> +Some distros may enable strict rp_filter by default, which will 
> prevent
> +vethc from receiving the packets with an unrouteable reverse path 
> address.
> +
> +Reported-by: Hangbin Liu <liuhangbin at gmail.com>
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + tools/testing/selftests/wireguard/netns.sh | 1 +
> + 1 file changed, 1 insertion(+)
> +
> +--- a/tools/testing/selftests/wireguard/netns.sh
> ++++ b/tools/testing/selftests/wireguard/netns.sh
> +@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_pref
> + ip1 -4 route add default dev wg0 table 51820
> + ip1 -4 rule add not fwmark 51820 table 51820
> + ip1 -4 rule add table main suppress_prefixlength 0
> ++n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
> + # Flood the pings instead of sending just one, to trigger routing 
> table reference counting bugs.
> + n1 ping -W 1 -c 100 -f 192.168.99.7
> + n1 ping -W 1 -c 100 -f abab::1111
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch
> new file mode 100644
> index 0000000000..a7890a7384
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0129-wireguard-do-not-use-O3.patch
> @@ -0,0 +1,33 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:32 +0200
> +Subject: [PATCH] wireguard: do not use -O3
> +
> +commit cc5060ca0285efe2728bced399a1955a7ce808b2 upstream.
> +
> +Apparently, various versions of gcc have O3-related miscompiles. 
> Looking
> +at the difference between -O2 and -O3 for gcc 11 doesn't indicate
> +miscompiles, but the difference also doesn't seem so significant for
> +performance that it's worth risking.
> +
> +Link: 
> https://lore.kernel.org/lkml/CAHk-=wjuoGyxDhAF8SsrTkN0-YfCx7E6jUN3ikC_tn2AKWTTsA@mail.gmail.com/
> +Link: 
> https://lore.kernel.org/lkml/CAHmME9otB5Wwxp7H8bR_i2uH2esEMvoBMC8uEXBMH9p0q1s6Bw@mail.gmail.com/
> +Reported-by: Linus Torvalds <torvalds at linux-foundation.org>
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/Makefile | 3 +--
> + 1 file changed, 1 insertion(+), 2 deletions(-)
> +
> +--- a/drivers/net/wireguard/Makefile
> ++++ b/drivers/net/wireguard/Makefile
> +@@ -1,5 +1,4 @@
> +-ccflags-y := -O3
> +-ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
> ++ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
> + ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
> + wireguard-y := main.o
> + wireguard-y += noise.o
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch
> new file mode 100644
> index 0000000000..309fe36198
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0130-wireguard-use-synchronize_net-rather-than-synchroniz.patch
> @@ -0,0 +1,66 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:33 +0200
> +Subject: [PATCH] wireguard: use synchronize_net rather than 
> synchronize_rcu
> +
> +commit 24b70eeeb4f46c09487f8155239ebfb1f875774a upstream.
> +
> +Many of the synchronization points are sometimes called under the 
> rtnl
> +lock, which means we should use synchronize_net rather than
> +synchronize_rcu. Under the hood, this expands to using the expedited
> +flavor of function in the event that rtnl is held, in order to not 
> stall
> +other concurrent changes.
> +
> +This fixes some very, very long delays when removing multiple peers 
> at
> +once, which would cause some operations to take several minutes.
> +
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/peer.c   | 6 +++---
> + drivers/net/wireguard/socket.c | 2 +-
> + 2 files changed, 4 insertions(+), 4 deletions(-)
> +
> +--- a/drivers/net/wireguard/peer.c
> ++++ b/drivers/net/wireguard/peer.c
> +@@ -88,7 +88,7 @@ static void peer_make_dead(struct wg_pee
> + 	/* Mark as dead, so that we don't allow jumping contexts after. */
> + 	WRITE_ONCE(peer->is_dead, true);
> +
> +-	/* The caller must now synchronize_rcu() for this to take effect. 
> */
> ++	/* The caller must now synchronize_net() for this to take effect. 
> */
> + }
> +
> + static void peer_remove_after_dead(struct wg_peer *peer)
> +@@ -160,7 +160,7 @@ void wg_peer_remove(struct wg_peer *peer
> + 	lockdep_assert_held(&peer->device->device_update_lock);
> +
> + 	peer_make_dead(peer);
> +-	synchronize_rcu();
> ++	synchronize_net();
> + 	peer_remove_after_dead(peer);
> + }
> +
> +@@ -178,7 +178,7 @@ void wg_peer_remove_all(struct wg_device
> + 		peer_make_dead(peer);
> + 		list_add_tail(&peer->peer_list, &dead_peers);
> + 	}
> +-	synchronize_rcu();
> ++	synchronize_net();
> + 	list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
> + 		peer_remove_after_dead(peer);
> + }
> +--- a/drivers/net/wireguard/socket.c
> ++++ b/drivers/net/wireguard/socket.c
> +@@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device *
> + 	if (new4)
> + 		wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
> + 	mutex_unlock(&wg->socket_update_lock);
> +-	synchronize_rcu();
> ++	synchronize_net();
> + 	sock_free(old4);
> + 	sock_free(old6);
> + }
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch
> new file mode 100644
> index 0000000000..32ae327037
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0131-wireguard-peer-allocate-in-kmem_cache.patch
> @@ -0,0 +1,125 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:34 +0200
> +Subject: [PATCH] wireguard: peer: allocate in kmem_cache
> +
> +commit a4e9f8e3287c9eb6bf70df982870980dd3341863 upstream.
> +
> +With deployments having upwards of 600k peers now, this somewhat 
> heavy
> +structure could benefit from more fine-grained allocations.
> +Specifically, instead of using a 2048-byte slab for a 1544-byte 
> object,
> +we can now use 1544-byte objects directly, thus saving almost 25%
> +per-peer, or with 600k peers, that's a savings of 303 MiB. This also
> +makes wireguard's memory usage more transparent in tools like slabtop
> +and /proc/slabinfo.
> +
> +Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring 
> buffers")
> +Suggested-by: Arnd Bergmann <arnd at arndb.de>
> +Suggested-by: Matthew Wilcox <willy at infradead.org>
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/main.c |  7 +++++++
> + drivers/net/wireguard/peer.c | 21 +++++++++++++++++----
> + drivers/net/wireguard/peer.h |  3 +++
> + 3 files changed, 27 insertions(+), 4 deletions(-)
> +
> +--- a/drivers/net/wireguard/main.c
> ++++ b/drivers/net/wireguard/main.c
> +@@ -28,6 +28,10 @@ static int __init mod_init(void)
> + #endif
> + 	wg_noise_init();
> +
> ++	ret = wg_peer_init();
> ++	if (ret < 0)
> ++		goto err_peer;
> ++
> + 	ret = wg_device_init();
> + 	if (ret < 0)
> + 		goto err_device;
> +@@ -44,6 +48,8 @@ static int __init mod_init(void)
> + err_netlink:
> + 	wg_device_uninit();
> + err_device:
> ++	wg_peer_uninit();
> ++err_peer:
> + 	return ret;
> + }
> +
> +@@ -51,6 +57,7 @@ static void __exit mod_exit(void)
> + {
> + 	wg_genetlink_uninit();
> + 	wg_device_uninit();
> ++	wg_peer_uninit();
> + }
> +
> + module_init(mod_init);
> +--- a/drivers/net/wireguard/peer.c
> ++++ b/drivers/net/wireguard/peer.c
> +@@ -15,6 +15,7 @@
> + #include <linux/rcupdate.h>
> + #include <linux/list.h>
> +
> ++static struct kmem_cache *peer_cache;
> + static atomic64_t peer_counter = ATOMIC64_INIT(0);
> +
> + struct wg_peer *wg_peer_create(struct wg_device *wg,
> +@@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg
> + 	if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
> + 		return ERR_PTR(ret);
> +
> +-	peer = kzalloc(sizeof(*peer), GFP_KERNEL);
> ++	peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL);
> + 	if (unlikely(!peer))
> + 		return ERR_PTR(ret);
> +-	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
> ++	if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
> + 		goto err;
> +
> + 	peer->device = wg;
> +@@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg
> + 	return peer;
> +
> + err:
> +-	kfree(peer);
> ++	kmem_cache_free(peer_cache, peer);
> + 	return ERR_PTR(ret);
> + }
> +
> +@@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head
> + 	/* The final zeroing takes care of clearing any remaining 
> handshake key
> + 	 * material and other potentially sensitive information.
> + 	 */
> +-	kzfree(peer);
> ++	memzero_explicit(peer, sizeof(*peer));
> ++	kmem_cache_free(peer_cache, peer);
> + }
> +
> + static void kref_release(struct kref *refcount)
> +@@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer)
> + 		return;
> + 	kref_put(&peer->refcount, kref_release);
> + }
> ++
> ++int __init wg_peer_init(void)
> ++{
> ++	peer_cache = KMEM_CACHE(wg_peer, 0);
> ++	return peer_cache ? 0 : -ENOMEM;
> ++}
> ++
> ++void wg_peer_uninit(void)
> ++{
> ++	kmem_cache_destroy(peer_cache);
> ++}
> +--- a/drivers/net/wireguard/peer.h
> ++++ b/drivers/net/wireguard/peer.h
> +@@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer);
> + void wg_peer_remove(struct wg_peer *peer);
> + void wg_peer_remove_all(struct wg_device *wg);
> +
> ++int wg_peer_init(void);
> ++void wg_peer_uninit(void);
> ++
> + #endif /* _WG_PEER_H */
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch
> new file mode 100644
> index 0000000000..ce4e5dcf50
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0132-wireguard-allowedips-initialize-list-head-in-selftes.patch
> @@ -0,0 +1,43 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:35 +0200
> +Subject: [PATCH] wireguard: allowedips: initialize list head in 
> selftest
> +
> +commit 46cfe8eee285cde465b420637507884551f5d7ca upstream.
> +
> +The randomized trie tests weren't initializing the dummy peer list 
> head,
> +resulting in a NULL pointer dereference when used. Fix this by
> +initializing it in the randomized trie test, just like we do for the
> +static unit test.
> +
> +While we're at it, all of the other strings like this have the word
> +"self-test", so add it to the missing place here.
> +
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/selftest/allowedips.c | 3 ++-
> + 1 file changed, 2 insertions(+), 1 deletion(-)
> +
> +--- a/drivers/net/wireguard/selftest/allowedips.c
> ++++ b/drivers/net/wireguard/selftest/allowedips.c
> +@@ -296,6 +296,7 @@ static __init bool randomized_test(void)
> + 			goto free;
> + 		}
> + 		kref_init(&peers[i]->refcount);
> ++		INIT_LIST_HEAD(&peers[i]->allowedips_list);
> + 	}
> +
> + 	mutex_lock(&mutex);
> +@@ -333,7 +334,7 @@ static __init bool randomized_test(void)
> + 			if (wg_allowedips_insert_v4(&t,
> + 						    (struct in_addr *)mutated,
> + 						    cidr, peer, &mutex) < 0) {
> +-				pr_err("allowedips random malloc: FAIL\n");
> ++				pr_err("allowedips random self-test malloc: FAIL\n");
> + 				goto free_locked;
> + 			}
> + 			if (horrible_allowedips_insert_v4(&h,
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
> new file mode 100644
> index 0000000000..78da24ea46
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
> @@ -0,0 +1,237 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:36 +0200
> +Subject: [PATCH] wireguard: allowedips: remove nodes in O(1)
> +
> +commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream.
> +
> +Previously, deleting peers would require traversing the entire trie 
> in
> +order to rebalance nodes and safely free them. This meant that 
> removing
> +1000 peers from a trie with a half million nodes would take an 
> extremely
> +long time, during which we're holding the rtnl lock. Large-scale 
> users
> +were reporting 200ms latencies added to the networking stack as a 
> whole
> +every time their userspace software would queue up significant 
> removals.
> +That's a serious situation.
> +
> +This commit fixes that by maintaining a double pointer to the 
> parent's
> +bit pointer for each node, and then using the already existing node 
> list
> +belonging to each peer to go directly to the node, fix up its 
> pointers,
> +and free it with RCU. This means removal is O(1) instead of O(n), 
> and we
> +don't use gobs of stack.
> +
> +The removal algorithm has the same downside as the code that it 
> fixes:
> +it won't collapse needlessly long runs of fillers.  We can enhance 
> that
> +in the future if it ever becomes a problem. This commit documents 
> that
> +limitation with a TODO comment in code, a small but meaningful
> +improvement over the prior situation.
> +
> +Currently the biggest flaw, which the next commit addresses, is that
> +because this increases the node size on 64-bit machines from 60 
> bytes to
> +68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up
> +using twice as much memory per node, because of power-of-two
> +allocations, which is a big bummer. We'll need to figure something 
> out
> +there.
> +
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/allowedips.c | 132 
> ++++++++++++-----------------
> + drivers/net/wireguard/allowedips.h |   9 +-
> + 2 files changed, 57 insertions(+), 84 deletions(-)
> +
> +--- a/drivers/net/wireguard/allowedips.c
> ++++ b/drivers/net/wireguard/allowedips.c
> +@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc
> + 	}
> + }
> +
> +-static void walk_remove_by_peer(struct allowedips_node __rcu **top,
> +-				struct wg_peer *peer, struct mutex *lock)
> +-{
> +-#define REF(p) rcu_access_pointer(p)
> +-#define DEREF(p) rcu_dereference_protected(*(p), 
> lockdep_is_held(lock))
> +-#define PUSH(p) ({                                                  
>            \
> +-		WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \
> +-		stack[len++] = p;                                              \
> +-	})
> +-
> +-	struct allowedips_node __rcu **stack[128], **nptr;
> +-	struct allowedips_node *node, *prev;
> +-	unsigned int len;
> +-
> +-	if (unlikely(!peer || !REF(*top)))
> +-		return;
> +-
> +-	for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
> +-		nptr = stack[len - 1];
> +-		node = DEREF(nptr);
> +-		if (!node) {
> +-			--len;
> +-			continue;
> +-		}
> +-		if (!prev || REF(prev->bit[0]) == node ||
> +-		    REF(prev->bit[1]) == node) {
> +-			if (REF(node->bit[0]))
> +-				PUSH(&node->bit[0]);
> +-			else if (REF(node->bit[1]))
> +-				PUSH(&node->bit[1]);
> +-		} else if (REF(node->bit[0]) == prev) {
> +-			if (REF(node->bit[1]))
> +-				PUSH(&node->bit[1]);
> +-		} else {
> +-			if (rcu_dereference_protected(node->peer,
> +-				lockdep_is_held(lock)) == peer) {
> +-				RCU_INIT_POINTER(node->peer, NULL);
> +-				list_del_init(&node->peer_list);
> +-				if (!node->bit[0] || !node->bit[1]) {
> +-					rcu_assign_pointer(*nptr, DEREF(
> +-					       &node->bit[!REF(node->bit[0])]));
> +-					kfree_rcu(node, rcu);
> +-					node = DEREF(nptr);
> +-				}
> +-			}
> +-			--len;
> +-		}
> +-	}
> +-
> +-#undef REF
> +-#undef DEREF
> +-#undef PUSH
> +-}
> +-
> + static unsigned int fls128(u64 a, u64 b)
> + {
> + 	return a ? fls64(a) + 64U : fls64(b);
> +@@ -224,6 +170,7 @@ static int add(struct allowedips_node __
> + 		RCU_INIT_POINTER(node->peer, peer);
> + 		list_add_tail(&node->peer_list, &peer->allowedips_list);
> + 		copy_and_assign_cidr(node, key, cidr, bits);
> ++		rcu_assign_pointer(node->parent_bit, trie);
> + 		rcu_assign_pointer(*trie, node);
> + 		return 0;
> + 	}
> +@@ -243,9 +190,9 @@ static int add(struct allowedips_node __
> + 	if (!node) {
> + 		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
> + 	} else {
> +-		down = rcu_dereference_protected(CHOOSE_NODE(node, key),
> +-						 lockdep_is_held(lock));
> ++		down = rcu_dereference_protected(CHOOSE_NODE(node, key), 
> lockdep_is_held(lock));
> + 		if (!down) {
> ++			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
> + 			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
> + 			return 0;
> + 		}
> +@@ -254,29 +201,37 @@ static int add(struct allowedips_node __
> + 	parent = node;
> +
> + 	if (newnode->cidr == cidr) {
> ++		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, 
> down->bits));
> + 		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
> +-		if (!parent)
> ++		if (!parent) {
> ++			rcu_assign_pointer(newnode->parent_bit, trie);
> + 			rcu_assign_pointer(*trie, newnode);
> +-		else
> +-			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
> +-					   newnode);
> +-	} else {
> +-		node = kzalloc(sizeof(*node), GFP_KERNEL);
> +-		if (unlikely(!node)) {
> +-			list_del(&newnode->peer_list);
> +-			kfree(newnode);
> +-			return -ENOMEM;
> ++		} else {
> ++			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, 
> newnode->bits));
> ++			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
> + 		}
> +-		INIT_LIST_HEAD(&node->peer_list);
> +-		copy_and_assign_cidr(node, newnode->bits, cidr, bits);
> ++		return 0;
> ++	}
> ++
> ++	node = kzalloc(sizeof(*node), GFP_KERNEL);
> ++	if (unlikely(!node)) {
> ++		list_del(&newnode->peer_list);
> ++		kfree(newnode);
> ++		return -ENOMEM;
> ++	}
> ++	INIT_LIST_HEAD(&node->peer_list);
> ++	copy_and_assign_cidr(node, newnode->bits, cidr, bits);
> +
> +-		rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
> +-		rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
> +-		if (!parent)
> +-			rcu_assign_pointer(*trie, node);
> +-		else
> +-			rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
> +-					   node);
> ++	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, 
> down->bits));
> ++	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
> ++	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, 
> newnode->bits));
> ++	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
> ++	if (!parent) {
> ++		rcu_assign_pointer(node->parent_bit, trie);
> ++		rcu_assign_pointer(*trie, node);
> ++	} else {
> ++		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, 
> node->bits));
> ++		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
> + 	}
> + 	return 0;
> + }
> +@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow
> + void wg_allowedips_remove_by_peer(struct allowedips *table,
> + 				  struct wg_peer *peer, struct mutex *lock)
> + {
> ++	struct allowedips_node *node, *child, *tmp;
> ++
> ++	if (list_empty(&peer->allowedips_list))
> ++		return;
> + 	++table->seq;
> +-	walk_remove_by_peer(&table->root4, peer, lock);
> +-	walk_remove_by_peer(&table->root6, peer, lock);
> ++	list_for_each_entry_safe(node, tmp, &peer->allowedips_list, 
> peer_list) {
> ++		list_del_init(&node->peer_list);
> ++		RCU_INIT_POINTER(node->peer, NULL);
> ++		if (node->bit[0] && node->bit[1])
> ++			continue;
> ++		child = rcu_dereference_protected(
> ++				node->bit[!rcu_access_pointer(node->bit[0])],
> ++				lockdep_is_held(lock));
> ++		if (child)
> ++			child->parent_bit = node->parent_bit;
> ++		*rcu_dereference_protected(node->parent_bit, 
> lockdep_is_held(lock)) = child;
> ++		kfree_rcu(node, rcu);
> ++
> ++		/* TODO: Note that we currently don't walk up and down in order to
> ++		 * free any potential filler nodes. This means that this function
> ++		 * doesn't free up as much as it could, which could be revisited
> ++		 * at some point.
> ++		 */
> ++	}
> + }
> +
> + int wg_allowedips_read_node(struct allowedips_node *node, u8 
> ip[16], u8 *cidr)
> +--- a/drivers/net/wireguard/allowedips.h
> ++++ b/drivers/net/wireguard/allowedips.h
> +@@ -15,14 +15,11 @@ struct wg_peer;
> + struct allowedips_node {
> + 	struct wg_peer __rcu *peer;
> + 	struct allowedips_node __rcu *bit[2];
> +-	/* While it may seem scandalous that we waste space for v4,
> +-	 * we're alloc'ing to the nearest power of 2 anyway, so this
> +-	 * doesn't actually make a difference.
> +-	 */
> +-	u8 bits[16] __aligned(__alignof(u64));
> + 	u8 cidr, bit_at_a, bit_at_b, bitlen;
> ++	u8 bits[16] __aligned(__alignof(u64));
> +
> +-	/* Keep rarely used list at bottom to be beyond cache line. */
> ++	/* Keep rarely used members at bottom to be beyond cache line. */
> ++	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 
> 68->128 bytes instead of 60->64 bytes!! */
> + 	union {
> + 		struct list_head peer_list;
> + 		struct rcu_head rcu;
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch
> new file mode 100644
> index 0000000000..65b31b05f5
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0134-wireguard-allowedips-allocate-nodes-in-kmem_cache.patch
> @@ -0,0 +1,173 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:37 +0200
> +Subject: [PATCH] wireguard: allowedips: allocate nodes in kmem_cache
> +
> +commit dc680de28ca849dfe589dc15ac56d22505f0ef11 upstream.
> +
> +The previous commit moved from O(n) to O(1) for removal, but in the
> +process introduced an additional pointer member to a struct that
> +increased the size from 60 to 68 bytes, putting nodes in the 128-byte
> +slab. With deployed systems having as many as 2 million nodes, this
> +represents a significant doubling in memory usage (128 MiB -> 256 
> MiB).
> +Fix this by using our own kmem_cache, that's sized exactly right. 
> This
> +also makes wireguard's memory usage more transparent in tools like
> +slabtop and /proc/slabinfo.
> +
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Suggested-by: Arnd Bergmann <arnd at arndb.de>
> +Suggested-by: Matthew Wilcox <willy at infradead.org>
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/allowedips.c | 31 
> ++++++++++++++++++++++++------
> + drivers/net/wireguard/allowedips.h |  5 ++++-
> + drivers/net/wireguard/main.c       | 10 +++++++++-
> + 3 files changed, 38 insertions(+), 8 deletions(-)
> +
> +--- a/drivers/net/wireguard/allowedips.c
> ++++ b/drivers/net/wireguard/allowedips.c
> +@@ -6,6 +6,8 @@
> + #include "allowedips.h"
> + #include "peer.h"
> +
> ++static struct kmem_cache *node_cache;
> ++
> + static void swap_endian(u8 *dst, const u8 *src, u8 bits)
> + {
> + 	if (bits == 32) {
> +@@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_n
> + 	}
> + }
> +
> ++static void node_free_rcu(struct rcu_head *rcu)
> ++{
> ++	kmem_cache_free(node_cache, container_of(rcu, struct 
> allowedips_node, rcu));
> ++}
> ++
> + static void root_free_rcu(struct rcu_head *rcu)
> + {
> + 	struct allowedips_node *node, *stack[128] = {
> +@@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_hea
> + 	while (len > 0 && (node = stack[--len])) {
> + 		push_rcu(stack, node->bit[0], &len);
> + 		push_rcu(stack, node->bit[1], &len);
> +-		kfree(node);
> ++		kmem_cache_free(node_cache, node);
> + 	}
> + }
> +
> +@@ -164,7 +171,7 @@ static int add(struct allowedips_node __
> + 		return -EINVAL;
> +
> + 	if (!rcu_access_pointer(*trie)) {
> +-		node = kzalloc(sizeof(*node), GFP_KERNEL);
> ++		node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
> + 		if (unlikely(!node))
> + 			return -ENOMEM;
> + 		RCU_INIT_POINTER(node->peer, peer);
> +@@ -180,7 +187,7 @@ static int add(struct allowedips_node __
> + 		return 0;
> + 	}
> +
> +-	newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
> ++	newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
> + 	if (unlikely(!newnode))
> + 		return -ENOMEM;
> + 	RCU_INIT_POINTER(newnode->peer, peer);
> +@@ -213,10 +220,10 @@ static int add(struct allowedips_node __
> + 		return 0;
> + 	}
> +
> +-	node = kzalloc(sizeof(*node), GFP_KERNEL);
> ++	node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
> + 	if (unlikely(!node)) {
> + 		list_del(&newnode->peer_list);
> +-		kfree(newnode);
> ++		kmem_cache_free(node_cache, newnode);
> + 		return -ENOMEM;
> + 	}
> + 	INIT_LIST_HEAD(&node->peer_list);
> +@@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct
> + 		if (child)
> + 			child->parent_bit = node->parent_bit;
> + 		*rcu_dereference_protected(node->parent_bit, 
> lockdep_is_held(lock)) = child;
> +-		kfree_rcu(node, rcu);
> ++		call_rcu(&node->rcu, node_free_rcu);
> +
> + 		/* TODO: Note that we currently don't walk up and down in order to
> + 		 * free any potential filler nodes. This means that this function
> +@@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src
> + 	return NULL;
> + }
> +
> ++int __init wg_allowedips_slab_init(void)
> ++{
> ++	node_cache = KMEM_CACHE(allowedips_node, 0);
> ++	return node_cache ? 0 : -ENOMEM;
> ++}
> ++
> ++void wg_allowedips_slab_uninit(void)
> ++{
> ++	rcu_barrier();
> ++	kmem_cache_destroy(node_cache);
> ++}
> ++
> + #include "selftest/allowedips.c"
> +--- a/drivers/net/wireguard/allowedips.h
> ++++ b/drivers/net/wireguard/allowedips.h
> +@@ -19,7 +19,7 @@ struct allowedips_node {
> + 	u8 bits[16] __aligned(__alignof(u64));
> +
> + 	/* Keep rarely used members at bottom to be beyond cache line. */
> +-	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 
> 68->128 bytes instead of 60->64 bytes!! */
> ++	struct allowedips_node *__rcu *parent_bit;
> + 	union {
> + 		struct list_head peer_list;
> + 		struct rcu_head rcu;
> +@@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src
> + bool wg_allowedips_selftest(void);
> + #endif
> +
> ++int wg_allowedips_slab_init(void);
> ++void wg_allowedips_slab_uninit(void);
> ++
> + #endif /* _WG_ALLOWEDIPS_H */
> +--- a/drivers/net/wireguard/main.c
> ++++ b/drivers/net/wireguard/main.c
> +@@ -21,10 +21,15 @@ static int __init mod_init(void)
> + {
> + 	int ret;
> +
> ++	ret = wg_allowedips_slab_init();
> ++	if (ret < 0)
> ++		goto err_allowedips;
> ++
> + #ifdef DEBUG
> ++	ret = -ENOTRECOVERABLE;
> + 	if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
> + 	    !wg_ratelimiter_selftest())
> +-		return -ENOTRECOVERABLE;
> ++		goto err_peer;
> + #endif
> + 	wg_noise_init();
> +
> +@@ -50,6 +55,8 @@ err_netlink:
> + err_device:
> + 	wg_peer_uninit();
> + err_peer:
> ++	wg_allowedips_slab_uninit();
> ++err_allowedips:
> + 	return ret;
> + }
> +
> +@@ -58,6 +65,7 @@ static void __exit mod_exit(void)
> + 	wg_genetlink_uninit();
> + 	wg_device_uninit();
> + 	wg_peer_uninit();
> ++	wg_allowedips_slab_uninit();
> + }
> +
> + module_init(mod_init);
> diff --git 
> a/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch 
> b/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch
> new file mode 100644
> index 0000000000..c044ad25af
> --- /dev/null
> +++ 
> b/target/linux/generic/backport-5.4/080-wireguard-0135-wireguard-allowedips-free-empty-intermediate-nodes-w.patch
> @@ -0,0 +1,521 @@
> +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 
> 2001
> +From: "Jason A. Donenfeld" <Jason at zx2c4.com>
> +Date: Fri, 4 Jun 2021 17:17:38 +0200
> +Subject: [PATCH] wireguard: allowedips: free empty intermediate 
> nodes when
> + removing single node
> +
> +commit bf7b042dc62a31f66d3a41dd4dfc7806f267b307 upstream.
> +
> +When removing single nodes, it's possible that that node's parent is 
> an
> +empty intermediate node, in which case, it too should be removed.
> +Otherwise the trie fills up and never is fully emptied, leading to
> +gradual memory leaks over time for tries that are modified often. 
> There
> +was originally code to do this, but was removed during refactoring in
> +2016 and never reworked. Now that we have proper parent pointers from
> +the previous commits, we can implement this properly.
> +
> +In order to reduce branching and expensive comparisons, we want to 
> keep
> +the double pointer for parent assignment (which lets us easily chain 
> up
> +to the root), but we still need to actually get the parent's base
> +address. So encode the bit number into the last two bits of the 
> pointer,
> +and pack and unpack it as needed. This is a little bit clumsy but is 
> the
> +fastest and less memory wasteful of the compromises. Note that we 
> align
> +the root struct here to a minimum of 4, because it's embedded into a
> +larger struct, and we're relying on having the bottom two bits for 
> our
> +flag, which would only be 16-bit aligned on m68k.
> +
> +The existing macro-based helpers were a bit unwieldy for adding the 
> bit
> +packing to, so this commit replaces them with safer and clearer 
> ordinary
> +functions.
> +
> +We add a test to the randomized/fuzzer part of the selftests, to free
> +the randomized tries by-peer, refuzz it, and repeat, until it's 
> supposed
> +to be empty, and then then see if that actually resulted in the whole
> +thing being emptied. That combined with kmemcheck should hopefully 
> make
> +sure this commit is doing what it should. Along the way this 
> resulted in
> +various other cleanups of the tests and fixes for recent graphviz.
> +
> +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
> +Cc: stable at vger.kernel.org
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +Signed-off-by: David S. Miller <davem at davemloft.net>
> +Signed-off-by: Jason A. Donenfeld <Jason at zx2c4.com>
> +---
> + drivers/net/wireguard/allowedips.c          | 102 ++++++------
> + drivers/net/wireguard/allowedips.h          |   4 +-
> + drivers/net/wireguard/selftest/allowedips.c | 162 
> ++++++++++----------
> + 3 files changed, 137 insertions(+), 131 deletions(-)
> +
> +--- a/drivers/net/wireguard/allowedips.c
> ++++ b/drivers/net/wireguard/allowedips.c
> +@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct
> + 	node->bitlen = bits;
> + 	memcpy(node->bits, src, bits / 8U);
> + }
> +-#define CHOOSE_NODE(parent, key) \
> +-	parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
> ++
> ++static inline u8 choose(struct allowedips_node *node, const u8 *key)
> ++{
> ++	return (key[node->bit_at_a] >> node->bit_at_b) & 1;
> ++}
> +
> + static void push_rcu(struct allowedips_node **stack,
> + 		     struct allowedips_node __rcu *p, unsigned int *len)
> +@@ -112,7 +115,7 @@ static struct allowedips_node *find_node
> + 			found = node;
> + 		if (node->cidr == bits)
> + 			break;
> +-		node = rcu_dereference_bh(CHOOSE_NODE(node, key));
> ++		node = rcu_dereference_bh(node->bit[choose(node, key)]);
> + 	}
> + 	return found;
> + }
> +@@ -144,8 +147,7 @@ static bool node_placement(struct allowe
> + 			   u8 cidr, u8 bits, struct allowedips_node **rnode,
> + 			   struct mutex *lock)
> + {
> +-	struct allowedips_node *node = rcu_dereference_protected(trie,
> +-						lockdep_is_held(lock));
> ++	struct allowedips_node *node = rcu_dereference_protected(trie, 
> lockdep_is_held(lock));
> + 	struct allowedips_node *parent = NULL;
> + 	bool exact = false;
> +
> +@@ -155,13 +157,24 @@ static bool node_placement(struct allowe
> + 			exact = true;
> + 			break;
> + 		}
> +-		node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
> +-						 lockdep_is_held(lock));
> ++		node = rcu_dereference_protected(parent->bit[choose(parent, 
> key)], lockdep_is_held(lock));
> + 	}
> + 	*rnode = parent;
> + 	return exact;
> + }
> +
> ++static inline void connect_node(struct allowedips_node **parent, u8 
> bit, struct allowedips_node *node)
> ++{
> ++	node->parent_bit_packed = (unsigned long)parent | bit;
> ++	rcu_assign_pointer(*parent, node);
> ++}
> ++
> ++static inline void choose_and_connect_node(struct allowedips_node 
> *parent, struct allowedips_node *node)
> ++{
> ++	u8 bit = choose(parent, node->bits);
> ++	connect_node(&parent->bit[bit], bit, node);
> ++}
> ++
> + static int add(struct allowedips_node __rcu **trie, u8 bits, const 
> u8 *key,
> + 	       u8 cidr, struct wg_peer *peer, struct mutex *lock)
> + {
> +@@ -177,8 +190,7 @@ static int add(struct allowedips_node __
> + 		RCU_INIT_POINTER(node->peer, peer);
> + 		list_add_tail(&node->peer_list, &peer->allowedips_list);
> + 		copy_and_assign_cidr(node, key, cidr, bits);
> +-		rcu_assign_pointer(node->parent_bit, trie);
> +-		rcu_assign_pointer(*trie, node);
> ++		connect_node(trie, 2, node);
> + 		return 0;
> + 	}
> + 	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
> +@@ -197,10 +209,10 @@ static int add(struct allowedips_node __
> + 	if (!node) {
> + 		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
> + 	} else {
> +-		down = rcu_dereference_protected(CHOOSE_NODE(node, key), 
> lockdep_is_held(lock));
> ++		const u8 bit = choose(node, key);
> ++		down = rcu_dereference_protected(node->bit[bit], 
> lockdep_is_held(lock));
> + 		if (!down) {
> +-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
> +-			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
> ++			connect_node(&node->bit[bit], bit, newnode);
> + 			return 0;
> + 		}
> + 	}
> +@@ -208,15 +220,11 @@ static int add(struct allowedips_node __
> + 	parent = node;
> +
> + 	if (newnode->cidr == cidr) {
> +-		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, 
> down->bits));
> +-		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
> +-		if (!parent) {
> +-			rcu_assign_pointer(newnode->parent_bit, trie);
> +-			rcu_assign_pointer(*trie, newnode);
> +-		} else {
> +-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, 
> newnode->bits));
> +-			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
> +-		}
> ++		choose_and_connect_node(newnode, down);
> ++		if (!parent)
> ++			connect_node(trie, 2, newnode);
> ++		else
> ++			choose_and_connect_node(parent, newnode);
> + 		return 0;
> + 	}
> +
> +@@ -229,17 +237,12 @@ static int add(struct allowedips_node __
> + 	INIT_LIST_HEAD(&node->peer_list);
> + 	copy_and_assign_cidr(node, newnode->bits, cidr, bits);
> +
> +-	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, 
> down->bits));
> +-	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
> +-	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, 
> newnode->bits));
> +-	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
> +-	if (!parent) {
> +-		rcu_assign_pointer(node->parent_bit, trie);
> +-		rcu_assign_pointer(*trie, node);
> +-	} else {
> +-		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, 
> node->bits));
> +-		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
> +-	}
> ++	choose_and_connect_node(node, down);
> ++	choose_and_connect_node(node, newnode);
> ++	if (!parent)
> ++		connect_node(trie, 2, node);
> ++	else
> ++		choose_and_connect_node(parent, node);
> + 	return 0;
> + }
> +
> +@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allow
> + void wg_allowedips_remove_by_peer(struct allowedips *table,
> + 				  struct wg_peer *peer, struct mutex *lock)
> + {
> +-	struct allowedips_node *node, *child, *tmp;
> ++	struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
> ++	bool free_parent;
> +
> + 	if (list_empty(&peer->allowedips_list))
> + 		return;
> +@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct
> + 		RCU_INIT_POINTER(node->peer, NULL);
> + 		if (node->bit[0] && node->bit[1])
> + 			continue;
> +-		child = rcu_dereference_protected(
> +-				node->bit[!rcu_access_pointer(node->bit[0])],
> +-				lockdep_is_held(lock));
> ++		child = 
> rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
> ++						  lockdep_is_held(lock));
> + 		if (child)
> +-			child->parent_bit = node->parent_bit;
> +-		*rcu_dereference_protected(node->parent_bit, 
> lockdep_is_held(lock)) = child;
> ++			child->parent_bit_packed = node->parent_bit_packed;
> ++		parent_bit = (struct allowedips_node **)(node->parent_bit_packed 
> & ~3UL);
> ++		*parent_bit = child;
> ++		parent = (void *)parent_bit -
> ++			 offsetof(struct allowedips_node, bit[node->parent_bit_packed & 
> 1]);
> ++		free_parent = !rcu_access_pointer(node->bit[0]) &&
> ++			      !rcu_access_pointer(node->bit[1]) &&
> ++			      (node->parent_bit_packed & 3) <= 1 &&
> ++			      !rcu_access_pointer(parent->peer);
> ++		if (free_parent)
> ++			child = rcu_dereference_protected(
> ++					parent->bit[!(node->parent_bit_packed & 1)],
> ++					lockdep_is_held(lock));
> + 		call_rcu(&node->rcu, node_free_rcu);
> +-
> +-		/* TODO: Note that we currently don't walk up and down in order to
> +-		 * free any potential filler nodes. This means that this function
> +-		 * doesn't free up as much as it could, which could be revisited
> +-		 * at some point.
> +-		 */
> ++		if (!free_parent)
> ++			continue;
> ++		if (child)
> ++			child->parent_bit_packed = parent->parent_bit_packed;
> ++		*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = 
> child;
> ++		call_rcu(&parent->rcu, node_free_rcu);
> + 	}
> + }
> +
> +--- a/drivers/net/wireguard/allowedips.h
> ++++ b/drivers/net/wireguard/allowedips.h
> +@@ -19,7 +19,7 @@ struct allowedips_node {
> + 	u8 bits[16] __aligned(__alignof(u64));
> +
> + 	/* Keep rarely used members at bottom to be beyond cache line. */
> +-	struct allowedips_node *__rcu *parent_bit;
> ++	unsigned long parent_bit_packed;
> + 	union {
> + 		struct list_head peer_list;
> + 		struct rcu_head rcu;
> +@@ -30,7 +30,7 @@ struct allowedips {
> + 	struct allowedips_node __rcu *root4;
> + 	struct allowedips_node __rcu *root6;
> + 	u64 seq;
> +-};
> ++} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only 
> gives 16-bit alignment. */
> +
> + void wg_allowedips_init(struct allowedips *table);
> + void wg_allowedips_free(struct allowedips *table, struct mutex 
> *mutex);
> +--- a/drivers/net/wireguard/selftest/allowedips.c
> ++++ b/drivers/net/wireguard/selftest/allowedips.c
> +@@ -19,32 +19,22 @@
> +
> + #include <linux/siphash.h>
> +
> +-static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 
> *src, u8 bits,
> +-					      u8 cidr)
> +-{
> +-	swap_endian(dst, src, bits);
> +-	memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
> +-	if (cidr)
> +-		dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
> +-}
> +-
> + static __init void print_node(struct allowedips_node *node, u8 bits)
> + {
> + 	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
> +-	char *fmt_declaration = KERN_DEBUG
> +-		"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
> ++	char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, 
> color=\"#%06x\"];\n";
> ++	u8 ip1[16], ip2[16], cidr1, cidr2;
> + 	char *style = "dotted";
> +-	u8 ip1[16], ip2[16];
> + 	u32 color = 0;
> +
> ++	if (node == NULL)
> ++		return;
> + 	if (bits == 32) {
> + 		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
> +-		fmt_declaration = KERN_DEBUG
> +-			"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
> ++		fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, 
> color=\"#%06x\"];\n";
> + 	} else if (bits == 128) {
> + 		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
> +-		fmt_declaration = KERN_DEBUG
> +-			"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
> ++		fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, 
> color=\"#%06x\"];\n";
> + 	}
> + 	if (node->peer) {
> + 		hsiphash_key_t key = { { 0 } };
> +@@ -55,24 +45,20 @@ static __init void print_node(struct all
> + 			hsiphash_1u32(0xabad1dea, &key) % 200;
> + 		style = "bold";
> + 	}
> +-	swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
> +-	printk(fmt_declaration, ip1, node->cidr, style, color);
> ++	wg_allowedips_read_node(node, ip1, &cidr1);
> ++	printk(fmt_declaration, ip1, cidr1, style, color);
> + 	if (node->bit[0]) {
> +-		swap_endian_and_apply_cidr(ip2,
> +-				rcu_dereference_raw(node->bit[0])->bits, bits,
> +-				node->cidr);
> +-		printk(fmt_connection, ip1, node->cidr, ip2,
> +-		       rcu_dereference_raw(node->bit[0])->cidr);
> +-		print_node(rcu_dereference_raw(node->bit[0]), bits);
> ++		wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, 
> &cidr2);
> ++		printk(fmt_connection, ip1, cidr1, ip2, cidr2);
> + 	}
> + 	if (node->bit[1]) {
> +-		swap_endian_and_apply_cidr(ip2,
> +-				rcu_dereference_raw(node->bit[1])->bits,
> +-				bits, node->cidr);
> +-		printk(fmt_connection, ip1, node->cidr, ip2,
> +-		       rcu_dereference_raw(node->bit[1])->cidr);
> +-		print_node(rcu_dereference_raw(node->bit[1]), bits);
> ++		wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, 
> &cidr2);
> ++		printk(fmt_connection, ip1, cidr1, ip2, cidr2);
> + 	}
> ++	if (node->bit[0])
> ++		print_node(rcu_dereference_raw(node->bit[0]), bits);
> ++	if (node->bit[1])
> ++		print_node(rcu_dereference_raw(node->bit[1]), bits);
> + }
> +
> + static __init void print_tree(struct allowedips_node __rcu *top, u8 
> bits)
> +@@ -121,8 +107,8 @@ static __init inline union nf_inet_addr
> + {
> + 	union nf_inet_addr mask;
> +
> +-	memset(&mask, 0x00, 128 / 8);
> +-	memset(&mask, 0xff, cidr / 8);
> ++	memset(&mask, 0, sizeof(mask));
> ++	memset(&mask.all, 0xff, cidr / 8);
> + 	if (cidr % 32)
> + 		mask.all[cidr / 32] = (__force u32)htonl(
> + 			(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
> +@@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allow
> + }
> +
> + static __init inline bool
> +-horrible_match_v4(const struct horrible_allowedips_node *node,
> +-		  struct in_addr *ip)
> ++horrible_match_v4(const struct horrible_allowedips_node *node, 
> struct in_addr *ip)
> + {
> + 	return (ip->s_addr & node->mask.ip) == node->ip.ip;
> + }
> +
> + static __init inline bool
> +-horrible_match_v6(const struct horrible_allowedips_node *node,
> +-		  struct in6_addr *ip)
> ++horrible_match_v6(const struct horrible_allowedips_node *node, 
> struct in6_addr *ip)
> + {
> +-	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
> +-		       node->ip.ip6[0] &&
> +-	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
> +-		       node->ip.ip6[1] &&
> +-	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
> +-		       node->ip.ip6[2] &&
> ++	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == 
> node->ip.ip6[0] &&
> ++	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == 
> node->ip.ip6[1] &&
> ++	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == 
> node->ip.ip6[2] &&
> + 	       (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == 
> node->ip.ip6[3];
> + }
> +
> + static __init void
> +-horrible_insert_ordered(struct horrible_allowedips *table,
> +-			struct horrible_allowedips_node *node)
> ++horrible_insert_ordered(struct horrible_allowedips *table, struct 
> horrible_allowedips_node *node)
> + {
> + 	struct horrible_allowedips_node *other = NULL, *where = NULL;
> + 	u8 my_cidr = horrible_mask_to_cidr(node->mask);
> +
> + 	hlist_for_each_entry(other, &table->head, table) {
> +-		if (!memcmp(&other->mask, &node->mask,
> +-			    sizeof(union nf_inet_addr)) &&
> +-		    !memcmp(&other->ip, &node->ip,
> +-			    sizeof(union nf_inet_addr)) &&
> +-		    other->ip_version == node->ip_version) {
> ++		if (other->ip_version == node->ip_version &&
> ++		    !memcmp(&other->mask, &node->mask, sizeof(union 
> nf_inet_addr)) &&
> ++		    !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) {
> + 			other->value = node->value;
> + 			kfree(node);
> + 			return;
> + 		}
> ++	}
> ++	hlist_for_each_entry(other, &table->head, table) {
> + 		where = other;
> + 		if (horrible_mask_to_cidr(other->mask) <= my_cidr)
> + 			break;
> +@@ -201,8 +181,7 @@ static __init int
> + horrible_allowedips_insert_v4(struct horrible_allowedips *table,
> + 			      struct in_addr *ip, u8 cidr, void *value)
> + {
> +-	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
> +-							GFP_KERNEL);
> ++	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), 
> GFP_KERNEL);
> +
> + 	if (unlikely(!node))
> + 		return -ENOMEM;
> +@@ -219,8 +198,7 @@ static __init int
> + horrible_allowedips_insert_v6(struct horrible_allowedips *table,
> + 			      struct in6_addr *ip, u8 cidr, void *value)
> + {
> +-	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
> +-							GFP_KERNEL);
> ++	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), 
> GFP_KERNEL);
> +
> + 	if (unlikely(!node))
> + 		return -ENOMEM;
> +@@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct hor
> + }
> +
> + static __init void *
> +-horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
> +-			      struct in_addr *ip)
> ++horrible_allowedips_lookup_v4(struct horrible_allowedips *table, 
> struct in_addr *ip)
> + {
> + 	struct horrible_allowedips_node *node;
> +-	void *ret = NULL;
> +
> + 	hlist_for_each_entry(node, &table->head, table) {
> +-		if (node->ip_version != 4)
> +-			continue;
> +-		if (horrible_match_v4(node, ip)) {
> +-			ret = node->value;
> +-			break;
> +-		}
> ++		if (node->ip_version == 4 && horrible_match_v4(node, ip))
> ++			return node->value;
> + 	}
> +-	return ret;
> ++	return NULL;
> + }
> +
> + static __init void *
> +-horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
> +-			      struct in6_addr *ip)
> ++horrible_allowedips_lookup_v6(struct horrible_allowedips *table, 
> struct in6_addr *ip)
> + {
> + 	struct horrible_allowedips_node *node;
> +-	void *ret = NULL;
> +
> + 	hlist_for_each_entry(node, &table->head, table) {
> +-		if (node->ip_version != 6)
> ++		if (node->ip_version == 6 && horrible_match_v6(node, ip))
> ++			return node->value;
> ++	}
> ++	return NULL;
> ++}
> ++
> ++
> ++static __init void
> ++horrible_allowedips_remove_by_value(struct horrible_allowedips 
> *table, void *value)
> ++{
> ++	struct horrible_allowedips_node *node;
> ++	struct hlist_node *h;
> ++
> ++	hlist_for_each_entry_safe(node, h, &table->head, table) {
> ++		if (node->value != value)
> + 			continue;
> +-		if (horrible_match_v6(node, ip)) {
> +-			ret = node->value;
> +-			break;
> +-		}
> ++		hlist_del(&node->table);
> ++		kfree(node);
> + 	}
> +-	return ret;
> ++
> + }
> +
> + static __init bool randomized_test(void)
> +@@ -397,23 +379,33 @@ static __init bool randomized_test(void)
> + 		print_tree(t.root6, 128);
> + 	}
> +
> +-	for (i = 0; i < NUM_QUERIES; ++i) {
> +-		prandom_bytes(ip, 4);
> +-		if (lookup(t.root4, 32, ip) !=
> +-		    horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
> +-			pr_err("allowedips random self-test: FAIL\n");
> +-			goto free;
> ++	for (j = 0;; ++j) {
> ++		for (i = 0; i < NUM_QUERIES; ++i) {
> ++			prandom_bytes(ip, 4);
> ++			if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, 
> (struct in_addr *)ip)) {
> ++				horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip);
> ++				pr_err("allowedips random v4 self-test: FAIL\n");
> ++				goto free;
> ++			}
> ++			prandom_bytes(ip, 16);
> ++			if (lookup(t.root6, 128, ip) != 
> horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
> ++				pr_err("allowedips random v6 self-test: FAIL\n");
> ++				goto free;
> ++			}
> + 		}
> ++		if (j >= NUM_PEERS)
> ++			break;
> ++		mutex_lock(&mutex);
> ++		wg_allowedips_remove_by_peer(&t, peers[j], &mutex);
> ++		mutex_unlock(&mutex);
> ++		horrible_allowedips_remove_by_value(&h, peers[j]);
> + 	}
> +
> +-	for (i = 0; i < NUM_QUERIES; ++i) {
> +-		prandom_bytes(ip, 16);
> +-		if (lookup(t.root6, 128, ip) !=
> +-		    horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
> +-			pr_err("allowedips random self-test: FAIL\n");
> +-			goto free;
> +-		}
> ++	if (t.root4 || t.root6) {
> ++		pr_err("allowedips random self-test removal: FAIL\n");
> ++		goto free;
> + 	}
> ++
> + 	ret = true;
> +
> + free:
> --
> 2.31.1
> 
> 
> _______________________________________________
> openwrt-devel mailing list
> openwrt-devel at lists.openwrt.org
> https://lists.openwrt.org/mailman/listinfo/openwrt-devel





More information about the openwrt-devel mailing list