[PATCH] kernel: fix scheduling while atomic in bridge offload

Qingfang DENG dqfext at gmail.com
Wed May 10 07:05:04 PDT 2023


br_offload_port_state is in a spinlock's critical section (&br->lock),
but rhashtable_init/rhashtable_free_and_destroy/flush_work may sleep,
causing scheduling while atomic bug.

To fix this, use workqueues to defer the init/destroy operations. To
synchronize between rhashtable insert/destroy, synchronize_rcu is used
to ensure all writers have left the RCU critical section before calling
rhashtable_free_and_destroy.

While at it, check for null pointers at the flow allocation.

Fixes: 94b4da9b4aad ("kernel: add a fast path for the bridge code")
Signed-off-by: Qingfang DENG <dqfext at gmail.com>
---
 .../hack-5.10/600-bridge_offload.patch        | 121 ++++++++++++++----
 .../hack-5.15/600-bridge_offload.patch        | 121 ++++++++++++++----
 2 files changed, 198 insertions(+), 44 deletions(-)

diff --git a/target/linux/generic/hack-5.10/600-bridge_offload.patch b/target/linux/generic/hack-5.10/600-bridge_offload.patch
index 82282627ea..1c03ac2cbd 100644
--- a/target/linux/generic/hack-5.10/600-bridge_offload.patch
+++ b/target/linux/generic/hack-5.10/600-bridge_offload.patch
@@ -153,16 +153,25 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  
  /*
   * Determine initial path cost based on speed.
-@@ -427,7 +428,7 @@ static struct net_bridge_port *new_nbp(s
+@@ -361,6 +362,7 @@ static void del_nbp(struct net_bridge_po
+ 	kobject_del(&p->kobj);
+ 
+ 	br_netpoll_disable(p);
++	flush_work(&p->offload.deferred_work);
+ 
+ 	call_rcu(&p->rcu, destroy_nbp_rcu);
+ }
+@@ -427,7 +429,8 @@ static struct net_bridge_port *new_nbp(s
  	p->path_cost = port_cost(dev);
  	p->priority = 0x8000 >> BR_PORT_BITS;
  	p->port_no = index;
 -	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
 +	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_OFFLOAD;
++	br_offload_init_work(p);
  	br_init_port(p);
  	br_set_state(p, BR_STATE_DISABLED);
  	br_stp_port_timer_init(p);
-@@ -777,6 +778,9 @@ void br_port_flags_change(struct net_bri
+@@ -777,6 +780,9 @@ void br_port_flags_change(struct net_bri
  
  	if (mask & BR_NEIGH_SUPPRESS)
  		br_recalculate_neigh_suppress_enabled(br);
@@ -202,7 +211,7 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  						  nbp_vlan_group_rcu(p)))
 --- /dev/null
 +++ b/net/bridge/br_offload.c
-@@ -0,0 +1,436 @@
+@@ -0,0 +1,491 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#include <linux/kernel.h>
 +#include <linux/workqueue.h>
@@ -236,6 +245,11 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +	struct rcu_head rcu;
 +};
 +
++struct bridge_offload_deferred_item {
++	struct list_head list;
++	bool enable;
++};
++
 +static const struct rhashtable_params flow_params = {
 +	.automatic_shrinking = true,
 +	.head_offset = offsetof(struct bridge_flow, node),
@@ -309,7 +323,7 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +	        p->br->offload_cache_reserved) >= p->br->offload_cache_size;
 +}
 +
-+static void
++void
 +br_offload_gc_work(struct work_struct *work)
 +{
 +	struct rhashtable_iter hti;
@@ -357,11 +371,57 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +
 +}
 +
++static struct bridge_offload_deferred_item *
++br_offload_deferred_dequeue(struct net_bridge_port *p)
++{
++	struct bridge_offload_deferred_item *bi = NULL;
++
++	spin_lock_bh(&p->br->lock);
++	if (list_empty(&p->offload.deferred_list))
++		goto out;
++
++	bi = list_first_entry(&p->offload.deferred_list,
++			      struct bridge_offload_deferred_item, list);
++	list_del(&bi->list);
++out:
++	spin_unlock_bh(&p->br->lock);
++	return bi;
++}
++
++void br_offload_deferred_work(struct work_struct *work)
++{
++	struct bridge_offload_deferred_item *bi;
++	struct net_bridge_port_offload *o;
++	struct net_bridge_port *p;
++
++	p = container_of(work, struct net_bridge_port, offload.deferred_work);
++	o = &p->offload;
++	while ((bi = br_offload_deferred_dequeue(p)) != NULL) {
++		if (bi->enable) {
++			rhashtable_init(&o->rht, &flow_params);
++			spin_lock_bh(&offload_lock);
++			o->enabled = true;
++			spin_unlock_bh(&offload_lock);
++		} else {
++			spin_lock_bh(&offload_lock);
++			o->enabled = false;
++			spin_unlock_bh(&offload_lock);
++			/* Ensure all rht users have finished */
++			synchronize_rcu();
++			cancel_work_sync(&o->gc_work);
++			rhashtable_free_and_destroy(&o->rht,
++						    br_offload_destroy_cb, o);
++		}
++		kfree(bi);
++	}
++}
++
 +void br_offload_port_state(struct net_bridge_port *p)
 +{
 +	struct net_bridge_port_offload *o = &p->offload;
++	struct bridge_offload_deferred_item *bi;
++	gfp_t gfp = GFP_ATOMIC;
 +	bool enabled = true;
-+	bool flush = false;
 +
 +	if (p->state != BR_STATE_FORWARDING ||
 +	    !(p->flags & BR_OFFLOAD))
@@ -371,22 +431,23 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +	if (o->enabled == enabled)
 +		goto out;
 +
-+	if (enabled) {
-+		if (!o->gc_work.func)
-+			INIT_WORK(&o->gc_work, br_offload_gc_work);
-+		rhashtable_init(&o->rht, &flow_params);
-+	} else {
-+		flush = true;
-+		rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o);
-+	}
++	bi = kmalloc(sizeof(*bi), gfp);
++	if (!bi) {
++		if (enabled)
++			goto out;
 +
-+	o->enabled = enabled;
++		/* If we are disabling (freeing the rht), retry with
++		 * __GFP_MEMALLOC. Panic if that also fails.
++		 */
++		gfp |= __GFP_MEMALLOC | __GFP_NOWARN;
++		bi = kmalloc(sizeof(*bi), gfp);
++	}
 +
++	bi->enable = enabled;
++	list_add_tail(&bi->list, &o->deferred_list);
++	schedule_work(&o->deferred_work);
 +out:
 +	spin_unlock_bh(&offload_lock);
-+
-+	if (flush)
-+		flush_work(&o->gc_work);
 +}
 +
 +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
@@ -478,6 +539,9 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +#endif
 +
 +	flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC);
++	if (unlikely(!flow))
++		goto out;
++
 +	flow->port = inp;
 +	memcpy(&flow->key, &key, sizeof(key));
 +
@@ -656,20 +720,22 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  };
  
  #define MDB_PG_FLAGS_PERMANENT	BIT(0)
-@@ -280,6 +286,12 @@ struct net_bridge_mdb_entry {
+@@ -280,6 +286,14 @@ struct net_bridge_mdb_entry {
  	struct rcu_head			rcu;
  };
  
 +struct net_bridge_port_offload {
 +	struct rhashtable		rht;
 +	struct work_struct		gc_work;
++	struct work_struct		deferred_work;
++	struct list_head		deferred_list;
 +	bool				enabled;
 +};
 +
  struct net_bridge_port {
  	struct net_bridge		*br;
  	struct net_device		*dev;
-@@ -337,6 +349,7 @@ struct net_bridge_port {
+@@ -337,6 +351,7 @@ struct net_bridge_port {
  	u16				backup_redirected_cnt;
  
  	struct bridge_stp_xstats	stp_xstats;
@@ -677,7 +743,7 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  };
  
  #define kobj_to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)
-@@ -475,6 +488,9 @@ struct net_bridge {
+@@ -475,6 +490,9 @@ struct net_bridge {
  	struct kobject			*ifobj;
  	u32				auto_cnt;
  
@@ -687,7 +753,7 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  #ifdef CONFIG_NET_SWITCHDEV
  	int offload_fwd_mark;
  #endif
-@@ -501,6 +517,10 @@ struct br_input_skb_cb {
+@@ -501,6 +519,10 @@ struct br_input_skb_cb {
  #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
  	u8 br_netfilter_broute:1;
  #endif
@@ -700,7 +766,7 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
  	int offload_fwd_mark;
 --- /dev/null
 +++ b/net/bridge/br_private_offload.h
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,32 @@
 +#ifndef __BR_OFFLOAD_H
 +#define __BR_OFFLOAD_H
 +
@@ -712,6 +778,17 @@ Submitted-by: Felix Fietkau <nbd at nbd.name>
 +void br_offload_fini(void);
 +int br_offload_set_cache_size(struct net_bridge *br, unsigned long val);
 +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val);
++void br_offload_deferred_work(struct work_struct *work);
++void br_offload_gc_work(struct work_struct *work);
++
++static inline void br_offload_init_work(struct net_bridge_port *p)
++{
++	struct net_bridge_port_offload *o = &p->offload;
++
++	INIT_LIST_HEAD(&o->deferred_list);
++	INIT_WORK(&o->deferred_work, br_offload_deferred_work);
++	INIT_WORK(&o->gc_work, br_offload_gc_work);
++}
 +
 +static inline void br_offload_skb_disable(struct sk_buff *skb)
 +{
diff --git a/target/linux/generic/hack-5.15/600-bridge_offload.patch b/target/linux/generic/hack-5.15/600-bridge_offload.patch
index 9d71a741b2..118df44fec 100644
--- a/target/linux/generic/hack-5.15/600-bridge_offload.patch
+++ b/target/linux/generic/hack-5.15/600-bridge_offload.patch
@@ -150,16 +150,25 @@ Subject: [PATCH] net/bridge: add bridge offload
  
  /*
   * Determine initial path cost based on speed.
-@@ -428,7 +429,7 @@ static struct net_bridge_port *new_nbp(s
+@@ -362,6 +363,7 @@ static void del_nbp(struct net_bridge_po
+ 	kobject_del(&p->kobj);
+ 
+ 	br_netpoll_disable(p);
++	flush_work(&p->offload.deferred_work);
+ 
+ 	call_rcu(&p->rcu, destroy_nbp_rcu);
+ }
+@@ -428,7 +430,8 @@ static struct net_bridge_port *new_nbp(s
  	p->path_cost = port_cost(dev);
  	p->priority = 0x8000 >> BR_PORT_BITS;
  	p->port_no = index;
 -	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
 +	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_OFFLOAD;
++	br_offload_init_work(p);
  	br_init_port(p);
  	br_set_state(p, BR_STATE_DISABLED);
  	br_stp_port_timer_init(p);
-@@ -771,6 +772,9 @@ void br_port_flags_change(struct net_bri
+@@ -771,6 +774,9 @@ void br_port_flags_change(struct net_bri
  
  	if (mask & BR_NEIGH_SUPPRESS)
  		br_recalculate_neigh_suppress_enabled(br);
@@ -199,7 +208,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  
 --- /dev/null
 +++ b/net/bridge/br_offload.c
-@@ -0,0 +1,438 @@
+@@ -0,0 +1,493 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#include <linux/kernel.h>
 +#include <linux/workqueue.h>
@@ -233,6 +242,11 @@ Subject: [PATCH] net/bridge: add bridge offload
 +	struct rcu_head rcu;
 +};
 +
++struct bridge_offload_deferred_item {
++	struct list_head list;
++	bool enable;
++};
++
 +static const struct rhashtable_params flow_params = {
 +	.automatic_shrinking = true,
 +	.head_offset = offsetof(struct bridge_flow, node),
@@ -306,7 +320,7 @@ Subject: [PATCH] net/bridge: add bridge offload
 +	        p->br->offload_cache_reserved) >= p->br->offload_cache_size;
 +}
 +
-+static void
++void
 +br_offload_gc_work(struct work_struct *work)
 +{
 +	struct rhashtable_iter hti;
@@ -354,11 +368,57 @@ Subject: [PATCH] net/bridge: add bridge offload
 +
 +}
 +
++static struct bridge_offload_deferred_item *
++br_offload_deferred_dequeue(struct net_bridge_port *p)
++{
++	struct bridge_offload_deferred_item *bi = NULL;
++
++	spin_lock_bh(&p->br->lock);
++	if (list_empty(&p->offload.deferred_list))
++		goto out;
++
++	bi = list_first_entry(&p->offload.deferred_list,
++			      struct bridge_offload_deferred_item, list);
++	list_del(&bi->list);
++out:
++	spin_unlock_bh(&p->br->lock);
++	return bi;
++}
++
++void br_offload_deferred_work(struct work_struct *work)
++{
++	struct bridge_offload_deferred_item *bi;
++	struct net_bridge_port_offload *o;
++	struct net_bridge_port *p;
++
++	p = container_of(work, struct net_bridge_port, offload.deferred_work);
++	o = &p->offload;
++	while ((bi = br_offload_deferred_dequeue(p)) != NULL) {
++		if (bi->enable) {
++			rhashtable_init(&o->rht, &flow_params);
++			spin_lock_bh(&offload_lock);
++			o->enabled = true;
++			spin_unlock_bh(&offload_lock);
++		} else {
++			spin_lock_bh(&offload_lock);
++			o->enabled = false;
++			spin_unlock_bh(&offload_lock);
++			/* Ensure all rht users have finished */
++			synchronize_rcu();
++			cancel_work_sync(&o->gc_work);
++			rhashtable_free_and_destroy(&o->rht,
++						    br_offload_destroy_cb, o);
++		}
++		kfree(bi);
++	}
++}
++
 +void br_offload_port_state(struct net_bridge_port *p)
 +{
 +	struct net_bridge_port_offload *o = &p->offload;
++	struct bridge_offload_deferred_item *bi;
++	gfp_t gfp = GFP_ATOMIC;
 +	bool enabled = true;
-+	bool flush = false;
 +
 +	if (p->state != BR_STATE_FORWARDING ||
 +	    !(p->flags & BR_OFFLOAD))
@@ -368,22 +428,23 @@ Subject: [PATCH] net/bridge: add bridge offload
 +	if (o->enabled == enabled)
 +		goto out;
 +
-+	if (enabled) {
-+		if (!o->gc_work.func)
-+			INIT_WORK(&o->gc_work, br_offload_gc_work);
-+		rhashtable_init(&o->rht, &flow_params);
-+	} else {
-+		flush = true;
-+		rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o);
-+	}
++	bi = kmalloc(sizeof(*bi), gfp);
++	if (!bi) {
++		if (enabled)
++			goto out;
 +
-+	o->enabled = enabled;
++		/* If we are disabling (freeing the rht), retry with
++		 * __GFP_MEMALLOC. Panic if that also fails.
++		 */
++		gfp |= __GFP_MEMALLOC | __GFP_NOWARN;
++		bi = kmalloc(sizeof(*bi), gfp);
++	}
 +
++	bi->enable = enabled;
++	list_add_tail(&bi->list, &o->deferred_list);
++	schedule_work(&o->deferred_work);
 +out:
 +	spin_unlock_bh(&offload_lock);
-+
-+	if (flush)
-+		flush_work(&o->gc_work);
 +}
 +
 +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
@@ -475,6 +536,9 @@ Subject: [PATCH] net/bridge: add bridge offload
 +#endif
 +
 +	flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC);
++	if (unlikely(!flow))
++		goto out;
++
 +	flow->port = inp;
 +	memcpy(&flow->key, &key, sizeof(key));
 +
@@ -655,20 +719,22 @@ Subject: [PATCH] net/bridge: add bridge offload
  };
  
  #define MDB_PG_FLAGS_PERMANENT	BIT(0)
-@@ -343,6 +349,12 @@ struct net_bridge_mdb_entry {
+@@ -343,6 +349,14 @@ struct net_bridge_mdb_entry {
  	struct rcu_head			rcu;
  };
  
 +struct net_bridge_port_offload {
 +	struct rhashtable		rht;
 +	struct work_struct		gc_work;
++	struct work_struct		deferred_work;
++	struct list_head		deferred_list;
 +	bool				enabled;
 +};
 +
  struct net_bridge_port {
  	struct net_bridge		*br;
  	struct net_device		*dev;
-@@ -403,6 +415,7 @@ struct net_bridge_port {
+@@ -403,6 +417,7 @@ struct net_bridge_port {
  	u16				backup_redirected_cnt;
  
  	struct bridge_stp_xstats	stp_xstats;
@@ -676,7 +742,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  };
  
  #define kobj_to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)
-@@ -519,6 +532,9 @@ struct net_bridge {
+@@ -519,6 +534,9 @@ struct net_bridge {
  	struct kobject			*ifobj;
  	u32				auto_cnt;
  
@@ -686,7 +752,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  #ifdef CONFIG_NET_SWITCHDEV
  	/* Counter used to make sure that hardware domains get unique
  	 * identifiers in case a bridge spans multiple switchdev instances.
-@@ -553,6 +569,10 @@ struct br_input_skb_cb {
+@@ -553,6 +571,10 @@ struct br_input_skb_cb {
  #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
  	u8 br_netfilter_broute:1;
  #endif
@@ -699,7 +765,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  	/* Set if TX data plane offloading is used towards at least one
 --- /dev/null
 +++ b/net/bridge/br_private_offload.h
-@@ -0,0 +1,23 @@
+@@ -0,0 +1,34 @@
 +#ifndef __BR_OFFLOAD_H
 +#define __BR_OFFLOAD_H
 +
@@ -713,6 +779,17 @@ Subject: [PATCH] net/bridge: add bridge offload
 +			       struct netlink_ext_ack *extack);
 +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val,
 +			 	   struct netlink_ext_ack *extack);
++void br_offload_deferred_work(struct work_struct *work);
++void br_offload_gc_work(struct work_struct *work);
++
++static inline void br_offload_init_work(struct net_bridge_port *p)
++{
++	struct net_bridge_port_offload *o = &p->offload;
++
++	INIT_LIST_HEAD(&o->deferred_list);
++	INIT_WORK(&o->deferred_work, br_offload_deferred_work);
++	INIT_WORK(&o->gc_work, br_offload_gc_work);
++}
 +
 +static inline void br_offload_skb_disable(struct sk_buff *skb)
 +{
-- 
2.34.1




More information about the openwrt-devel mailing list