[NET_SCHED]: Fix fallout from dev->qdisc RCU change
The move of qdisc destruction to a rcu callback broke locking in the entire qdisc layer by invalidating previously valid assumptions about the context in which changes to the qdisc tree occur. The two assumptions were: - since changes only happen in process context, read_lock doesn't need bottem half protection. Now invalid since destruction of inner qdiscs, classifiers, actions and estimators happens in the RCU callback unless they're manually deleted, resulting in dead-locks when read_lock in process context is interrupted by write_lock_bh in bottem half context. - since changes only happen under the RTNL, no additional locking is necessary for data not used during packet processing (f.e. u32_list). Again, since destruction now happens in the RCU callback, this assumption is not valid anymore, causing races while using this data, which can result in corruption or use-after-free. Instead of "fixing" this by disabling bottem halfs everywhere and adding new locks/refcounting, this patch makes these assumptions valid again by moving destruction back to process context. Since only the dev->qdisc pointer is protected by RCU, but ->enqueue and the qdisc tree are still protected by dev->qdisc_lock, destruction of the tree can be performed immediately and only the final free needs to happen in the rcu callback to make sure dev_queue_xmit doesn't access already freed memory. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
787e0617e5
commit
85670cc1fa
@@ -195,14 +195,14 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
|
||||
{
|
||||
struct Qdisc *q;
|
||||
|
||||
read_lock_bh(&qdisc_tree_lock);
|
||||
read_lock(&qdisc_tree_lock);
|
||||
list_for_each_entry(q, &dev->qdisc_list, list) {
|
||||
if (q->handle == handle) {
|
||||
read_unlock_bh(&qdisc_tree_lock);
|
||||
read_unlock(&qdisc_tree_lock);
|
||||
return q;
|
||||
}
|
||||
}
|
||||
read_unlock_bh(&qdisc_tree_lock);
|
||||
read_unlock(&qdisc_tree_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -837,7 +837,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
continue;
|
||||
if (idx > s_idx)
|
||||
s_q_idx = 0;
|
||||
read_lock_bh(&qdisc_tree_lock);
|
||||
read_lock(&qdisc_tree_lock);
|
||||
q_idx = 0;
|
||||
list_for_each_entry(q, &dev->qdisc_list, list) {
|
||||
if (q_idx < s_q_idx) {
|
||||
@@ -846,12 +846,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
}
|
||||
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
|
||||
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
|
||||
read_unlock_bh(&qdisc_tree_lock);
|
||||
read_unlock(&qdisc_tree_lock);
|
||||
goto done;
|
||||
}
|
||||
q_idx++;
|
||||
}
|
||||
read_unlock_bh(&qdisc_tree_lock);
|
||||
read_unlock(&qdisc_tree_lock);
|
||||
}
|
||||
|
||||
done:
|
||||
@@ -1074,7 +1074,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
s_t = cb->args[0];
|
||||
t = 0;
|
||||
|
||||
read_lock_bh(&qdisc_tree_lock);
|
||||
read_lock(&qdisc_tree_lock);
|
||||
list_for_each_entry(q, &dev->qdisc_list, list) {
|
||||
if (t < s_t || !q->ops->cl_ops ||
|
||||
(tcm->tcm_parent &&
|
||||
@@ -1096,7 +1096,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
break;
|
||||
t++;
|
||||
}
|
||||
read_unlock_bh(&qdisc_tree_lock);
|
||||
read_unlock(&qdisc_tree_lock);
|
||||
|
||||
cb->args[0] = t;
|
||||
|
||||
|
Reference in New Issue
Block a user