[NET]: gen_estimator deadlock fix
-Fixes ABBA deadlock noted by Patrick McHardy <kaber@trash.net>: > There is at least one ABBA deadlock, est_timer() does: > read_lock(&est_lock) > spin_lock(e->stats_lock) (which is dev->queue_lock) > > and qdisc_destroy calls htb_destroy under dev->queue_lock, which > calls htb_destroy_class, then gen_kill_estimator and this > write_locks est_lock. To fix the ABBA deadlock the rate estimators are now kept on an rcu list. -The est_lock changes the use from protecting the list to protecting the update to the 'bstat' pointer in order to avoid NULL dereferencing. -The 'interval' member of the gen_estimator structure removed as it is not needed. Signed-off-by: Ranko Zivojnovic <ranko@spidernet.net> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
dd121c4bbf
commit
0929c2dd83
@@ -79,27 +79,27 @@
|
|||||||
|
|
||||||
struct gen_estimator
|
struct gen_estimator
|
||||||
{
|
{
|
||||||
struct gen_estimator *next;
|
struct list_head list;
|
||||||
struct gnet_stats_basic *bstats;
|
struct gnet_stats_basic *bstats;
|
||||||
struct gnet_stats_rate_est *rate_est;
|
struct gnet_stats_rate_est *rate_est;
|
||||||
spinlock_t *stats_lock;
|
spinlock_t *stats_lock;
|
||||||
unsigned interval;
|
|
||||||
int ewma_log;
|
int ewma_log;
|
||||||
u64 last_bytes;
|
u64 last_bytes;
|
||||||
u32 last_packets;
|
u32 last_packets;
|
||||||
u32 avpps;
|
u32 avpps;
|
||||||
u32 avbps;
|
u32 avbps;
|
||||||
|
struct rcu_head e_rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gen_estimator_head
|
struct gen_estimator_head
|
||||||
{
|
{
|
||||||
struct timer_list timer;
|
struct timer_list timer;
|
||||||
struct gen_estimator *list;
|
struct list_head list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
|
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
|
||||||
|
|
||||||
/* Estimator array lock */
|
/* Protects against NULL dereference */
|
||||||
static DEFINE_RWLOCK(est_lock);
|
static DEFINE_RWLOCK(est_lock);
|
||||||
|
|
||||||
static void est_timer(unsigned long arg)
|
static void est_timer(unsigned long arg)
|
||||||
@@ -107,13 +107,17 @@ static void est_timer(unsigned long arg)
|
|||||||
int idx = (int)arg;
|
int idx = (int)arg;
|
||||||
struct gen_estimator *e;
|
struct gen_estimator *e;
|
||||||
|
|
||||||
read_lock(&est_lock);
|
rcu_read_lock();
|
||||||
for (e = elist[idx].list; e; e = e->next) {
|
list_for_each_entry_rcu(e, &elist[idx].list, list) {
|
||||||
u64 nbytes;
|
u64 nbytes;
|
||||||
u32 npackets;
|
u32 npackets;
|
||||||
u32 rate;
|
u32 rate;
|
||||||
|
|
||||||
spin_lock(e->stats_lock);
|
spin_lock(e->stats_lock);
|
||||||
|
read_lock(&est_lock);
|
||||||
|
if (e->bstats == NULL)
|
||||||
|
goto skip;
|
||||||
|
|
||||||
nbytes = e->bstats->bytes;
|
nbytes = e->bstats->bytes;
|
||||||
npackets = e->bstats->packets;
|
npackets = e->bstats->packets;
|
||||||
rate = (nbytes - e->last_bytes)<<(7 - idx);
|
rate = (nbytes - e->last_bytes)<<(7 - idx);
|
||||||
@@ -125,12 +129,14 @@ static void est_timer(unsigned long arg)
|
|||||||
e->last_packets = npackets;
|
e->last_packets = npackets;
|
||||||
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
|
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
|
||||||
e->rate_est->pps = (e->avpps+0x1FF)>>10;
|
e->rate_est->pps = (e->avpps+0x1FF)>>10;
|
||||||
|
skip:
|
||||||
|
read_unlock(&est_lock);
|
||||||
spin_unlock(e->stats_lock);
|
spin_unlock(e->stats_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (elist[idx].list != NULL)
|
if (!list_empty(&elist[idx].list))
|
||||||
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
|
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
|
||||||
read_unlock(&est_lock);
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -147,12 +153,17 @@ static void est_timer(unsigned long arg)
|
|||||||
* &rate_est with the statistics lock grabed during this period.
|
* &rate_est with the statistics lock grabed during this period.
|
||||||
*
|
*
|
||||||
* Returns 0 on success or a negative error code.
|
* Returns 0 on success or a negative error code.
|
||||||
|
*
|
||||||
|
* NOTE: Called under rtnl_mutex
|
||||||
*/
|
*/
|
||||||
int gen_new_estimator(struct gnet_stats_basic *bstats,
|
int gen_new_estimator(struct gnet_stats_basic *bstats,
|
||||||
struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
|
struct gnet_stats_rate_est *rate_est,
|
||||||
|
spinlock_t *stats_lock,
|
||||||
|
struct rtattr *opt)
|
||||||
{
|
{
|
||||||
struct gen_estimator *est;
|
struct gen_estimator *est;
|
||||||
struct gnet_estimator *parm = RTA_DATA(opt);
|
struct gnet_estimator *parm = RTA_DATA(opt);
|
||||||
|
int idx;
|
||||||
|
|
||||||
if (RTA_PAYLOAD(opt) < sizeof(*parm))
|
if (RTA_PAYLOAD(opt) < sizeof(*parm))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -164,7 +175,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
|
|||||||
if (est == NULL)
|
if (est == NULL)
|
||||||
return -ENOBUFS;
|
return -ENOBUFS;
|
||||||
|
|
||||||
est->interval = parm->interval + 2;
|
idx = parm->interval + 2;
|
||||||
est->bstats = bstats;
|
est->bstats = bstats;
|
||||||
est->rate_est = rate_est;
|
est->rate_est = rate_est;
|
||||||
est->stats_lock = stats_lock;
|
est->stats_lock = stats_lock;
|
||||||
@@ -174,20 +185,25 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
|
|||||||
est->last_packets = bstats->packets;
|
est->last_packets = bstats->packets;
|
||||||
est->avpps = rate_est->pps<<10;
|
est->avpps = rate_est->pps<<10;
|
||||||
|
|
||||||
est->next = elist[est->interval].list;
|
if (!elist[idx].timer.function) {
|
||||||
if (est->next == NULL) {
|
INIT_LIST_HEAD(&elist[idx].list);
|
||||||
init_timer(&elist[est->interval].timer);
|
setup_timer(&elist[idx].timer, est_timer, idx);
|
||||||
elist[est->interval].timer.data = est->interval;
|
|
||||||
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
|
|
||||||
elist[est->interval].timer.function = est_timer;
|
|
||||||
add_timer(&elist[est->interval].timer);
|
|
||||||
}
|
}
|
||||||
write_lock_bh(&est_lock);
|
|
||||||
elist[est->interval].list = est;
|
if (list_empty(&elist[idx].list))
|
||||||
write_unlock_bh(&est_lock);
|
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
|
||||||
|
|
||||||
|
list_add_rcu(&est->list, &elist[idx].list);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __gen_kill_estimator(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct gen_estimator *e = container_of(head,
|
||||||
|
struct gen_estimator, e_rcu);
|
||||||
|
kfree(e);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gen_kill_estimator - remove a rate estimator
|
* gen_kill_estimator - remove a rate estimator
|
||||||
* @bstats: basic statistics
|
* @bstats: basic statistics
|
||||||
@@ -195,31 +211,32 @@ int gen_new_estimator(struct gnet_stats_basic *bstats,
|
|||||||
*
|
*
|
||||||
* Removes the rate estimator specified by &bstats and &rate_est
|
* Removes the rate estimator specified by &bstats and &rate_est
|
||||||
* and deletes the timer.
|
* and deletes the timer.
|
||||||
|
*
|
||||||
|
* NOTE: Called under rtnl_mutex
|
||||||
*/
|
*/
|
||||||
void gen_kill_estimator(struct gnet_stats_basic *bstats,
|
void gen_kill_estimator(struct gnet_stats_basic *bstats,
|
||||||
struct gnet_stats_rate_est *rate_est)
|
struct gnet_stats_rate_est *rate_est)
|
||||||
{
|
{
|
||||||
int idx;
|
int idx;
|
||||||
struct gen_estimator *est, **pest;
|
struct gen_estimator *e, *n;
|
||||||
|
|
||||||
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
|
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
|
||||||
int killed = 0;
|
|
||||||
pest = &elist[idx].list;
|
/* Skip non initialized indexes */
|
||||||
while ((est=*pest) != NULL) {
|
if (!elist[idx].timer.function)
|
||||||
if (est->rate_est != rate_est || est->bstats != bstats) {
|
continue;
|
||||||
pest = &est->next;
|
|
||||||
|
list_for_each_entry_safe(e, n, &elist[idx].list, list) {
|
||||||
|
if (e->rate_est != rate_est || e->bstats != bstats)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
write_lock_bh(&est_lock);
|
write_lock_bh(&est_lock);
|
||||||
*pest = est->next;
|
e->bstats = NULL;
|
||||||
write_unlock_bh(&est_lock);
|
write_unlock_bh(&est_lock);
|
||||||
|
|
||||||
kfree(est);
|
list_del_rcu(&e->list);
|
||||||
killed++;
|
call_rcu(&e->e_rcu, __gen_kill_estimator);
|
||||||
}
|
}
|
||||||
if (killed && elist[idx].list == NULL)
|
|
||||||
del_timer(&elist[idx].timer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user