ipvs: do not use dest after ip_vs_dest_put in LBLC
commit c2a4ffb70e
("ipvs: convert lblc scheduler to rcu")
allows RCU readers to use dest after calling ip_vs_dest_put().
In the corner case it can race with ip_vs_dest_trash_expire()
which can release the dest while it is being returned to the
RCU readers as scheduling result.
To fix the problem do not allow en->dest to be replaced and
defer the ip_vs_dest_put() call by using RCU callback. Now
en->dest does not need to be RCU pointer.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
This commit is contained in:
committed by
Simon Horman
parent
bcbde4c0a7
commit
2f3d771a35
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
|
|||||||
struct hlist_node list;
|
struct hlist_node list;
|
||||||
int af; /* address family */
|
int af; /* address family */
|
||||||
union nf_inet_addr addr; /* destination IP address */
|
union nf_inet_addr addr; /* destination IP address */
|
||||||
struct ip_vs_dest __rcu *dest; /* real server (cache) */
|
struct ip_vs_dest *dest; /* real server (cache) */
|
||||||
unsigned long lastuse; /* last used time */
|
unsigned long lastuse; /* last used time */
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
};
|
};
|
||||||
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
|
static void ip_vs_lblc_rcu_free(struct rcu_head *head)
|
||||||
{
|
{
|
||||||
struct ip_vs_dest *dest;
|
struct ip_vs_lblc_entry *en = container_of(head,
|
||||||
|
struct ip_vs_lblc_entry,
|
||||||
|
rcu_head);
|
||||||
|
|
||||||
hlist_del_rcu(&en->list);
|
ip_vs_dest_put(en->dest);
|
||||||
/*
|
kfree(en);
|
||||||
* We don't kfree dest because it is referred either by its service
|
|
||||||
* or the trash dest list.
|
|
||||||
*/
|
|
||||||
dest = rcu_dereference_protected(en->dest, 1);
|
|
||||||
ip_vs_dest_put(dest);
|
|
||||||
kfree_rcu(en, rcu_head);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
|
||||||
|
{
|
||||||
|
hlist_del_rcu(&en->list);
|
||||||
|
call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns hash value for IPVS LBLC entry
|
* Returns hash value for IPVS LBLC entry
|
||||||
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
|
|||||||
struct ip_vs_lblc_entry *en;
|
struct ip_vs_lblc_entry *en;
|
||||||
|
|
||||||
en = ip_vs_lblc_get(dest->af, tbl, daddr);
|
en = ip_vs_lblc_get(dest->af, tbl, daddr);
|
||||||
if (!en) {
|
if (en) {
|
||||||
en = kmalloc(sizeof(*en), GFP_ATOMIC);
|
if (en->dest == dest)
|
||||||
if (!en)
|
return en;
|
||||||
return NULL;
|
ip_vs_lblc_del(en);
|
||||||
|
|
||||||
en->af = dest->af;
|
|
||||||
ip_vs_addr_copy(dest->af, &en->addr, daddr);
|
|
||||||
en->lastuse = jiffies;
|
|
||||||
|
|
||||||
ip_vs_dest_hold(dest);
|
|
||||||
RCU_INIT_POINTER(en->dest, dest);
|
|
||||||
|
|
||||||
ip_vs_lblc_hash(tbl, en);
|
|
||||||
} else {
|
|
||||||
struct ip_vs_dest *old_dest;
|
|
||||||
|
|
||||||
old_dest = rcu_dereference_protected(en->dest, 1);
|
|
||||||
if (old_dest != dest) {
|
|
||||||
ip_vs_dest_put(old_dest);
|
|
||||||
ip_vs_dest_hold(dest);
|
|
||||||
/* No ordering constraints for refcnt */
|
|
||||||
RCU_INIT_POINTER(en->dest, dest);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
en = kmalloc(sizeof(*en), GFP_ATOMIC);
|
||||||
|
if (!en)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
en->af = dest->af;
|
||||||
|
ip_vs_addr_copy(dest->af, &en->addr, daddr);
|
||||||
|
en->lastuse = jiffies;
|
||||||
|
|
||||||
|
ip_vs_dest_hold(dest);
|
||||||
|
en->dest = dest;
|
||||||
|
|
||||||
|
ip_vs_lblc_hash(tbl, en);
|
||||||
|
|
||||||
return en;
|
return en;
|
||||||
}
|
}
|
||||||
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
|
|||||||
tbl->dead = 1;
|
tbl->dead = 1;
|
||||||
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
|
||||||
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
|
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
|
||||||
ip_vs_lblc_free(en);
|
ip_vs_lblc_del(en);
|
||||||
atomic_dec(&tbl->entries);
|
atomic_dec(&tbl->entries);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
|
|||||||
sysctl_lblc_expiration(svc)))
|
sysctl_lblc_expiration(svc)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ip_vs_lblc_free(en);
|
ip_vs_lblc_del(en);
|
||||||
atomic_dec(&tbl->entries);
|
atomic_dec(&tbl->entries);
|
||||||
}
|
}
|
||||||
spin_unlock(&svc->sched_lock);
|
spin_unlock(&svc->sched_lock);
|
||||||
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
|
|||||||
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
|
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ip_vs_lblc_free(en);
|
ip_vs_lblc_del(en);
|
||||||
atomic_dec(&tbl->entries);
|
atomic_dec(&tbl->entries);
|
||||||
goal--;
|
goal--;
|
||||||
}
|
}
|
||||||
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
|
|||||||
* free up entries from the trash at any time.
|
* free up entries from the trash at any time.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
dest = rcu_dereference(en->dest);
|
dest = en->dest;
|
||||||
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
|
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
|
||||||
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
|
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
|
||||||
goto out;
|
goto out;
|
||||||
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
|
|||||||
{
|
{
|
||||||
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
|
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
|
||||||
unregister_pernet_subsys(&ip_vs_lblc_ops);
|
unregister_pernet_subsys(&ip_vs_lblc_ops);
|
||||||
synchronize_rcu();
|
rcu_barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user