net: cleanup and document skb fclone layout
Lets use a proper structure to clearly document and implement skb fast clones. Then, we might experiment more easily alternative layouts. This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm, to stop leaking of implementation details. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
b248230c34
commit
d0bf4a9e92
@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
|
|||||||
int *errcode,
|
int *errcode,
|
||||||
gfp_t gfp_mask);
|
gfp_t gfp_mask);
|
||||||
|
|
||||||
|
/* Layout of fast clones : [skb1][skb2][fclone_ref] */
|
||||||
|
struct sk_buff_fclones {
|
||||||
|
struct sk_buff skb1;
|
||||||
|
|
||||||
|
struct sk_buff skb2;
|
||||||
|
|
||||||
|
atomic_t fclone_ref;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* skb_fclone_busy - check if fclone is busy
|
||||||
|
* @skb: buffer
|
||||||
|
*
|
||||||
|
* Returns true is skb is a fast clone, and its clone is not freed.
|
||||||
|
*/
|
||||||
|
static inline bool skb_fclone_busy(const struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
const struct sk_buff_fclones *fclones;
|
||||||
|
|
||||||
|
fclones = container_of(skb, struct sk_buff_fclones, skb1);
|
||||||
|
|
||||||
|
return skb->fclone == SKB_FCLONE_ORIG &&
|
||||||
|
fclones->skb2.fclone == SKB_FCLONE_CLONE;
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
|
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
|
||||||
gfp_t priority)
|
gfp_t priority)
|
||||||
{
|
{
|
||||||
|
@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|||||||
kmemcheck_annotate_variable(shinfo->destructor_arg);
|
kmemcheck_annotate_variable(shinfo->destructor_arg);
|
||||||
|
|
||||||
if (flags & SKB_ALLOC_FCLONE) {
|
if (flags & SKB_ALLOC_FCLONE) {
|
||||||
struct sk_buff *child = skb + 1;
|
struct sk_buff_fclones *fclones;
|
||||||
atomic_t *fclone_ref = (atomic_t *) (child + 1);
|
|
||||||
|
|
||||||
kmemcheck_annotate_bitfield(child, flags1);
|
fclones = container_of(skb, struct sk_buff_fclones, skb1);
|
||||||
|
|
||||||
|
kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
|
||||||
skb->fclone = SKB_FCLONE_ORIG;
|
skb->fclone = SKB_FCLONE_ORIG;
|
||||||
atomic_set(fclone_ref, 1);
|
atomic_set(&fclones->fclone_ref, 1);
|
||||||
|
|
||||||
child->fclone = SKB_FCLONE_UNAVAILABLE;
|
fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
child->pfmemalloc = pfmemalloc;
|
fclones->skb2.pfmemalloc = pfmemalloc;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
return skb;
|
return skb;
|
||||||
@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
|
|||||||
*/
|
*/
|
||||||
static void kfree_skbmem(struct sk_buff *skb)
|
static void kfree_skbmem(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
struct sk_buff *other;
|
struct sk_buff_fclones *fclones;
|
||||||
atomic_t *fclone_ref;
|
|
||||||
|
|
||||||
switch (skb->fclone) {
|
switch (skb->fclone) {
|
||||||
case SKB_FCLONE_UNAVAILABLE:
|
case SKB_FCLONE_UNAVAILABLE:
|
||||||
@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case SKB_FCLONE_ORIG:
|
case SKB_FCLONE_ORIG:
|
||||||
fclone_ref = (atomic_t *) (skb + 2);
|
fclones = container_of(skb, struct sk_buff_fclones, skb1);
|
||||||
if (atomic_dec_and_test(fclone_ref))
|
if (atomic_dec_and_test(&fclones->fclone_ref))
|
||||||
kmem_cache_free(skbuff_fclone_cache, skb);
|
kmem_cache_free(skbuff_fclone_cache, fclones);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SKB_FCLONE_CLONE:
|
case SKB_FCLONE_CLONE:
|
||||||
fclone_ref = (atomic_t *) (skb + 1);
|
fclones = container_of(skb, struct sk_buff_fclones, skb2);
|
||||||
other = skb - 1;
|
|
||||||
|
|
||||||
/* The clone portion is available for
|
/* The clone portion is available for
|
||||||
* fast-cloning again.
|
* fast-cloning again.
|
||||||
*/
|
*/
|
||||||
skb->fclone = SKB_FCLONE_UNAVAILABLE;
|
skb->fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
|
|
||||||
if (atomic_dec_and_test(fclone_ref))
|
if (atomic_dec_and_test(&fclones->fclone_ref))
|
||||||
kmem_cache_free(skbuff_fclone_cache, other);
|
kmem_cache_free(skbuff_fclone_cache, fclones);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
|
|||||||
|
|
||||||
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
||||||
{
|
{
|
||||||
struct sk_buff *n;
|
struct sk_buff_fclones *fclones = container_of(skb,
|
||||||
|
struct sk_buff_fclones,
|
||||||
|
skb1);
|
||||||
|
struct sk_buff *n = &fclones->skb2;
|
||||||
|
|
||||||
if (skb_orphan_frags(skb, gfp_mask))
|
if (skb_orphan_frags(skb, gfp_mask))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
n = skb + 1;
|
|
||||||
if (skb->fclone == SKB_FCLONE_ORIG &&
|
if (skb->fclone == SKB_FCLONE_ORIG &&
|
||||||
n->fclone == SKB_FCLONE_UNAVAILABLE) {
|
n->fclone == SKB_FCLONE_UNAVAILABLE) {
|
||||||
atomic_t *fclone_ref = (atomic_t *) (n + 1);
|
|
||||||
n->fclone = SKB_FCLONE_CLONE;
|
n->fclone = SKB_FCLONE_CLONE;
|
||||||
atomic_inc(fclone_ref);
|
atomic_inc(&fclones->fclone_ref);
|
||||||
} else {
|
} else {
|
||||||
if (skb_pfmemalloc(skb))
|
if (skb_pfmemalloc(skb))
|
||||||
gfp_mask |= __GFP_MEMALLOC;
|
gfp_mask |= __GFP_MEMALLOC;
|
||||||
@@ -3240,8 +3240,7 @@ void __init skb_init(void)
|
|||||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
||||||
NULL);
|
NULL);
|
||||||
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
|
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
|
||||||
(2*sizeof(struct sk_buff)) +
|
sizeof(struct sk_buff_fclones),
|
||||||
sizeof(atomic_t),
|
|
||||||
0,
|
0,
|
||||||
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
|
||||||
NULL);
|
NULL);
|
||||||
|
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
|
|||||||
static bool skb_still_in_host_queue(const struct sock *sk,
|
static bool skb_still_in_host_queue(const struct sock *sk,
|
||||||
const struct sk_buff *skb)
|
const struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
const struct sk_buff *fclone = skb + 1;
|
if (unlikely(skb_fclone_busy(skb))) {
|
||||||
|
|
||||||
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
|
|
||||||
fclone->fclone == SKB_FCLONE_CLONE)) {
|
|
||||||
NET_INC_STATS_BH(sock_net(sk),
|
NET_INC_STATS_BH(sock_net(sk),
|
||||||
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
|
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
|
||||||
return true;
|
return true;
|
||||||
|
@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
|
|||||||
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
|
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
|
||||||
struct xfrm_policy *pol = xdst->pols[0];
|
struct xfrm_policy *pol = xdst->pols[0];
|
||||||
struct xfrm_policy_queue *pq = &pol->polq;
|
struct xfrm_policy_queue *pq = &pol->polq;
|
||||||
const struct sk_buff *fclone = skb + 1;
|
|
||||||
|
|
||||||
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
|
if (unlikely(skb_fclone_busy(skb))) {
|
||||||
fclone->fclone == SKB_FCLONE_CLONE)) {
|
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user