net: __alloc_skb() speedup
With following patch I can reach maximum rate of my pktgen+udpsink simulator : - 'old' machine : dual quad core E5450 @3.00GHz - 64 UDP rx flows (only differ by destination port) - RPS enabled, NIC interrupts serviced on cpu0 - rps dispatched on 7 other cores. (~130.000 IPI per second) - SLAB allocator (faster than SLUB in this workload) - tg3 NIC - 1.080.000 pps without a single drop at NIC level. Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch first sk_buff cache line, the second to prefetch the shinfo part. Also using one memset() to initialize all skb_shared_info fields instead of one by one to reduce number of instructions, using long word moves. All skb_shared_info fields before 'dataref' are cleared in __alloc_skb(). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
8753d29fd5
commit
ec7d2f2cf3
@@ -187,7 +187,6 @@ union skb_shared_tx {
|
||||
* the end of the header data, ie. at skb->end.
|
||||
*/
|
||||
struct skb_shared_info {
|
||||
atomic_t dataref;
|
||||
unsigned short nr_frags;
|
||||
unsigned short gso_size;
|
||||
/* Warning: this field is not always filled in (UFO)! */
|
||||
@@ -197,6 +196,12 @@ struct skb_shared_info {
|
||||
union skb_shared_tx tx_flags;
|
||||
struct sk_buff *frag_list;
|
||||
struct skb_shared_hwtstamps hwtstamps;
|
||||
|
||||
/*
|
||||
* Warning : all fields before dataref are cleared in __alloc_skb()
|
||||
*/
|
||||
atomic_t dataref;
|
||||
|
||||
skb_frag_t frags[MAX_SKB_FRAGS];
|
||||
/* Intermediate layers must ensure that destructor_arg
|
||||
* remains valid until skb destructor */
|
||||
|
Reference in New Issue
Block a user