net dst: use a percpu_counter to track entries
struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
0ed8ddf404
commit
fc66f95c68
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
|
||||
if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
|
||||
if (ops->gc(ops))
|
||||
return NULL;
|
||||
}
|
||||
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
|
||||
#if RT_CACHE_DEBUG >= 2
|
||||
atomic_inc(&dst_total);
|
||||
#endif
|
||||
atomic_inc(&ops->entries);
|
||||
dst_entries_add(ops, 1);
|
||||
return dst;
|
||||
}
|
||||
EXPORT_SYMBOL(dst_alloc);
|
||||
@@ -236,7 +236,7 @@ again:
|
||||
neigh_release(neigh);
|
||||
}
|
||||
|
||||
atomic_dec(&dst->ops->entries);
|
||||
dst_entries_add(dst->ops, -1);
|
||||
|
||||
if (dst->ops->destroy)
|
||||
dst->ops->destroy(dst);
|
||||
|
Reference in New Issue
Block a user