netfilter: get rid of atomic ops in fast path
We currently use a percpu spinlock to 'protect' rule bytes/packets counters, after various attempts to use RCU instead. Lately we added a seqlock so that get_counters() can run without blocking BH or 'writers'. But we really only need the seqcount in it. Spinlock itself is only locked by the current/owner cpu, so we can remove it completely. This cleanups api, using correct 'writer' vs 'reader' semantic. At replace time, the get_counters() call makes sure all cpus are done using the old table. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Jan Engelhardt <jengelh@medozas.de> Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
committed by
Patrick McHardy
parent
8f7b01a178
commit
7f5c6d4f66
@@ -260,6 +260,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
||||
void *table_base;
|
||||
const struct xt_table_info *private;
|
||||
struct xt_action_param acpar;
|
||||
unsigned int addend;
|
||||
|
||||
if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
|
||||
return NF_DROP;
|
||||
@@ -267,7 +268,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
||||
indev = in ? in->name : nulldevname;
|
||||
outdev = out ? out->name : nulldevname;
|
||||
|
||||
xt_info_rdlock_bh();
|
||||
local_bh_disable();
|
||||
addend = xt_write_recseq_begin();
|
||||
private = table->private;
|
||||
table_base = private->entries[smp_processor_id()];
|
||||
|
||||
@@ -338,7 +340,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
|
||||
/* Verdict */
|
||||
break;
|
||||
} while (!acpar.hotdrop);
|
||||
xt_info_rdunlock_bh();
|
||||
xt_write_recseq_end(addend);
|
||||
local_bh_enable();
|
||||
|
||||
if (acpar.hotdrop)
|
||||
return NF_DROP;
|
||||
@@ -712,7 +715,7 @@ static void get_counters(const struct xt_table_info *t,
|
||||
unsigned int i;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
|
||||
seqcount_t *s = &per_cpu(xt_recseq, cpu);
|
||||
|
||||
i = 0;
|
||||
xt_entry_foreach(iter, t->entries[cpu], t->size) {
|
||||
@@ -720,10 +723,10 @@ static void get_counters(const struct xt_table_info *t,
|
||||
unsigned int start;
|
||||
|
||||
do {
|
||||
start = read_seqbegin(lock);
|
||||
start = read_seqcount_begin(s);
|
||||
bcnt = iter->counters.bcnt;
|
||||
pcnt = iter->counters.pcnt;
|
||||
} while (read_seqretry(lock, start));
|
||||
} while (read_seqcount_retry(s, start));
|
||||
|
||||
ADD_COUNTER(counters[i], bcnt, pcnt);
|
||||
++i;
|
||||
@@ -1115,6 +1118,7 @@ static int do_add_counters(struct net *net, const void __user *user,
|
||||
int ret = 0;
|
||||
void *loc_cpu_entry;
|
||||
struct arpt_entry *iter;
|
||||
unsigned int addend;
|
||||
#ifdef CONFIG_COMPAT
|
||||
struct compat_xt_counters_info compat_tmp;
|
||||
|
||||
@@ -1171,12 +1175,12 @@ static int do_add_counters(struct net *net, const void __user *user,
|
||||
/* Choose the copy that is on our node */
|
||||
curcpu = smp_processor_id();
|
||||
loc_cpu_entry = private->entries[curcpu];
|
||||
xt_info_wrlock(curcpu);
|
||||
addend = xt_write_recseq_begin();
|
||||
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
|
||||
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
|
||||
++i;
|
||||
}
|
||||
xt_info_wrunlock(curcpu);
|
||||
xt_write_recseq_end(addend);
|
||||
unlock_up_free:
|
||||
local_bh_enable();
|
||||
xt_table_unlock(t);
|
||||
|
@@ -68,15 +68,6 @@ void *ipt_alloc_initial_table(const struct xt_table *info)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
|
||||
|
||||
/*
|
||||
We keep a set of rules for each CPU, so we can avoid write-locking
|
||||
them in the softirq when updating the counters and therefore
|
||||
only need to read-lock in the softirq; doing a write_lock_bh() in user
|
||||
context stops packets coming through and allows user context to read
|
||||
the counters or update the rules.
|
||||
|
||||
Hence the start of any table is given by get_table() below. */
|
||||
|
||||
/* Returns whether matches rule or not. */
|
||||
/* Performance critical - called for every packet */
|
||||
static inline bool
|
||||
@@ -311,6 +302,7 @@ ipt_do_table(struct sk_buff *skb,
|
||||
unsigned int *stackptr, origptr, cpu;
|
||||
const struct xt_table_info *private;
|
||||
struct xt_action_param acpar;
|
||||
unsigned int addend;
|
||||
|
||||
/* Initialization */
|
||||
ip = ip_hdr(skb);
|
||||
@@ -331,7 +323,8 @@ ipt_do_table(struct sk_buff *skb,
|
||||
acpar.hooknum = hook;
|
||||
|
||||
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
|
||||
xt_info_rdlock_bh();
|
||||
local_bh_disable();
|
||||
addend = xt_write_recseq_begin();
|
||||
private = table->private;
|
||||
cpu = smp_processor_id();
|
||||
table_base = private->entries[cpu];
|
||||
@@ -430,7 +423,9 @@ ipt_do_table(struct sk_buff *skb,
|
||||
pr_debug("Exiting %s; resetting sp from %u to %u\n",
|
||||
__func__, *stackptr, origptr);
|
||||
*stackptr = origptr;
|
||||
xt_info_rdunlock_bh();
|
||||
xt_write_recseq_end(addend);
|
||||
local_bh_enable();
|
||||
|
||||
#ifdef DEBUG_ALLOW_ALL
|
||||
return NF_ACCEPT;
|
||||
#else
|
||||
@@ -886,7 +881,7 @@ get_counters(const struct xt_table_info *t,
|
||||
unsigned int i;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
|
||||
seqcount_t *s = &per_cpu(xt_recseq, cpu);
|
||||
|
||||
i = 0;
|
||||
xt_entry_foreach(iter, t->entries[cpu], t->size) {
|
||||
@@ -894,10 +889,10 @@ get_counters(const struct xt_table_info *t,
|
||||
unsigned int start;
|
||||
|
||||
do {
|
||||
start = read_seqbegin(lock);
|
||||
start = read_seqcount_begin(s);
|
||||
bcnt = iter->counters.bcnt;
|
||||
pcnt = iter->counters.pcnt;
|
||||
} while (read_seqretry(lock, start));
|
||||
} while (read_seqcount_retry(s, start));
|
||||
|
||||
ADD_COUNTER(counters[i], bcnt, pcnt);
|
||||
++i; /* macro does multi eval of i */
|
||||
@@ -1312,6 +1307,7 @@ do_add_counters(struct net *net, const void __user *user,
|
||||
int ret = 0;
|
||||
void *loc_cpu_entry;
|
||||
struct ipt_entry *iter;
|
||||
unsigned int addend;
|
||||
#ifdef CONFIG_COMPAT
|
||||
struct compat_xt_counters_info compat_tmp;
|
||||
|
||||
@@ -1368,12 +1364,12 @@ do_add_counters(struct net *net, const void __user *user,
|
||||
/* Choose the copy that is on our node */
|
||||
curcpu = smp_processor_id();
|
||||
loc_cpu_entry = private->entries[curcpu];
|
||||
xt_info_wrlock(curcpu);
|
||||
addend = xt_write_recseq_begin();
|
||||
xt_entry_foreach(iter, loc_cpu_entry, private->size) {
|
||||
ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
|
||||
++i;
|
||||
}
|
||||
xt_info_wrunlock(curcpu);
|
||||
xt_write_recseq_end(addend);
|
||||
unlock_up_free:
|
||||
local_bh_enable();
|
||||
xt_table_unlock(t);
|
||||
|
Reference in New Issue
Block a user