net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we need two atomic operations (and associated dirtying) per incoming packet. RCU conversion is pretty much needed : 1) Add a new structure, called "struct socket_wq" to hold all fields that will need rcu_read_lock() protection (currently: a wait_queue_head_t and a struct fasync_struct pointer). [Future patch will add a list anchor for wakeup coalescing] 2) Attach one of such structure to each "struct socket" created in sock_alloc_inode(). 3) Respect RCU grace period when freeing a "struct socket_wq" 4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct socket_wq" 5) Change sk_sleep() function to use new sk->sk_wq instead of sk->sk_sleep 6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside a rcu_read_lock() section. 7) Change all sk_has_sleeper() callers to : - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock) - Use wq_has_sleeper() to eventually wakeup tasks. - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock) 8) sock_wake_async() is modified to use rcu protection as well. 9) Exceptions : macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq" instead of dynamically allocated ones. They dont need rcu freeing. Some cleanups or followups are probably needed, (possible sk_callback_lock conversion to a spinlock for example...). Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
83d7eb2979
commit
4381548237
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
|
||||
*/
|
||||
sk_refcnt_debug_inc(newsk);
|
||||
sk_set_socket(newsk, NULL);
|
||||
newsk->sk_sleep = NULL;
|
||||
newsk->sk_wq = NULL;
|
||||
|
||||
if (newsk->sk_prot->sockets_allocated)
|
||||
percpu_counter_inc(newsk->sk_prot->sockets_allocated);
|
||||
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
|
||||
|
||||
static void sock_def_wakeup(struct sock *sk)
|
||||
{
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
wake_up_interruptible_all(sk_sleep(sk));
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (wq_has_sleeper(wq))
|
||||
wake_up_interruptible_all(&wq->wait);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sock_def_error_report(struct sock *sk)
|
||||
{
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (wq_has_sleeper(wq))
|
||||
wake_up_interruptible_poll(&wq->wait, POLLERR);
|
||||
sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sock_def_readable(struct sock *sk, int len)
|
||||
{
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
if (sk_has_sleeper(sk))
|
||||
wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (wq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
|
||||
POLLRDNORM | POLLRDBAND);
|
||||
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sock_def_write_space(struct sock *sk)
|
||||
{
|
||||
read_lock(&sk->sk_callback_lock);
|
||||
struct socket_wq *wq;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/* Do not wake up a writer until he can make "significant"
|
||||
* progress. --DaveM
|
||||
*/
|
||||
if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
|
||||
if (sk_has_sleeper(sk))
|
||||
wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
|
||||
wq = rcu_dereference(sk->sk_wq);
|
||||
if (wq_has_sleeper(wq))
|
||||
wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
|
||||
POLLWRNORM | POLLWRBAND);
|
||||
|
||||
/* Should agree with poll, otherwise some programs break */
|
||||
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
|
||||
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
||||
}
|
||||
|
||||
read_unlock(&sk->sk_callback_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void sock_def_destruct(struct sock *sk)
|
||||
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
|
||||
|
||||
if (sock) {
|
||||
sk->sk_type = sock->type;
|
||||
sk->sk_sleep = &sock->wait;
|
||||
sk->sk_wq = sock->wq;
|
||||
sock->sk = sk;
|
||||
} else
|
||||
sk->sk_sleep = NULL;
|
||||
sk->sk_wq = NULL;
|
||||
|
||||
spin_lock_init(&sk->sk_dst_lock);
|
||||
rwlock_init(&sk->sk_callback_lock);
|
||||
|
Reference in New Issue
Block a user