rds: cancel send/recv work before queuing connection shutdown

We could end up executing rds_conn_shutdown before the rds_recv_worker
thread, then rds_conn_shutdown -> rds_tcp_conn_shutdown can do a
sock_release and set sock->sk to null, which may interleave in bad
ways with rds_recv_worker, e.g., it could result in:

"BUG: unable to handle kernel NULL pointer dereference at 0000000000000078"
    [ffff881769f6fd70] release_sock at ffffffff815f337b
    [ffff881769f6fd90] rds_tcp_recv at ffffffffa043c888 [rds_tcp]
    [ffff881769f6fdb0] rds_recv_worker at ffffffffa04a4810 [rds]
    [ffff881769f6fde0] process_one_work at ffffffff810a14c1
    [ffff881769f6fe40] worker_thread at ffffffff810a1940
    [ffff881769f6fec0] kthread at ffffffff810a6b1e

Also, do not enqueue any new shutdown workq items when the connection is
shutting down (this may happen for rds-tcp in softirq mode, if a FIN
or CLOSE is received while the modules is in the middle of an unload)

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Sowmini Varadhan 2017-07-16 16:43:46 -07:00 committed by David S. Miller
parent ce3dbe2974
commit aed20a53a7
6 changed files with 16 additions and 12 deletions

View File

@ -374,13 +374,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
if (!cp->cp_transport_data)
return;
rds_conn_path_drop(cp);
flush_work(&cp->cp_down_w);
/* make sure lingering queued work won't try to ref the conn */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
rds_conn_path_drop(cp, true);
flush_work(&cp->cp_down_w);
/* tear down queued messages */
list_for_each_entry_safe(rm, rtmp,
&cp->cp_send_queue,
@ -664,9 +664,13 @@ void rds_conn_exit(void)
/*
* Force a disconnect
*/
void rds_conn_path_drop(struct rds_conn_path *cp)
void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
{
atomic_set(&cp->cp_state, RDS_CONN_ERROR);
if (!destroy && cp->cp_conn->c_destroy_in_prog)
return;
queue_work(rds_wq, &cp->cp_down_w);
}
EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@ -674,7 +678,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
void rds_conn_drop(struct rds_connection *conn)
{
WARN_ON(conn->c_trans->t_mp_capable);
rds_conn_path_drop(&conn->c_path[0]);
rds_conn_path_drop(&conn->c_path[0], false);
}
EXPORT_SYMBOL_GPL(rds_conn_drop);
@ -706,5 +710,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
vprintk(fmt, ap);
va_end(ap);
rds_conn_path_drop(cp);
rds_conn_path_drop(cp, false);
}

View File

@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
void rds_conn_path_drop(struct rds_conn_path *cpath);
void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
void rds_conn_connect_if_down(struct rds_connection *conn);
void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
void rds_for_each_conn_info(struct socket *sock, unsigned int len,

View File

@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
continue;
/* reconnect with new parameters */
rds_conn_path_drop(tc->t_cpath);
rds_conn_path_drop(tc->t_cpath, false);
}
spin_unlock_irq(&rds_tcp_conn_lock);
}

View File

@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
RDS_CONN_ERROR)) {
rds_conn_path_drop(cp);
rds_conn_path_drop(cp, false);
} else {
rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
}
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
rds_conn_path_drop(cp);
rds_conn_path_drop(cp, false);
default:
break;
}

View File

@ -157,7 +157,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
"returned %d, "
"disconnecting and reconnecting\n",
&conn->c_faddr, cp->cp_index, ret);
rds_conn_path_drop(cp);
rds_conn_path_drop(cp, false);
}
}
}

View File

@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
"current state is %d\n",
__func__,
atomic_read(&cp->cp_state));
rds_conn_path_drop(cp);
rds_conn_path_drop(cp, false);
return;
}