net/smc: check for healthy link group resp. connections

If a problem for at least one connection of a link group is detected,
the whole link group and all its connections are terminated.
This patch adds a check for healthy link group when trying to reserve
a work request, and checks for healthy connections before starting
a tx worker.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ursula Braun 2018-01-25 11:15:36 +01:00 committed by David S. Miller
parent 732720fafd
commit 1a0a04c7a8
4 changed files with 29 additions and 12 deletions

View File

@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend) struct smc_cdc_tx_pend **pend)
{ {
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend); (struct smc_wr_tx_pend_priv **)pend);
if (!conn->alert_token_local)
/* abnormal termination */
rc = -EPIPE;
return rc;
} }
static inline void smc_cdc_add_pending_send(struct smc_connection *conn, static inline void smc_cdc_add_pending_send(struct smc_connection *conn,

View File

@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout; goto errout;
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn; struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = { struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local, .token = conn->alert_token_local,
@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout; goto errout;
} }
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = { struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role, .role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,

View File

@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock; goto out_unlock;
} }
rc = 0; rc = 0;
schedule_delayed_work(&conn->tx_work, if (conn->alert_token_local) /* connection healthy */
SMC_TX_WORK_DELAY); schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
} }
goto out_unlock; goto out_unlock;
} }
@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc; int rc;
lock_sock(&smc->sk); lock_sock(&smc->sk);
if (smc->sk.sk_err ||
!conn->alert_token_local ||
conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
goto out;
rc = smc_tx_sndbuf_nonempty(conn); rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv)) !atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0; conn->local_rx_ctrl.prod_flags.write_blocked = 0;
out:
release_sock(&smc->sk); release_sock(&smc->sk);
} }
@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) && ((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) || ((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) { conn->local_rx_ctrl.prod_flags.write_blocked))) {
if (smc_cdc_get_slot_and_msg_send(conn) < 0) { if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work, schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY); SMC_TX_WORK_DELAY);
return; return;

View File

@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv) struct smc_wr_tx_pend_priv **wr_pend_priv)
{ {
struct smc_wr_tx_pend *wr_pend; struct smc_wr_tx_pend *wr_pend;
u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib; struct ib_send_wr *wr_ib;
u64 wr_id; u64 wr_id;
u32 idx;
int rc; int rc;
*wr_buf = NULL; *wr_buf = NULL;
@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc) if (rc)
return rc; return rc;
} else { } else {
struct smc_link_group *lgr;
lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
rc = wait_event_timeout( rc = wait_event_timeout(
link->wr_tx_wait, link->wr_tx_wait,
list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME); SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) { if (!rc) {
/* timeout - terminate connections */ /* timeout - terminate connections */
struct smc_link_group *lgr;
lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr); smc_lgr_terminate(lgr);
return -EPIPE; return -EPIPE;
} }