[DLM] fix softlockup in dlm_recv
This patch stops the dlm_recv workqueue from busy-waiting when a node disconnects. This can cause soft lockup errors on debug systems and bad performance generally. Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
committed by
Steven Whitehouse
parent
62a0f62369
commit
a34fbc6363
@@ -2,7 +2,7 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
**
|
**
|
||||||
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
||||||
** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
|
||||||
**
|
**
|
||||||
** This copyrighted material is made available to anyone wishing to use,
|
** This copyrighted material is made available to anyone wishing to use,
|
||||||
** modify, copy, or redistribute it subject to the terms and conditions
|
** modify, copy, or redistribute it subject to the terms and conditions
|
||||||
@@ -109,7 +109,6 @@ struct connection {
|
|||||||
struct page *rx_page;
|
struct page *rx_page;
|
||||||
struct cbuf cb;
|
struct cbuf cb;
|
||||||
int retries;
|
int retries;
|
||||||
atomic_t waiting_requests;
|
|
||||||
#define MAX_CONNECT_RETRIES 3
|
#define MAX_CONNECT_RETRIES 3
|
||||||
struct connection *othercon;
|
struct connection *othercon;
|
||||||
struct work_struct rwork; /* Receive workqueue */
|
struct work_struct rwork; /* Receive workqueue */
|
||||||
@@ -278,8 +277,11 @@ static int receive_from_sock(struct connection *con)
|
|||||||
|
|
||||||
mutex_lock(&con->sock_mutex);
|
mutex_lock(&con->sock_mutex);
|
||||||
|
|
||||||
if (con->sock == NULL)
|
if (con->sock == NULL) {
|
||||||
goto out;
|
ret = -EAGAIN;
|
||||||
|
goto out_close;
|
||||||
|
}
|
||||||
|
|
||||||
if (con->rx_page == NULL) {
|
if (con->rx_page == NULL) {
|
||||||
/*
|
/*
|
||||||
* This doesn't need to be atomic, but I think it should
|
* This doesn't need to be atomic, but I think it should
|
||||||
@@ -352,7 +354,6 @@ static int receive_from_sock(struct connection *con)
|
|||||||
con->rx_page = NULL;
|
con->rx_page = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
|
||||||
if (call_again_soon)
|
if (call_again_soon)
|
||||||
goto out_resched;
|
goto out_resched;
|
||||||
mutex_unlock(&con->sock_mutex);
|
mutex_unlock(&con->sock_mutex);
|
||||||
@@ -370,6 +371,9 @@ out_close:
|
|||||||
close_connection(con, false);
|
close_connection(con, false);
|
||||||
/* Reconnect when there is something to send */
|
/* Reconnect when there is something to send */
|
||||||
}
|
}
|
||||||
|
/* Don't return success if we really got EOF */
|
||||||
|
if (ret == 0)
|
||||||
|
ret = -EAGAIN;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -847,7 +851,6 @@ int dlm_lowcomms_close(int nodeid)
|
|||||||
if (con) {
|
if (con) {
|
||||||
clean_one_writequeue(con);
|
clean_one_writequeue(con);
|
||||||
close_connection(con, true);
|
close_connection(con, true);
|
||||||
atomic_set(&con->waiting_requests, 0);
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user