[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv threads while working in the dlm. This allows dlm_recv activity to be suspended when the lockspace transitions to, from and between recovery cycles. The specific bug prompting this change is one where an in-progress recovery cycle is aborted by a new recovery cycle. While dlm_recv was processing a recovery message, the recovery cycle was aborted and dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count on an rsb after dlm_recoverd had reset it to zero. This is fixed by suspending dlm_recv (taking write lock on the rwsem) before aborting the current recovery. The transitions to/from normal and recovery modes are simplified by using this new ability to block dlm_recv. The switch from normal to recovery mode means dlm_recv goes from processing locking messages, to saving them for later, and vice versa. Races are avoided by blocking dlm_recv when setting the flag that switches between modes. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
committed by
Steven Whitehouse
parent
b434eda6fd
commit
c36258b592
@@ -2,7 +2,7 @@
|
||||
*******************************************************************************
|
||||
**
|
||||
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
||||
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
|
||||
** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved.
|
||||
**
|
||||
** This copyrighted material is made available to anyone wishing to use,
|
||||
** modify, copy, or redistribute it subject to the terms and conditions
|
||||
@@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
|
||||
dlm_recover_process_copy(ls, rc_in);
|
||||
}
|
||||
|
||||
static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
|
||||
/* If the lockspace doesn't exist then still send a status message
|
||||
back; it's possible that it just doesn't have its global_id yet. */
|
||||
|
||||
int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
|
||||
{
|
||||
struct dlm_rcom *rc;
|
||||
struct rcom_config *rf;
|
||||
@@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
|
||||
/* Called by dlm_recv; corresponds to dlm_receive_message() but special
|
||||
recovery-only comms are sent through here. */
|
||||
|
||||
void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
|
||||
void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
|
||||
{
|
||||
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
|
||||
struct dlm_ls *ls;
|
||||
|
||||
dlm_rcom_in(rc);
|
||||
|
||||
/* If the lockspace doesn't exist then still send a status message
|
||||
back; it's possible that it just doesn't have its global_id yet. */
|
||||
|
||||
ls = dlm_find_lockspace_global(hd->h_lockspace);
|
||||
if (!ls) {
|
||||
log_print("lockspace %x from %d type %x not found",
|
||||
hd->h_lockspace, nodeid, rc->rc_type);
|
||||
if (rc->rc_type == DLM_RCOM_STATUS)
|
||||
send_ls_not_ready(nodeid, rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
|
||||
log_debug(ls, "ignoring recovery message %x from %d",
|
||||
rc->rc_type, nodeid);
|
||||
@@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
|
||||
if (is_old_reply(ls, rc))
|
||||
goto out;
|
||||
|
||||
if (nodeid != rc->rc_header.h_nodeid) {
|
||||
log_error(ls, "bad rcom nodeid %d from %d",
|
||||
rc->rc_header.h_nodeid, nodeid);
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (rc->rc_type) {
|
||||
case DLM_RCOM_STATUS:
|
||||
receive_rcom_status(ls, rc);
|
||||
@@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
|
||||
DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
|
||||
}
|
||||
out:
|
||||
dlm_put_lockspace(ls);
|
||||
return;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user