ceph: use i_ceph_lock instead of i_lock
We have been using i_lock to protect all kinds of data structures in the ceph_inode_info struct, including lists of inodes that we need to iterate over while avoiding races with inode destruction. That requires grabbing a reference to the inode with the list lock protected, but igrab() now takes i_lock to check the inode flags. Changing the list lock ordering would be a painful process. However, using a ceph-specific i_ceph_lock in the ceph inode instead of i_lock is a simple mechanical change and avoids the ordering constraints imposed by igrab(). Reported-by: Amon Ott <a.ott@m-privacy.de> Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
|
||||
|
||||
dout("alloc_inode %p\n", &ci->vfs_inode);
|
||||
|
||||
spin_lock_init(&ci->i_ceph_lock);
|
||||
|
||||
ci->i_version = 0;
|
||||
ci->i_time_warp_seq = 0;
|
||||
ci->i_ceph_flags = 0;
|
||||
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
|
||||
iinfo->xattr_len);
|
||||
}
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
|
||||
/*
|
||||
* provided version will be odd if inode value is projected,
|
||||
@@ -680,7 +682,7 @@ static int fill_inode(struct inode *inode,
|
||||
char *sym;
|
||||
|
||||
BUG_ON(symlen != inode->i_size);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
err = -ENOMEM;
|
||||
sym = kmalloc(symlen+1, GFP_NOFS);
|
||||
@@ -689,7 +691,7 @@ static int fill_inode(struct inode *inode,
|
||||
memcpy(sym, iinfo->symlink, symlen);
|
||||
sym[symlen] = 0;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (!ci->i_symlink)
|
||||
ci->i_symlink = sym;
|
||||
else
|
||||
@@ -715,7 +717,7 @@ static int fill_inode(struct inode *inode,
|
||||
}
|
||||
|
||||
no_change:
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
/* queue truncate if we saw i_size decrease */
|
||||
if (queue_trunc)
|
||||
@@ -750,13 +752,13 @@ no_change:
|
||||
info->cap.flags,
|
||||
caps_reservation);
|
||||
} else {
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
dout(" %p got snap_caps %s\n", inode,
|
||||
ceph_cap_string(le32_to_cpu(info->cap.caps)));
|
||||
ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
|
||||
if (cap_fmode >= 0)
|
||||
__ceph_get_fmode(ci, cap_fmode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
}
|
||||
} else if (cap_fmode >= 0) {
|
||||
pr_warning("mds issued no caps on %llx.%llx\n",
|
||||
@@ -849,19 +851,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
|
||||
{
|
||||
struct dentry *dir = dn->d_parent;
|
||||
struct inode *inode = dir->d_inode;
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
struct ceph_dentry_info *di;
|
||||
|
||||
BUG_ON(!inode);
|
||||
|
||||
di = ceph_dentry(dn);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (!ceph_dir_test_complete(inode)) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return;
|
||||
}
|
||||
di->offset = ceph_inode(inode)->i_max_offset++;
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
spin_lock(&dir->d_lock);
|
||||
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
|
||||
@@ -1308,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
|
||||
struct ceph_inode_info *ci = ceph_inode(inode);
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
|
||||
inode->i_size = size;
|
||||
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
|
||||
@@ -1318,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
|
||||
(ci->i_reported_size << 1) < ci->i_max_size)
|
||||
ret = 1;
|
||||
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1376,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
|
||||
u32 orig_gen;
|
||||
int check = 0;
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
dout("invalidate_pages %p gen %d revoking %d\n", inode,
|
||||
ci->i_rdcache_gen, ci->i_rdcache_revoking);
|
||||
if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
|
||||
/* nevermind! */
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
goto out;
|
||||
}
|
||||
orig_gen = ci->i_rdcache_gen;
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (orig_gen == ci->i_rdcache_gen &&
|
||||
orig_gen == ci->i_rdcache_revoking) {
|
||||
dout("invalidate_pages %p gen %d successful\n", inode,
|
||||
@@ -1401,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
|
||||
inode, orig_gen, ci->i_rdcache_gen,
|
||||
ci->i_rdcache_revoking);
|
||||
}
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
if (check)
|
||||
ceph_check_caps(ci, 0, NULL);
|
||||
@@ -1460,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
|
||||
int wrbuffer_refs, wake = 0;
|
||||
|
||||
retry:
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
if (ci->i_truncate_pending == 0) {
|
||||
dout("__do_pending_vmtruncate %p none pending\n", inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1474,7 +1477,7 @@ retry:
|
||||
if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
|
||||
dout("__do_pending_vmtruncate %p flushing snaps first\n",
|
||||
inode);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
filemap_write_and_wait_range(&inode->i_data, 0,
|
||||
inode->i_sb->s_maxbytes);
|
||||
goto retry;
|
||||
@@ -1484,15 +1487,15 @@ retry:
|
||||
wrbuffer_refs = ci->i_wrbuffer_ref;
|
||||
dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
|
||||
ci->i_truncate_pending, to);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
truncate_inode_pages(inode->i_mapping, to);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
ci->i_truncate_pending--;
|
||||
if (ci->i_truncate_pending == 0)
|
||||
wake = 1;
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
if (wrbuffer_refs == 0)
|
||||
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
|
||||
@@ -1547,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
if (IS_ERR(req))
|
||||
return PTR_ERR(req);
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_lock(&ci->i_ceph_lock);
|
||||
issued = __ceph_caps_issued(ci, NULL);
|
||||
dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
|
||||
|
||||
@@ -1695,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
}
|
||||
|
||||
release &= issued;
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
if (inode_dirty_flags)
|
||||
__mark_inode_dirty(inode, inode_dirty_flags);
|
||||
@@ -1717,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
__ceph_do_pending_vmtruncate(inode);
|
||||
return err;
|
||||
out:
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
ceph_mdsc_put_request(req);
|
||||
return err;
|
||||
}
|
||||
|
Reference in New Issue
Block a user