xfs: add version 3 inode format with CRCs
Add a new inode version with a larger core. The primary objective is to allow for a crc of the inode, and location information (uuid and ino) to verify it was written in the right place. We also extend it by: a creation time (for Samba); a changecount (for NFSv4); a flush sequence (in LSN format for recovery); an additional inode flags field; and some additional padding. These additional fields are not implemented yet, but already laid out in the structure. [dchinner@redhat.com] Added LSN and flags field, some factoring and rework to capture all the necessary information in the crc calculation. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Ben Myers <bpm@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
committed by
Ben Myers
parent
3fe58f30b4
commit
93848a999c
@ -44,6 +44,7 @@
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_filestream.h"
|
||||
#include "xfs_vnodeops.h"
|
||||
#include "xfs_cksum.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_icache.h"
|
||||
|
||||
@ -866,6 +867,17 @@ xfs_dinode_from_disk(
|
||||
to->di_dmstate = be16_to_cpu(from->di_dmstate);
|
||||
to->di_flags = be16_to_cpu(from->di_flags);
|
||||
to->di_gen = be32_to_cpu(from->di_gen);
|
||||
|
||||
if (to->di_version == 3) {
|
||||
to->di_changecount = be64_to_cpu(from->di_changecount);
|
||||
to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
|
||||
to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
|
||||
to->di_flags2 = be64_to_cpu(from->di_flags2);
|
||||
to->di_ino = be64_to_cpu(from->di_ino);
|
||||
to->di_lsn = be64_to_cpu(from->di_lsn);
|
||||
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
||||
uuid_copy(&to->di_uuid, &from->di_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -902,6 +914,17 @@ xfs_dinode_to_disk(
|
||||
to->di_dmstate = cpu_to_be16(from->di_dmstate);
|
||||
to->di_flags = cpu_to_be16(from->di_flags);
|
||||
to->di_gen = cpu_to_be32(from->di_gen);
|
||||
|
||||
if (from->di_version == 3) {
|
||||
to->di_changecount = cpu_to_be64(from->di_changecount);
|
||||
to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
|
||||
to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
|
||||
to->di_flags2 = cpu_to_be64(from->di_flags2);
|
||||
to->di_ino = cpu_to_be64(from->di_ino);
|
||||
to->di_lsn = cpu_to_be64(from->di_lsn);
|
||||
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
|
||||
uuid_copy(&to->di_uuid, &from->di_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
STATIC uint
|
||||
@ -962,6 +985,47 @@ xfs_dic2xflags(
|
||||
(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
xfs_dinode_verify(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_dinode *dip)
|
||||
{
|
||||
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
|
||||
return false;
|
||||
|
||||
/* only version 3 or greater inodes are extensively verified here */
|
||||
if (dip->di_version < 3)
|
||||
return true;
|
||||
|
||||
if (!xfs_sb_version_hascrc(&mp->m_sb))
|
||||
return false;
|
||||
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
|
||||
offsetof(struct xfs_dinode, di_crc)))
|
||||
return false;
|
||||
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
|
||||
return false;
|
||||
if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
xfs_dinode_calc_crc(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_dinode *dip)
|
||||
{
|
||||
__uint32_t crc;
|
||||
|
||||
if (dip->di_version < 3)
|
||||
return;
|
||||
|
||||
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
|
||||
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
|
||||
offsetof(struct xfs_dinode, di_crc));
|
||||
dip->di_crc = xfs_end_cksum(crc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the disk inode attributes into the in-core inode structure.
|
||||
*/
|
||||
@ -990,17 +1054,13 @@ xfs_iread(
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* If we got something that isn't an inode it means someone
|
||||
* (nfs or dmi) has a stale handle.
|
||||
*/
|
||||
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
|
||||
#ifdef DEBUG
|
||||
xfs_alert(mp,
|
||||
"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
|
||||
__func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
|
||||
#endif /* DEBUG */
|
||||
error = XFS_ERROR(EINVAL);
|
||||
/* even unallocated inodes are verified */
|
||||
if (!xfs_dinode_verify(mp, ip, dip)) {
|
||||
xfs_alert(mp, "%s: validation failed for inode %lld failed",
|
||||
__func__, ip->i_ino);
|
||||
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
|
||||
error = XFS_ERROR(EFSCORRUPTED);
|
||||
goto out_brelse;
|
||||
}
|
||||
|
||||
@ -1022,10 +1082,20 @@ xfs_iread(
|
||||
goto out_brelse;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Partial initialisation of the in-core inode. Just the bits
|
||||
* that xfs_ialloc won't overwrite or relies on being correct.
|
||||
*/
|
||||
ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
|
||||
ip->i_d.di_version = dip->di_version;
|
||||
ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
|
||||
ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
|
||||
|
||||
if (dip->di_version == 3) {
|
||||
ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
|
||||
uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure to pull in the mode here as well in
|
||||
* case the inode is released without being used.
|
||||
@ -1161,6 +1231,7 @@ xfs_ialloc(
|
||||
xfs_buf_t **ialloc_context,
|
||||
xfs_inode_t **ipp)
|
||||
{
|
||||
struct xfs_mount *mp = tp->t_mountp;
|
||||
xfs_ino_t ino;
|
||||
xfs_inode_t *ip;
|
||||
uint flags;
|
||||
@ -1187,7 +1258,7 @@ xfs_ialloc(
|
||||
* This is because we're setting fields here we need
|
||||
* to prevent others from looking at until we're done.
|
||||
*/
|
||||
error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
|
||||
error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
|
||||
XFS_ILOCK_EXCL, &ip);
|
||||
if (error)
|
||||
return error;
|
||||
@ -1208,7 +1279,7 @@ xfs_ialloc(
|
||||
* the inode version number now. This way we only do the conversion
|
||||
* here rather than here and in the flush/logging code.
|
||||
*/
|
||||
if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
|
||||
if (xfs_sb_version_hasnlink(&mp->m_sb) &&
|
||||
ip->i_d.di_version == 1) {
|
||||
ip->i_d.di_version = 2;
|
||||
/*
|
||||
@ -1258,6 +1329,19 @@ xfs_ialloc(
|
||||
ip->i_d.di_dmevmask = 0;
|
||||
ip->i_d.di_dmstate = 0;
|
||||
ip->i_d.di_flags = 0;
|
||||
|
||||
if (ip->i_d.di_version == 3) {
|
||||
ASSERT(ip->i_d.di_ino == ino);
|
||||
ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
|
||||
ip->i_d.di_crc = 0;
|
||||
ip->i_d.di_changecount = 1;
|
||||
ip->i_d.di_lsn = 0;
|
||||
ip->i_d.di_flags2 = 0;
|
||||
memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
|
||||
ip->i_d.di_crtime = ip->i_d.di_mtime;
|
||||
}
|
||||
|
||||
|
||||
flags = XFS_ILOG_CORE;
|
||||
switch (mode & S_IFMT) {
|
||||
case S_IFIFO:
|
||||
@ -2716,20 +2800,18 @@ abort_out:
|
||||
|
||||
STATIC int
|
||||
xfs_iflush_int(
|
||||
xfs_inode_t *ip,
|
||||
xfs_buf_t *bp)
|
||||
struct xfs_inode *ip,
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
xfs_inode_log_item_t *iip;
|
||||
xfs_dinode_t *dip;
|
||||
xfs_mount_t *mp;
|
||||
struct xfs_inode_log_item *iip = ip->i_itemp;
|
||||
struct xfs_dinode *dip;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
||||
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
|
||||
ASSERT(xfs_isiflocked(ip));
|
||||
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
|
||||
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
|
||||
|
||||
iip = ip->i_itemp;
|
||||
mp = ip->i_mount;
|
||||
ASSERT(iip != NULL && iip->ili_fields != 0);
|
||||
|
||||
/* set *dip = inode's place in the buffer */
|
||||
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
|
||||
@ -2790,9 +2872,9 @@ xfs_iflush_int(
|
||||
}
|
||||
/*
|
||||
* bump the flush iteration count, used to detect flushes which
|
||||
* postdate a log record during recovery.
|
||||
* postdate a log record during recovery. This is redundant as we now
|
||||
* log every change and hence this can't happen. Still, it doesn't hurt.
|
||||
*/
|
||||
|
||||
ip->i_d.di_flushiter++;
|
||||
|
||||
/*
|
||||
@ -2868,41 +2950,30 @@ xfs_iflush_int(
|
||||
* need the AIL lock, because it is a 64 bit value that cannot be read
|
||||
* atomically.
|
||||
*/
|
||||
if (iip != NULL && iip->ili_fields != 0) {
|
||||
iip->ili_last_fields = iip->ili_fields;
|
||||
iip->ili_fields = 0;
|
||||
iip->ili_logged = 1;
|
||||
iip->ili_last_fields = iip->ili_fields;
|
||||
iip->ili_fields = 0;
|
||||
iip->ili_logged = 1;
|
||||
|
||||
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
|
||||
&iip->ili_item.li_lsn);
|
||||
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
|
||||
&iip->ili_item.li_lsn);
|
||||
|
||||
/*
|
||||
* Attach the function xfs_iflush_done to the inode's
|
||||
* buffer. This will remove the inode from the AIL
|
||||
* and unlock the inode's flush lock when the inode is
|
||||
* completely written to disk.
|
||||
*/
|
||||
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
|
||||
/*
|
||||
* Attach the function xfs_iflush_done to the inode's
|
||||
* buffer. This will remove the inode from the AIL
|
||||
* and unlock the inode's flush lock when the inode is
|
||||
* completely written to disk.
|
||||
*/
|
||||
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
|
||||
|
||||
ASSERT(bp->b_fspriv != NULL);
|
||||
ASSERT(bp->b_iodone != NULL);
|
||||
} else {
|
||||
/*
|
||||
* We're flushing an inode which is not in the AIL and has
|
||||
* not been logged. For this case we can immediately drop
|
||||
* the inode flush lock because we can avoid the whole
|
||||
* AIL state thing. It's OK to drop the flush lock now,
|
||||
* because we've already locked the buffer and to do anything
|
||||
* you really need both.
|
||||
*/
|
||||
if (iip != NULL) {
|
||||
ASSERT(iip->ili_logged == 0);
|
||||
ASSERT(iip->ili_last_fields == 0);
|
||||
ASSERT((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0);
|
||||
}
|
||||
xfs_ifunlock(ip);
|
||||
}
|
||||
/* update the lsn in the on disk inode if required */
|
||||
if (ip->i_d.di_version == 3)
|
||||
dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
|
||||
|
||||
/* generate the checksum. */
|
||||
xfs_dinode_calc_crc(mp, dip);
|
||||
|
||||
ASSERT(bp->b_fspriv != NULL);
|
||||
ASSERT(bp->b_iodone != NULL);
|
||||
return 0;
|
||||
|
||||
corrupt_out:
|
||||
|
Reference in New Issue
Block a user