[XFS] On machines with more than 8 cpus, when running parallel I/O
threads, the incore superblock lock becomes the limiting factor for buffered write throughput. Make the contended fields in the incore superblock use per-cpu counters so that there is no global lock to limit scalability. SGI-PV: 946630 SGI-Modid: xfs-linux-melb:xfs-kern:25106a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
This commit is contained in:
committed by
Nathan Scott
parent
9f4cbecd7e
commit
8d280b98cf
@@ -100,6 +100,11 @@
|
|||||||
*/
|
*/
|
||||||
#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
|
#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
|
||||||
#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
|
#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
|
||||||
|
#if CONFIG_SMP
|
||||||
|
#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
|
||||||
|
#else
|
||||||
|
#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* State flag for unwritten extent buffers.
|
* State flag for unwritten extent buffers.
|
||||||
|
@@ -462,6 +462,7 @@ xfs_fs_counts(
|
|||||||
{
|
{
|
||||||
unsigned long s;
|
unsigned long s;
|
||||||
|
|
||||||
|
xfs_icsb_sync_counters_lazy(mp);
|
||||||
s = XFS_SB_LOCK(mp);
|
s = XFS_SB_LOCK(mp);
|
||||||
cnt->freedata = mp->m_sb.sb_fdblocks;
|
cnt->freedata = mp->m_sb.sb_fdblocks;
|
||||||
cnt->freertx = mp->m_sb.sb_frextents;
|
cnt->freertx = mp->m_sb.sb_frextents;
|
||||||
|
@@ -51,11 +51,31 @@ STATIC int xfs_uuid_mount(xfs_mount_t *);
|
|||||||
STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
|
STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
|
||||||
STATIC void xfs_unmountfs_wait(xfs_mount_t *);
|
STATIC void xfs_unmountfs_wait(xfs_mount_t *);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
|
||||||
|
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
|
||||||
|
STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
|
||||||
|
STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
|
||||||
|
int, int);
|
||||||
|
STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
|
||||||
|
int, int);
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define xfs_icsb_destroy_counters(mp) do { } while (0)
|
||||||
|
#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
|
||||||
|
#define xfs_icsb_sync_counters(mp) do { } while (0)
|
||||||
|
#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
|
||||||
|
#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static const struct {
|
static const struct {
|
||||||
short offset;
|
short offset;
|
||||||
short type; /* 0 = integer
|
short type; /* 0 = integer
|
||||||
* 1 = binary / string (no translation)
|
* 1 = binary / string (no translation)
|
||||||
*/
|
*/
|
||||||
} xfs_sb_info[] = {
|
} xfs_sb_info[] = {
|
||||||
{ offsetof(xfs_sb_t, sb_magicnum), 0 },
|
{ offsetof(xfs_sb_t, sb_magicnum), 0 },
|
||||||
{ offsetof(xfs_sb_t, sb_blocksize), 0 },
|
{ offsetof(xfs_sb_t, sb_blocksize), 0 },
|
||||||
@@ -113,7 +133,11 @@ xfs_mount_init(void)
|
|||||||
{
|
{
|
||||||
xfs_mount_t *mp;
|
xfs_mount_t *mp;
|
||||||
|
|
||||||
mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
|
mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
|
||||||
|
|
||||||
|
if (xfs_icsb_init_counters(mp)) {
|
||||||
|
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
|
||||||
|
}
|
||||||
|
|
||||||
AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
|
AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
|
||||||
spinlock_init(&mp->m_sb_lock, "xfs_sb");
|
spinlock_init(&mp->m_sb_lock, "xfs_sb");
|
||||||
@@ -136,8 +160,8 @@ xfs_mount_init(void)
|
|||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
xfs_mount_free(
|
xfs_mount_free(
|
||||||
xfs_mount_t *mp,
|
xfs_mount_t *mp,
|
||||||
int remove_bhv)
|
int remove_bhv)
|
||||||
{
|
{
|
||||||
if (mp->m_ihash)
|
if (mp->m_ihash)
|
||||||
xfs_ihash_free(mp);
|
xfs_ihash_free(mp);
|
||||||
@@ -177,6 +201,7 @@ xfs_mount_free(
|
|||||||
VFS_REMOVEBHV(vfsp, &mp->m_bhv);
|
VFS_REMOVEBHV(vfsp, &mp->m_bhv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xfs_icsb_destroy_counters(mp);
|
||||||
kmem_free(mp, sizeof(xfs_mount_t));
|
kmem_free(mp, sizeof(xfs_mount_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -527,6 +552,10 @@ xfs_readsb(xfs_mount_t *mp)
|
|||||||
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
|
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
|
||||||
|
xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
|
||||||
|
xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
|
||||||
|
|
||||||
mp->m_sb_bp = bp;
|
mp->m_sb_bp = bp;
|
||||||
xfs_buf_relse(bp);
|
xfs_buf_relse(bp);
|
||||||
ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
|
ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
|
||||||
@@ -1154,6 +1183,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
|
|||||||
sbp = xfs_getsb(mp, 0);
|
sbp = xfs_getsb(mp, 0);
|
||||||
if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
|
if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
|
||||||
XFS_FORCED_SHUTDOWN(mp))) {
|
XFS_FORCED_SHUTDOWN(mp))) {
|
||||||
|
|
||||||
|
xfs_icsb_sync_counters(mp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mark shared-readonly if desired
|
* mark shared-readonly if desired
|
||||||
*/
|
*/
|
||||||
@@ -1227,7 +1259,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
|
|||||||
|
|
||||||
xfs_trans_log_buf(tp, bp, first, last);
|
xfs_trans_log_buf(tp, bp, first, last);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
|
* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
|
||||||
* a delta to a specified field in the in-core superblock. Simply
|
* a delta to a specified field in the in-core superblock. Simply
|
||||||
@@ -1237,7 +1268,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
|
|||||||
*
|
*
|
||||||
* The SB_LOCK must be held when this routine is called.
|
* The SB_LOCK must be held when this routine is called.
|
||||||
*/
|
*/
|
||||||
STATIC int
|
int
|
||||||
xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
|
xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
|
||||||
int delta, int rsvd)
|
int delta, int rsvd)
|
||||||
{
|
{
|
||||||
@@ -1406,9 +1437,26 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
|
|||||||
unsigned long s;
|
unsigned long s;
|
||||||
int status;
|
int status;
|
||||||
|
|
||||||
s = XFS_SB_LOCK(mp);
|
/* check for per-cpu counters */
|
||||||
status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
|
switch (field) {
|
||||||
XFS_SB_UNLOCK(mp, s);
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
|
||||||
|
status = xfs_icsb_modify_counters(mp, field,
|
||||||
|
delta, rsvd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
s = XFS_SB_LOCK(mp);
|
||||||
|
status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
|
||||||
|
XFS_SB_UNLOCK(mp, s);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1445,8 +1493,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
|
|||||||
* from the loop so we'll fall into the undo loop
|
* from the loop so we'll fall into the undo loop
|
||||||
* below.
|
* below.
|
||||||
*/
|
*/
|
||||||
status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
|
switch (msbp->msb_field) {
|
||||||
msbp->msb_delta, rsvd);
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
|
||||||
|
status = xfs_icsb_modify_counters_locked(mp,
|
||||||
|
msbp->msb_field,
|
||||||
|
msbp->msb_delta, rsvd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
status = xfs_mod_incore_sb_unlocked(mp,
|
||||||
|
msbp->msb_field,
|
||||||
|
msbp->msb_delta, rsvd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (status != 0) {
|
if (status != 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1463,8 +1529,28 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
|
|||||||
if (status != 0) {
|
if (status != 0) {
|
||||||
msbp--;
|
msbp--;
|
||||||
while (msbp >= msb) {
|
while (msbp >= msb) {
|
||||||
status = xfs_mod_incore_sb_unlocked(mp,
|
switch (msbp->msb_field) {
|
||||||
msbp->msb_field, -(msbp->msb_delta), rsvd);
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
|
||||||
|
status =
|
||||||
|
xfs_icsb_modify_counters_locked(mp,
|
||||||
|
msbp->msb_field,
|
||||||
|
-(msbp->msb_delta),
|
||||||
|
rsvd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
status = xfs_mod_incore_sb_unlocked(mp,
|
||||||
|
msbp->msb_field,
|
||||||
|
-(msbp->msb_delta),
|
||||||
|
rsvd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
ASSERT(status == 0);
|
ASSERT(status == 0);
|
||||||
msbp--;
|
msbp--;
|
||||||
}
|
}
|
||||||
@@ -1577,3 +1663,445 @@ xfs_mount_log_sbunit(
|
|||||||
xfs_mod_sb(tp, fields);
|
xfs_mod_sb(tp, fields);
|
||||||
xfs_trans_commit(tp, 0, NULL);
|
xfs_trans_commit(tp, 0, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
/*
|
||||||
|
* Per-cpu incore superblock counters
|
||||||
|
*
|
||||||
|
* Simple concept, difficult implementation
|
||||||
|
*
|
||||||
|
* Basically, replace the incore superblock counters with a distributed per cpu
|
||||||
|
* counter for contended fields (e.g. free block count).
|
||||||
|
*
|
||||||
|
* Difficulties arise in that the incore sb is used for ENOSPC checking, and
|
||||||
|
* hence needs to be accurately read when we are running low on space. Hence
|
||||||
|
* there is a method to enable and disable the per-cpu counters based on how
|
||||||
|
* much "stuff" is available in them.
|
||||||
|
*
|
||||||
|
* Basically, a counter is enabled if there is enough free resource to justify
|
||||||
|
* running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
|
||||||
|
* ENOSPC), then we disable the counters to synchronise all callers and
|
||||||
|
* re-distribute the available resources.
|
||||||
|
*
|
||||||
|
* If, once we redistributed the available resources, we still get a failure,
|
||||||
|
* we disable the per-cpu counter and go through the slow path.
|
||||||
|
*
|
||||||
|
* The slow path is the current xfs_mod_incore_sb() function. This means that
|
||||||
|
* when we disable a per-cpu counter, we need to drain it's resources back to
|
||||||
|
* the global superblock. We do this after disabling the counter to prevent
|
||||||
|
* more threads from queueing up on the counter.
|
||||||
|
*
|
||||||
|
* Essentially, this means that we still need a lock in the fast path to enable
|
||||||
|
* synchronisation between the global counters and the per-cpu counters. This
|
||||||
|
* is not a problem because the lock will be local to a CPU almost all the time
|
||||||
|
* and have little contention except when we get to ENOSPC conditions.
|
||||||
|
*
|
||||||
|
* Basically, this lock becomes a barrier that enables us to lock out the fast
|
||||||
|
* path while we do things like enabling and disabling counters and
|
||||||
|
* synchronising the counters.
|
||||||
|
*
|
||||||
|
* Locking rules:
|
||||||
|
*
|
||||||
|
* 1. XFS_SB_LOCK() before picking up per-cpu locks
|
||||||
|
* 2. per-cpu locks always picked up via for_each_online_cpu() order
|
||||||
|
* 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
|
||||||
|
* 4. modifying per-cpu counters requires holding per-cpu lock
|
||||||
|
* 5. modifying global counters requires holding XFS_SB_LOCK
|
||||||
|
* 6. enabling or disabling a counter requires holding the XFS_SB_LOCK
|
||||||
|
* and _none_ of the per-cpu locks.
|
||||||
|
*
|
||||||
|
* Disabled counters are only ever re-enabled by a balance operation
|
||||||
|
* that results in more free resources per CPU than a given threshold.
|
||||||
|
* To ensure counters don't remain disabled, they are rebalanced when
|
||||||
|
* the global resource goes above a higher threshold (i.e. some hysteresis
|
||||||
|
* is present to prevent thrashing).
|
||||||
|
*
|
||||||
|
* Note: hotplug CPUs not yet supported
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xfs_icsb_init_counters(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *cntp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
|
||||||
|
if (mp->m_sb_cnts == NULL)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
|
||||||
|
spin_lock_init(&cntp->icsb_lock);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* start with all counters disabled so that the
|
||||||
|
* initial balance kicks us off correctly
|
||||||
|
*/
|
||||||
|
mp->m_icsb_counters = -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_destroy_counters(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
if (mp->m_sb_cnts)
|
||||||
|
free_percpu(mp->m_sb_cnts);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
STATIC inline void
|
||||||
|
xfs_icsb_lock_all_counters(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *cntp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
|
||||||
|
spin_lock(&cntp->icsb_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC inline void
|
||||||
|
xfs_icsb_unlock_all_counters(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *cntp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
|
||||||
|
spin_unlock(&cntp->icsb_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_count(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_icsb_cnts_t *cnt,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *cntp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
|
||||||
|
|
||||||
|
if (!(flags & XFS_ICSB_LAZY_COUNT))
|
||||||
|
xfs_icsb_lock_all_counters(mp);
|
||||||
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
|
||||||
|
cnt->icsb_icount += cntp->icsb_icount;
|
||||||
|
cnt->icsb_ifree += cntp->icsb_ifree;
|
||||||
|
cnt->icsb_fdblocks += cntp->icsb_fdblocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(flags & XFS_ICSB_LAZY_COUNT))
|
||||||
|
xfs_icsb_unlock_all_counters(mp);
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC int
|
||||||
|
xfs_icsb_counter_disabled(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field)
|
||||||
|
{
|
||||||
|
ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
|
||||||
|
return test_bit(field, &mp->m_icsb_counters);
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC int
|
||||||
|
xfs_icsb_disable_counter(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t cnt;
|
||||||
|
|
||||||
|
ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
|
||||||
|
|
||||||
|
xfs_icsb_lock_all_counters(mp);
|
||||||
|
if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
|
||||||
|
/* drain back to superblock */
|
||||||
|
|
||||||
|
xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
|
||||||
|
switch(field) {
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
mp->m_sb.sb_icount = cnt.icsb_icount;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
mp->m_sb.sb_ifree = cnt.icsb_ifree;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
xfs_icsb_unlock_all_counters(mp);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_enable_counter(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field,
|
||||||
|
uint64_t count,
|
||||||
|
uint64_t resid)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *cntp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
|
||||||
|
|
||||||
|
xfs_icsb_lock_all_counters(mp);
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
cntp = per_cpu_ptr(mp->m_sb_cnts, i);
|
||||||
|
switch (field) {
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
cntp->icsb_icount = count + resid;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
cntp->icsb_ifree = count + resid;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
cntp->icsb_fdblocks = count + resid;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
resid = 0;
|
||||||
|
}
|
||||||
|
clear_bit(field, &mp->m_icsb_counters);
|
||||||
|
xfs_icsb_unlock_all_counters(mp);
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_sync_counters_int(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t cnt;
|
||||||
|
int s;
|
||||||
|
|
||||||
|
/* Pass 1: lock all counters */
|
||||||
|
if ((flags & XFS_ICSB_SB_LOCKED) == 0)
|
||||||
|
s = XFS_SB_LOCK(mp);
|
||||||
|
|
||||||
|
xfs_icsb_count(mp, &cnt, flags);
|
||||||
|
|
||||||
|
/* Step 3: update mp->m_sb fields */
|
||||||
|
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
|
||||||
|
mp->m_sb.sb_icount = cnt.icsb_icount;
|
||||||
|
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
|
||||||
|
mp->m_sb.sb_ifree = cnt.icsb_ifree;
|
||||||
|
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
|
||||||
|
mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
|
||||||
|
|
||||||
|
if ((flags & XFS_ICSB_SB_LOCKED) == 0)
|
||||||
|
XFS_SB_UNLOCK(mp, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Accurate update of per-cpu counters to incore superblock
|
||||||
|
*/
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_sync_counters(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
xfs_icsb_sync_counters_int(mp, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* lazy addition used for things like df, background sb syncs, etc
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
xfs_icsb_sync_counters_lazy(
|
||||||
|
xfs_mount_t *mp)
|
||||||
|
{
|
||||||
|
xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Balance and enable/disable counters as necessary.
|
||||||
|
*
|
||||||
|
* Thresholds for re-enabling counters are somewhat magic.
|
||||||
|
* inode counts are chosen to be the same number as single
|
||||||
|
* on disk allocation chunk per CPU, and free blocks is
|
||||||
|
* something far enough zero that we aren't going thrash
|
||||||
|
* when we get near ENOSPC.
|
||||||
|
*/
|
||||||
|
#define XFS_ICSB_INO_CNTR_REENABLE 64
|
||||||
|
#define XFS_ICSB_FDBLK_CNTR_REENABLE 512
|
||||||
|
STATIC void
|
||||||
|
xfs_icsb_balance_counter(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
uint64_t count, resid = 0;
|
||||||
|
int weight = num_online_cpus();
|
||||||
|
int s;
|
||||||
|
|
||||||
|
if (!(flags & XFS_ICSB_SB_LOCKED))
|
||||||
|
s = XFS_SB_LOCK(mp);
|
||||||
|
|
||||||
|
/* disable counter and sync counter */
|
||||||
|
xfs_icsb_disable_counter(mp, field);
|
||||||
|
|
||||||
|
/* update counters - first CPU gets residual*/
|
||||||
|
switch (field) {
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
count = mp->m_sb.sb_icount;
|
||||||
|
resid = do_div(count, weight);
|
||||||
|
if (count < XFS_ICSB_INO_CNTR_REENABLE)
|
||||||
|
goto out;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
count = mp->m_sb.sb_ifree;
|
||||||
|
resid = do_div(count, weight);
|
||||||
|
if (count < XFS_ICSB_INO_CNTR_REENABLE)
|
||||||
|
goto out;
|
||||||
|
break;
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
count = mp->m_sb.sb_fdblocks;
|
||||||
|
resid = do_div(count, weight);
|
||||||
|
if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
|
||||||
|
goto out;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
xfs_icsb_enable_counter(mp, field, count, resid);
|
||||||
|
out:
|
||||||
|
if (!(flags & XFS_ICSB_SB_LOCKED))
|
||||||
|
XFS_SB_UNLOCK(mp, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC int
|
||||||
|
xfs_icsb_modify_counters_int(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field,
|
||||||
|
int delta,
|
||||||
|
int rsvd,
|
||||||
|
int flags)
|
||||||
|
{
|
||||||
|
xfs_icsb_cnts_t *icsbp;
|
||||||
|
long long lcounter; /* long counter for 64 bit fields */
|
||||||
|
int cpu, s, locked = 0;
|
||||||
|
int ret = 0, balance_done = 0;
|
||||||
|
|
||||||
|
again:
|
||||||
|
cpu = get_cpu();
|
||||||
|
icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
|
||||||
|
spin_lock(&icsbp->icsb_lock);
|
||||||
|
if (unlikely(xfs_icsb_counter_disabled(mp, field)))
|
||||||
|
goto slow_path;
|
||||||
|
|
||||||
|
switch (field) {
|
||||||
|
case XFS_SBS_ICOUNT:
|
||||||
|
lcounter = icsbp->icsb_icount;
|
||||||
|
lcounter += delta;
|
||||||
|
if (unlikely(lcounter < 0))
|
||||||
|
goto slow_path;
|
||||||
|
icsbp->icsb_icount = lcounter;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XFS_SBS_IFREE:
|
||||||
|
lcounter = icsbp->icsb_ifree;
|
||||||
|
lcounter += delta;
|
||||||
|
if (unlikely(lcounter < 0))
|
||||||
|
goto slow_path;
|
||||||
|
icsbp->icsb_ifree = lcounter;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XFS_SBS_FDBLOCKS:
|
||||||
|
BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
|
||||||
|
|
||||||
|
lcounter = icsbp->icsb_fdblocks;
|
||||||
|
lcounter += delta;
|
||||||
|
if (unlikely(lcounter < 0))
|
||||||
|
goto slow_path;
|
||||||
|
icsbp->icsb_fdblocks = lcounter;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
spin_unlock(&icsbp->icsb_lock);
|
||||||
|
put_cpu();
|
||||||
|
if (locked)
|
||||||
|
XFS_SB_UNLOCK(mp, s);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The slow path needs to be run with the SBLOCK
|
||||||
|
* held so that we prevent other threads from
|
||||||
|
* attempting to run this path at the same time.
|
||||||
|
* this provides exclusion for the balancing code,
|
||||||
|
* and exclusive fallback if the balance does not
|
||||||
|
* provide enough resources to continue in an unlocked
|
||||||
|
* manner.
|
||||||
|
*/
|
||||||
|
slow_path:
|
||||||
|
spin_unlock(&icsbp->icsb_lock);
|
||||||
|
put_cpu();
|
||||||
|
|
||||||
|
/* need to hold superblock incase we need
|
||||||
|
* to disable a counter */
|
||||||
|
if (!(flags & XFS_ICSB_SB_LOCKED)) {
|
||||||
|
s = XFS_SB_LOCK(mp);
|
||||||
|
locked = 1;
|
||||||
|
flags |= XFS_ICSB_SB_LOCKED;
|
||||||
|
}
|
||||||
|
if (!balance_done) {
|
||||||
|
xfs_icsb_balance_counter(mp, field, flags);
|
||||||
|
balance_done = 1;
|
||||||
|
goto again;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* we might not have enough on this local
|
||||||
|
* cpu to allocate for a bulk request.
|
||||||
|
* We need to drain this field from all CPUs
|
||||||
|
* and disable the counter fastpath
|
||||||
|
*/
|
||||||
|
xfs_icsb_disable_counter(mp, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
|
||||||
|
|
||||||
|
if (locked)
|
||||||
|
XFS_SB_UNLOCK(mp, s);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC int
|
||||||
|
xfs_icsb_modify_counters(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field,
|
||||||
|
int delta,
|
||||||
|
int rsvd)
|
||||||
|
{
|
||||||
|
return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called when superblock is already locked
|
||||||
|
*/
|
||||||
|
STATIC int
|
||||||
|
xfs_icsb_modify_counters_locked(
|
||||||
|
xfs_mount_t *mp,
|
||||||
|
xfs_sb_field_t field,
|
||||||
|
int delta,
|
||||||
|
int rsvd)
|
||||||
|
{
|
||||||
|
return xfs_icsb_modify_counters_int(mp, field, delta,
|
||||||
|
rsvd, XFS_ICSB_SB_LOCKED);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@@ -267,6 +267,32 @@ typedef struct xfs_ioops {
|
|||||||
#define XFS_IODONE(vfsp) \
|
#define XFS_IODONE(vfsp) \
|
||||||
(*(mp)->m_io_ops.xfs_iodone)(vfsp)
|
(*(mp)->m_io_ops.xfs_iodone)(vfsp)
|
||||||
|
|
||||||
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Valid per-cpu incore superblock counters. Note that if you add new counters,
|
||||||
|
* you may need to define new counter disabled bit field descriptors as there
|
||||||
|
* are more possible fields in the superblock that can fit in a bitfield on a
|
||||||
|
* 32 bit platform. The XFS_SBS_* values for the current current counters just
|
||||||
|
* fit.
|
||||||
|
*/
|
||||||
|
typedef struct xfs_icsb_cnts {
|
||||||
|
uint64_t icsb_fdblocks;
|
||||||
|
uint64_t icsb_ifree;
|
||||||
|
uint64_t icsb_icount;
|
||||||
|
spinlock_t icsb_lock;
|
||||||
|
} xfs_icsb_cnts_t;
|
||||||
|
|
||||||
|
#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
|
||||||
|
#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
|
||||||
|
|
||||||
|
extern int xfs_icsb_init_counters(struct xfs_mount *);
|
||||||
|
extern void xfs_icsb_sync_counters_lazy(struct xfs_mount *);
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define xfs_icsb_init_counters(mp) (0)
|
||||||
|
#define xfs_icsb_sync_counters_lazy(mp) do { } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct xfs_mount {
|
typedef struct xfs_mount {
|
||||||
bhv_desc_t m_bhv; /* vfs xfs behavior */
|
bhv_desc_t m_bhv; /* vfs xfs behavior */
|
||||||
@@ -372,6 +398,10 @@ typedef struct xfs_mount {
|
|||||||
struct xfs_qmops m_qm_ops; /* vector of XQM ops */
|
struct xfs_qmops m_qm_ops; /* vector of XQM ops */
|
||||||
struct xfs_ioops m_io_ops; /* vector of I/O ops */
|
struct xfs_ioops m_io_ops; /* vector of I/O ops */
|
||||||
atomic_t m_active_trans; /* number trans frozen */
|
atomic_t m_active_trans; /* number trans frozen */
|
||||||
|
#ifdef HAVE_PERCPU_SB
|
||||||
|
xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */
|
||||||
|
unsigned long m_icsb_counters; /* disabled per-cpu counters */
|
||||||
|
#endif
|
||||||
} xfs_mount_t;
|
} xfs_mount_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -409,6 +439,8 @@ typedef struct xfs_mount {
|
|||||||
#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
|
#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
|
||||||
#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
|
#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
|
||||||
* I/O size in stat() */
|
* I/O size in stat() */
|
||||||
|
#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
|
||||||
|
counters */
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -546,6 +578,8 @@ extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
|
|||||||
extern int xfs_unmountfs_writesb(xfs_mount_t *);
|
extern int xfs_unmountfs_writesb(xfs_mount_t *);
|
||||||
extern int xfs_unmount_flush(xfs_mount_t *, int);
|
extern int xfs_unmount_flush(xfs_mount_t *, int);
|
||||||
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
|
extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
|
||||||
|
extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
|
||||||
|
int, int);
|
||||||
extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
|
extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
|
||||||
uint, int);
|
uint, int);
|
||||||
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
|
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
|
||||||
|
@@ -55,7 +55,7 @@
|
|||||||
#include "xfs_clnt.h"
|
#include "xfs_clnt.h"
|
||||||
#include "xfs_fsops.h"
|
#include "xfs_fsops.h"
|
||||||
|
|
||||||
STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
|
STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
|
||||||
|
|
||||||
int
|
int
|
||||||
xfs_init(void)
|
xfs_init(void)
|
||||||
@@ -807,6 +807,7 @@ xfs_statvfs(
|
|||||||
|
|
||||||
statp->f_type = XFS_SB_MAGIC;
|
statp->f_type = XFS_SB_MAGIC;
|
||||||
|
|
||||||
|
xfs_icsb_sync_counters_lazy(mp);
|
||||||
s = XFS_SB_LOCK(mp);
|
s = XFS_SB_LOCK(mp);
|
||||||
statp->f_bsize = sbp->sb_blocksize;
|
statp->f_bsize = sbp->sb_blocksize;
|
||||||
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
|
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
|
||||||
|
Reference in New Issue
Block a user