Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded. Don't unconditionally set in_sync on newly added device in raid5_reshape md: allow v0.91 metadata to record devices as being active but not in-sync. md: factor out updating of 'recovery_offset'.
This commit is contained in:
@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
desc->raid_disk < mddev->raid_disks */) {
|
desc->raid_disk < mddev->raid_disks */) {
|
||||||
set_bit(In_sync, &rdev->flags);
|
set_bit(In_sync, &rdev->flags);
|
||||||
rdev->raid_disk = desc->raid_disk;
|
rdev->raid_disk = desc->raid_disk;
|
||||||
|
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
|
||||||
|
/* active but not in sync implies recovery up to
|
||||||
|
* reshape position. We don't know exactly where
|
||||||
|
* that is, so set to zero for now */
|
||||||
|
if (mddev->minor_version >= 91) {
|
||||||
|
rdev->recovery_offset = 0;
|
||||||
|
rdev->raid_disk = desc->raid_disk;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
|
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
|
||||||
set_bit(WriteMostly, &rdev->flags);
|
set_bit(WriteMostly, &rdev->flags);
|
||||||
@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
list_for_each_entry(rdev2, &mddev->disks, same_set) {
|
||||||
mdp_disk_t *d;
|
mdp_disk_t *d;
|
||||||
int desc_nr;
|
int desc_nr;
|
||||||
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
|
int is_active = test_bit(In_sync, &rdev2->flags);
|
||||||
&& !test_bit(Faulty, &rdev2->flags))
|
|
||||||
|
if (rdev2->raid_disk >= 0 &&
|
||||||
|
sb->minor_version >= 91)
|
||||||
|
/* we have nowhere to store the recovery_offset,
|
||||||
|
* but if it is not below the reshape_position,
|
||||||
|
* we can piggy-back on that.
|
||||||
|
*/
|
||||||
|
is_active = 1;
|
||||||
|
if (rdev2->raid_disk < 0 ||
|
||||||
|
test_bit(Faulty, &rdev2->flags))
|
||||||
|
is_active = 0;
|
||||||
|
if (is_active)
|
||||||
desc_nr = rdev2->raid_disk;
|
desc_nr = rdev2->raid_disk;
|
||||||
else
|
else
|
||||||
desc_nr = next_spare++;
|
desc_nr = next_spare++;
|
||||||
@@ -1043,15 +1062,15 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
d->number = rdev2->desc_nr;
|
d->number = rdev2->desc_nr;
|
||||||
d->major = MAJOR(rdev2->bdev->bd_dev);
|
d->major = MAJOR(rdev2->bdev->bd_dev);
|
||||||
d->minor = MINOR(rdev2->bdev->bd_dev);
|
d->minor = MINOR(rdev2->bdev->bd_dev);
|
||||||
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
|
if (is_active)
|
||||||
&& !test_bit(Faulty, &rdev2->flags))
|
|
||||||
d->raid_disk = rdev2->raid_disk;
|
d->raid_disk = rdev2->raid_disk;
|
||||||
else
|
else
|
||||||
d->raid_disk = rdev2->desc_nr; /* compatibility */
|
d->raid_disk = rdev2->desc_nr; /* compatibility */
|
||||||
if (test_bit(Faulty, &rdev2->flags))
|
if (test_bit(Faulty, &rdev2->flags))
|
||||||
d->state = (1<<MD_DISK_FAULTY);
|
d->state = (1<<MD_DISK_FAULTY);
|
||||||
else if (test_bit(In_sync, &rdev2->flags)) {
|
else if (is_active) {
|
||||||
d->state = (1<<MD_DISK_ACTIVE);
|
d->state = (1<<MD_DISK_ACTIVE);
|
||||||
|
if (test_bit(In_sync, &rdev2->flags))
|
||||||
d->state |= (1<<MD_DISK_SYNC);
|
d->state |= (1<<MD_DISK_SYNC);
|
||||||
active++;
|
active++;
|
||||||
working++;
|
working++;
|
||||||
@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
|||||||
|
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(In_sync, &rdev->flags)) {
|
!test_bit(In_sync, &rdev->flags)) {
|
||||||
if (mddev->curr_resync_completed > rdev->recovery_offset)
|
|
||||||
rdev->recovery_offset = mddev->curr_resync_completed;
|
|
||||||
if (rdev->recovery_offset > 0) {
|
if (rdev->recovery_offset > 0) {
|
||||||
sb->feature_map |=
|
sb->feature_map |=
|
||||||
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
|
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
|
||||||
@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
|
|||||||
*/
|
*/
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
|
|
||||||
|
/* First make sure individual recovery_offsets are correct */
|
||||||
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
|
if (rdev->raid_disk >= 0 &&
|
||||||
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
|
mddev->curr_resync_completed > rdev->recovery_offset)
|
||||||
|
rdev->recovery_offset = mddev->curr_resync_completed;
|
||||||
|
|
||||||
|
}
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
if (rdev->sb_events == mddev->events ||
|
if (rdev->sb_events == mddev->events ||
|
||||||
(nospares &&
|
(nospares &&
|
||||||
|
@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
|
|||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
|
||||||
|
{
|
||||||
|
switch (algo) {
|
||||||
|
case ALGORITHM_PARITY_0:
|
||||||
|
if (raid_disk < max_degraded)
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case ALGORITHM_PARITY_N:
|
||||||
|
if (raid_disk >= raid_disks - max_degraded)
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case ALGORITHM_PARITY_0_6:
|
||||||
|
if (raid_disk == 0 ||
|
||||||
|
raid_disk == raid_disks - 1)
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case ALGORITHM_LEFT_ASYMMETRIC_6:
|
||||||
|
case ALGORITHM_RIGHT_ASYMMETRIC_6:
|
||||||
|
case ALGORITHM_LEFT_SYMMETRIC_6:
|
||||||
|
case ALGORITHM_RIGHT_SYMMETRIC_6:
|
||||||
|
if (raid_disk == raid_disks - 1)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int run(mddev_t *mddev)
|
static int run(mddev_t *mddev)
|
||||||
{
|
{
|
||||||
raid5_conf_t *conf;
|
raid5_conf_t *conf;
|
||||||
int working_disks = 0, chunk_size;
|
int working_disks = 0, chunk_size;
|
||||||
|
int dirty_parity_disks = 0;
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
|
sector_t reshape_offset = 0;
|
||||||
|
|
||||||
if (mddev->recovery_cp != MaxSector)
|
if (mddev->recovery_cp != MaxSector)
|
||||||
printk(KERN_NOTICE "raid5: %s is not clean"
|
printk(KERN_NOTICE "raid5: %s is not clean"
|
||||||
@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
|
|||||||
"on a stripe boundary\n");
|
"on a stripe boundary\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
reshape_offset = here_new * mddev->new_chunk_sectors;
|
||||||
/* here_new is the stripe we will write to */
|
/* here_new is the stripe we will write to */
|
||||||
here_old = mddev->reshape_position;
|
here_old = mddev->reshape_position;
|
||||||
sector_div(here_old, mddev->chunk_sectors *
|
sector_div(here_old, mddev->chunk_sectors *
|
||||||
@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
|
|||||||
/*
|
/*
|
||||||
* 0 for a fully functional array, 1 or 2 for a degraded array.
|
* 0 for a fully functional array, 1 or 2 for a degraded array.
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(rdev, &mddev->disks, same_set)
|
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk < 0)
|
||||||
test_bit(In_sync, &rdev->flags))
|
continue;
|
||||||
|
if (test_bit(In_sync, &rdev->flags))
|
||||||
working_disks++;
|
working_disks++;
|
||||||
|
/* This disc is not fully in-sync. However if it
|
||||||
|
* just stored parity (beyond the recovery_offset),
|
||||||
|
* when we don't need to be concerned about the
|
||||||
|
* array being dirty.
|
||||||
|
* When reshape goes 'backwards', we never have
|
||||||
|
* partially completed devices, so we only need
|
||||||
|
* to worry about reshape going forwards.
|
||||||
|
*/
|
||||||
|
/* Hack because v0.91 doesn't store recovery_offset properly. */
|
||||||
|
if (mddev->major_version == 0 &&
|
||||||
|
mddev->minor_version > 90)
|
||||||
|
rdev->recovery_offset = reshape_offset;
|
||||||
|
|
||||||
|
printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
|
||||||
|
rdev->raid_disk, working_disks, conf->prev_algo,
|
||||||
|
conf->previous_raid_disks, conf->max_degraded,
|
||||||
|
conf->algorithm, conf->raid_disks,
|
||||||
|
only_parity(rdev->raid_disk,
|
||||||
|
conf->prev_algo,
|
||||||
|
conf->previous_raid_disks,
|
||||||
|
conf->max_degraded),
|
||||||
|
only_parity(rdev->raid_disk,
|
||||||
|
conf->algorithm,
|
||||||
|
conf->raid_disks,
|
||||||
|
conf->max_degraded));
|
||||||
|
if (rdev->recovery_offset < reshape_offset) {
|
||||||
|
/* We need to check old and new layout */
|
||||||
|
if (!only_parity(rdev->raid_disk,
|
||||||
|
conf->algorithm,
|
||||||
|
conf->raid_disks,
|
||||||
|
conf->max_degraded))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!only_parity(rdev->raid_disk,
|
||||||
|
conf->prev_algo,
|
||||||
|
conf->previous_raid_disks,
|
||||||
|
conf->max_degraded))
|
||||||
|
continue;
|
||||||
|
dirty_parity_disks++;
|
||||||
|
}
|
||||||
|
|
||||||
mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
|
mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
|
||||||
- working_disks);
|
- working_disks);
|
||||||
@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
|
|||||||
mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
|
mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
|
||||||
mddev->resync_max_sectors = mddev->dev_sectors;
|
mddev->resync_max_sectors = mddev->dev_sectors;
|
||||||
|
|
||||||
if (mddev->degraded > 0 &&
|
if (mddev->degraded > dirty_parity_disks &&
|
||||||
mddev->recovery_cp != MaxSector) {
|
mddev->recovery_cp != MaxSector) {
|
||||||
if (mddev->ok_start_degraded)
|
if (mddev->ok_start_degraded)
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
@@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
|
|||||||
!test_bit(Faulty, &rdev->flags)) {
|
!test_bit(Faulty, &rdev->flags)) {
|
||||||
if (raid5_add_disk(mddev, rdev) == 0) {
|
if (raid5_add_disk(mddev, rdev) == 0) {
|
||||||
char nm[20];
|
char nm[20];
|
||||||
|
if (rdev->raid_disk >= conf->previous_raid_disks)
|
||||||
set_bit(In_sync, &rdev->flags);
|
set_bit(In_sync, &rdev->flags);
|
||||||
added_devices++;
|
else
|
||||||
rdev->recovery_offset = 0;
|
rdev->recovery_offset = 0;
|
||||||
|
added_devices++;
|
||||||
sprintf(nm, "rd%d", rdev->raid_disk);
|
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||||
if (sysfs_create_link(&mddev->kobj,
|
if (sysfs_create_link(&mddev->kobj,
|
||||||
&rdev->kobj, nm))
|
&rdev->kobj, nm))
|
||||||
|
Reference in New Issue
Block a user