md/raid1: record badblocks found during resync etc.
If we find a bad block while writing as part of resync/recovery we need to report that back to raid1d which must record the bad block, or fail the device. Similarly when fixing a read error, a further error should just record a bad block if possible rather than failing the device. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Namhyung Kim <namhyung@gmail.com>
This commit is contained in:
@@ -1386,7 +1386,9 @@ static void end_sync_write(struct bio *bio, int error)
|
|||||||
s += sync_blocks;
|
s += sync_blocks;
|
||||||
sectors_to_go -= sync_blocks;
|
sectors_to_go -= sync_blocks;
|
||||||
} while (sectors_to_go > 0);
|
} while (sectors_to_go > 0);
|
||||||
md_error(mddev, conf->mirrors[mirror].rdev);
|
set_bit(WriteErrorSeen,
|
||||||
|
&conf->mirrors[mirror].rdev->flags);
|
||||||
|
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||||
} else if (is_badblock(conf->mirrors[mirror].rdev,
|
} else if (is_badblock(conf->mirrors[mirror].rdev,
|
||||||
r1_bio->sector,
|
r1_bio->sector,
|
||||||
r1_bio->sectors,
|
r1_bio->sectors,
|
||||||
@@ -1397,7 +1399,8 @@ static void end_sync_write(struct bio *bio, int error)
|
|||||||
|
|
||||||
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
if (atomic_dec_and_test(&r1_bio->remaining)) {
|
||||||
int s = r1_bio->sectors;
|
int s = r1_bio->sectors;
|
||||||
if (test_bit(R1BIO_MadeGood, &r1_bio->state))
|
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||||
|
test_bit(R1BIO_WriteError, &r1_bio->state))
|
||||||
reschedule_retry(r1_bio);
|
reschedule_retry(r1_bio);
|
||||||
else {
|
else {
|
||||||
put_buf(r1_bio);
|
put_buf(r1_bio);
|
||||||
@@ -1406,6 +1409,20 @@ static void end_sync_write(struct bio *bio, int error)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
|
||||||
|
int sectors, struct page *page, int rw)
|
||||||
|
{
|
||||||
|
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
|
||||||
|
/* success */
|
||||||
|
return 1;
|
||||||
|
if (rw == WRITE)
|
||||||
|
set_bit(WriteErrorSeen, &rdev->flags);
|
||||||
|
/* need to record an error - either for the block or the device */
|
||||||
|
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
|
||||||
|
md_error(rdev->mddev, rdev);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int fix_sync_read_error(r1bio_t *r1_bio)
|
static int fix_sync_read_error(r1bio_t *r1_bio)
|
||||||
{
|
{
|
||||||
/* Try some synchronous reads of other devices to get
|
/* Try some synchronous reads of other devices to get
|
||||||
@@ -1477,12 +1494,11 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
|
|||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
continue;
|
continue;
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (sync_page_io(rdev, sect, s<<9,
|
if (r1_sync_page_io(rdev, sect, s,
|
||||||
bio->bi_io_vec[idx].bv_page,
|
bio->bi_io_vec[idx].bv_page,
|
||||||
WRITE, false) == 0) {
|
WRITE) == 0) {
|
||||||
r1_bio->bios[d]->bi_end_io = NULL;
|
r1_bio->bios[d]->bi_end_io = NULL;
|
||||||
rdev_dec_pending(rdev, mddev);
|
rdev_dec_pending(rdev, mddev);
|
||||||
md_error(mddev, rdev);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
d = start;
|
d = start;
|
||||||
@@ -1493,11 +1509,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
|
|||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
continue;
|
continue;
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (sync_page_io(rdev, sect, s<<9,
|
if (r1_sync_page_io(rdev, sect, s,
|
||||||
bio->bi_io_vec[idx].bv_page,
|
bio->bi_io_vec[idx].bv_page,
|
||||||
READ, false) == 0)
|
READ) != 0)
|
||||||
md_error(mddev, rdev);
|
|
||||||
else
|
|
||||||
atomic_add(s, &rdev->corrected_errors);
|
atomic_add(s, &rdev->corrected_errors);
|
||||||
}
|
}
|
||||||
sectors -= s;
|
sectors -= s;
|
||||||
@@ -1682,8 +1696,10 @@ static void fix_read_error(conf_t *conf, int read_disk,
|
|||||||
} while (!success && d != read_disk);
|
} while (!success && d != read_disk);
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
/* Cannot read from anywhere -- bye bye array */
|
/* Cannot read from anywhere - mark it bad */
|
||||||
md_error(mddev, conf->mirrors[read_disk].rdev);
|
mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev;
|
||||||
|
if (!rdev_set_badblocks(rdev, sect, s, 0))
|
||||||
|
md_error(mddev, rdev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* write it back and re-read */
|
/* write it back and re-read */
|
||||||
@@ -1694,13 +1710,9 @@ static void fix_read_error(conf_t *conf, int read_disk,
|
|||||||
d--;
|
d--;
|
||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
test_bit(In_sync, &rdev->flags)) {
|
test_bit(In_sync, &rdev->flags))
|
||||||
if (sync_page_io(rdev, sect, s<<9,
|
r1_sync_page_io(rdev, sect, s,
|
||||||
conf->tmppage, WRITE, false)
|
conf->tmppage, WRITE);
|
||||||
== 0)
|
|
||||||
/* Well, this device is dead */
|
|
||||||
md_error(mddev, rdev);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
d = start;
|
d = start;
|
||||||
while (d != read_disk) {
|
while (d != read_disk) {
|
||||||
@@ -1711,12 +1723,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
|
|||||||
rdev = conf->mirrors[d].rdev;
|
rdev = conf->mirrors[d].rdev;
|
||||||
if (rdev &&
|
if (rdev &&
|
||||||
test_bit(In_sync, &rdev->flags)) {
|
test_bit(In_sync, &rdev->flags)) {
|
||||||
if (sync_page_io(rdev, sect, s<<9,
|
if (r1_sync_page_io(rdev, sect, s,
|
||||||
conf->tmppage, READ, false)
|
conf->tmppage, READ)) {
|
||||||
== 0)
|
|
||||||
/* Well, this device is dead */
|
|
||||||
md_error(mddev, rdev);
|
|
||||||
else {
|
|
||||||
atomic_add(s, &rdev->corrected_errors);
|
atomic_add(s, &rdev->corrected_errors);
|
||||||
printk(KERN_INFO
|
printk(KERN_INFO
|
||||||
"md/raid1:%s: read error corrected "
|
"md/raid1:%s: read error corrected "
|
||||||
@@ -1860,20 +1868,33 @@ static void raid1d(mddev_t *mddev)
|
|||||||
mddev = r1_bio->mddev;
|
mddev = r1_bio->mddev;
|
||||||
conf = mddev->private;
|
conf = mddev->private;
|
||||||
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
|
if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
|
||||||
if (test_bit(R1BIO_MadeGood, &r1_bio->state)) {
|
if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
|
||||||
|
test_bit(R1BIO_WriteError, &r1_bio->state)) {
|
||||||
int m;
|
int m;
|
||||||
int s = r1_bio->sectors;
|
int s = r1_bio->sectors;
|
||||||
for (m = 0; m < conf->raid_disks ; m++) {
|
for (m = 0; m < conf->raid_disks ; m++) {
|
||||||
|
mdk_rdev_t *rdev
|
||||||
|
= conf->mirrors[m].rdev;
|
||||||
struct bio *bio = r1_bio->bios[m];
|
struct bio *bio = r1_bio->bios[m];
|
||||||
if (bio->bi_end_io != NULL &&
|
if (bio->bi_end_io == NULL)
|
||||||
test_bit(BIO_UPTODATE,
|
continue;
|
||||||
|
if (test_bit(BIO_UPTODATE,
|
||||||
&bio->bi_flags)) {
|
&bio->bi_flags)) {
|
||||||
rdev = conf->mirrors[m].rdev;
|
|
||||||
rdev_clear_badblocks(
|
rdev_clear_badblocks(
|
||||||
rdev,
|
rdev,
|
||||||
r1_bio->sector,
|
r1_bio->sector,
|
||||||
r1_bio->sectors);
|
r1_bio->sectors);
|
||||||
}
|
}
|
||||||
|
if (!test_bit(BIO_UPTODATE,
|
||||||
|
&bio->bi_flags) &&
|
||||||
|
test_bit(R1BIO_WriteError,
|
||||||
|
&r1_bio->state)) {
|
||||||
|
if (!rdev_set_badblocks(
|
||||||
|
rdev,
|
||||||
|
r1_bio->sector,
|
||||||
|
r1_bio->sectors, 0))
|
||||||
|
md_error(mddev, rdev);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
put_buf(r1_bio);
|
put_buf(r1_bio);
|
||||||
md_done_sync(mddev, s, 1);
|
md_done_sync(mddev, s, 1);
|
||||||
|
Reference in New Issue
Block a user