Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (34 commits) dm table: set flush capability based on underlying devices dm crypt: optionally support discard requests dm raid: add md raid1 support dm raid: support metadata devices dm raid: add write_mostly parameter dm raid: add region_size parameter dm raid: improve table parameters documentation dm ioctl: forbid multiple device specifiers dm ioctl: introduce __get_dev_cell dm ioctl: fill in device parameters in more ioctls dm flakey: add corrupt_bio_byte feature dm flakey: add drop_writes dm flakey: support feature args dm flakey: use dm_target_offset and support discards dm table: share target argument parsing functions dm snapshot: skip reading origin when overwriting complete chunk dm: ignore merge_bvec for snapshots when safe dm table: clean dm_get_device and move exports dm raid: tidy includes dm ioctl: prevent empty message ...
This commit is contained in:
@@ -4,7 +4,8 @@ dm-crypt
|
|||||||
Device-Mapper's "crypt" target provides transparent encryption of block devices
|
Device-Mapper's "crypt" target provides transparent encryption of block devices
|
||||||
using the kernel crypto API.
|
using the kernel crypto API.
|
||||||
|
|
||||||
Parameters: <cipher> <key> <iv_offset> <device path> <offset>
|
Parameters: <cipher> <key> <iv_offset> <device path> \
|
||||||
|
<offset> [<#opt_params> <opt_params>]
|
||||||
|
|
||||||
<cipher>
|
<cipher>
|
||||||
Encryption cipher and an optional IV generation mode.
|
Encryption cipher and an optional IV generation mode.
|
||||||
@@ -37,6 +38,24 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
|
|||||||
<offset>
|
<offset>
|
||||||
Starting sector within the device where the encrypted data begins.
|
Starting sector within the device where the encrypted data begins.
|
||||||
|
|
||||||
|
<#opt_params>
|
||||||
|
Number of optional parameters. If there are no optional parameters,
|
||||||
|
the optional paramaters section can be skipped or #opt_params can be zero.
|
||||||
|
Otherwise #opt_params is the number of following arguments.
|
||||||
|
|
||||||
|
Example of optional parameters section:
|
||||||
|
1 allow_discards
|
||||||
|
|
||||||
|
allow_discards
|
||||||
|
Block discard requests (a.k.a. TRIM) are passed through the crypt device.
|
||||||
|
The default is to ignore discard requests.
|
||||||
|
|
||||||
|
WARNING: Assess the specific security risks carefully before enabling this
|
||||||
|
option. For example, allowing discards on encrypted devices may lead to
|
||||||
|
the leak of information about the ciphertext device (filesystem type,
|
||||||
|
used space etc.) if the discarded blocks can be located easily on the
|
||||||
|
device later.
|
||||||
|
|
||||||
Example scripts
|
Example scripts
|
||||||
===============
|
===============
|
||||||
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
|
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
|
||||||
|
@@ -1,17 +1,53 @@
|
|||||||
dm-flakey
|
dm-flakey
|
||||||
=========
|
=========
|
||||||
|
|
||||||
This target is the same as the linear target except that it returns I/O
|
This target is the same as the linear target except that it exhibits
|
||||||
errors periodically. It's been found useful in simulating failing
|
unreliable behaviour periodically. It's been found useful in simulating
|
||||||
devices for testing purposes.
|
failing devices for testing purposes.
|
||||||
|
|
||||||
Starting from the time the table is loaded, the device is available for
|
Starting from the time the table is loaded, the device is available for
|
||||||
<up interval> seconds, then returns errors for <down interval> seconds,
|
<up interval> seconds, then exhibits unreliable behaviour for <down
|
||||||
and then this cycle repeats.
|
interval> seconds, and then this cycle repeats.
|
||||||
|
|
||||||
Parameters: <dev path> <offset> <up interval> <down interval>
|
Also, consider using this in combination with the dm-delay target too,
|
||||||
|
which can delay reads and writes and/or send them to different
|
||||||
|
underlying devices.
|
||||||
|
|
||||||
|
Table parameters
|
||||||
|
----------------
|
||||||
|
<dev path> <offset> <up interval> <down interval> \
|
||||||
|
[<num_features> [<feature arguments>]]
|
||||||
|
|
||||||
|
Mandatory parameters:
|
||||||
<dev path>: Full pathname to the underlying block-device, or a
|
<dev path>: Full pathname to the underlying block-device, or a
|
||||||
"major:minor" device-number.
|
"major:minor" device-number.
|
||||||
<offset>: Starting sector within the device.
|
<offset>: Starting sector within the device.
|
||||||
<up interval>: Number of seconds device is available.
|
<up interval>: Number of seconds device is available.
|
||||||
<down interval>: Number of seconds device returns errors.
|
<down interval>: Number of seconds device returns errors.
|
||||||
|
|
||||||
|
Optional feature parameters:
|
||||||
|
If no feature parameters are present, during the periods of
|
||||||
|
unreliability, all I/O returns errors.
|
||||||
|
|
||||||
|
drop_writes:
|
||||||
|
All write I/O is silently ignored.
|
||||||
|
Read I/O is handled correctly.
|
||||||
|
|
||||||
|
corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
|
||||||
|
During <down interval>, replace <Nth_byte> of the data of
|
||||||
|
each matching bio with <value>.
|
||||||
|
|
||||||
|
<Nth_byte>: The offset of the byte to replace.
|
||||||
|
Counting starts at 1, to replace the first byte.
|
||||||
|
<direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
|
||||||
|
'w' is incompatible with drop_writes.
|
||||||
|
<value>: The value (from 0-255) to write.
|
||||||
|
<flags>: Perform the replacement only if bio->bi_rw has all the
|
||||||
|
selected flags set.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
corrupt_bio_byte 32 r 1 0
|
||||||
|
- replaces the 32nd byte of READ bios with the value 1
|
||||||
|
|
||||||
|
corrupt_bio_byte 224 w 0 32
|
||||||
|
- replaces the 224th byte of REQ_META (=32) bios with the value 0
|
||||||
|
@@ -1,70 +1,108 @@
|
|||||||
Device-mapper RAID (dm-raid) is a bridge from DM to MD. It
|
dm-raid
|
||||||
provides a way to use device-mapper interfaces to access the MD RAID
|
-------
|
||||||
drivers.
|
|
||||||
|
|
||||||
As with all device-mapper targets, the nominal public interfaces are the
|
The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
|
||||||
constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
|
It allows the MD RAID drivers to be accessed using a device-mapper
|
||||||
and STATUSTYPE_TABLE). The CTR table looks like the following:
|
interface.
|
||||||
|
|
||||||
1: <s> <l> raid \
|
The target is named "raid" and it accepts the following parameters:
|
||||||
2: <raid_type> <#raid_params> <raid_params> \
|
|
||||||
3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
|
|
||||||
|
|
||||||
Line 1 contains the standard first three arguments to any device-mapper
|
<raid_type> <#raid_params> <raid_params> \
|
||||||
target - the start, length, and target type fields. The target type in
|
<#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
|
||||||
this case is "raid".
|
|
||||||
|
|
||||||
Line 2 contains the arguments that define the particular raid
|
<raid_type>:
|
||||||
type/personality/level, the required arguments for that raid type, and
|
raid1 RAID1 mirroring
|
||||||
any optional arguments. Possible raid types include: raid4, raid5_la,
|
raid4 RAID4 dedicated parity disk
|
||||||
raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is
|
raid5_la RAID5 left asymmetric
|
||||||
planned for the future.) The list of required and optional parameters
|
- rotating parity 0 with data continuation
|
||||||
is the same for all the current raid types. The required parameters are
|
raid5_ra RAID5 right asymmetric
|
||||||
positional, while the optional parameters are given as key/value pairs.
|
- rotating parity N with data continuation
|
||||||
The possible parameters are as follows:
|
raid5_ls RAID5 left symmetric
|
||||||
<chunk_size> Chunk size in sectors.
|
- rotating parity 0 with data restart
|
||||||
[[no]sync] Force/Prevent RAID initialization
|
raid5_rs RAID5 right symmetric
|
||||||
[rebuild <idx>] Rebuild the drive indicated by the index
|
- rotating parity N with data restart
|
||||||
[daemon_sleep <ms>] Time between bitmap daemon work to clear bits
|
raid6_zr RAID6 zero restart
|
||||||
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
- rotating parity zero (left-to-right) with data restart
|
||||||
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
raid6_nr RAID6 N restart
|
||||||
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
- rotating parity N (right-to-left) with data restart
|
||||||
[stripe_cache <sectors>] Stripe cache size for higher RAIDs
|
raid6_nc RAID6 N continue
|
||||||
|
- rotating parity N (right-to-left) with data continuation
|
||||||
|
|
||||||
Line 3 contains the list of devices that compose the array in
|
Refererence: Chapter 4 of
|
||||||
metadata/data device pairs. If the metadata is stored separately, a '-'
|
http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
|
||||||
is given for the metadata device position. If a drive has failed or is
|
|
||||||
missing at creation time, a '-' can be given for both the metadata and
|
|
||||||
data drives for a given position.
|
|
||||||
|
|
||||||
NB. Currently all metadata devices must be specified as '-'.
|
<#raid_params>: The number of parameters that follow.
|
||||||
|
|
||||||
Examples:
|
<raid_params> consists of
|
||||||
# RAID4 - 4 data drives, 1 parity
|
Mandatory parameters:
|
||||||
|
<chunk_size>: Chunk size in sectors. This parameter is often known as
|
||||||
|
"stripe size". It is the only mandatory parameter and
|
||||||
|
is placed first.
|
||||||
|
|
||||||
|
followed by optional parameters (in any order):
|
||||||
|
[sync|nosync] Force or prevent RAID initialization.
|
||||||
|
|
||||||
|
[rebuild <idx>] Rebuild drive number idx (first drive is 0).
|
||||||
|
|
||||||
|
[daemon_sleep <ms>]
|
||||||
|
Interval between runs of the bitmap daemon that
|
||||||
|
clear bits. A longer interval means less bitmap I/O but
|
||||||
|
resyncing after a failure is likely to take longer.
|
||||||
|
|
||||||
|
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
|
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
|
[write_mostly <idx>] Drive index is write-mostly
|
||||||
|
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
||||||
|
[stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
|
||||||
|
[region_size <sectors>]
|
||||||
|
The region_size multiplied by the number of regions is the
|
||||||
|
logical size of the array. The bitmap records the device
|
||||||
|
synchronisation state for each region.
|
||||||
|
|
||||||
|
<#raid_devs>: The number of devices composing the array.
|
||||||
|
Each device consists of two entries. The first is the device
|
||||||
|
containing the metadata (if any); the second is the one containing the
|
||||||
|
data.
|
||||||
|
|
||||||
|
If a drive has failed or is missing at creation time, a '-' can be
|
||||||
|
given for both the metadata and data drives for a given position.
|
||||||
|
|
||||||
|
|
||||||
|
Example tables
|
||||||
|
--------------
|
||||||
|
# RAID4 - 4 data drives, 1 parity (no metadata devices)
|
||||||
# No metadata devices specified to hold superblock/bitmap info
|
# No metadata devices specified to hold superblock/bitmap info
|
||||||
# Chunk size of 1MiB
|
# Chunk size of 1MiB
|
||||||
# (Lines separated for easy reading)
|
# (Lines separated for easy reading)
|
||||||
|
|
||||||
0 1960893648 raid \
|
0 1960893648 raid \
|
||||||
raid4 1 2048 \
|
raid4 1 2048 \
|
||||||
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
|
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
|
||||||
|
|
||||||
# RAID4 - 4 data drives, 1 parity (no metadata devices)
|
# RAID4 - 4 data drives, 1 parity (with metadata devices)
|
||||||
# Chunk size of 1MiB, force RAID initialization,
|
# Chunk size of 1MiB, force RAID initialization,
|
||||||
# min recovery rate at 20 kiB/sec/disk
|
# min recovery rate at 20 kiB/sec/disk
|
||||||
|
|
||||||
0 1960893648 raid \
|
0 1960893648 raid \
|
||||||
raid4 4 2048 min_recovery_rate 20 sync\
|
raid4 4 2048 sync min_recovery_rate 20 \
|
||||||
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
|
5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
|
||||||
|
|
||||||
Performing a 'dmsetup table' should display the CTR table used to
|
'dmsetup table' displays the table used to construct the mapping.
|
||||||
construct the mapping (with possible reordering of optional
|
The optional parameters are always printed in the order listed
|
||||||
parameters).
|
above with "sync" or "nosync" always output ahead of the other
|
||||||
|
arguments, regardless of the order used when originally loading the table.
|
||||||
|
Arguments that can be repeated are ordered by value.
|
||||||
|
|
||||||
Performing a 'dmsetup status' will yield information on the state and
|
'dmsetup status' yields information on the state and health of the
|
||||||
health of the array. The output is as follows:
|
array.
|
||||||
|
The output is as follows:
|
||||||
1: <s> <l> raid \
|
1: <s> <l> raid \
|
||||||
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
|
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
|
||||||
|
|
||||||
Line 1 is standard DM output. Line 2 is best shown by example:
|
Line 1 is the standard output produced by device-mapper.
|
||||||
|
Line 2 is produced by the raid target, and best explained by example:
|
||||||
0 1960893648 raid raid4 5 AAAAA 2/490221568
|
0 1960893648 raid raid4 5 AAAAA 2/490221568
|
||||||
Here we can see the RAID type is raid4, there are 5 devices - all of
|
Here we can see the RAID type is raid4, there are 5 devices - all of
|
||||||
which are 'A'live, and the array is 2/490221568 complete with recovery.
|
which are 'A'live, and the array is 2/490221568 complete with recovery.
|
||||||
|
Faulty or missing devices are marked 'D'. Devices that are out-of-sync
|
||||||
|
are marked 'a'.
|
||||||
|
@@ -241,12 +241,13 @@ config DM_MIRROR
|
|||||||
needed for live data migration tools such as 'pvmove'.
|
needed for live data migration tools such as 'pvmove'.
|
||||||
|
|
||||||
config DM_RAID
|
config DM_RAID
|
||||||
tristate "RAID 4/5/6 target (EXPERIMENTAL)"
|
tristate "RAID 1/4/5/6 target (EXPERIMENTAL)"
|
||||||
depends on BLK_DEV_DM && EXPERIMENTAL
|
depends on BLK_DEV_DM && EXPERIMENTAL
|
||||||
|
select MD_RAID1
|
||||||
select MD_RAID456
|
select MD_RAID456
|
||||||
select BLK_DEV_MD
|
select BLK_DEV_MD
|
||||||
---help---
|
---help---
|
||||||
A dm target that supports RAID4, RAID5 and RAID6 mappings
|
A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
|
||||||
|
|
||||||
A RAID-5 set of N drives with a capacity of C MB per drive provides
|
A RAID-5 set of N drives with a capacity of C MB per drive provides
|
||||||
the capacity of C * (N - 1) MB, and protects against a failure
|
the capacity of C * (N - 1) MB, and protects against a failure
|
||||||
|
@@ -30,7 +30,6 @@
|
|||||||
#include <linux/device-mapper.h>
|
#include <linux/device-mapper.h>
|
||||||
|
|
||||||
#define DM_MSG_PREFIX "crypt"
|
#define DM_MSG_PREFIX "crypt"
|
||||||
#define MESG_STR(x) x, sizeof(x)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* context holding the current state of a multi-part conversion
|
* context holding the current state of a multi-part conversion
|
||||||
@@ -239,7 +238,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
|
|||||||
struct dm_crypt_request *dmreq)
|
struct dm_crypt_request *dmreq)
|
||||||
{
|
{
|
||||||
memset(iv, 0, cc->iv_size);
|
memset(iv, 0, cc->iv_size);
|
||||||
*(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
|
*(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -248,7 +247,7 @@ static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
|
|||||||
struct dm_crypt_request *dmreq)
|
struct dm_crypt_request *dmreq)
|
||||||
{
|
{
|
||||||
memset(iv, 0, cc->iv_size);
|
memset(iv, 0, cc->iv_size);
|
||||||
*(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
|
*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -415,7 +414,7 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
|
|||||||
struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
|
struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
|
||||||
|
|
||||||
memset(iv, 0, cc->iv_size);
|
memset(iv, 0, cc->iv_size);
|
||||||
*(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
|
*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
|
||||||
crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
|
crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1575,11 +1574,17 @@ bad_mem:
|
|||||||
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct crypt_config *cc;
|
struct crypt_config *cc;
|
||||||
unsigned int key_size;
|
unsigned int key_size, opt_params;
|
||||||
unsigned long long tmpll;
|
unsigned long long tmpll;
|
||||||
int ret;
|
int ret;
|
||||||
|
struct dm_arg_set as;
|
||||||
|
const char *opt_string;
|
||||||
|
|
||||||
if (argc != 5) {
|
static struct dm_arg _args[] = {
|
||||||
|
{0, 1, "Invalid number of feature args"},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (argc < 5) {
|
||||||
ti->error = "Not enough arguments";
|
ti->error = "Not enough arguments";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -1648,6 +1653,30 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||||||
}
|
}
|
||||||
cc->start = tmpll;
|
cc->start = tmpll;
|
||||||
|
|
||||||
|
argv += 5;
|
||||||
|
argc -= 5;
|
||||||
|
|
||||||
|
/* Optional parameters */
|
||||||
|
if (argc) {
|
||||||
|
as.argc = argc;
|
||||||
|
as.argv = argv;
|
||||||
|
|
||||||
|
ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
|
||||||
|
if (ret)
|
||||||
|
goto bad;
|
||||||
|
|
||||||
|
opt_string = dm_shift_arg(&as);
|
||||||
|
|
||||||
|
if (opt_params == 1 && opt_string &&
|
||||||
|
!strcasecmp(opt_string, "allow_discards"))
|
||||||
|
ti->num_discard_requests = 1;
|
||||||
|
else if (opt_params) {
|
||||||
|
ret = -EINVAL;
|
||||||
|
ti->error = "Invalid feature arguments";
|
||||||
|
goto bad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
cc->io_queue = alloc_workqueue("kcryptd_io",
|
cc->io_queue = alloc_workqueue("kcryptd_io",
|
||||||
WQ_NON_REENTRANT|
|
WQ_NON_REENTRANT|
|
||||||
@@ -1682,9 +1711,16 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
|
|||||||
struct dm_crypt_io *io;
|
struct dm_crypt_io *io;
|
||||||
struct crypt_config *cc;
|
struct crypt_config *cc;
|
||||||
|
|
||||||
if (bio->bi_rw & REQ_FLUSH) {
|
/*
|
||||||
|
* If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
|
||||||
|
* - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
|
||||||
|
* - for REQ_DISCARD caller must use flush if IO ordering matters
|
||||||
|
*/
|
||||||
|
if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
|
||||||
cc = ti->private;
|
cc = ti->private;
|
||||||
bio->bi_bdev = cc->dev->bdev;
|
bio->bi_bdev = cc->dev->bdev;
|
||||||
|
if (bio_sectors(bio))
|
||||||
|
bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
|
||||||
return DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1727,6 +1763,10 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
|
|||||||
|
|
||||||
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
|
DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
|
||||||
cc->dev->name, (unsigned long long)cc->start);
|
cc->dev->name, (unsigned long long)cc->start);
|
||||||
|
|
||||||
|
if (ti->num_discard_requests)
|
||||||
|
DMEMIT(" 1 allow_discards");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1770,12 +1810,12 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
if (argc < 2)
|
if (argc < 2)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
if (!strnicmp(argv[0], MESG_STR("key"))) {
|
if (!strcasecmp(argv[0], "key")) {
|
||||||
if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
|
if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
|
||||||
DMWARN("not suspended during key manipulation.");
|
DMWARN("not suspended during key manipulation.");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) {
|
if (argc == 3 && !strcasecmp(argv[1], "set")) {
|
||||||
ret = crypt_set_key(cc, argv[2]);
|
ret = crypt_set_key(cc, argv[2]);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
@@ -1783,7 +1823,7 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
ret = cc->iv_gen_ops->init(cc);
|
ret = cc->iv_gen_ops->init(cc);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) {
|
if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
|
||||||
if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
|
if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
|
||||||
ret = cc->iv_gen_ops->wipe(cc);
|
ret = cc->iv_gen_ops->wipe(cc);
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -1823,7 +1863,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
|
|||||||
|
|
||||||
static struct target_type crypt_target = {
|
static struct target_type crypt_target = {
|
||||||
.name = "crypt",
|
.name = "crypt",
|
||||||
.version = {1, 10, 0},
|
.version = {1, 11, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = crypt_ctr,
|
.ctr = crypt_ctr,
|
||||||
.dtr = crypt_dtr,
|
.dtr = crypt_dtr,
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2003 Sistina Software (UK) Limited.
|
* Copyright (C) 2003 Sistina Software (UK) Limited.
|
||||||
* Copyright (C) 2004, 2010 Red Hat, Inc. All rights reserved.
|
* Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* This file is released under the GPL.
|
* This file is released under the GPL.
|
||||||
*/
|
*/
|
||||||
@@ -15,6 +15,9 @@
|
|||||||
|
|
||||||
#define DM_MSG_PREFIX "flakey"
|
#define DM_MSG_PREFIX "flakey"
|
||||||
|
|
||||||
|
#define all_corrupt_bio_flags_match(bio, fc) \
|
||||||
|
(((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flakey: Used for testing only, simulates intermittent,
|
* Flakey: Used for testing only, simulates intermittent,
|
||||||
* catastrophic device failure.
|
* catastrophic device failure.
|
||||||
@@ -25,60 +28,189 @@ struct flakey_c {
|
|||||||
sector_t start;
|
sector_t start;
|
||||||
unsigned up_interval;
|
unsigned up_interval;
|
||||||
unsigned down_interval;
|
unsigned down_interval;
|
||||||
|
unsigned long flags;
|
||||||
|
unsigned corrupt_bio_byte;
|
||||||
|
unsigned corrupt_bio_rw;
|
||||||
|
unsigned corrupt_bio_value;
|
||||||
|
unsigned corrupt_bio_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
enum feature_flag_bits {
|
||||||
* Construct a flakey mapping: <dev_path> <offset> <up interval> <down interval>
|
DROP_WRITES
|
||||||
*/
|
};
|
||||||
static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
||||||
{
|
|
||||||
struct flakey_c *fc;
|
|
||||||
unsigned long long tmp;
|
|
||||||
|
|
||||||
if (argc != 4) {
|
static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
|
||||||
ti->error = "dm-flakey: Invalid argument count";
|
struct dm_target *ti)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
unsigned argc;
|
||||||
|
const char *arg_name;
|
||||||
|
|
||||||
|
static struct dm_arg _args[] = {
|
||||||
|
{0, 6, "Invalid number of feature args"},
|
||||||
|
{1, UINT_MAX, "Invalid corrupt bio byte"},
|
||||||
|
{0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
|
||||||
|
{0, UINT_MAX, "Invalid corrupt bio flags mask"},
|
||||||
|
};
|
||||||
|
|
||||||
|
/* No feature arguments supplied. */
|
||||||
|
if (!as->argc)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
r = dm_read_arg_group(_args, as, &argc, &ti->error);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
while (argc) {
|
||||||
|
arg_name = dm_shift_arg(as);
|
||||||
|
argc--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* drop_writes
|
||||||
|
*/
|
||||||
|
if (!strcasecmp(arg_name, "drop_writes")) {
|
||||||
|
if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
|
||||||
|
ti->error = "Feature drop_writes duplicated";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
|
||||||
|
*/
|
||||||
|
if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
|
||||||
|
if (!argc)
|
||||||
|
ti->error = "Feature corrupt_bio_byte requires parameters";
|
||||||
|
|
||||||
|
r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
argc--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Direction r or w?
|
||||||
|
*/
|
||||||
|
arg_name = dm_shift_arg(as);
|
||||||
|
if (!strcasecmp(arg_name, "w"))
|
||||||
|
fc->corrupt_bio_rw = WRITE;
|
||||||
|
else if (!strcasecmp(arg_name, "r"))
|
||||||
|
fc->corrupt_bio_rw = READ;
|
||||||
|
else {
|
||||||
|
ti->error = "Invalid corrupt bio direction (r or w)";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
argc--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Value of byte (0-255) to write in place of correct one.
|
||||||
|
*/
|
||||||
|
r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
argc--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only corrupt bios with these flags set.
|
||||||
|
*/
|
||||||
|
r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
argc--;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
ti->error = "Unrecognised flakey feature requested";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
|
if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
|
||||||
|
ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Construct a flakey mapping:
|
||||||
|
* <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
|
||||||
|
*
|
||||||
|
* Feature args:
|
||||||
|
* [drop_writes]
|
||||||
|
* [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
|
||||||
|
*
|
||||||
|
* Nth_byte starts from 1 for the first byte.
|
||||||
|
* Direction is r for READ or w for WRITE.
|
||||||
|
* bio_flags is ignored if 0.
|
||||||
|
*/
|
||||||
|
static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||||
|
{
|
||||||
|
static struct dm_arg _args[] = {
|
||||||
|
{0, UINT_MAX, "Invalid up interval"},
|
||||||
|
{0, UINT_MAX, "Invalid down interval"},
|
||||||
|
};
|
||||||
|
|
||||||
|
int r;
|
||||||
|
struct flakey_c *fc;
|
||||||
|
unsigned long long tmpll;
|
||||||
|
struct dm_arg_set as;
|
||||||
|
const char *devname;
|
||||||
|
|
||||||
|
as.argc = argc;
|
||||||
|
as.argv = argv;
|
||||||
|
|
||||||
|
if (argc < 4) {
|
||||||
|
ti->error = "Invalid argument count";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fc = kzalloc(sizeof(*fc), GFP_KERNEL);
|
||||||
if (!fc) {
|
if (!fc) {
|
||||||
ti->error = "dm-flakey: Cannot allocate linear context";
|
ti->error = "Cannot allocate linear context";
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
fc->start_time = jiffies;
|
fc->start_time = jiffies;
|
||||||
|
|
||||||
if (sscanf(argv[1], "%llu", &tmp) != 1) {
|
devname = dm_shift_arg(&as);
|
||||||
ti->error = "dm-flakey: Invalid device sector";
|
|
||||||
goto bad;
|
|
||||||
}
|
|
||||||
fc->start = tmp;
|
|
||||||
|
|
||||||
if (sscanf(argv[2], "%u", &fc->up_interval) != 1) {
|
if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) {
|
||||||
ti->error = "dm-flakey: Invalid up interval";
|
ti->error = "Invalid device sector";
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
|
fc->start = tmpll;
|
||||||
|
|
||||||
if (sscanf(argv[3], "%u", &fc->down_interval) != 1) {
|
r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
|
||||||
ti->error = "dm-flakey: Invalid down interval";
|
if (r)
|
||||||
|
goto bad;
|
||||||
|
|
||||||
|
r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
|
||||||
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
|
||||||
|
|
||||||
if (!(fc->up_interval + fc->down_interval)) {
|
if (!(fc->up_interval + fc->down_interval)) {
|
||||||
ti->error = "dm-flakey: Total (up + down) interval is zero";
|
ti->error = "Total (up + down) interval is zero";
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fc->up_interval + fc->down_interval < fc->up_interval) {
|
if (fc->up_interval + fc->down_interval < fc->up_interval) {
|
||||||
ti->error = "dm-flakey: Interval overflow";
|
ti->error = "Interval overflow";
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &fc->dev)) {
|
r = parse_features(&as, fc, ti);
|
||||||
ti->error = "dm-flakey: Device lookup failed";
|
if (r)
|
||||||
|
goto bad;
|
||||||
|
|
||||||
|
if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
|
||||||
|
ti->error = "Device lookup failed";
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
|
|
||||||
ti->num_flush_requests = 1;
|
ti->num_flush_requests = 1;
|
||||||
|
ti->num_discard_requests = 1;
|
||||||
ti->private = fc;
|
ti->private = fc;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -99,7 +231,7 @@ static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
|
|||||||
{
|
{
|
||||||
struct flakey_c *fc = ti->private;
|
struct flakey_c *fc = ti->private;
|
||||||
|
|
||||||
return fc->start + (bi_sector - ti->begin);
|
return fc->start + dm_target_offset(ti, bi_sector);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
|
static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
|
||||||
@@ -111,6 +243,25 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
|
|||||||
bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
|
bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
|
||||||
|
{
|
||||||
|
unsigned bio_bytes = bio_cur_bytes(bio);
|
||||||
|
char *data = bio_data(bio);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Overwrite the Nth byte of the data returned.
|
||||||
|
*/
|
||||||
|
if (data && bio_bytes >= fc->corrupt_bio_byte) {
|
||||||
|
data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
|
||||||
|
|
||||||
|
DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
|
||||||
|
"(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
|
||||||
|
bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
|
||||||
|
(bio_data_dir(bio) == WRITE) ? 'w' : 'r',
|
||||||
|
bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int flakey_map(struct dm_target *ti, struct bio *bio,
|
static int flakey_map(struct dm_target *ti, struct bio *bio,
|
||||||
union map_info *map_context)
|
union map_info *map_context)
|
||||||
{
|
{
|
||||||
@@ -119,18 +270,71 @@ static int flakey_map(struct dm_target *ti, struct bio *bio,
|
|||||||
|
|
||||||
/* Are we alive ? */
|
/* Are we alive ? */
|
||||||
elapsed = (jiffies - fc->start_time) / HZ;
|
elapsed = (jiffies - fc->start_time) / HZ;
|
||||||
if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval)
|
if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
|
||||||
return -EIO;
|
/*
|
||||||
|
* Flag this bio as submitted while down.
|
||||||
|
*/
|
||||||
|
map_context->ll = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Map reads as normal.
|
||||||
|
*/
|
||||||
|
if (bio_data_dir(bio) == READ)
|
||||||
|
goto map_bio;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop writes?
|
||||||
|
*/
|
||||||
|
if (test_bit(DROP_WRITES, &fc->flags)) {
|
||||||
|
bio_endio(bio, 0);
|
||||||
|
return DM_MAPIO_SUBMITTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Corrupt matching writes.
|
||||||
|
*/
|
||||||
|
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
|
||||||
|
if (all_corrupt_bio_flags_match(bio, fc))
|
||||||
|
corrupt_bio_data(bio, fc);
|
||||||
|
goto map_bio;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* By default, error all I/O.
|
||||||
|
*/
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
|
map_bio:
|
||||||
flakey_map_bio(ti, bio);
|
flakey_map_bio(ti, bio);
|
||||||
|
|
||||||
return DM_MAPIO_REMAPPED;
|
return DM_MAPIO_REMAPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int flakey_end_io(struct dm_target *ti, struct bio *bio,
|
||||||
|
int error, union map_info *map_context)
|
||||||
|
{
|
||||||
|
struct flakey_c *fc = ti->private;
|
||||||
|
unsigned bio_submitted_while_down = map_context->ll;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Corrupt successful READs while in down state.
|
||||||
|
* If flags were specified, only corrupt those that match.
|
||||||
|
*/
|
||||||
|
if (!error && bio_submitted_while_down &&
|
||||||
|
(bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
|
||||||
|
all_corrupt_bio_flags_match(bio, fc))
|
||||||
|
corrupt_bio_data(bio, fc);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
static int flakey_status(struct dm_target *ti, status_type_t type,
|
static int flakey_status(struct dm_target *ti, status_type_t type,
|
||||||
char *result, unsigned int maxlen)
|
char *result, unsigned int maxlen)
|
||||||
{
|
{
|
||||||
|
unsigned sz = 0;
|
||||||
struct flakey_c *fc = ti->private;
|
struct flakey_c *fc = ti->private;
|
||||||
|
unsigned drop_writes;
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case STATUSTYPE_INFO:
|
case STATUSTYPE_INFO:
|
||||||
@@ -138,9 +342,22 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case STATUSTYPE_TABLE:
|
case STATUSTYPE_TABLE:
|
||||||
snprintf(result, maxlen, "%s %llu %u %u", fc->dev->name,
|
DMEMIT("%s %llu %u %u ", fc->dev->name,
|
||||||
(unsigned long long)fc->start, fc->up_interval,
|
(unsigned long long)fc->start, fc->up_interval,
|
||||||
fc->down_interval);
|
fc->down_interval);
|
||||||
|
|
||||||
|
drop_writes = test_bit(DROP_WRITES, &fc->flags);
|
||||||
|
DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
|
||||||
|
|
||||||
|
if (drop_writes)
|
||||||
|
DMEMIT("drop_writes ");
|
||||||
|
|
||||||
|
if (fc->corrupt_bio_byte)
|
||||||
|
DMEMIT("corrupt_bio_byte %u %c %u %u ",
|
||||||
|
fc->corrupt_bio_byte,
|
||||||
|
(fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
|
||||||
|
fc->corrupt_bio_value, fc->corrupt_bio_flags);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@@ -177,11 +394,12 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
|
|||||||
|
|
||||||
static struct target_type flakey_target = {
|
static struct target_type flakey_target = {
|
||||||
.name = "flakey",
|
.name = "flakey",
|
||||||
.version = {1, 1, 0},
|
.version = {1, 2, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = flakey_ctr,
|
.ctr = flakey_ctr,
|
||||||
.dtr = flakey_dtr,
|
.dtr = flakey_dtr,
|
||||||
.map = flakey_map,
|
.map = flakey_map,
|
||||||
|
.end_io = flakey_end_io,
|
||||||
.status = flakey_status,
|
.status = flakey_status,
|
||||||
.ioctl = flakey_ioctl,
|
.ioctl = flakey_ioctl,
|
||||||
.merge = flakey_merge,
|
.merge = flakey_merge,
|
||||||
|
@@ -38,6 +38,8 @@ struct io {
|
|||||||
struct dm_io_client *client;
|
struct dm_io_client *client;
|
||||||
io_notify_fn callback;
|
io_notify_fn callback;
|
||||||
void *context;
|
void *context;
|
||||||
|
void *vma_invalidate_address;
|
||||||
|
unsigned long vma_invalidate_size;
|
||||||
} __attribute__((aligned(DM_IO_MAX_REGIONS)));
|
} __attribute__((aligned(DM_IO_MAX_REGIONS)));
|
||||||
|
|
||||||
static struct kmem_cache *_dm_io_cache;
|
static struct kmem_cache *_dm_io_cache;
|
||||||
@@ -116,6 +118,10 @@ static void dec_count(struct io *io, unsigned int region, int error)
|
|||||||
set_bit(region, &io->error_bits);
|
set_bit(region, &io->error_bits);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&io->count)) {
|
if (atomic_dec_and_test(&io->count)) {
|
||||||
|
if (io->vma_invalidate_size)
|
||||||
|
invalidate_kernel_vmap_range(io->vma_invalidate_address,
|
||||||
|
io->vma_invalidate_size);
|
||||||
|
|
||||||
if (io->sleeper)
|
if (io->sleeper)
|
||||||
wake_up_process(io->sleeper);
|
wake_up_process(io->sleeper);
|
||||||
|
|
||||||
@@ -159,6 +165,9 @@ struct dpages {
|
|||||||
|
|
||||||
unsigned context_u;
|
unsigned context_u;
|
||||||
void *context_ptr;
|
void *context_ptr;
|
||||||
|
|
||||||
|
void *vma_invalidate_address;
|
||||||
|
unsigned long vma_invalidate_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -377,6 +386,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
|
|||||||
io->sleeper = current;
|
io->sleeper = current;
|
||||||
io->client = client;
|
io->client = client;
|
||||||
|
|
||||||
|
io->vma_invalidate_address = dp->vma_invalidate_address;
|
||||||
|
io->vma_invalidate_size = dp->vma_invalidate_size;
|
||||||
|
|
||||||
dispatch_io(rw, num_regions, where, dp, io, 1);
|
dispatch_io(rw, num_regions, where, dp, io, 1);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
@@ -415,13 +427,21 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
|
|||||||
io->callback = fn;
|
io->callback = fn;
|
||||||
io->context = context;
|
io->context = context;
|
||||||
|
|
||||||
|
io->vma_invalidate_address = dp->vma_invalidate_address;
|
||||||
|
io->vma_invalidate_size = dp->vma_invalidate_size;
|
||||||
|
|
||||||
dispatch_io(rw, num_regions, where, dp, io, 0);
|
dispatch_io(rw, num_regions, where, dp, io, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
|
static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
|
||||||
|
unsigned long size)
|
||||||
{
|
{
|
||||||
/* Set up dpages based on memory type */
|
/* Set up dpages based on memory type */
|
||||||
|
|
||||||
|
dp->vma_invalidate_address = NULL;
|
||||||
|
dp->vma_invalidate_size = 0;
|
||||||
|
|
||||||
switch (io_req->mem.type) {
|
switch (io_req->mem.type) {
|
||||||
case DM_IO_PAGE_LIST:
|
case DM_IO_PAGE_LIST:
|
||||||
list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
|
list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
|
||||||
@@ -432,6 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case DM_IO_VMA:
|
case DM_IO_VMA:
|
||||||
|
flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
|
||||||
|
if ((io_req->bi_rw & RW_MASK) == READ) {
|
||||||
|
dp->vma_invalidate_address = io_req->mem.ptr.vma;
|
||||||
|
dp->vma_invalidate_size = size;
|
||||||
|
}
|
||||||
vm_dp_init(dp, io_req->mem.ptr.vma);
|
vm_dp_init(dp, io_req->mem.ptr.vma);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -460,7 +485,7 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
|
|||||||
int r;
|
int r;
|
||||||
struct dpages dp;
|
struct dpages dp;
|
||||||
|
|
||||||
r = dp_init(io_req, &dp);
|
r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@@ -128,6 +128,24 @@ static struct hash_cell *__get_uuid_cell(const char *str)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct hash_cell *__get_dev_cell(uint64_t dev)
|
||||||
|
{
|
||||||
|
struct mapped_device *md;
|
||||||
|
struct hash_cell *hc;
|
||||||
|
|
||||||
|
md = dm_get_md(huge_decode_dev(dev));
|
||||||
|
if (!md)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
hc = dm_get_mdptr(md);
|
||||||
|
if (!hc) {
|
||||||
|
dm_put(md);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hc;
|
||||||
|
}
|
||||||
|
|
||||||
/*-----------------------------------------------------------------
|
/*-----------------------------------------------------------------
|
||||||
* Inserting, removing and renaming a device.
|
* Inserting, removing and renaming a device.
|
||||||
*---------------------------------------------------------------*/
|
*---------------------------------------------------------------*/
|
||||||
@@ -718,25 +736,45 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
|
|||||||
*/
|
*/
|
||||||
static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
|
static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
|
||||||
{
|
{
|
||||||
struct mapped_device *md;
|
struct hash_cell *hc = NULL;
|
||||||
void *mdptr = NULL;
|
|
||||||
|
|
||||||
if (*param->uuid)
|
if (*param->uuid) {
|
||||||
return __get_uuid_cell(param->uuid);
|
if (*param->name || param->dev)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
if (*param->name)
|
hc = __get_uuid_cell(param->uuid);
|
||||||
return __get_name_cell(param->name);
|
if (!hc)
|
||||||
|
return NULL;
|
||||||
|
} else if (*param->name) {
|
||||||
|
if (param->dev)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
md = dm_get_md(huge_decode_dev(param->dev));
|
hc = __get_name_cell(param->name);
|
||||||
if (!md)
|
if (!hc)
|
||||||
goto out;
|
return NULL;
|
||||||
|
} else if (param->dev) {
|
||||||
|
hc = __get_dev_cell(param->dev);
|
||||||
|
if (!hc)
|
||||||
|
return NULL;
|
||||||
|
} else
|
||||||
|
return NULL;
|
||||||
|
|
||||||
mdptr = dm_get_mdptr(md);
|
/*
|
||||||
if (!mdptr)
|
* Sneakily write in both the name and the uuid
|
||||||
dm_put(md);
|
* while we have the cell.
|
||||||
|
*/
|
||||||
|
strlcpy(param->name, hc->name, sizeof(param->name));
|
||||||
|
if (hc->uuid)
|
||||||
|
strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
|
||||||
|
else
|
||||||
|
param->uuid[0] = '\0';
|
||||||
|
|
||||||
out:
|
if (hc->new_map)
|
||||||
return mdptr;
|
param->flags |= DM_INACTIVE_PRESENT_FLAG;
|
||||||
|
else
|
||||||
|
param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
|
||||||
|
|
||||||
|
return hc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct mapped_device *find_device(struct dm_ioctl *param)
|
static struct mapped_device *find_device(struct dm_ioctl *param)
|
||||||
@@ -746,24 +784,8 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
|
|||||||
|
|
||||||
down_read(&_hash_lock);
|
down_read(&_hash_lock);
|
||||||
hc = __find_device_hash_cell(param);
|
hc = __find_device_hash_cell(param);
|
||||||
if (hc) {
|
if (hc)
|
||||||
md = hc->md;
|
md = hc->md;
|
||||||
|
|
||||||
/*
|
|
||||||
* Sneakily write in both the name and the uuid
|
|
||||||
* while we have the cell.
|
|
||||||
*/
|
|
||||||
strlcpy(param->name, hc->name, sizeof(param->name));
|
|
||||||
if (hc->uuid)
|
|
||||||
strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
|
|
||||||
else
|
|
||||||
param->uuid[0] = '\0';
|
|
||||||
|
|
||||||
if (hc->new_map)
|
|
||||||
param->flags |= DM_INACTIVE_PRESENT_FLAG;
|
|
||||||
else
|
|
||||||
param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
|
|
||||||
}
|
|
||||||
up_read(&_hash_lock);
|
up_read(&_hash_lock);
|
||||||
|
|
||||||
return md;
|
return md;
|
||||||
@@ -1402,6 +1424,11 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!argc) {
|
||||||
|
DMWARN("Empty message received.");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
table = dm_get_live_table(md);
|
table = dm_get_live_table(md);
|
||||||
if (!table)
|
if (!table)
|
||||||
goto out_argv;
|
goto out_argv;
|
||||||
|
@@ -224,8 +224,6 @@ struct kcopyd_job {
|
|||||||
unsigned int num_dests;
|
unsigned int num_dests;
|
||||||
struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
|
struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
|
||||||
|
|
||||||
sector_t offset;
|
|
||||||
unsigned int nr_pages;
|
|
||||||
struct page_list *pages;
|
struct page_list *pages;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -380,7 +378,7 @@ static int run_io_job(struct kcopyd_job *job)
|
|||||||
.bi_rw = job->rw,
|
.bi_rw = job->rw,
|
||||||
.mem.type = DM_IO_PAGE_LIST,
|
.mem.type = DM_IO_PAGE_LIST,
|
||||||
.mem.ptr.pl = job->pages,
|
.mem.ptr.pl = job->pages,
|
||||||
.mem.offset = job->offset,
|
.mem.offset = 0,
|
||||||
.notify.fn = complete_io,
|
.notify.fn = complete_io,
|
||||||
.notify.context = job,
|
.notify.context = job,
|
||||||
.client = job->kc->io_client,
|
.client = job->kc->io_client,
|
||||||
@@ -397,10 +395,9 @@ static int run_io_job(struct kcopyd_job *job)
|
|||||||
static int run_pages_job(struct kcopyd_job *job)
|
static int run_pages_job(struct kcopyd_job *job)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
|
||||||
|
|
||||||
job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
|
r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
|
||||||
PAGE_SIZE >> 9);
|
|
||||||
r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
|
|
||||||
if (!r) {
|
if (!r) {
|
||||||
/* this job is ready for io */
|
/* this job is ready for io */
|
||||||
push(&job->kc->io_jobs, job);
|
push(&job->kc->io_jobs, job);
|
||||||
@@ -602,8 +599,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
|
|||||||
job->num_dests = num_dests;
|
job->num_dests = num_dests;
|
||||||
memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
|
memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
|
||||||
|
|
||||||
job->offset = 0;
|
|
||||||
job->nr_pages = 0;
|
|
||||||
job->pages = NULL;
|
job->pages = NULL;
|
||||||
|
|
||||||
job->fn = fn;
|
job->fn = fn;
|
||||||
@@ -622,6 +617,37 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(dm_kcopyd_copy);
|
EXPORT_SYMBOL(dm_kcopyd_copy);
|
||||||
|
|
||||||
|
void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
|
||||||
|
dm_kcopyd_notify_fn fn, void *context)
|
||||||
|
{
|
||||||
|
struct kcopyd_job *job;
|
||||||
|
|
||||||
|
job = mempool_alloc(kc->job_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
memset(job, 0, sizeof(struct kcopyd_job));
|
||||||
|
job->kc = kc;
|
||||||
|
job->fn = fn;
|
||||||
|
job->context = context;
|
||||||
|
|
||||||
|
atomic_inc(&kc->nr_jobs);
|
||||||
|
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
|
||||||
|
|
||||||
|
void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
|
||||||
|
{
|
||||||
|
struct kcopyd_job *job = j;
|
||||||
|
struct dm_kcopyd_client *kc = job->kc;
|
||||||
|
|
||||||
|
job->read_err = read_err;
|
||||||
|
job->write_err = write_err;
|
||||||
|
|
||||||
|
push(&kc->complete_jobs, job);
|
||||||
|
wake(kc);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_kcopyd_do_callback);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cancels a kcopyd job, eg. someone might be deactivating a
|
* Cancels a kcopyd job, eg. someone might be deactivating a
|
||||||
* mirror.
|
* mirror.
|
||||||
|
@@ -394,8 +394,7 @@ static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
|
|||||||
group[count] = fe->region;
|
group[count] = fe->region;
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
list_del(&fe->list);
|
list_move(&fe->list, &tmp_list);
|
||||||
list_add(&fe->list, &tmp_list);
|
|
||||||
|
|
||||||
type = fe->type;
|
type = fe->type;
|
||||||
if (count >= MAX_FLUSH_GROUP_COUNT)
|
if (count >= MAX_FLUSH_GROUP_COUNT)
|
||||||
|
@@ -197,15 +197,21 @@ EXPORT_SYMBOL(dm_dirty_log_destroy);
|
|||||||
#define MIRROR_DISK_VERSION 2
|
#define MIRROR_DISK_VERSION 2
|
||||||
#define LOG_OFFSET 2
|
#define LOG_OFFSET 2
|
||||||
|
|
||||||
struct log_header {
|
struct log_header_disk {
|
||||||
uint32_t magic;
|
__le32 magic;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simple, incrementing version. no backward
|
* Simple, incrementing version. no backward
|
||||||
* compatibility.
|
* compatibility.
|
||||||
*/
|
*/
|
||||||
|
__le32 version;
|
||||||
|
__le64 nr_regions;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
struct log_header_core {
|
||||||
|
uint32_t magic;
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
sector_t nr_regions;
|
uint64_t nr_regions;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct log_c {
|
struct log_c {
|
||||||
@@ -239,10 +245,10 @@ struct log_c {
|
|||||||
int log_dev_failed;
|
int log_dev_failed;
|
||||||
int log_dev_flush_failed;
|
int log_dev_flush_failed;
|
||||||
struct dm_dev *log_dev;
|
struct dm_dev *log_dev;
|
||||||
struct log_header header;
|
struct log_header_core header;
|
||||||
|
|
||||||
struct dm_io_region header_location;
|
struct dm_io_region header_location;
|
||||||
struct log_header *disk_header;
|
struct log_header_disk *disk_header;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -251,34 +257,34 @@ struct log_c {
|
|||||||
*/
|
*/
|
||||||
static inline int log_test_bit(uint32_t *bs, unsigned bit)
|
static inline int log_test_bit(uint32_t *bs, unsigned bit)
|
||||||
{
|
{
|
||||||
return test_bit_le(bit, (unsigned long *) bs) ? 1 : 0;
|
return test_bit_le(bit, bs) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void log_set_bit(struct log_c *l,
|
static inline void log_set_bit(struct log_c *l,
|
||||||
uint32_t *bs, unsigned bit)
|
uint32_t *bs, unsigned bit)
|
||||||
{
|
{
|
||||||
__test_and_set_bit_le(bit, (unsigned long *) bs);
|
__set_bit_le(bit, bs);
|
||||||
l->touched_cleaned = 1;
|
l->touched_cleaned = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void log_clear_bit(struct log_c *l,
|
static inline void log_clear_bit(struct log_c *l,
|
||||||
uint32_t *bs, unsigned bit)
|
uint32_t *bs, unsigned bit)
|
||||||
{
|
{
|
||||||
__test_and_clear_bit_le(bit, (unsigned long *) bs);
|
__clear_bit_le(bit, bs);
|
||||||
l->touched_dirtied = 1;
|
l->touched_dirtied = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------
|
/*----------------------------------------------------------------
|
||||||
* Header IO
|
* Header IO
|
||||||
*--------------------------------------------------------------*/
|
*--------------------------------------------------------------*/
|
||||||
static void header_to_disk(struct log_header *core, struct log_header *disk)
|
static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
|
||||||
{
|
{
|
||||||
disk->magic = cpu_to_le32(core->magic);
|
disk->magic = cpu_to_le32(core->magic);
|
||||||
disk->version = cpu_to_le32(core->version);
|
disk->version = cpu_to_le32(core->version);
|
||||||
disk->nr_regions = cpu_to_le64(core->nr_regions);
|
disk->nr_regions = cpu_to_le64(core->nr_regions);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void header_from_disk(struct log_header *core, struct log_header *disk)
|
static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
|
||||||
{
|
{
|
||||||
core->magic = le32_to_cpu(disk->magic);
|
core->magic = le32_to_cpu(disk->magic);
|
||||||
core->version = le32_to_cpu(disk->version);
|
core->version = le32_to_cpu(disk->version);
|
||||||
@@ -486,7 +492,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||||||
memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
|
memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
|
||||||
lc->sync_count = (sync == NOSYNC) ? region_count : 0;
|
lc->sync_count = (sync == NOSYNC) ? region_count : 0;
|
||||||
|
|
||||||
lc->recovering_bits = vmalloc(bitset_size);
|
lc->recovering_bits = vzalloc(bitset_size);
|
||||||
if (!lc->recovering_bits) {
|
if (!lc->recovering_bits) {
|
||||||
DMWARN("couldn't allocate sync bitset");
|
DMWARN("couldn't allocate sync bitset");
|
||||||
vfree(lc->sync_bits);
|
vfree(lc->sync_bits);
|
||||||
@@ -498,7 +504,6 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
|
|||||||
kfree(lc);
|
kfree(lc);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
memset(lc->recovering_bits, 0, bitset_size);
|
|
||||||
lc->sync_search = 0;
|
lc->sync_search = 0;
|
||||||
log->context = lc;
|
log->context = lc;
|
||||||
|
|
||||||
@@ -739,8 +744,7 @@ static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
*region = find_next_zero_bit_le(
|
*region = find_next_zero_bit_le(lc->sync_bits,
|
||||||
(unsigned long *) lc->sync_bits,
|
|
||||||
lc->region_count,
|
lc->region_count,
|
||||||
lc->sync_search);
|
lc->sync_search);
|
||||||
lc->sync_search = *region + 1;
|
lc->sync_search = *region + 1;
|
||||||
|
@@ -22,7 +22,6 @@
|
|||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
#define DM_MSG_PREFIX "multipath"
|
#define DM_MSG_PREFIX "multipath"
|
||||||
#define MESG_STR(x) x, sizeof(x)
|
|
||||||
#define DM_PG_INIT_DELAY_MSECS 2000
|
#define DM_PG_INIT_DELAY_MSECS 2000
|
||||||
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
|
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
|
||||||
|
|
||||||
@@ -505,80 +504,29 @@ static void trigger_event(struct work_struct *work)
|
|||||||
* <#paths> <#per-path selector args>
|
* <#paths> <#per-path selector args>
|
||||||
* [<path> [<arg>]* ]+ ]+
|
* [<path> [<arg>]* ]+ ]+
|
||||||
*---------------------------------------------------------------*/
|
*---------------------------------------------------------------*/
|
||||||
struct param {
|
static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
|
||||||
unsigned min;
|
|
||||||
unsigned max;
|
|
||||||
char *error;
|
|
||||||
};
|
|
||||||
|
|
||||||
static int read_param(struct param *param, char *str, unsigned *v, char **error)
|
|
||||||
{
|
|
||||||
if (!str ||
|
|
||||||
(sscanf(str, "%u", v) != 1) ||
|
|
||||||
(*v < param->min) ||
|
|
||||||
(*v > param->max)) {
|
|
||||||
*error = param->error;
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct arg_set {
|
|
||||||
unsigned argc;
|
|
||||||
char **argv;
|
|
||||||
};
|
|
||||||
|
|
||||||
static char *shift(struct arg_set *as)
|
|
||||||
{
|
|
||||||
char *r;
|
|
||||||
|
|
||||||
if (as->argc) {
|
|
||||||
as->argc--;
|
|
||||||
r = *as->argv;
|
|
||||||
as->argv++;
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void consume(struct arg_set *as, unsigned n)
|
|
||||||
{
|
|
||||||
BUG_ON (as->argc < n);
|
|
||||||
as->argc -= n;
|
|
||||||
as->argv += n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
|
|
||||||
struct dm_target *ti)
|
struct dm_target *ti)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
struct path_selector_type *pst;
|
struct path_selector_type *pst;
|
||||||
unsigned ps_argc;
|
unsigned ps_argc;
|
||||||
|
|
||||||
static struct param _params[] = {
|
static struct dm_arg _args[] = {
|
||||||
{0, 1024, "invalid number of path selector args"},
|
{0, 1024, "invalid number of path selector args"},
|
||||||
};
|
};
|
||||||
|
|
||||||
pst = dm_get_path_selector(shift(as));
|
pst = dm_get_path_selector(dm_shift_arg(as));
|
||||||
if (!pst) {
|
if (!pst) {
|
||||||
ti->error = "unknown path selector type";
|
ti->error = "unknown path selector type";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = read_param(_params, shift(as), &ps_argc, &ti->error);
|
r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
|
||||||
if (r) {
|
if (r) {
|
||||||
dm_put_path_selector(pst);
|
dm_put_path_selector(pst);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ps_argc > as->argc) {
|
|
||||||
dm_put_path_selector(pst);
|
|
||||||
ti->error = "not enough arguments for path selector";
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = pst->create(&pg->ps, ps_argc, as->argv);
|
r = pst->create(&pg->ps, ps_argc, as->argv);
|
||||||
if (r) {
|
if (r) {
|
||||||
dm_put_path_selector(pst);
|
dm_put_path_selector(pst);
|
||||||
@@ -587,12 +535,12 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
pg->ps.type = pst;
|
pg->ps.type = pst;
|
||||||
consume(as, ps_argc);
|
dm_consume_args(as, ps_argc);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
|
static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
|
||||||
struct dm_target *ti)
|
struct dm_target *ti)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
@@ -609,7 +557,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
|
|||||||
if (!p)
|
if (!p)
|
||||||
return ERR_PTR(-ENOMEM);
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
|
r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
|
||||||
&p->path.dev);
|
&p->path.dev);
|
||||||
if (r) {
|
if (r) {
|
||||||
ti->error = "error getting device";
|
ti->error = "error getting device";
|
||||||
@@ -660,16 +608,16 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
|
|||||||
return ERR_PTR(r);
|
return ERR_PTR(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct priority_group *parse_priority_group(struct arg_set *as,
|
static struct priority_group *parse_priority_group(struct dm_arg_set *as,
|
||||||
struct multipath *m)
|
struct multipath *m)
|
||||||
{
|
{
|
||||||
static struct param _params[] = {
|
static struct dm_arg _args[] = {
|
||||||
{1, 1024, "invalid number of paths"},
|
{1, 1024, "invalid number of paths"},
|
||||||
{0, 1024, "invalid number of selector args"}
|
{0, 1024, "invalid number of selector args"}
|
||||||
};
|
};
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
unsigned i, nr_selector_args, nr_params;
|
unsigned i, nr_selector_args, nr_args;
|
||||||
struct priority_group *pg;
|
struct priority_group *pg;
|
||||||
struct dm_target *ti = m->ti;
|
struct dm_target *ti = m->ti;
|
||||||
|
|
||||||
@@ -693,26 +641,26 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
|
|||||||
/*
|
/*
|
||||||
* read the paths
|
* read the paths
|
||||||
*/
|
*/
|
||||||
r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
|
r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
|
||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
|
r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
|
||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
nr_params = 1 + nr_selector_args;
|
nr_args = 1 + nr_selector_args;
|
||||||
for (i = 0; i < pg->nr_pgpaths; i++) {
|
for (i = 0; i < pg->nr_pgpaths; i++) {
|
||||||
struct pgpath *pgpath;
|
struct pgpath *pgpath;
|
||||||
struct arg_set path_args;
|
struct dm_arg_set path_args;
|
||||||
|
|
||||||
if (as->argc < nr_params) {
|
if (as->argc < nr_args) {
|
||||||
ti->error = "not enough path parameters";
|
ti->error = "not enough path parameters";
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
|
|
||||||
path_args.argc = nr_params;
|
path_args.argc = nr_args;
|
||||||
path_args.argv = as->argv;
|
path_args.argv = as->argv;
|
||||||
|
|
||||||
pgpath = parse_path(&path_args, &pg->ps, ti);
|
pgpath = parse_path(&path_args, &pg->ps, ti);
|
||||||
@@ -723,7 +671,7 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
|
|||||||
|
|
||||||
pgpath->pg = pg;
|
pgpath->pg = pg;
|
||||||
list_add_tail(&pgpath->list, &pg->pgpaths);
|
list_add_tail(&pgpath->list, &pg->pgpaths);
|
||||||
consume(as, nr_params);
|
dm_consume_args(as, nr_args);
|
||||||
}
|
}
|
||||||
|
|
||||||
return pg;
|
return pg;
|
||||||
@@ -733,28 +681,23 @@ static struct priority_group *parse_priority_group(struct arg_set *as,
|
|||||||
return ERR_PTR(r);
|
return ERR_PTR(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_hw_handler(struct arg_set *as, struct multipath *m)
|
static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
|
||||||
{
|
{
|
||||||
unsigned hw_argc;
|
unsigned hw_argc;
|
||||||
int ret;
|
int ret;
|
||||||
struct dm_target *ti = m->ti;
|
struct dm_target *ti = m->ti;
|
||||||
|
|
||||||
static struct param _params[] = {
|
static struct dm_arg _args[] = {
|
||||||
{0, 1024, "invalid number of hardware handler args"},
|
{0, 1024, "invalid number of hardware handler args"},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (read_param(_params, shift(as), &hw_argc, &ti->error))
|
if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!hw_argc)
|
if (!hw_argc)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (hw_argc > as->argc) {
|
m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
|
||||||
ti->error = "not enough arguments for hardware handler";
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
|
|
||||||
request_module("scsi_dh_%s", m->hw_handler_name);
|
request_module("scsi_dh_%s", m->hw_handler_name);
|
||||||
if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
|
if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
|
||||||
ti->error = "unknown hardware handler type";
|
ti->error = "unknown hardware handler type";
|
||||||
@@ -778,7 +721,7 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
|
|||||||
for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
|
for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
|
||||||
j = sprintf(p, "%s", as->argv[i]);
|
j = sprintf(p, "%s", as->argv[i]);
|
||||||
}
|
}
|
||||||
consume(as, hw_argc - 1);
|
dm_consume_args(as, hw_argc - 1);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
fail:
|
fail:
|
||||||
@@ -787,20 +730,20 @@ fail:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_features(struct arg_set *as, struct multipath *m)
|
static int parse_features(struct dm_arg_set *as, struct multipath *m)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
unsigned argc;
|
unsigned argc;
|
||||||
struct dm_target *ti = m->ti;
|
struct dm_target *ti = m->ti;
|
||||||
const char *param_name;
|
const char *arg_name;
|
||||||
|
|
||||||
static struct param _params[] = {
|
static struct dm_arg _args[] = {
|
||||||
{0, 5, "invalid number of feature args"},
|
{0, 5, "invalid number of feature args"},
|
||||||
{1, 50, "pg_init_retries must be between 1 and 50"},
|
{1, 50, "pg_init_retries must be between 1 and 50"},
|
||||||
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
|
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
|
||||||
};
|
};
|
||||||
|
|
||||||
r = read_param(_params, shift(as), &argc, &ti->error);
|
r = dm_read_arg_group(_args, as, &argc, &ti->error);
|
||||||
if (r)
|
if (r)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
@@ -808,26 +751,24 @@ static int parse_features(struct arg_set *as, struct multipath *m)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
param_name = shift(as);
|
arg_name = dm_shift_arg(as);
|
||||||
argc--;
|
argc--;
|
||||||
|
|
||||||
if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
|
if (!strcasecmp(arg_name, "queue_if_no_path")) {
|
||||||
r = queue_if_no_path(m, 1, 0);
|
r = queue_if_no_path(m, 1, 0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
|
if (!strcasecmp(arg_name, "pg_init_retries") &&
|
||||||
(argc >= 1)) {
|
(argc >= 1)) {
|
||||||
r = read_param(_params + 1, shift(as),
|
r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
|
||||||
&m->pg_init_retries, &ti->error);
|
|
||||||
argc--;
|
argc--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
|
if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
|
||||||
(argc >= 1)) {
|
(argc >= 1)) {
|
||||||
r = read_param(_params + 2, shift(as),
|
r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
|
||||||
&m->pg_init_delay_msecs, &ti->error);
|
|
||||||
argc--;
|
argc--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -842,15 +783,15 @@ static int parse_features(struct arg_set *as, struct multipath *m)
|
|||||||
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
||||||
char **argv)
|
char **argv)
|
||||||
{
|
{
|
||||||
/* target parameters */
|
/* target arguments */
|
||||||
static struct param _params[] = {
|
static struct dm_arg _args[] = {
|
||||||
{0, 1024, "invalid number of priority groups"},
|
{0, 1024, "invalid number of priority groups"},
|
||||||
{0, 1024, "invalid initial priority group number"},
|
{0, 1024, "invalid initial priority group number"},
|
||||||
};
|
};
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
struct multipath *m;
|
struct multipath *m;
|
||||||
struct arg_set as;
|
struct dm_arg_set as;
|
||||||
unsigned pg_count = 0;
|
unsigned pg_count = 0;
|
||||||
unsigned next_pg_num;
|
unsigned next_pg_num;
|
||||||
|
|
||||||
@@ -871,11 +812,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
|
|||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
|
r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
|
||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
|
r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
|
||||||
if (r)
|
if (r)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
@@ -1505,10 +1446,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (argc == 1) {
|
if (argc == 1) {
|
||||||
if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
|
if (!strcasecmp(argv[0], "queue_if_no_path")) {
|
||||||
r = queue_if_no_path(m, 1, 0);
|
r = queue_if_no_path(m, 1, 0);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
|
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
|
||||||
r = queue_if_no_path(m, 0, 0);
|
r = queue_if_no_path(m, 0, 0);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@@ -1519,18 +1460,18 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
|
if (!strcasecmp(argv[0], "disable_group")) {
|
||||||
r = bypass_pg_num(m, argv[1], 1);
|
r = bypass_pg_num(m, argv[1], 1);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
|
} else if (!strcasecmp(argv[0], "enable_group")) {
|
||||||
r = bypass_pg_num(m, argv[1], 0);
|
r = bypass_pg_num(m, argv[1], 0);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
|
} else if (!strcasecmp(argv[0], "switch_group")) {
|
||||||
r = switch_pg_num(m, argv[1]);
|
r = switch_pg_num(m, argv[1]);
|
||||||
goto out;
|
goto out;
|
||||||
} else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
|
} else if (!strcasecmp(argv[0], "reinstate_path"))
|
||||||
action = reinstate_path;
|
action = reinstate_path;
|
||||||
else if (!strnicmp(argv[0], MESG_STR("fail_path")))
|
else if (!strcasecmp(argv[0], "fail_path"))
|
||||||
action = fail_path;
|
action = fail_path;
|
||||||
else {
|
else {
|
||||||
DMWARN("Unrecognised multipath message received.");
|
DMWARN("Unrecognised multipath message received.");
|
||||||
|
@@ -8,19 +8,19 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
#include "md.h"
|
#include "md.h"
|
||||||
|
#include "raid1.h"
|
||||||
#include "raid5.h"
|
#include "raid5.h"
|
||||||
#include "dm.h"
|
|
||||||
#include "bitmap.h"
|
#include "bitmap.h"
|
||||||
|
|
||||||
|
#include <linux/device-mapper.h>
|
||||||
|
|
||||||
#define DM_MSG_PREFIX "raid"
|
#define DM_MSG_PREFIX "raid"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the MD doesn't support MD_SYNC_STATE_FORCED yet, then
|
* The following flags are used by dm-raid.c to set up the array state.
|
||||||
* make it so the flag doesn't set anything.
|
* They must be cleared before md_run is called.
|
||||||
*/
|
*/
|
||||||
#ifndef MD_SYNC_STATE_FORCED
|
#define FirstUse 10 /* rdev flag */
|
||||||
#define MD_SYNC_STATE_FORCED 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct raid_dev {
|
struct raid_dev {
|
||||||
/*
|
/*
|
||||||
@@ -43,14 +43,15 @@ struct raid_dev {
|
|||||||
/*
|
/*
|
||||||
* Flags for rs->print_flags field.
|
* Flags for rs->print_flags field.
|
||||||
*/
|
*/
|
||||||
#define DMPF_DAEMON_SLEEP 0x1
|
#define DMPF_SYNC 0x1
|
||||||
#define DMPF_MAX_WRITE_BEHIND 0x2
|
#define DMPF_NOSYNC 0x2
|
||||||
#define DMPF_SYNC 0x4
|
#define DMPF_REBUILD 0x4
|
||||||
#define DMPF_NOSYNC 0x8
|
#define DMPF_DAEMON_SLEEP 0x8
|
||||||
#define DMPF_STRIPE_CACHE 0x10
|
#define DMPF_MIN_RECOVERY_RATE 0x10
|
||||||
#define DMPF_MIN_RECOVERY_RATE 0x20
|
#define DMPF_MAX_RECOVERY_RATE 0x20
|
||||||
#define DMPF_MAX_RECOVERY_RATE 0x40
|
#define DMPF_MAX_WRITE_BEHIND 0x40
|
||||||
|
#define DMPF_STRIPE_CACHE 0x80
|
||||||
|
#define DMPF_REGION_SIZE 0X100
|
||||||
struct raid_set {
|
struct raid_set {
|
||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
|
|
||||||
@@ -72,6 +73,7 @@ static struct raid_type {
|
|||||||
const unsigned level; /* RAID level. */
|
const unsigned level; /* RAID level. */
|
||||||
const unsigned algorithm; /* RAID algorithm. */
|
const unsigned algorithm; /* RAID algorithm. */
|
||||||
} raid_types[] = {
|
} raid_types[] = {
|
||||||
|
{"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
|
||||||
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
|
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
|
||||||
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
|
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
|
||||||
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
|
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
|
||||||
@@ -105,7 +107,8 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra
|
|||||||
}
|
}
|
||||||
|
|
||||||
sectors_per_dev = ti->len;
|
sectors_per_dev = ti->len;
|
||||||
if (sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
|
if ((raid_type->level > 1) &&
|
||||||
|
sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
|
||||||
ti->error = "Target length not divisible by number of data devices";
|
ti->error = "Target length not divisible by number of data devices";
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
@@ -147,9 +150,16 @@ static void context_free(struct raid_set *rs)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < rs->md.raid_disks; i++)
|
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||||
|
if (rs->dev[i].meta_dev)
|
||||||
|
dm_put_device(rs->ti, rs->dev[i].meta_dev);
|
||||||
|
if (rs->dev[i].rdev.sb_page)
|
||||||
|
put_page(rs->dev[i].rdev.sb_page);
|
||||||
|
rs->dev[i].rdev.sb_page = NULL;
|
||||||
|
rs->dev[i].rdev.sb_loaded = 0;
|
||||||
if (rs->dev[i].data_dev)
|
if (rs->dev[i].data_dev)
|
||||||
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
||||||
|
}
|
||||||
|
|
||||||
kfree(rs);
|
kfree(rs);
|
||||||
}
|
}
|
||||||
@@ -159,7 +169,16 @@ static void context_free(struct raid_set *rs)
|
|||||||
* <meta_dev>: meta device name or '-' if missing
|
* <meta_dev>: meta device name or '-' if missing
|
||||||
* <data_dev>: data device name or '-' if missing
|
* <data_dev>: data device name or '-' if missing
|
||||||
*
|
*
|
||||||
* This code parses those words.
|
* The following are permitted:
|
||||||
|
* - -
|
||||||
|
* - <data_dev>
|
||||||
|
* <meta_dev> <data_dev>
|
||||||
|
*
|
||||||
|
* The following is not allowed:
|
||||||
|
* <meta_dev> -
|
||||||
|
*
|
||||||
|
* This code parses those words. If there is a failure,
|
||||||
|
* the caller must use context_free to unwind the operations.
|
||||||
*/
|
*/
|
||||||
static int dev_parms(struct raid_set *rs, char **argv)
|
static int dev_parms(struct raid_set *rs, char **argv)
|
||||||
{
|
{
|
||||||
@@ -182,8 +201,16 @@ static int dev_parms(struct raid_set *rs, char **argv)
|
|||||||
rs->dev[i].rdev.mddev = &rs->md;
|
rs->dev[i].rdev.mddev = &rs->md;
|
||||||
|
|
||||||
if (strcmp(argv[0], "-")) {
|
if (strcmp(argv[0], "-")) {
|
||||||
rs->ti->error = "Metadata devices not supported";
|
ret = dm_get_device(rs->ti, argv[0],
|
||||||
return -EINVAL;
|
dm_table_get_mode(rs->ti->table),
|
||||||
|
&rs->dev[i].meta_dev);
|
||||||
|
rs->ti->error = "RAID metadata device lookup failure";
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
|
||||||
|
if (!rs->dev[i].rdev.sb_page)
|
||||||
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strcmp(argv[1], "-")) {
|
if (!strcmp(argv[1], "-")) {
|
||||||
@@ -193,6 +220,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rs->ti->error = "No data device supplied with metadata device";
|
||||||
|
if (rs->dev[i].meta_dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -204,6 +235,10 @@ static int dev_parms(struct raid_set *rs, char **argv)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rs->dev[i].meta_dev) {
|
||||||
|
metadata_available = 1;
|
||||||
|
rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
|
||||||
|
}
|
||||||
rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
|
rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
|
||||||
list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
|
list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
|
||||||
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
||||||
@@ -234,57 +269,150 @@ static int dev_parms(struct raid_set *rs, char **argv)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* validate_region_size
|
||||||
|
* @rs
|
||||||
|
* @region_size: region size in sectors. If 0, pick a size (4MiB default).
|
||||||
|
*
|
||||||
|
* Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
|
||||||
|
* Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
|
||||||
|
*
|
||||||
|
* Returns: 0 on success, -EINVAL on failure.
|
||||||
|
*/
|
||||||
|
static int validate_region_size(struct raid_set *rs, unsigned long region_size)
|
||||||
|
{
|
||||||
|
unsigned long min_region_size = rs->ti->len / (1 << 21);
|
||||||
|
|
||||||
|
if (!region_size) {
|
||||||
|
/*
|
||||||
|
* Choose a reasonable default. All figures in sectors.
|
||||||
|
*/
|
||||||
|
if (min_region_size > (1 << 13)) {
|
||||||
|
DMINFO("Choosing default region size of %lu sectors",
|
||||||
|
region_size);
|
||||||
|
region_size = min_region_size;
|
||||||
|
} else {
|
||||||
|
DMINFO("Choosing default region size of 4MiB");
|
||||||
|
region_size = 1 << 13; /* sectors */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Validate user-supplied value.
|
||||||
|
*/
|
||||||
|
if (region_size > rs->ti->len) {
|
||||||
|
rs->ti->error = "Supplied region size is too large";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (region_size < min_region_size) {
|
||||||
|
DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
|
||||||
|
region_size, min_region_size);
|
||||||
|
rs->ti->error = "Supplied region size is too small";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_power_of_2(region_size)) {
|
||||||
|
rs->ti->error = "Region size is not a power of 2";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (region_size < rs->md.chunk_sectors) {
|
||||||
|
rs->ti->error = "Region size is smaller than the chunk size";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert sectors to bytes.
|
||||||
|
*/
|
||||||
|
rs->md.bitmap_info.chunksize = (region_size << 9);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Possible arguments are...
|
* Possible arguments are...
|
||||||
* RAID456:
|
|
||||||
* <chunk_size> [optional_args]
|
* <chunk_size> [optional_args]
|
||||||
*
|
*
|
||||||
* Optional args:
|
* Argument definitions
|
||||||
* [[no]sync] Force or prevent recovery of the entire array
|
* <chunk_size> The number of sectors per disk that
|
||||||
|
* will form the "stripe"
|
||||||
|
* [[no]sync] Force or prevent recovery of the
|
||||||
|
* entire array
|
||||||
* [rebuild <idx>] Rebuild the drive indicated by the index
|
* [rebuild <idx>] Rebuild the drive indicated by the index
|
||||||
* [daemon_sleep <ms>] Time between bitmap daemon work to clear bits
|
* [daemon_sleep <ms>] Time between bitmap daemon work to
|
||||||
|
* clear bits
|
||||||
* [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
* [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
* [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
* [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
|
* [write_mostly <idx>] Indicate a write mostly drive via index
|
||||||
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
||||||
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
|
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
|
||||||
|
* [region_size <sectors>] Defines granularity of bitmap
|
||||||
*/
|
*/
|
||||||
static int parse_raid_params(struct raid_set *rs, char **argv,
|
static int parse_raid_params(struct raid_set *rs, char **argv,
|
||||||
unsigned num_raid_params)
|
unsigned num_raid_params)
|
||||||
{
|
{
|
||||||
unsigned i, rebuild_cnt = 0;
|
unsigned i, rebuild_cnt = 0;
|
||||||
unsigned long value;
|
unsigned long value, region_size = 0;
|
||||||
char *key;
|
char *key;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, parse the in-order required arguments
|
* First, parse the in-order required arguments
|
||||||
|
* "chunk_size" is the only argument of this type.
|
||||||
*/
|
*/
|
||||||
if ((strict_strtoul(argv[0], 10, &value) < 0) ||
|
if ((strict_strtoul(argv[0], 10, &value) < 0)) {
|
||||||
!is_power_of_2(value) || (value < 8)) {
|
|
||||||
rs->ti->error = "Bad chunk size";
|
rs->ti->error = "Bad chunk size";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
} else if (rs->raid_type->level == 1) {
|
||||||
|
if (value)
|
||||||
|
DMERR("Ignoring chunk size parameter for RAID 1");
|
||||||
|
value = 0;
|
||||||
|
} else if (!is_power_of_2(value)) {
|
||||||
|
rs->ti->error = "Chunk size must be a power of 2";
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (value < 8) {
|
||||||
|
rs->ti->error = "Chunk size value is too small";
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
|
rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
|
||||||
argv++;
|
argv++;
|
||||||
num_raid_params--;
|
num_raid_params--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We set each individual device as In_sync with a completed
|
||||||
|
* 'recovery_offset'. If there has been a device failure or
|
||||||
|
* replacement then one of the following cases applies:
|
||||||
|
*
|
||||||
|
* 1) User specifies 'rebuild'.
|
||||||
|
* - Device is reset when param is read.
|
||||||
|
* 2) A new device is supplied.
|
||||||
|
* - No matching superblock found, resets device.
|
||||||
|
* 3) Device failure was transient and returns on reload.
|
||||||
|
* - Failure noticed, resets device for bitmap replay.
|
||||||
|
* 4) Device hadn't completed recovery after previous failure.
|
||||||
|
* - Superblock is read and overrides recovery_offset.
|
||||||
|
*
|
||||||
|
* What is found in the superblocks of the devices is always
|
||||||
|
* authoritative, unless 'rebuild' or '[no]sync' was specified.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||||
|
set_bit(In_sync, &rs->dev[i].rdev.flags);
|
||||||
|
rs->dev[i].rdev.recovery_offset = MaxSector;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Second, parse the unordered optional arguments
|
* Second, parse the unordered optional arguments
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < rs->md.raid_disks; i++)
|
|
||||||
set_bit(In_sync, &rs->dev[i].rdev.flags);
|
|
||||||
|
|
||||||
for (i = 0; i < num_raid_params; i++) {
|
for (i = 0; i < num_raid_params; i++) {
|
||||||
if (!strcmp(argv[i], "nosync")) {
|
if (!strcasecmp(argv[i], "nosync")) {
|
||||||
rs->md.recovery_cp = MaxSector;
|
rs->md.recovery_cp = MaxSector;
|
||||||
rs->print_flags |= DMPF_NOSYNC;
|
rs->print_flags |= DMPF_NOSYNC;
|
||||||
rs->md.flags |= MD_SYNC_STATE_FORCED;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!strcmp(argv[i], "sync")) {
|
if (!strcasecmp(argv[i], "sync")) {
|
||||||
rs->md.recovery_cp = 0;
|
rs->md.recovery_cp = 0;
|
||||||
rs->print_flags |= DMPF_SYNC;
|
rs->print_flags |= DMPF_SYNC;
|
||||||
rs->md.flags |= MD_SYNC_STATE_FORCED;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -300,9 +428,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strcmp(key, "rebuild")) {
|
if (!strcasecmp(key, "rebuild")) {
|
||||||
if (++rebuild_cnt > rs->raid_type->parity_devs) {
|
rebuild_cnt++;
|
||||||
rs->ti->error = "Too many rebuild drives given";
|
if (((rs->raid_type->level != 1) &&
|
||||||
|
(rebuild_cnt > rs->raid_type->parity_devs)) ||
|
||||||
|
((rs->raid_type->level == 1) &&
|
||||||
|
(rebuild_cnt > (rs->md.raid_disks - 1)))) {
|
||||||
|
rs->ti->error = "Too many rebuild devices specified for given RAID type";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
if (value > rs->md.raid_disks) {
|
if (value > rs->md.raid_disks) {
|
||||||
@@ -311,7 +443,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
|
|||||||
}
|
}
|
||||||
clear_bit(In_sync, &rs->dev[value].rdev.flags);
|
clear_bit(In_sync, &rs->dev[value].rdev.flags);
|
||||||
rs->dev[value].rdev.recovery_offset = 0;
|
rs->dev[value].rdev.recovery_offset = 0;
|
||||||
} else if (!strcmp(key, "max_write_behind")) {
|
rs->print_flags |= DMPF_REBUILD;
|
||||||
|
} else if (!strcasecmp(key, "write_mostly")) {
|
||||||
|
if (rs->raid_type->level != 1) {
|
||||||
|
rs->ti->error = "write_mostly option is only valid for RAID1";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
if (value > rs->md.raid_disks) {
|
||||||
|
rs->ti->error = "Invalid write_mostly drive index given";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
set_bit(WriteMostly, &rs->dev[value].rdev.flags);
|
||||||
|
} else if (!strcasecmp(key, "max_write_behind")) {
|
||||||
|
if (rs->raid_type->level != 1) {
|
||||||
|
rs->ti->error = "max_write_behind option is only valid for RAID1";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
|
rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -324,14 +471,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
rs->md.bitmap_info.max_write_behind = value;
|
rs->md.bitmap_info.max_write_behind = value;
|
||||||
} else if (!strcmp(key, "daemon_sleep")) {
|
} else if (!strcasecmp(key, "daemon_sleep")) {
|
||||||
rs->print_flags |= DMPF_DAEMON_SLEEP;
|
rs->print_flags |= DMPF_DAEMON_SLEEP;
|
||||||
if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
|
if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
|
||||||
rs->ti->error = "daemon sleep period out of range";
|
rs->ti->error = "daemon sleep period out of range";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
rs->md.bitmap_info.daemon_sleep = value;
|
rs->md.bitmap_info.daemon_sleep = value;
|
||||||
} else if (!strcmp(key, "stripe_cache")) {
|
} else if (!strcasecmp(key, "stripe_cache")) {
|
||||||
rs->print_flags |= DMPF_STRIPE_CACHE;
|
rs->print_flags |= DMPF_STRIPE_CACHE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -348,20 +495,23 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
|
|||||||
rs->ti->error = "Bad stripe_cache size";
|
rs->ti->error = "Bad stripe_cache size";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
} else if (!strcmp(key, "min_recovery_rate")) {
|
} else if (!strcasecmp(key, "min_recovery_rate")) {
|
||||||
rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
|
rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
|
||||||
if (value > INT_MAX) {
|
if (value > INT_MAX) {
|
||||||
rs->ti->error = "min_recovery_rate out of range";
|
rs->ti->error = "min_recovery_rate out of range";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
rs->md.sync_speed_min = (int)value;
|
rs->md.sync_speed_min = (int)value;
|
||||||
} else if (!strcmp(key, "max_recovery_rate")) {
|
} else if (!strcasecmp(key, "max_recovery_rate")) {
|
||||||
rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
|
rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
|
||||||
if (value > INT_MAX) {
|
if (value > INT_MAX) {
|
||||||
rs->ti->error = "max_recovery_rate out of range";
|
rs->ti->error = "max_recovery_rate out of range";
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
rs->md.sync_speed_max = (int)value;
|
rs->md.sync_speed_max = (int)value;
|
||||||
|
} else if (!strcasecmp(key, "region_size")) {
|
||||||
|
rs->print_flags |= DMPF_REGION_SIZE;
|
||||||
|
region_size = value;
|
||||||
} else {
|
} else {
|
||||||
DMERR("Unable to parse RAID parameter: %s", key);
|
DMERR("Unable to parse RAID parameter: %s", key);
|
||||||
rs->ti->error = "Unable to parse RAID parameters";
|
rs->ti->error = "Unable to parse RAID parameters";
|
||||||
@@ -369,6 +519,19 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (validate_region_size(rs, region_size))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (rs->md.chunk_sectors)
|
||||||
|
rs->ti->split_io = rs->md.chunk_sectors;
|
||||||
|
else
|
||||||
|
rs->ti->split_io = region_size;
|
||||||
|
|
||||||
|
if (rs->md.chunk_sectors)
|
||||||
|
rs->ti->split_io = rs->md.chunk_sectors;
|
||||||
|
else
|
||||||
|
rs->ti->split_io = region_size;
|
||||||
|
|
||||||
/* Assume there are no metadata devices until the drives are parsed */
|
/* Assume there are no metadata devices until the drives are parsed */
|
||||||
rs->md.persistent = 0;
|
rs->md.persistent = 0;
|
||||||
rs->md.external = 1;
|
rs->md.external = 1;
|
||||||
@@ -387,17 +550,351 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
|
|||||||
{
|
{
|
||||||
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
|
struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
|
||||||
|
|
||||||
|
if (rs->raid_type->level == 1)
|
||||||
|
return md_raid1_congested(&rs->md, bits);
|
||||||
|
|
||||||
return md_raid5_congested(&rs->md, bits);
|
return md_raid5_congested(&rs->md, bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This structure is never routinely used by userspace, unlike md superblocks.
|
||||||
|
* Devices with this superblock should only ever be accessed via device-mapper.
|
||||||
|
*/
|
||||||
|
#define DM_RAID_MAGIC 0x64526D44
|
||||||
|
struct dm_raid_superblock {
|
||||||
|
__le32 magic; /* "DmRd" */
|
||||||
|
__le32 features; /* Used to indicate possible future changes */
|
||||||
|
|
||||||
|
__le32 num_devices; /* Number of devices in this array. (Max 64) */
|
||||||
|
__le32 array_position; /* The position of this drive in the array */
|
||||||
|
|
||||||
|
__le64 events; /* Incremented by md when superblock updated */
|
||||||
|
__le64 failed_devices; /* Bit field of devices to indicate failures */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This offset tracks the progress of the repair or replacement of
|
||||||
|
* an individual drive.
|
||||||
|
*/
|
||||||
|
__le64 disk_recovery_offset;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This offset tracks the progress of the initial array
|
||||||
|
* synchronisation/parity calculation.
|
||||||
|
*/
|
||||||
|
__le64 array_resync_offset;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RAID characteristics
|
||||||
|
*/
|
||||||
|
__le32 level;
|
||||||
|
__le32 layout;
|
||||||
|
__le32 stripe_sectors;
|
||||||
|
|
||||||
|
__u8 pad[452]; /* Round struct to 512 bytes. */
|
||||||
|
/* Always set to 0 when writing. */
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
static int read_disk_sb(mdk_rdev_t *rdev, int size)
|
||||||
|
{
|
||||||
|
BUG_ON(!rdev->sb_page);
|
||||||
|
|
||||||
|
if (rdev->sb_loaded)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
|
||||||
|
DMERR("Failed to read device superblock");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
rdev->sb_loaded = 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void super_sync(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
|
{
|
||||||
|
mdk_rdev_t *r, *t;
|
||||||
|
uint64_t failed_devices;
|
||||||
|
struct dm_raid_superblock *sb;
|
||||||
|
|
||||||
|
sb = page_address(rdev->sb_page);
|
||||||
|
failed_devices = le64_to_cpu(sb->failed_devices);
|
||||||
|
|
||||||
|
rdev_for_each(r, t, mddev)
|
||||||
|
if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
|
||||||
|
failed_devices |= (1ULL << r->raid_disk);
|
||||||
|
|
||||||
|
memset(sb, 0, sizeof(*sb));
|
||||||
|
|
||||||
|
sb->magic = cpu_to_le32(DM_RAID_MAGIC);
|
||||||
|
sb->features = cpu_to_le32(0); /* No features yet */
|
||||||
|
|
||||||
|
sb->num_devices = cpu_to_le32(mddev->raid_disks);
|
||||||
|
sb->array_position = cpu_to_le32(rdev->raid_disk);
|
||||||
|
|
||||||
|
sb->events = cpu_to_le64(mddev->events);
|
||||||
|
sb->failed_devices = cpu_to_le64(failed_devices);
|
||||||
|
|
||||||
|
sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
|
||||||
|
sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
|
||||||
|
|
||||||
|
sb->level = cpu_to_le32(mddev->level);
|
||||||
|
sb->layout = cpu_to_le32(mddev->layout);
|
||||||
|
sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* super_load
|
||||||
|
*
|
||||||
|
* This function creates a superblock if one is not found on the device
|
||||||
|
* and will decide which superblock to use if there's a choice.
|
||||||
|
*
|
||||||
|
* Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
|
||||||
|
*/
|
||||||
|
static int super_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct dm_raid_superblock *sb;
|
||||||
|
struct dm_raid_superblock *refsb;
|
||||||
|
uint64_t events_sb, events_refsb;
|
||||||
|
|
||||||
|
rdev->sb_start = 0;
|
||||||
|
rdev->sb_size = sizeof(*sb);
|
||||||
|
|
||||||
|
ret = read_disk_sb(rdev, rdev->sb_size);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
sb = page_address(rdev->sb_page);
|
||||||
|
if (sb->magic != cpu_to_le32(DM_RAID_MAGIC)) {
|
||||||
|
super_sync(rdev->mddev, rdev);
|
||||||
|
|
||||||
|
set_bit(FirstUse, &rdev->flags);
|
||||||
|
|
||||||
|
/* Force writing of superblocks to disk */
|
||||||
|
set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
|
||||||
|
|
||||||
|
/* Any superblock is better than none, choose that if given */
|
||||||
|
return refdev ? 0 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!refdev)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
events_sb = le64_to_cpu(sb->events);
|
||||||
|
|
||||||
|
refsb = page_address(refdev->sb_page);
|
||||||
|
events_refsb = le64_to_cpu(refsb->events);
|
||||||
|
|
||||||
|
return (events_sb > events_refsb) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int super_init_validation(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
|
{
|
||||||
|
int role;
|
||||||
|
struct raid_set *rs = container_of(mddev, struct raid_set, md);
|
||||||
|
uint64_t events_sb;
|
||||||
|
uint64_t failed_devices;
|
||||||
|
struct dm_raid_superblock *sb;
|
||||||
|
uint32_t new_devs = 0;
|
||||||
|
uint32_t rebuilds = 0;
|
||||||
|
mdk_rdev_t *r, *t;
|
||||||
|
struct dm_raid_superblock *sb2;
|
||||||
|
|
||||||
|
sb = page_address(rdev->sb_page);
|
||||||
|
events_sb = le64_to_cpu(sb->events);
|
||||||
|
failed_devices = le64_to_cpu(sb->failed_devices);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialise to 1 if this is a new superblock.
|
||||||
|
*/
|
||||||
|
mddev->events = events_sb ? : 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reshaping is not currently allowed
|
||||||
|
*/
|
||||||
|
if ((le32_to_cpu(sb->level) != mddev->level) ||
|
||||||
|
(le32_to_cpu(sb->layout) != mddev->layout) ||
|
||||||
|
(le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
|
||||||
|
DMERR("Reshaping arrays not yet supported.");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can only change the number of devices in RAID1 right now */
|
||||||
|
if ((rs->raid_type->level != 1) &&
|
||||||
|
(le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
|
||||||
|
DMERR("Reshaping arrays not yet supported.");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
|
||||||
|
mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* During load, we set FirstUse if a new superblock was written.
|
||||||
|
* There are two reasons we might not have a superblock:
|
||||||
|
* 1) The array is brand new - in which case, all of the
|
||||||
|
* devices must have their In_sync bit set. Also,
|
||||||
|
* recovery_cp must be 0, unless forced.
|
||||||
|
* 2) This is a new device being added to an old array
|
||||||
|
* and the new device needs to be rebuilt - in which
|
||||||
|
* case the In_sync bit will /not/ be set and
|
||||||
|
* recovery_cp must be MaxSector.
|
||||||
|
*/
|
||||||
|
rdev_for_each(r, t, mddev) {
|
||||||
|
if (!test_bit(In_sync, &r->flags)) {
|
||||||
|
if (!test_bit(FirstUse, &r->flags))
|
||||||
|
DMERR("Superblock area of "
|
||||||
|
"rebuild device %d should have been "
|
||||||
|
"cleared.", r->raid_disk);
|
||||||
|
set_bit(FirstUse, &r->flags);
|
||||||
|
rebuilds++;
|
||||||
|
} else if (test_bit(FirstUse, &r->flags))
|
||||||
|
new_devs++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!rebuilds) {
|
||||||
|
if (new_devs == mddev->raid_disks) {
|
||||||
|
DMINFO("Superblocks created for new array");
|
||||||
|
set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
|
||||||
|
} else if (new_devs) {
|
||||||
|
DMERR("New device injected "
|
||||||
|
"into existing array without 'rebuild' "
|
||||||
|
"parameter specified");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
} else if (new_devs) {
|
||||||
|
DMERR("'rebuild' devices cannot be "
|
||||||
|
"injected into an array with other first-time devices");
|
||||||
|
return -EINVAL;
|
||||||
|
} else if (mddev->recovery_cp != MaxSector) {
|
||||||
|
DMERR("'rebuild' specified while array is not in-sync");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we set the Faulty bit for those devices that are
|
||||||
|
* recorded in the superblock as failed.
|
||||||
|
*/
|
||||||
|
rdev_for_each(r, t, mddev) {
|
||||||
|
if (!r->sb_page)
|
||||||
|
continue;
|
||||||
|
sb2 = page_address(r->sb_page);
|
||||||
|
sb2->failed_devices = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for any device re-ordering.
|
||||||
|
*/
|
||||||
|
if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
|
||||||
|
role = le32_to_cpu(sb2->array_position);
|
||||||
|
if (role != r->raid_disk) {
|
||||||
|
if (rs->raid_type->level != 1) {
|
||||||
|
rs->ti->error = "Cannot change device "
|
||||||
|
"positions in RAID array";
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
DMINFO("RAID1 device #%d now at position #%d",
|
||||||
|
role, r->raid_disk);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Partial recovery is performed on
|
||||||
|
* returning failed devices.
|
||||||
|
*/
|
||||||
|
if (failed_devices & (1 << role))
|
||||||
|
set_bit(Faulty, &r->flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int super_validate(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
|
{
|
||||||
|
struct dm_raid_superblock *sb = page_address(rdev->sb_page);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If mddev->events is not set, we know we have not yet initialized
|
||||||
|
* the array.
|
||||||
|
*/
|
||||||
|
if (!mddev->events && super_init_validation(mddev, rdev))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
|
||||||
|
rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
|
||||||
|
if (!test_bit(FirstUse, &rdev->flags)) {
|
||||||
|
rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
|
||||||
|
if (rdev->recovery_offset != MaxSector)
|
||||||
|
clear_bit(In_sync, &rdev->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a device comes back, set it as not In_sync and no longer faulty.
|
||||||
|
*/
|
||||||
|
if (test_bit(Faulty, &rdev->flags)) {
|
||||||
|
clear_bit(Faulty, &rdev->flags);
|
||||||
|
clear_bit(In_sync, &rdev->flags);
|
||||||
|
rdev->saved_raid_disk = rdev->raid_disk;
|
||||||
|
rdev->recovery_offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
clear_bit(FirstUse, &rdev->flags);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Analyse superblocks and select the freshest.
|
||||||
|
*/
|
||||||
|
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
mdk_rdev_t *rdev, *freshest, *tmp;
|
||||||
|
mddev_t *mddev = &rs->md;
|
||||||
|
|
||||||
|
freshest = NULL;
|
||||||
|
rdev_for_each(rdev, tmp, mddev) {
|
||||||
|
if (!rdev->meta_bdev)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = super_load(rdev, freshest);
|
||||||
|
|
||||||
|
switch (ret) {
|
||||||
|
case 1:
|
||||||
|
freshest = rdev;
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ti->error = "Failed to load superblock";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!freshest)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validation of the freshest device provides the source of
|
||||||
|
* validation for the remaining devices.
|
||||||
|
*/
|
||||||
|
ti->error = "Unable to assemble array: Invalid superblocks";
|
||||||
|
if (super_validate(mddev, freshest))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
rdev_for_each(rdev, tmp, mddev)
|
||||||
|
if ((rdev != freshest) && super_validate(mddev, rdev))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Construct a RAID4/5/6 mapping:
|
* Construct a RAID4/5/6 mapping:
|
||||||
* Args:
|
* Args:
|
||||||
* <raid_type> <#raid_params> <raid_params> \
|
* <raid_type> <#raid_params> <raid_params> \
|
||||||
* <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
|
* <#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
|
||||||
*
|
*
|
||||||
* ** metadata devices are not supported yet, use '-' instead **
|
|
||||||
*
|
|
||||||
* <raid_params> varies by <raid_type>. See 'parse_raid_params' for
|
* <raid_params> varies by <raid_type>. See 'parse_raid_params' for
|
||||||
* details on possible <raid_params>.
|
* details on possible <raid_params>.
|
||||||
*/
|
*/
|
||||||
@@ -465,8 +962,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto bad;
|
goto bad;
|
||||||
|
|
||||||
|
rs->md.sync_super = super_sync;
|
||||||
|
ret = analyse_superblocks(ti, rs);
|
||||||
|
if (ret)
|
||||||
|
goto bad;
|
||||||
|
|
||||||
INIT_WORK(&rs->md.event_work, do_table_event);
|
INIT_WORK(&rs->md.event_work, do_table_event);
|
||||||
ti->split_io = rs->md.chunk_sectors;
|
|
||||||
ti->private = rs;
|
ti->private = rs;
|
||||||
|
|
||||||
mutex_lock(&rs->md.reconfig_mutex);
|
mutex_lock(&rs->md.reconfig_mutex);
|
||||||
@@ -482,6 +983,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||||||
rs->callbacks.congested_fn = raid_is_congested;
|
rs->callbacks.congested_fn = raid_is_congested;
|
||||||
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
||||||
|
|
||||||
|
mddev_suspend(&rs->md);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bad:
|
bad:
|
||||||
@@ -546,12 +1048,17 @@ static int raid_status(struct dm_target *ti, status_type_t type,
|
|||||||
break;
|
break;
|
||||||
case STATUSTYPE_TABLE:
|
case STATUSTYPE_TABLE:
|
||||||
/* The string you would use to construct this array */
|
/* The string you would use to construct this array */
|
||||||
for (i = 0; i < rs->md.raid_disks; i++)
|
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||||
if (rs->dev[i].data_dev &&
|
if ((rs->print_flags & DMPF_REBUILD) &&
|
||||||
|
rs->dev[i].data_dev &&
|
||||||
!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
||||||
raid_param_cnt++; /* for rebuilds */
|
raid_param_cnt += 2; /* for rebuilds */
|
||||||
|
if (rs->dev[i].data_dev &&
|
||||||
|
test_bit(WriteMostly, &rs->dev[i].rdev.flags))
|
||||||
|
raid_param_cnt += 2;
|
||||||
|
}
|
||||||
|
|
||||||
raid_param_cnt += (hweight64(rs->print_flags) * 2);
|
raid_param_cnt += (hweight64(rs->print_flags & ~DMPF_REBUILD) * 2);
|
||||||
if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
|
if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
|
||||||
raid_param_cnt--;
|
raid_param_cnt--;
|
||||||
|
|
||||||
@@ -565,7 +1072,8 @@ static int raid_status(struct dm_target *ti, status_type_t type,
|
|||||||
DMEMIT(" nosync");
|
DMEMIT(" nosync");
|
||||||
|
|
||||||
for (i = 0; i < rs->md.raid_disks; i++)
|
for (i = 0; i < rs->md.raid_disks; i++)
|
||||||
if (rs->dev[i].data_dev &&
|
if ((rs->print_flags & DMPF_REBUILD) &&
|
||||||
|
rs->dev[i].data_dev &&
|
||||||
!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
||||||
DMEMIT(" rebuild %u", i);
|
DMEMIT(" rebuild %u", i);
|
||||||
|
|
||||||
@@ -579,6 +1087,11 @@ static int raid_status(struct dm_target *ti, status_type_t type,
|
|||||||
if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
|
if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
|
||||||
DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
|
DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
|
||||||
|
|
||||||
|
for (i = 0; i < rs->md.raid_disks; i++)
|
||||||
|
if (rs->dev[i].data_dev &&
|
||||||
|
test_bit(WriteMostly, &rs->dev[i].rdev.flags))
|
||||||
|
DMEMIT(" write_mostly %u", i);
|
||||||
|
|
||||||
if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
|
if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
|
||||||
DMEMIT(" max_write_behind %lu",
|
DMEMIT(" max_write_behind %lu",
|
||||||
rs->md.bitmap_info.max_write_behind);
|
rs->md.bitmap_info.max_write_behind);
|
||||||
@@ -591,9 +1104,16 @@ static int raid_status(struct dm_target *ti, status_type_t type,
|
|||||||
conf ? conf->max_nr_stripes * 2 : 0);
|
conf ? conf->max_nr_stripes * 2 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rs->print_flags & DMPF_REGION_SIZE)
|
||||||
|
DMEMIT(" region_size %lu",
|
||||||
|
rs->md.bitmap_info.chunksize >> 9);
|
||||||
|
|
||||||
DMEMIT(" %d", rs->md.raid_disks);
|
DMEMIT(" %d", rs->md.raid_disks);
|
||||||
for (i = 0; i < rs->md.raid_disks; i++) {
|
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||||
DMEMIT(" -"); /* metadata device */
|
if (rs->dev[i].meta_dev)
|
||||||
|
DMEMIT(" %s", rs->dev[i].meta_dev->name);
|
||||||
|
else
|
||||||
|
DMEMIT(" -");
|
||||||
|
|
||||||
if (rs->dev[i].data_dev)
|
if (rs->dev[i].data_dev)
|
||||||
DMEMIT(" %s", rs->dev[i].data_dev->name);
|
DMEMIT(" %s", rs->dev[i].data_dev->name);
|
||||||
@@ -650,12 +1170,13 @@ static void raid_resume(struct dm_target *ti)
|
|||||||
{
|
{
|
||||||
struct raid_set *rs = ti->private;
|
struct raid_set *rs = ti->private;
|
||||||
|
|
||||||
|
bitmap_load(&rs->md);
|
||||||
mddev_resume(&rs->md);
|
mddev_resume(&rs->md);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct target_type raid_target = {
|
static struct target_type raid_target = {
|
||||||
.name = "raid",
|
.name = "raid",
|
||||||
.version = {1, 0, 0},
|
.version = {1, 1, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = raid_ctr,
|
.ctr = raid_ctr,
|
||||||
.dtr = raid_dtr,
|
.dtr = raid_dtr,
|
||||||
|
@@ -58,25 +58,30 @@
|
|||||||
#define NUM_SNAPSHOT_HDR_CHUNKS 1
|
#define NUM_SNAPSHOT_HDR_CHUNKS 1
|
||||||
|
|
||||||
struct disk_header {
|
struct disk_header {
|
||||||
uint32_t magic;
|
__le32 magic;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is this snapshot valid. There is no way of recovering
|
* Is this snapshot valid. There is no way of recovering
|
||||||
* an invalid snapshot.
|
* an invalid snapshot.
|
||||||
*/
|
*/
|
||||||
uint32_t valid;
|
__le32 valid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simple, incrementing version. no backward
|
* Simple, incrementing version. no backward
|
||||||
* compatibility.
|
* compatibility.
|
||||||
*/
|
*/
|
||||||
uint32_t version;
|
__le32 version;
|
||||||
|
|
||||||
/* In sectors */
|
/* In sectors */
|
||||||
uint32_t chunk_size;
|
__le32 chunk_size;
|
||||||
};
|
} __packed;
|
||||||
|
|
||||||
struct disk_exception {
|
struct disk_exception {
|
||||||
|
__le64 old_chunk;
|
||||||
|
__le64 new_chunk;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
|
struct core_exception {
|
||||||
uint64_t old_chunk;
|
uint64_t old_chunk;
|
||||||
uint64_t new_chunk;
|
uint64_t new_chunk;
|
||||||
};
|
};
|
||||||
@@ -169,10 +174,9 @@ static int alloc_area(struct pstore *ps)
|
|||||||
if (!ps->area)
|
if (!ps->area)
|
||||||
goto err_area;
|
goto err_area;
|
||||||
|
|
||||||
ps->zero_area = vmalloc(len);
|
ps->zero_area = vzalloc(len);
|
||||||
if (!ps->zero_area)
|
if (!ps->zero_area)
|
||||||
goto err_zero_area;
|
goto err_zero_area;
|
||||||
memset(ps->zero_area, 0, len);
|
|
||||||
|
|
||||||
ps->header_area = vmalloc(len);
|
ps->header_area = vmalloc(len);
|
||||||
if (!ps->header_area)
|
if (!ps->header_area)
|
||||||
@@ -396,32 +400,32 @@ static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void read_exception(struct pstore *ps,
|
static void read_exception(struct pstore *ps,
|
||||||
uint32_t index, struct disk_exception *result)
|
uint32_t index, struct core_exception *result)
|
||||||
{
|
{
|
||||||
struct disk_exception *e = get_exception(ps, index);
|
struct disk_exception *de = get_exception(ps, index);
|
||||||
|
|
||||||
/* copy it */
|
/* copy it */
|
||||||
result->old_chunk = le64_to_cpu(e->old_chunk);
|
result->old_chunk = le64_to_cpu(de->old_chunk);
|
||||||
result->new_chunk = le64_to_cpu(e->new_chunk);
|
result->new_chunk = le64_to_cpu(de->new_chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void write_exception(struct pstore *ps,
|
static void write_exception(struct pstore *ps,
|
||||||
uint32_t index, struct disk_exception *de)
|
uint32_t index, struct core_exception *e)
|
||||||
{
|
{
|
||||||
struct disk_exception *e = get_exception(ps, index);
|
struct disk_exception *de = get_exception(ps, index);
|
||||||
|
|
||||||
/* copy it */
|
/* copy it */
|
||||||
e->old_chunk = cpu_to_le64(de->old_chunk);
|
de->old_chunk = cpu_to_le64(e->old_chunk);
|
||||||
e->new_chunk = cpu_to_le64(de->new_chunk);
|
de->new_chunk = cpu_to_le64(e->new_chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clear_exception(struct pstore *ps, uint32_t index)
|
static void clear_exception(struct pstore *ps, uint32_t index)
|
||||||
{
|
{
|
||||||
struct disk_exception *e = get_exception(ps, index);
|
struct disk_exception *de = get_exception(ps, index);
|
||||||
|
|
||||||
/* clear it */
|
/* clear it */
|
||||||
e->old_chunk = 0;
|
de->old_chunk = 0;
|
||||||
e->new_chunk = 0;
|
de->new_chunk = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -437,13 +441,13 @@ static int insert_exceptions(struct pstore *ps,
|
|||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
struct disk_exception de;
|
struct core_exception e;
|
||||||
|
|
||||||
/* presume the area is full */
|
/* presume the area is full */
|
||||||
*full = 1;
|
*full = 1;
|
||||||
|
|
||||||
for (i = 0; i < ps->exceptions_per_area; i++) {
|
for (i = 0; i < ps->exceptions_per_area; i++) {
|
||||||
read_exception(ps, i, &de);
|
read_exception(ps, i, &e);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the new_chunk is pointing at the start of
|
* If the new_chunk is pointing at the start of
|
||||||
@@ -451,7 +455,7 @@ static int insert_exceptions(struct pstore *ps,
|
|||||||
* is we know that we've hit the end of the
|
* is we know that we've hit the end of the
|
||||||
* exceptions. Therefore the area is not full.
|
* exceptions. Therefore the area is not full.
|
||||||
*/
|
*/
|
||||||
if (de.new_chunk == 0LL) {
|
if (e.new_chunk == 0LL) {
|
||||||
ps->current_committed = i;
|
ps->current_committed = i;
|
||||||
*full = 0;
|
*full = 0;
|
||||||
break;
|
break;
|
||||||
@@ -460,13 +464,13 @@ static int insert_exceptions(struct pstore *ps,
|
|||||||
/*
|
/*
|
||||||
* Keep track of the start of the free chunks.
|
* Keep track of the start of the free chunks.
|
||||||
*/
|
*/
|
||||||
if (ps->next_free <= de.new_chunk)
|
if (ps->next_free <= e.new_chunk)
|
||||||
ps->next_free = de.new_chunk + 1;
|
ps->next_free = e.new_chunk + 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Otherwise we add the exception to the snapshot.
|
* Otherwise we add the exception to the snapshot.
|
||||||
*/
|
*/
|
||||||
r = callback(callback_context, de.old_chunk, de.new_chunk);
|
r = callback(callback_context, e.old_chunk, e.new_chunk);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@@ -563,7 +567,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
|
|||||||
ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
|
ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
|
||||||
sizeof(struct disk_exception);
|
sizeof(struct disk_exception);
|
||||||
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
|
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
|
||||||
sizeof(*ps->callbacks));
|
sizeof(*ps->callbacks));
|
||||||
if (!ps->callbacks)
|
if (!ps->callbacks)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
@@ -641,12 +645,12 @@ static void persistent_commit_exception(struct dm_exception_store *store,
|
|||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
struct pstore *ps = get_info(store);
|
struct pstore *ps = get_info(store);
|
||||||
struct disk_exception de;
|
struct core_exception ce;
|
||||||
struct commit_callback *cb;
|
struct commit_callback *cb;
|
||||||
|
|
||||||
de.old_chunk = e->old_chunk;
|
ce.old_chunk = e->old_chunk;
|
||||||
de.new_chunk = e->new_chunk;
|
ce.new_chunk = e->new_chunk;
|
||||||
write_exception(ps, ps->current_committed++, &de);
|
write_exception(ps, ps->current_committed++, &ce);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add the callback to the back of the array. This code
|
* Add the callback to the back of the array. This code
|
||||||
@@ -670,7 +674,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
|
|||||||
* If we completely filled the current area, then wipe the next one.
|
* If we completely filled the current area, then wipe the next one.
|
||||||
*/
|
*/
|
||||||
if ((ps->current_committed == ps->exceptions_per_area) &&
|
if ((ps->current_committed == ps->exceptions_per_area) &&
|
||||||
zero_disk_area(ps, ps->current_area + 1))
|
zero_disk_area(ps, ps->current_area + 1))
|
||||||
ps->valid = 0;
|
ps->valid = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -701,7 +705,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
|
|||||||
chunk_t *last_new_chunk)
|
chunk_t *last_new_chunk)
|
||||||
{
|
{
|
||||||
struct pstore *ps = get_info(store);
|
struct pstore *ps = get_info(store);
|
||||||
struct disk_exception de;
|
struct core_exception ce;
|
||||||
int nr_consecutive;
|
int nr_consecutive;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
@@ -722,9 +726,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
|
|||||||
ps->current_committed = ps->exceptions_per_area;
|
ps->current_committed = ps->exceptions_per_area;
|
||||||
}
|
}
|
||||||
|
|
||||||
read_exception(ps, ps->current_committed - 1, &de);
|
read_exception(ps, ps->current_committed - 1, &ce);
|
||||||
*last_old_chunk = de.old_chunk;
|
*last_old_chunk = ce.old_chunk;
|
||||||
*last_new_chunk = de.new_chunk;
|
*last_new_chunk = ce.new_chunk;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find number of consecutive chunks within the current area,
|
* Find number of consecutive chunks within the current area,
|
||||||
@@ -733,9 +737,9 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
|
|||||||
for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
|
for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
|
||||||
nr_consecutive++) {
|
nr_consecutive++) {
|
||||||
read_exception(ps, ps->current_committed - 1 - nr_consecutive,
|
read_exception(ps, ps->current_committed - 1 - nr_consecutive,
|
||||||
&de);
|
&ce);
|
||||||
if (de.old_chunk != *last_old_chunk - nr_consecutive ||
|
if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
|
||||||
de.new_chunk != *last_new_chunk - nr_consecutive)
|
ce.new_chunk != *last_new_chunk - nr_consecutive)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -753,7 +757,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
|
|||||||
for (i = 0; i < nr_merged; i++)
|
for (i = 0; i < nr_merged; i++)
|
||||||
clear_exception(ps, ps->current_committed - 1 - i);
|
clear_exception(ps, ps->current_committed - 1 - i);
|
||||||
|
|
||||||
r = area_io(ps, WRITE);
|
r = area_io(ps, WRITE_FLUSH_FUA);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@@ -29,16 +29,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
|
|||||||
#define dm_target_is_snapshot_merge(ti) \
|
#define dm_target_is_snapshot_merge(ti) \
|
||||||
((ti)->type->name == dm_snapshot_merge_target_name)
|
((ti)->type->name == dm_snapshot_merge_target_name)
|
||||||
|
|
||||||
/*
|
|
||||||
* The percentage increment we will wake up users at
|
|
||||||
*/
|
|
||||||
#define WAKE_UP_PERCENT 5
|
|
||||||
|
|
||||||
/*
|
|
||||||
* kcopyd priority of snapshot operations
|
|
||||||
*/
|
|
||||||
#define SNAPSHOT_COPY_PRIORITY 2
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The size of the mempool used to track chunks in use.
|
* The size of the mempool used to track chunks in use.
|
||||||
*/
|
*/
|
||||||
@@ -180,6 +170,13 @@ struct dm_snap_pending_exception {
|
|||||||
* kcopyd.
|
* kcopyd.
|
||||||
*/
|
*/
|
||||||
int started;
|
int started;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For writing a complete chunk, bypassing the copy.
|
||||||
|
*/
|
||||||
|
struct bio *full_bio;
|
||||||
|
bio_end_io_t *full_bio_end_io;
|
||||||
|
void *full_bio_private;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1055,8 +1052,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||||||
|
|
||||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||||
if (!s) {
|
if (!s) {
|
||||||
ti->error = "Cannot allocate snapshot context private "
|
ti->error = "Cannot allocate private snapshot structure";
|
||||||
"structure";
|
|
||||||
r = -ENOMEM;
|
r = -ENOMEM;
|
||||||
goto bad;
|
goto bad;
|
||||||
}
|
}
|
||||||
@@ -1380,6 +1376,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
|
|||||||
struct dm_snapshot *s = pe->snap;
|
struct dm_snapshot *s = pe->snap;
|
||||||
struct bio *origin_bios = NULL;
|
struct bio *origin_bios = NULL;
|
||||||
struct bio *snapshot_bios = NULL;
|
struct bio *snapshot_bios = NULL;
|
||||||
|
struct bio *full_bio = NULL;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
if (!success) {
|
if (!success) {
|
||||||
@@ -1415,10 +1412,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
|
|||||||
*/
|
*/
|
||||||
dm_insert_exception(&s->complete, e);
|
dm_insert_exception(&s->complete, e);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
dm_remove_exception(&pe->e);
|
dm_remove_exception(&pe->e);
|
||||||
snapshot_bios = bio_list_get(&pe->snapshot_bios);
|
snapshot_bios = bio_list_get(&pe->snapshot_bios);
|
||||||
origin_bios = bio_list_get(&pe->origin_bios);
|
origin_bios = bio_list_get(&pe->origin_bios);
|
||||||
|
full_bio = pe->full_bio;
|
||||||
|
if (full_bio) {
|
||||||
|
full_bio->bi_end_io = pe->full_bio_end_io;
|
||||||
|
full_bio->bi_private = pe->full_bio_private;
|
||||||
|
}
|
||||||
free_pending_exception(pe);
|
free_pending_exception(pe);
|
||||||
|
|
||||||
increment_pending_exceptions_done_count();
|
increment_pending_exceptions_done_count();
|
||||||
@@ -1426,10 +1428,15 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
|
|||||||
up_write(&s->lock);
|
up_write(&s->lock);
|
||||||
|
|
||||||
/* Submit any pending write bios */
|
/* Submit any pending write bios */
|
||||||
if (error)
|
if (error) {
|
||||||
|
if (full_bio)
|
||||||
|
bio_io_error(full_bio);
|
||||||
error_bios(snapshot_bios);
|
error_bios(snapshot_bios);
|
||||||
else
|
} else {
|
||||||
|
if (full_bio)
|
||||||
|
bio_endio(full_bio, 0);
|
||||||
flush_bios(snapshot_bios);
|
flush_bios(snapshot_bios);
|
||||||
|
}
|
||||||
|
|
||||||
retry_origin_bios(s, origin_bios);
|
retry_origin_bios(s, origin_bios);
|
||||||
}
|
}
|
||||||
@@ -1480,8 +1487,33 @@ static void start_copy(struct dm_snap_pending_exception *pe)
|
|||||||
dest.count = src.count;
|
dest.count = src.count;
|
||||||
|
|
||||||
/* Hand over to kcopyd */
|
/* Hand over to kcopyd */
|
||||||
dm_kcopyd_copy(s->kcopyd_client,
|
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
|
||||||
&src, 1, &dest, 0, copy_callback, pe);
|
}
|
||||||
|
|
||||||
|
static void full_bio_end_io(struct bio *bio, int error)
|
||||||
|
{
|
||||||
|
void *callback_data = bio->bi_private;
|
||||||
|
|
||||||
|
dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void start_full_bio(struct dm_snap_pending_exception *pe,
|
||||||
|
struct bio *bio)
|
||||||
|
{
|
||||||
|
struct dm_snapshot *s = pe->snap;
|
||||||
|
void *callback_data;
|
||||||
|
|
||||||
|
pe->full_bio = bio;
|
||||||
|
pe->full_bio_end_io = bio->bi_end_io;
|
||||||
|
pe->full_bio_private = bio->bi_private;
|
||||||
|
|
||||||
|
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
|
||||||
|
copy_callback, pe);
|
||||||
|
|
||||||
|
bio->bi_end_io = full_bio_end_io;
|
||||||
|
bio->bi_private = callback_data;
|
||||||
|
|
||||||
|
generic_make_request(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct dm_snap_pending_exception *
|
static struct dm_snap_pending_exception *
|
||||||
@@ -1519,6 +1551,7 @@ __find_pending_exception(struct dm_snapshot *s,
|
|||||||
bio_list_init(&pe->origin_bios);
|
bio_list_init(&pe->origin_bios);
|
||||||
bio_list_init(&pe->snapshot_bios);
|
bio_list_init(&pe->snapshot_bios);
|
||||||
pe->started = 0;
|
pe->started = 0;
|
||||||
|
pe->full_bio = NULL;
|
||||||
|
|
||||||
if (s->store->type->prepare_exception(s->store, &pe->e)) {
|
if (s->store->type->prepare_exception(s->store, &pe->e)) {
|
||||||
free_pending_exception(pe);
|
free_pending_exception(pe);
|
||||||
@@ -1612,10 +1645,19 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
|
|||||||
}
|
}
|
||||||
|
|
||||||
remap_exception(s, &pe->e, bio, chunk);
|
remap_exception(s, &pe->e, bio, chunk);
|
||||||
bio_list_add(&pe->snapshot_bios, bio);
|
|
||||||
|
|
||||||
r = DM_MAPIO_SUBMITTED;
|
r = DM_MAPIO_SUBMITTED;
|
||||||
|
|
||||||
|
if (!pe->started &&
|
||||||
|
bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
|
||||||
|
pe->started = 1;
|
||||||
|
up_write(&s->lock);
|
||||||
|
start_full_bio(pe, bio);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
bio_list_add(&pe->snapshot_bios, bio);
|
||||||
|
|
||||||
if (!pe->started) {
|
if (!pe->started) {
|
||||||
/* this is protected by snap->lock */
|
/* this is protected by snap->lock */
|
||||||
pe->started = 1;
|
pe->started = 1;
|
||||||
@@ -1628,9 +1670,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
|
|||||||
map_context->ptr = track_chunk(s, chunk);
|
map_context->ptr = track_chunk(s, chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
up_write(&s->lock);
|
up_write(&s->lock);
|
||||||
out:
|
out:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1974,7 +2016,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
|
|||||||
pe_to_start_now = pe;
|
pe_to_start_now = pe;
|
||||||
}
|
}
|
||||||
|
|
||||||
next_snapshot:
|
next_snapshot:
|
||||||
up_write(&snap->lock);
|
up_write(&snap->lock);
|
||||||
|
|
||||||
if (pe_to_start_now) {
|
if (pe_to_start_now) {
|
||||||
|
@@ -54,7 +54,6 @@ struct dm_table {
|
|||||||
sector_t *highs;
|
sector_t *highs;
|
||||||
struct dm_target *targets;
|
struct dm_target *targets;
|
||||||
|
|
||||||
unsigned discards_supported:1;
|
|
||||||
unsigned integrity_supported:1;
|
unsigned integrity_supported:1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -154,12 +153,11 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
|
|||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
size = nmemb * elem_size;
|
size = nmemb * elem_size;
|
||||||
addr = vmalloc(size);
|
addr = vzalloc(size);
|
||||||
if (addr)
|
|
||||||
memset(addr, 0, size);
|
|
||||||
|
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_vcalloc);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* highs, and targets are managed as dynamic arrays during a
|
* highs, and targets are managed as dynamic arrays during a
|
||||||
@@ -209,7 +207,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
|
|||||||
INIT_LIST_HEAD(&t->devices);
|
INIT_LIST_HEAD(&t->devices);
|
||||||
INIT_LIST_HEAD(&t->target_callbacks);
|
INIT_LIST_HEAD(&t->target_callbacks);
|
||||||
atomic_set(&t->holders, 0);
|
atomic_set(&t->holders, 0);
|
||||||
t->discards_supported = 1;
|
|
||||||
|
|
||||||
if (!num_targets)
|
if (!num_targets)
|
||||||
num_targets = KEYS_PER_NODE;
|
num_targets = KEYS_PER_NODE;
|
||||||
@@ -281,6 +278,7 @@ void dm_table_get(struct dm_table *t)
|
|||||||
{
|
{
|
||||||
atomic_inc(&t->holders);
|
atomic_inc(&t->holders);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_get);
|
||||||
|
|
||||||
void dm_table_put(struct dm_table *t)
|
void dm_table_put(struct dm_table *t)
|
||||||
{
|
{
|
||||||
@@ -290,6 +288,7 @@ void dm_table_put(struct dm_table *t)
|
|||||||
smp_mb__before_atomic_dec();
|
smp_mb__before_atomic_dec();
|
||||||
atomic_dec(&t->holders);
|
atomic_dec(&t->holders);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_put);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Checks to see if we need to extend highs or targets.
|
* Checks to see if we need to extend highs or targets.
|
||||||
@@ -455,13 +454,14 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
|
|||||||
* Add a device to the list, or just increment the usage count if
|
* Add a device to the list, or just increment the usage count if
|
||||||
* it's already present.
|
* it's already present.
|
||||||
*/
|
*/
|
||||||
static int __table_get_device(struct dm_table *t, struct dm_target *ti,
|
int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
|
||||||
const char *path, fmode_t mode, struct dm_dev **result)
|
struct dm_dev **result)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
dev_t uninitialized_var(dev);
|
dev_t uninitialized_var(dev);
|
||||||
struct dm_dev_internal *dd;
|
struct dm_dev_internal *dd;
|
||||||
unsigned int major, minor;
|
unsigned int major, minor;
|
||||||
|
struct dm_table *t = ti->table;
|
||||||
|
|
||||||
BUG_ON(!t);
|
BUG_ON(!t);
|
||||||
|
|
||||||
@@ -509,6 +509,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
|
|||||||
*result = &dd->dm_dev;
|
*result = &dd->dm_dev;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_get_device);
|
||||||
|
|
||||||
int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
|
int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
|
||||||
sector_t start, sector_t len, void *data)
|
sector_t start, sector_t len, void *data)
|
||||||
@@ -539,23 +540,15 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
|
|||||||
* If not we'll force DM to use PAGE_SIZE or
|
* If not we'll force DM to use PAGE_SIZE or
|
||||||
* smaller I/O, just to be safe.
|
* smaller I/O, just to be safe.
|
||||||
*/
|
*/
|
||||||
|
if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
|
||||||
if (q->merge_bvec_fn && !ti->type->merge)
|
|
||||||
blk_limits_max_hw_sectors(limits,
|
blk_limits_max_hw_sectors(limits,
|
||||||
(unsigned int) (PAGE_SIZE >> 9));
|
(unsigned int) (PAGE_SIZE >> 9));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(dm_set_device_limits);
|
EXPORT_SYMBOL_GPL(dm_set_device_limits);
|
||||||
|
|
||||||
int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
|
|
||||||
struct dm_dev **result)
|
|
||||||
{
|
|
||||||
return __table_get_device(ti->table, ti, path, mode, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decrement a devices use count and remove it if necessary.
|
* Decrement a device's use count and remove it if necessary.
|
||||||
*/
|
*/
|
||||||
void dm_put_device(struct dm_target *ti, struct dm_dev *d)
|
void dm_put_device(struct dm_target *ti, struct dm_dev *d)
|
||||||
{
|
{
|
||||||
@@ -568,6 +561,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d)
|
|||||||
kfree(dd);
|
kfree(dd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_put_device);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Checks to see if the target joins onto the end of the table.
|
* Checks to see if the target joins onto the end of the table.
|
||||||
@@ -791,8 +785,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
|||||||
|
|
||||||
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
|
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
|
||||||
|
|
||||||
if (!tgt->num_discard_requests)
|
if (!tgt->num_discard_requests && tgt->discards_supported)
|
||||||
t->discards_supported = 0;
|
DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
|
||||||
|
dm_device_name(t->md), type);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
@@ -802,6 +797,63 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Target argument parsing helpers.
|
||||||
|
*/
|
||||||
|
static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
|
||||||
|
unsigned *value, char **error, unsigned grouped)
|
||||||
|
{
|
||||||
|
const char *arg_str = dm_shift_arg(arg_set);
|
||||||
|
|
||||||
|
if (!arg_str ||
|
||||||
|
(sscanf(arg_str, "%u", value) != 1) ||
|
||||||
|
(*value < arg->min) ||
|
||||||
|
(*value > arg->max) ||
|
||||||
|
(grouped && arg_set->argc < *value)) {
|
||||||
|
*error = arg->error;
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
|
||||||
|
unsigned *value, char **error)
|
||||||
|
{
|
||||||
|
return validate_next_arg(arg, arg_set, value, error, 0);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_read_arg);
|
||||||
|
|
||||||
|
int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
|
||||||
|
unsigned *value, char **error)
|
||||||
|
{
|
||||||
|
return validate_next_arg(arg, arg_set, value, error, 1);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_read_arg_group);
|
||||||
|
|
||||||
|
const char *dm_shift_arg(struct dm_arg_set *as)
|
||||||
|
{
|
||||||
|
char *r;
|
||||||
|
|
||||||
|
if (as->argc) {
|
||||||
|
as->argc--;
|
||||||
|
r = *as->argv;
|
||||||
|
as->argv++;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_shift_arg);
|
||||||
|
|
||||||
|
void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
|
||||||
|
{
|
||||||
|
BUG_ON(as->argc < num_args);
|
||||||
|
as->argc -= num_args;
|
||||||
|
as->argv += num_args;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(dm_consume_args);
|
||||||
|
|
||||||
static int dm_table_set_type(struct dm_table *t)
|
static int dm_table_set_type(struct dm_table *t)
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
@@ -1077,11 +1129,13 @@ void dm_table_event(struct dm_table *t)
|
|||||||
t->event_fn(t->event_context);
|
t->event_fn(t->event_context);
|
||||||
mutex_unlock(&_event_lock);
|
mutex_unlock(&_event_lock);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_event);
|
||||||
|
|
||||||
sector_t dm_table_get_size(struct dm_table *t)
|
sector_t dm_table_get_size(struct dm_table *t)
|
||||||
{
|
{
|
||||||
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
|
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_get_size);
|
||||||
|
|
||||||
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
|
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
|
||||||
{
|
{
|
||||||
@@ -1194,9 +1248,45 @@ static void dm_table_set_integrity(struct dm_table *t)
|
|||||||
blk_get_integrity(template_disk));
|
blk_get_integrity(template_disk));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||||
|
sector_t start, sector_t len, void *data)
|
||||||
|
{
|
||||||
|
unsigned flush = (*(unsigned *)data);
|
||||||
|
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||||
|
|
||||||
|
return q && (q->flush_flags & flush);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
|
||||||
|
{
|
||||||
|
struct dm_target *ti;
|
||||||
|
unsigned i = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Require at least one underlying device to support flushes.
|
||||||
|
* t->devices includes internal dm devices such as mirror logs
|
||||||
|
* so we need to use iterate_devices here, which targets
|
||||||
|
* supporting flushes must provide.
|
||||||
|
*/
|
||||||
|
while (i < dm_table_get_num_targets(t)) {
|
||||||
|
ti = dm_table_get_target(t, i++);
|
||||||
|
|
||||||
|
if (!ti->num_flush_requests)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (ti->type->iterate_devices &&
|
||||||
|
ti->type->iterate_devices(ti, device_flush_capable, &flush))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||||
struct queue_limits *limits)
|
struct queue_limits *limits)
|
||||||
{
|
{
|
||||||
|
unsigned flush = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copy table's limits to the DM device's request_queue
|
* Copy table's limits to the DM device's request_queue
|
||||||
*/
|
*/
|
||||||
@@ -1207,6 +1297,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
|||||||
else
|
else
|
||||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
|
||||||
|
|
||||||
|
if (dm_table_supports_flush(t, REQ_FLUSH)) {
|
||||||
|
flush |= REQ_FLUSH;
|
||||||
|
if (dm_table_supports_flush(t, REQ_FUA))
|
||||||
|
flush |= REQ_FUA;
|
||||||
|
}
|
||||||
|
blk_queue_flush(q, flush);
|
||||||
|
|
||||||
dm_table_set_integrity(t);
|
dm_table_set_integrity(t);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1237,6 +1334,7 @@ fmode_t dm_table_get_mode(struct dm_table *t)
|
|||||||
{
|
{
|
||||||
return t->mode;
|
return t->mode;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_get_mode);
|
||||||
|
|
||||||
static void suspend_targets(struct dm_table *t, unsigned postsuspend)
|
static void suspend_targets(struct dm_table *t, unsigned postsuspend)
|
||||||
{
|
{
|
||||||
@@ -1345,6 +1443,7 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
|
|||||||
{
|
{
|
||||||
return t->md;
|
return t->md;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(dm_table_get_md);
|
||||||
|
|
||||||
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
|
static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||||
sector_t start, sector_t len, void *data)
|
sector_t start, sector_t len, void *data)
|
||||||
@@ -1359,19 +1458,19 @@ bool dm_table_supports_discards(struct dm_table *t)
|
|||||||
struct dm_target *ti;
|
struct dm_target *ti;
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
|
|
||||||
if (!t->discards_supported)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unless any target used by the table set discards_supported,
|
* Unless any target used by the table set discards_supported,
|
||||||
* require at least one underlying device to support discards.
|
* require at least one underlying device to support discards.
|
||||||
* t->devices includes internal dm devices such as mirror logs
|
* t->devices includes internal dm devices such as mirror logs
|
||||||
* so we need to use iterate_devices here, which targets
|
* so we need to use iterate_devices here, which targets
|
||||||
* supporting discard must provide.
|
* supporting discard selectively must provide.
|
||||||
*/
|
*/
|
||||||
while (i < dm_table_get_num_targets(t)) {
|
while (i < dm_table_get_num_targets(t)) {
|
||||||
ti = dm_table_get_target(t, i++);
|
ti = dm_table_get_target(t, i++);
|
||||||
|
|
||||||
|
if (!ti->num_discard_requests)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (ti->discards_supported)
|
if (ti->discards_supported)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -1382,13 +1481,3 @@ bool dm_table_supports_discards(struct dm_table *t)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(dm_vcalloc);
|
|
||||||
EXPORT_SYMBOL(dm_get_device);
|
|
||||||
EXPORT_SYMBOL(dm_put_device);
|
|
||||||
EXPORT_SYMBOL(dm_table_event);
|
|
||||||
EXPORT_SYMBOL(dm_table_get_size);
|
|
||||||
EXPORT_SYMBOL(dm_table_get_mode);
|
|
||||||
EXPORT_SYMBOL(dm_table_get_md);
|
|
||||||
EXPORT_SYMBOL(dm_table_put);
|
|
||||||
EXPORT_SYMBOL(dm_table_get);
|
|
||||||
|
@@ -37,6 +37,8 @@ static const char *_name = DM_NAME;
|
|||||||
static unsigned int major = 0;
|
static unsigned int major = 0;
|
||||||
static unsigned int _major = 0;
|
static unsigned int _major = 0;
|
||||||
|
|
||||||
|
static DEFINE_IDR(_minor_idr);
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(_minor_lock);
|
static DEFINE_SPINLOCK(_minor_lock);
|
||||||
/*
|
/*
|
||||||
* For bio-based dm.
|
* For bio-based dm.
|
||||||
@@ -109,6 +111,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
|
|||||||
#define DMF_FREEING 3
|
#define DMF_FREEING 3
|
||||||
#define DMF_DELETING 4
|
#define DMF_DELETING 4
|
||||||
#define DMF_NOFLUSH_SUSPENDING 5
|
#define DMF_NOFLUSH_SUSPENDING 5
|
||||||
|
#define DMF_MERGE_IS_OPTIONAL 6
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Work processed by per-device workqueue.
|
* Work processed by per-device workqueue.
|
||||||
@@ -313,6 +316,12 @@ static void __exit dm_exit(void)
|
|||||||
|
|
||||||
while (i--)
|
while (i--)
|
||||||
_exits[i]();
|
_exits[i]();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should be empty by this point.
|
||||||
|
*/
|
||||||
|
idr_remove_all(&_minor_idr);
|
||||||
|
idr_destroy(&_minor_idr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1171,7 +1180,8 @@ static int __clone_and_map_discard(struct clone_info *ci)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Even though the device advertised discard support,
|
* Even though the device advertised discard support,
|
||||||
* reconfiguration might have changed that since the
|
* that does not mean every target supports it, and
|
||||||
|
* reconfiguration might also have changed that since the
|
||||||
* check was performed.
|
* check was performed.
|
||||||
*/
|
*/
|
||||||
if (!ti->num_discard_requests)
|
if (!ti->num_discard_requests)
|
||||||
@@ -1705,8 +1715,6 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
|
|||||||
/*-----------------------------------------------------------------
|
/*-----------------------------------------------------------------
|
||||||
* An IDR is used to keep track of allocated minor numbers.
|
* An IDR is used to keep track of allocated minor numbers.
|
||||||
*---------------------------------------------------------------*/
|
*---------------------------------------------------------------*/
|
||||||
static DEFINE_IDR(_minor_idr);
|
|
||||||
|
|
||||||
static void free_minor(int minor)
|
static void free_minor(int minor)
|
||||||
{
|
{
|
||||||
spin_lock(&_minor_lock);
|
spin_lock(&_minor_lock);
|
||||||
@@ -1800,7 +1808,6 @@ static void dm_init_md_queue(struct mapped_device *md)
|
|||||||
blk_queue_make_request(md->queue, dm_request);
|
blk_queue_make_request(md->queue, dm_request);
|
||||||
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
|
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
|
||||||
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
|
||||||
blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1985,6 +1992,59 @@ static void __set_size(struct mapped_device *md, sector_t size)
|
|||||||
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
|
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 1 if the queue has a compulsory merge_bvec_fn function.
|
||||||
|
*
|
||||||
|
* If this function returns 0, then the device is either a non-dm
|
||||||
|
* device without a merge_bvec_fn, or it is a dm device that is
|
||||||
|
* able to split any bios it receives that are too big.
|
||||||
|
*/
|
||||||
|
int dm_queue_merge_is_compulsory(struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct mapped_device *dev_md;
|
||||||
|
|
||||||
|
if (!q->merge_bvec_fn)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (q->make_request_fn == dm_request) {
|
||||||
|
dev_md = q->queuedata;
|
||||||
|
if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int dm_device_merge_is_compulsory(struct dm_target *ti,
|
||||||
|
struct dm_dev *dev, sector_t start,
|
||||||
|
sector_t len, void *data)
|
||||||
|
{
|
||||||
|
struct block_device *bdev = dev->bdev;
|
||||||
|
struct request_queue *q = bdev_get_queue(bdev);
|
||||||
|
|
||||||
|
return dm_queue_merge_is_compulsory(q);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 1 if it is acceptable to ignore merge_bvec_fn based
|
||||||
|
* on the properties of the underlying devices.
|
||||||
|
*/
|
||||||
|
static int dm_table_merge_is_optional(struct dm_table *table)
|
||||||
|
{
|
||||||
|
unsigned i = 0;
|
||||||
|
struct dm_target *ti;
|
||||||
|
|
||||||
|
while (i < dm_table_get_num_targets(table)) {
|
||||||
|
ti = dm_table_get_target(table, i++);
|
||||||
|
|
||||||
|
if (ti->type->iterate_devices &&
|
||||||
|
ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns old map, which caller must destroy.
|
* Returns old map, which caller must destroy.
|
||||||
*/
|
*/
|
||||||
@@ -1995,6 +2055,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
|
|||||||
struct request_queue *q = md->queue;
|
struct request_queue *q = md->queue;
|
||||||
sector_t size;
|
sector_t size;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
int merge_is_optional;
|
||||||
|
|
||||||
size = dm_table_get_size(t);
|
size = dm_table_get_size(t);
|
||||||
|
|
||||||
@@ -2020,10 +2081,16 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
|
|||||||
|
|
||||||
__bind_mempools(md, t);
|
__bind_mempools(md, t);
|
||||||
|
|
||||||
|
merge_is_optional = dm_table_merge_is_optional(t);
|
||||||
|
|
||||||
write_lock_irqsave(&md->map_lock, flags);
|
write_lock_irqsave(&md->map_lock, flags);
|
||||||
old_map = md->map;
|
old_map = md->map;
|
||||||
md->map = t;
|
md->map = t;
|
||||||
dm_table_set_restrictions(t, q, limits);
|
dm_table_set_restrictions(t, q, limits);
|
||||||
|
if (merge_is_optional)
|
||||||
|
set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
|
||||||
|
else
|
||||||
|
clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
|
||||||
write_unlock_irqrestore(&md->map_lock, flags);
|
write_unlock_irqrestore(&md->map_lock, flags);
|
||||||
|
|
||||||
return old_map;
|
return old_map;
|
||||||
|
@@ -66,6 +66,8 @@ int dm_table_alloc_md_mempools(struct dm_table *t);
|
|||||||
void dm_table_free_md_mempools(struct dm_table *t);
|
void dm_table_free_md_mempools(struct dm_table *t);
|
||||||
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
||||||
|
|
||||||
|
int dm_queue_merge_is_compulsory(struct request_queue *q);
|
||||||
|
|
||||||
void dm_lock_md_type(struct mapped_device *md);
|
void dm_lock_md_type(struct mapped_device *md);
|
||||||
void dm_unlock_md_type(struct mapped_device *md);
|
void dm_unlock_md_type(struct mapped_device *md);
|
||||||
void dm_set_md_type(struct mapped_device *md, unsigned type);
|
void dm_set_md_type(struct mapped_device *md, unsigned type);
|
||||||
|
@@ -208,6 +208,49 @@ struct dm_target_callbacks {
|
|||||||
int dm_register_target(struct target_type *t);
|
int dm_register_target(struct target_type *t);
|
||||||
void dm_unregister_target(struct target_type *t);
|
void dm_unregister_target(struct target_type *t);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Target argument parsing.
|
||||||
|
*/
|
||||||
|
struct dm_arg_set {
|
||||||
|
unsigned argc;
|
||||||
|
char **argv;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The minimum and maximum value of a numeric argument, together with
|
||||||
|
* the error message to use if the number is found to be outside that range.
|
||||||
|
*/
|
||||||
|
struct dm_arg {
|
||||||
|
unsigned min;
|
||||||
|
unsigned max;
|
||||||
|
char *error;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validate the next argument, either returning it as *value or, if invalid,
|
||||||
|
* returning -EINVAL and setting *error.
|
||||||
|
*/
|
||||||
|
int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
|
||||||
|
unsigned *value, char **error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Process the next argument as the start of a group containing between
|
||||||
|
* arg->min and arg->max further arguments. Either return the size as
|
||||||
|
* *num_args or, if invalid, return -EINVAL and set *error.
|
||||||
|
*/
|
||||||
|
int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
|
||||||
|
unsigned *num_args, char **error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the current argument and shift to the next.
|
||||||
|
*/
|
||||||
|
const char *dm_shift_arg(struct dm_arg_set *as);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Move through num_args arguments.
|
||||||
|
*/
|
||||||
|
void dm_consume_args(struct dm_arg_set *as, unsigned num_args);
|
||||||
|
|
||||||
/*-----------------------------------------------------------------
|
/*-----------------------------------------------------------------
|
||||||
* Functions for creating and manipulating mapped devices.
|
* Functions for creating and manipulating mapped devices.
|
||||||
* Drop the reference with dm_put when you finish with the object.
|
* Drop the reference with dm_put when you finish with the object.
|
||||||
|
@@ -267,9 +267,9 @@ enum {
|
|||||||
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
|
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
|
||||||
|
|
||||||
#define DM_VERSION_MAJOR 4
|
#define DM_VERSION_MAJOR 4
|
||||||
#define DM_VERSION_MINOR 20
|
#define DM_VERSION_MINOR 21
|
||||||
#define DM_VERSION_PATCHLEVEL 0
|
#define DM_VERSION_PATCHLEVEL 0
|
||||||
#define DM_VERSION_EXTRA "-ioctl (2011-02-02)"
|
#define DM_VERSION_EXTRA "-ioctl (2011-07-06)"
|
||||||
|
|
||||||
/* Status bits */
|
/* Status bits */
|
||||||
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
|
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
|
||||||
|
@@ -42,5 +42,20 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
|
|||||||
unsigned num_dests, struct dm_io_region *dests,
|
unsigned num_dests, struct dm_io_region *dests,
|
||||||
unsigned flags, dm_kcopyd_notify_fn fn, void *context);
|
unsigned flags, dm_kcopyd_notify_fn fn, void *context);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare a callback and submit it via the kcopyd thread.
|
||||||
|
*
|
||||||
|
* dm_kcopyd_prepare_callback allocates a callback structure and returns it.
|
||||||
|
* It must not be called from interrupt context.
|
||||||
|
* The returned value should be passed into dm_kcopyd_do_callback.
|
||||||
|
*
|
||||||
|
* dm_kcopyd_do_callback submits the callback.
|
||||||
|
* It may be called from interrupt context.
|
||||||
|
* The callback is issued from the kcopyd thread.
|
||||||
|
*/
|
||||||
|
void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
|
||||||
|
dm_kcopyd_notify_fn fn, void *context);
|
||||||
|
void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _LINUX_DM_KCOPYD_H */
|
#endif /* _LINUX_DM_KCOPYD_H */
|
||||||
|
Reference in New Issue
Block a user