dm sysfs: fix a module unload race
This reverts commit be35f48610
("dm: wait until embedded kobject is
released before destroying a device") and provides an improved fix.
The kobject release code that calls the completion must be placed in a
non-module file, otherwise there is a module unload race (if the process
calling dm_kobject_release is preempted and the DM module unloaded after
the completion is triggered, but before dm_kobject_release returns).
To fix this race, this patch moves the completion code to dm-builtin.c
which is always compiled directly into the kernel if BLK_DEV_DM is
selected.
The patch introduces a new dm_kobject_holder structure, its purpose is
to keep the completion and kobject in one place, so that it can be
accessed from non-module code without the need to export the layout of
struct mapped_device to that code.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
This commit is contained in:
committed by
Mike Snitzer
parent
55b082e614
commit
2995fa78e4
@@ -176,8 +176,12 @@ config MD_FAULTY
|
|||||||
|
|
||||||
source "drivers/md/bcache/Kconfig"
|
source "drivers/md/bcache/Kconfig"
|
||||||
|
|
||||||
|
config BLK_DEV_DM_BUILTIN
|
||||||
|
boolean
|
||||||
|
|
||||||
config BLK_DEV_DM
|
config BLK_DEV_DM
|
||||||
tristate "Device mapper support"
|
tristate "Device mapper support"
|
||||||
|
select BLK_DEV_DM_BUILTIN
|
||||||
---help---
|
---help---
|
||||||
Device-mapper is a low level volume manager. It works by allowing
|
Device-mapper is a low level volume manager. It works by allowing
|
||||||
people to specify mappings for ranges of logical sectors. Various
|
people to specify mappings for ranges of logical sectors. Various
|
||||||
|
@@ -32,6 +32,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
|
|||||||
obj-$(CONFIG_BCACHE) += bcache/
|
obj-$(CONFIG_BCACHE) += bcache/
|
||||||
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
|
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
|
||||||
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
|
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
|
||||||
|
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
|
||||||
obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
|
obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
|
||||||
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
|
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
|
||||||
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
|
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
|
||||||
|
48
drivers/md/dm-builtin.c
Normal file
48
drivers/md/dm-builtin.c
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#include "dm.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The kobject release method must not be placed in the module itself,
|
||||||
|
* otherwise we are subject to module unload races.
|
||||||
|
*
|
||||||
|
* The release method is called when the last reference to the kobject is
|
||||||
|
* dropped. It may be called by any other kernel code that drops the last
|
||||||
|
* reference.
|
||||||
|
*
|
||||||
|
* The release method suffers from module unload race. We may prevent the
|
||||||
|
* module from being unloaded at the start of the release method (using
|
||||||
|
* increased module reference count or synchronizing against the release
|
||||||
|
* method), however there is no way to prevent the module from being
|
||||||
|
* unloaded at the end of the release method.
|
||||||
|
*
|
||||||
|
* If this code were placed in the dm module, the following race may
|
||||||
|
* happen:
|
||||||
|
* 1. Some other process takes a reference to dm kobject
|
||||||
|
* 2. The user issues ioctl function to unload the dm device
|
||||||
|
* 3. dm_sysfs_exit calls kobject_put, however the object is not released
|
||||||
|
* because of the other reference taken at step 1
|
||||||
|
* 4. dm_sysfs_exit waits on the completion
|
||||||
|
* 5. The other process that took the reference in step 1 drops it,
|
||||||
|
* dm_kobject_release is called from this process
|
||||||
|
* 6. dm_kobject_release calls complete()
|
||||||
|
* 7. a reschedule happens before dm_kobject_release returns
|
||||||
|
* 8. dm_sysfs_exit continues, the dm device is unloaded, module reference
|
||||||
|
* count is decremented
|
||||||
|
* 9. The user unloads the dm module
|
||||||
|
* 10. The other process that was rescheduled in step 7 continues to run,
|
||||||
|
* it is now executing code in unloaded module, so it crashes
|
||||||
|
*
|
||||||
|
* Note that if the process that takes the foreign reference to dm kobject
|
||||||
|
* has a low priority and the system is sufficiently loaded with
|
||||||
|
* higher-priority processes that prevent the low-priority process from
|
||||||
|
* being scheduled long enough, this bug may really happen.
|
||||||
|
*
|
||||||
|
* In order to fix this module unload race, we place the release method
|
||||||
|
* into a helper code that is compiled directly into the kernel.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void dm_kobject_release(struct kobject *kobj)
|
||||||
|
{
|
||||||
|
complete(dm_get_completion_from_kobject(kobj));
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(dm_kobject_release);
|
@@ -79,11 +79,6 @@ static const struct sysfs_ops dm_sysfs_ops = {
|
|||||||
.show = dm_attr_show,
|
.show = dm_attr_show,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void dm_kobject_release(struct kobject *kobj)
|
|
||||||
{
|
|
||||||
complete(dm_get_completion_from_kobject(kobj));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* dm kobject is embedded in mapped_device structure
|
* dm kobject is embedded in mapped_device structure
|
||||||
* no need to define release function here
|
* no need to define release function here
|
||||||
|
@@ -200,11 +200,8 @@ struct mapped_device {
|
|||||||
/* forced geometry settings */
|
/* forced geometry settings */
|
||||||
struct hd_geometry geometry;
|
struct hd_geometry geometry;
|
||||||
|
|
||||||
/* sysfs handle */
|
/* kobject and completion */
|
||||||
struct kobject kobj;
|
struct dm_kobject_holder kobj_holder;
|
||||||
|
|
||||||
/* wait until the kobject is released */
|
|
||||||
struct completion kobj_completion;
|
|
||||||
|
|
||||||
/* zero-length flush that will be cloned and submitted to targets */
|
/* zero-length flush that will be cloned and submitted to targets */
|
||||||
struct bio flush_bio;
|
struct bio flush_bio;
|
||||||
@@ -2044,7 +2041,7 @@ static struct mapped_device *alloc_dev(int minor)
|
|||||||
init_waitqueue_head(&md->wait);
|
init_waitqueue_head(&md->wait);
|
||||||
INIT_WORK(&md->work, dm_wq_work);
|
INIT_WORK(&md->work, dm_wq_work);
|
||||||
init_waitqueue_head(&md->eventq);
|
init_waitqueue_head(&md->eventq);
|
||||||
init_completion(&md->kobj_completion);
|
init_completion(&md->kobj_holder.completion);
|
||||||
|
|
||||||
md->disk->major = _major;
|
md->disk->major = _major;
|
||||||
md->disk->first_minor = minor;
|
md->disk->first_minor = minor;
|
||||||
@@ -2906,14 +2903,14 @@ struct gendisk *dm_disk(struct mapped_device *md)
|
|||||||
|
|
||||||
struct kobject *dm_kobject(struct mapped_device *md)
|
struct kobject *dm_kobject(struct mapped_device *md)
|
||||||
{
|
{
|
||||||
return &md->kobj;
|
return &md->kobj_holder.kobj;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
|
struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
|
||||||
{
|
{
|
||||||
struct mapped_device *md;
|
struct mapped_device *md;
|
||||||
|
|
||||||
md = container_of(kobj, struct mapped_device, kobj);
|
md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
|
||||||
|
|
||||||
if (test_bit(DMF_FREEING, &md->flags) ||
|
if (test_bit(DMF_FREEING, &md->flags) ||
|
||||||
dm_deleting_md(md))
|
dm_deleting_md(md))
|
||||||
@@ -2923,13 +2920,6 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
|
|||||||
return md;
|
return md;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
|
|
||||||
{
|
|
||||||
struct mapped_device *md = container_of(kobj, struct mapped_device, kobj);
|
|
||||||
|
|
||||||
return &md->kobj_completion;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dm_suspended_md(struct mapped_device *md)
|
int dm_suspended_md(struct mapped_device *md)
|
||||||
{
|
{
|
||||||
return test_bit(DMF_SUSPENDED, &md->flags);
|
return test_bit(DMF_SUSPENDED, &md->flags);
|
||||||
|
@@ -16,6 +16,7 @@
|
|||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/hdreg.h>
|
#include <linux/hdreg.h>
|
||||||
#include <linux/completion.h>
|
#include <linux/completion.h>
|
||||||
|
#include <linux/kobject.h>
|
||||||
|
|
||||||
#include "dm-stats.h"
|
#include "dm-stats.h"
|
||||||
|
|
||||||
@@ -149,11 +150,25 @@ void dm_interface_exit(void);
|
|||||||
/*
|
/*
|
||||||
* sysfs interface
|
* sysfs interface
|
||||||
*/
|
*/
|
||||||
|
struct dm_kobject_holder {
|
||||||
|
struct kobject kobj;
|
||||||
|
struct completion completion;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
|
||||||
|
{
|
||||||
|
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
|
||||||
|
}
|
||||||
|
|
||||||
int dm_sysfs_init(struct mapped_device *md);
|
int dm_sysfs_init(struct mapped_device *md);
|
||||||
void dm_sysfs_exit(struct mapped_device *md);
|
void dm_sysfs_exit(struct mapped_device *md);
|
||||||
struct kobject *dm_kobject(struct mapped_device *md);
|
struct kobject *dm_kobject(struct mapped_device *md);
|
||||||
struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
|
struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
|
||||||
struct completion *dm_get_completion_from_kobject(struct kobject *kobj);
|
|
||||||
|
/*
|
||||||
|
* The kobject helper
|
||||||
|
*/
|
||||||
|
void dm_kobject_release(struct kobject *kobj);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Targets for linear and striped mappings
|
* Targets for linear and striped mappings
|
||||||
|
Reference in New Issue
Block a user