memcg, slab: RCU protect memcg_params for root caches
We relocate root cache's memcg_params whenever we need to grow the memcg_caches array to accommodate all kmem-active memory cgroups. Currently on relocation we free the old version immediately, which can lead to use-after-free, because the memcg_caches array is accessed lock-free (see cache_from_memcg_idx()). This patch fixes this by making memcg_params RCU-protected for root caches. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Glauber Costa <glommer@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Christoph Lameter <cl@linux.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
f717eb3abb
commit
f8570263ee
@@ -513,7 +513,9 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
|||||||
*
|
*
|
||||||
* Both the root cache and the child caches will have it. For the root cache,
|
* Both the root cache and the child caches will have it. For the root cache,
|
||||||
* this will hold a dynamically allocated array large enough to hold
|
* this will hold a dynamically allocated array large enough to hold
|
||||||
* information about the currently limited memcgs in the system.
|
* information about the currently limited memcgs in the system. To allow the
|
||||||
|
* array to be accessed without taking any locks, on relocation we free the old
|
||||||
|
* version only after a grace period.
|
||||||
*
|
*
|
||||||
* Child caches will hold extra metadata needed for its operation. Fields are:
|
* Child caches will hold extra metadata needed for its operation. Fields are:
|
||||||
*
|
*
|
||||||
@@ -528,7 +530,10 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
|||||||
struct memcg_cache_params {
|
struct memcg_cache_params {
|
||||||
bool is_root_cache;
|
bool is_root_cache;
|
||||||
union {
|
union {
|
||||||
|
struct {
|
||||||
|
struct rcu_head rcu_head;
|
||||||
struct kmem_cache *memcg_caches[0];
|
struct kmem_cache *memcg_caches[0];
|
||||||
|
};
|
||||||
struct {
|
struct {
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
@@ -3178,18 +3178,17 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
|
|||||||
|
|
||||||
if (num_groups > memcg_limited_groups_array_size) {
|
if (num_groups > memcg_limited_groups_array_size) {
|
||||||
int i;
|
int i;
|
||||||
|
struct memcg_cache_params *new_params;
|
||||||
ssize_t size = memcg_caches_array_size(num_groups);
|
ssize_t size = memcg_caches_array_size(num_groups);
|
||||||
|
|
||||||
size *= sizeof(void *);
|
size *= sizeof(void *);
|
||||||
size += offsetof(struct memcg_cache_params, memcg_caches);
|
size += offsetof(struct memcg_cache_params, memcg_caches);
|
||||||
|
|
||||||
s->memcg_params = kzalloc(size, GFP_KERNEL);
|
new_params = kzalloc(size, GFP_KERNEL);
|
||||||
if (!s->memcg_params) {
|
if (!new_params)
|
||||||
s->memcg_params = cur_params;
|
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
|
||||||
|
|
||||||
s->memcg_params->is_root_cache = true;
|
new_params->is_root_cache = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There is the chance it will be bigger than
|
* There is the chance it will be bigger than
|
||||||
@@ -3203,7 +3202,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
|
|||||||
for (i = 0; i < memcg_limited_groups_array_size; i++) {
|
for (i = 0; i < memcg_limited_groups_array_size; i++) {
|
||||||
if (!cur_params->memcg_caches[i])
|
if (!cur_params->memcg_caches[i])
|
||||||
continue;
|
continue;
|
||||||
s->memcg_params->memcg_caches[i] =
|
new_params->memcg_caches[i] =
|
||||||
cur_params->memcg_caches[i];
|
cur_params->memcg_caches[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3216,7 +3215,9 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
|
|||||||
* bigger than the others. And all updates will reset this
|
* bigger than the others. And all updates will reset this
|
||||||
* anyway.
|
* anyway.
|
||||||
*/
|
*/
|
||||||
kfree(cur_params);
|
rcu_assign_pointer(s->memcg_params, new_params);
|
||||||
|
if (cur_params)
|
||||||
|
kfree_rcu(cur_params, rcu_head);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
16
mm/slab.h
16
mm/slab.h
@@ -160,14 +160,28 @@ static inline const char *cache_name(struct kmem_cache *s)
|
|||||||
return s->name;
|
return s->name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, we protect with RCU only the memcg_caches array, not per-memcg caches.
|
||||||
|
* That said the caller must assure the memcg's cache won't go away. Since once
|
||||||
|
* created a memcg's cache is destroyed only along with the root cache, it is
|
||||||
|
* true if we are going to allocate from the cache or hold a reference to the
|
||||||
|
* root cache by other means. Otherwise, we should hold either the slab_mutex
|
||||||
|
* or the memcg's slab_caches_mutex while calling this function and accessing
|
||||||
|
* the returned value.
|
||||||
|
*/
|
||||||
static inline struct kmem_cache *
|
static inline struct kmem_cache *
|
||||||
cache_from_memcg_idx(struct kmem_cache *s, int idx)
|
cache_from_memcg_idx(struct kmem_cache *s, int idx)
|
||||||
{
|
{
|
||||||
struct kmem_cache *cachep;
|
struct kmem_cache *cachep;
|
||||||
|
struct memcg_cache_params *params;
|
||||||
|
|
||||||
if (!s->memcg_params)
|
if (!s->memcg_params)
|
||||||
return NULL;
|
return NULL;
|
||||||
cachep = s->memcg_params->memcg_caches[idx];
|
|
||||||
|
rcu_read_lock();
|
||||||
|
params = rcu_dereference(s->memcg_params);
|
||||||
|
cachep = params->memcg_caches[idx];
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure we will access the up-to-date value. The code updating
|
* Make sure we will access the up-to-date value. The code updating
|
||||||
|
Reference in New Issue
Block a user