memcg: remove clear_page_cgroup and atomics
Remove clear_page_cgroup: it's an unhelpful helper, see for example how mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it (serious races from that? I'm not sure). Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be atomic: it's always manipulated under lock_page_cgroup, except where force_empty unilaterally reset it to 0 (and how does uncharge's atomic_dec_and_test protect against that?). Simplify this page_cgroup locking: if you've got the lock and the pc is attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to check that pc->page matches page (we're on the way to finding why sometimes it doesn't, but this patch doesn't fix that). Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
d5b69e38f8
commit
b9c565d5a2
@@ -161,8 +161,7 @@ struct page_cgroup {
|
|||||||
struct list_head lru; /* per cgroup LRU list */
|
struct list_head lru; /* per cgroup LRU list */
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct mem_cgroup *mem_cgroup;
|
struct mem_cgroup *mem_cgroup;
|
||||||
atomic_t ref_cnt; /* Helpful when pages move b/w */
|
int ref_cnt; /* cached, mapped, migrating */
|
||||||
/* mapped and cached states */
|
|
||||||
int flags;
|
int flags;
|
||||||
};
|
};
|
||||||
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
|
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
|
||||||
@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page)
|
|||||||
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
|
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Clear page->page_cgroup member under lock_page_cgroup().
|
|
||||||
* If given "pc" value is different from one page->page_cgroup,
|
|
||||||
* page->cgroup is not cleared.
|
|
||||||
* Returns a value of page->page_cgroup at lock taken.
|
|
||||||
* A can can detect failure of clearing by following
|
|
||||||
* clear_page_cgroup(page, pc) == pc
|
|
||||||
*/
|
|
||||||
static struct page_cgroup *clear_page_cgroup(struct page *page,
|
|
||||||
struct page_cgroup *pc)
|
|
||||||
{
|
|
||||||
struct page_cgroup *ret;
|
|
||||||
/* lock and clear */
|
|
||||||
lock_page_cgroup(page);
|
|
||||||
ret = page_get_page_cgroup(page);
|
|
||||||
if (likely(ret == pc))
|
|
||||||
page_assign_page_cgroup(page, NULL);
|
|
||||||
unlock_page_cgroup(page);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __mem_cgroup_remove_list(struct page_cgroup *pc)
|
static void __mem_cgroup_remove_list(struct page_cgroup *pc)
|
||||||
{
|
{
|
||||||
int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
|
int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
|
||||||
@@ -555,16 +533,13 @@ retry:
|
|||||||
* the page has already been accounted.
|
* the page has already been accounted.
|
||||||
*/
|
*/
|
||||||
if (pc) {
|
if (pc) {
|
||||||
if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
|
VM_BUG_ON(pc->page != page);
|
||||||
/* this page is under being uncharged ? */
|
VM_BUG_ON(pc->ref_cnt <= 0);
|
||||||
unlock_page_cgroup(page);
|
|
||||||
cpu_relax();
|
pc->ref_cnt++;
|
||||||
goto retry;
|
|
||||||
} else {
|
|
||||||
unlock_page_cgroup(page);
|
unlock_page_cgroup(page);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
unlock_page_cgroup(page);
|
unlock_page_cgroup(page);
|
||||||
|
|
||||||
pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
|
pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
|
||||||
@@ -612,7 +587,7 @@ retry:
|
|||||||
congestion_wait(WRITE, HZ/10);
|
congestion_wait(WRITE, HZ/10);
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_set(&pc->ref_cnt, 1);
|
pc->ref_cnt = 1;
|
||||||
pc->mem_cgroup = mem;
|
pc->mem_cgroup = mem;
|
||||||
pc->page = page;
|
pc->page = page;
|
||||||
pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
|
pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
|
||||||
@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page)
|
|||||||
if (!pc)
|
if (!pc)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
|
|
||||||
if (atomic_dec_and_test(&pc->ref_cnt)) {
|
VM_BUG_ON(pc->page != page);
|
||||||
page = pc->page;
|
VM_BUG_ON(pc->ref_cnt <= 0);
|
||||||
mz = page_cgroup_zoneinfo(pc);
|
|
||||||
/*
|
if (--(pc->ref_cnt) == 0) {
|
||||||
* get page->cgroup and clear it under lock.
|
page_assign_page_cgroup(page, NULL);
|
||||||
* force_empty can drop page->cgroup without checking refcnt.
|
|
||||||
*/
|
|
||||||
unlock_page_cgroup(page);
|
unlock_page_cgroup(page);
|
||||||
if (clear_page_cgroup(page, pc) == pc) {
|
|
||||||
mem = pc->mem_cgroup;
|
mem = pc->mem_cgroup;
|
||||||
css_put(&mem->css);
|
css_put(&mem->css);
|
||||||
res_counter_uncharge(&mem->res, PAGE_SIZE);
|
res_counter_uncharge(&mem->res, PAGE_SIZE);
|
||||||
|
|
||||||
|
mz = page_cgroup_zoneinfo(pc);
|
||||||
spin_lock_irqsave(&mz->lru_lock, flags);
|
spin_lock_irqsave(&mz->lru_lock, flags);
|
||||||
__mem_cgroup_remove_list(pc);
|
__mem_cgroup_remove_list(pc);
|
||||||
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
||||||
|
|
||||||
kfree(pc);
|
kfree(pc);
|
||||||
}
|
return;
|
||||||
lock_page_cgroup(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
@@ -714,14 +689,13 @@ unlock:
|
|||||||
int mem_cgroup_prepare_migration(struct page *page)
|
int mem_cgroup_prepare_migration(struct page *page)
|
||||||
{
|
{
|
||||||
struct page_cgroup *pc;
|
struct page_cgroup *pc;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
lock_page_cgroup(page);
|
lock_page_cgroup(page);
|
||||||
pc = page_get_page_cgroup(page);
|
pc = page_get_page_cgroup(page);
|
||||||
if (pc && atomic_inc_not_zero(&pc->ref_cnt))
|
if (pc)
|
||||||
ret = 1;
|
pc->ref_cnt++;
|
||||||
unlock_page_cgroup(page);
|
unlock_page_cgroup(page);
|
||||||
return ret;
|
return pc != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_cgroup_end_migration(struct page *page)
|
void mem_cgroup_end_migration(struct page *page)
|
||||||
@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
|
|||||||
struct mem_cgroup_per_zone *mz;
|
struct mem_cgroup_per_zone *mz;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
retry:
|
lock_page_cgroup(page);
|
||||||
pc = page_get_page_cgroup(page);
|
pc = page_get_page_cgroup(page);
|
||||||
if (!pc)
|
if (!pc) {
|
||||||
|
unlock_page_cgroup(page);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
page_assign_page_cgroup(page, NULL);
|
||||||
|
unlock_page_cgroup(page);
|
||||||
|
|
||||||
mz = page_cgroup_zoneinfo(pc);
|
mz = page_cgroup_zoneinfo(pc);
|
||||||
if (clear_page_cgroup(page, pc) != pc)
|
|
||||||
goto retry;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&mz->lru_lock, flags);
|
spin_lock_irqsave(&mz->lru_lock, flags);
|
||||||
__mem_cgroup_remove_list(pc);
|
__mem_cgroup_remove_list(pc);
|
||||||
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
||||||
@@ -794,16 +770,20 @@ retry:
|
|||||||
while (--count && !list_empty(list)) {
|
while (--count && !list_empty(list)) {
|
||||||
pc = list_entry(list->prev, struct page_cgroup, lru);
|
pc = list_entry(list->prev, struct page_cgroup, lru);
|
||||||
page = pc->page;
|
page = pc->page;
|
||||||
/* Avoid race with charge */
|
lock_page_cgroup(page);
|
||||||
atomic_set(&pc->ref_cnt, 0);
|
if (page_get_page_cgroup(page) == pc) {
|
||||||
if (clear_page_cgroup(page, pc) == pc) {
|
page_assign_page_cgroup(page, NULL);
|
||||||
|
unlock_page_cgroup(page);
|
||||||
css_put(&mem->css);
|
css_put(&mem->css);
|
||||||
res_counter_uncharge(&mem->res, PAGE_SIZE);
|
res_counter_uncharge(&mem->res, PAGE_SIZE);
|
||||||
__mem_cgroup_remove_list(pc);
|
__mem_cgroup_remove_list(pc);
|
||||||
kfree(pc);
|
kfree(pc);
|
||||||
} else /* being uncharged ? ...do relax */
|
} else {
|
||||||
|
/* racing uncharge: let page go then retry */
|
||||||
|
unlock_page_cgroup(page);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
spin_unlock_irqrestore(&mz->lru_lock, flags);
|
||||||
if (!list_empty(list)) {
|
if (!list_empty(list)) {
|
||||||
|
Reference in New Issue
Block a user