mm: speculative page references
If we can be sure that elevating the page_count on a pagecache page will pin it, we can speculatively run this operation, and subsequently check to see if we hit the right page rather than relying on holding a lock or otherwise pinning a reference to the page. This can be done if get_page/put_page behaves consistently throughout the whole tree (ie. if we "get" the page after it has been used for something else, we must be able to free it with a put_page). Actually, there is a period where the count behaves differently: when the page is free or if it is a constituent page of a compound page. We need an atomic_inc_not_zero operation to ensure we don't try to grab the page in either case. This patch introduces the core locking protocol to the pagecache (ie. adds page_cache_get_speculative, and tweaks some update-side code to make it work). Thanks to Hugh for pointing out an improvement to the algorithm setting page_count to zero when we have control of all references, in order to hold off speculative getters. [kamezawa.hiroyu@jp.fujitsu.com: fix migration_entry_wait()] [hugh@veritas.com: fix add_to_page_cache] [akpm@linux-foundation.org: repair a comment] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Jeff Garzik <jeff@garzik.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Acked-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
47feff2c8e
commit
e286781d5f
32
mm/filemap.c
32
mm/filemap.c
@@ -442,39 +442,43 @@ int filemap_write_and_wait_range(struct address_space *mapping,
|
||||
}
|
||||
|
||||
/**
|
||||
* add_to_page_cache - add newly allocated pagecache pages
|
||||
* add_to_page_cache_locked - add a locked page to the pagecache
|
||||
* @page: page to add
|
||||
* @mapping: the page's address_space
|
||||
* @offset: page index
|
||||
* @gfp_mask: page allocation mode
|
||||
*
|
||||
* This function is used to add newly allocated pagecache pages;
|
||||
* the page is new, so we can just run SetPageLocked() against it.
|
||||
* The other page state flags were set by rmqueue().
|
||||
*
|
||||
* This function is used to add a page to the pagecache. It must be locked.
|
||||
* This function does not add the page to the LRU. The caller must do that.
|
||||
*/
|
||||
int add_to_page_cache(struct page *page, struct address_space *mapping,
|
||||
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
|
||||
pgoff_t offset, gfp_t gfp_mask)
|
||||
{
|
||||
int error = mem_cgroup_cache_charge(page, current->mm,
|
||||
int error;
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
|
||||
error = mem_cgroup_cache_charge(page, current->mm,
|
||||
gfp_mask & ~__GFP_HIGHMEM);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
|
||||
if (error == 0) {
|
||||
page_cache_get(page);
|
||||
page->mapping = mapping;
|
||||
page->index = offset;
|
||||
|
||||
write_lock_irq(&mapping->tree_lock);
|
||||
error = radix_tree_insert(&mapping->page_tree, offset, page);
|
||||
if (!error) {
|
||||
page_cache_get(page);
|
||||
SetPageLocked(page);
|
||||
page->mapping = mapping;
|
||||
page->index = offset;
|
||||
if (likely(!error)) {
|
||||
mapping->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
} else
|
||||
} else {
|
||||
page->mapping = NULL;
|
||||
mem_cgroup_uncharge_cache_page(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
radix_tree_preload_end();
|
||||
@@ -483,7 +487,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(add_to_page_cache);
|
||||
EXPORT_SYMBOL(add_to_page_cache_locked);
|
||||
|
||||
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
|
||||
pgoff_t offset, gfp_t gfp_mask)
|
||||
|
20
mm/migrate.c
20
mm/migrate.c
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
|
||||
|
||||
page = migration_entry_to_page(entry);
|
||||
|
||||
get_page(page);
|
||||
/*
|
||||
* Once radix-tree replacement of page migration started, page_count
|
||||
* *must* be zero. And, we don't want to call wait_on_page_locked()
|
||||
* against a page without get_page().
|
||||
* So, we use get_page_unless_zero(), here. Even failed, page fault
|
||||
* will occur again.
|
||||
*/
|
||||
if (!get_page_unless_zero(page))
|
||||
goto out;
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
wait_on_page_locked(page);
|
||||
put_page(page);
|
||||
@@ -305,6 +313,7 @@ out:
|
||||
static int migrate_page_move_mapping(struct address_space *mapping,
|
||||
struct page *newpage, struct page *page)
|
||||
{
|
||||
int expected_count;
|
||||
void **pslot;
|
||||
|
||||
if (!mapping) {
|
||||
@@ -319,12 +328,18 @@ static int migrate_page_move_mapping(struct address_space *mapping,
|
||||
pslot = radix_tree_lookup_slot(&mapping->page_tree,
|
||||
page_index(page));
|
||||
|
||||
if (page_count(page) != 2 + !!PagePrivate(page) ||
|
||||
expected_count = 2 + !!PagePrivate(page);
|
||||
if (page_count(page) != expected_count ||
|
||||
(struct page *)radix_tree_deref_slot(pslot) != page) {
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (!page_freeze_refs(page, expected_count)) {
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we know that no one else is looking at the page.
|
||||
*/
|
||||
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
|
||||
|
||||
radix_tree_replace_slot(pslot, newpage);
|
||||
|
||||
page_unfreeze_refs(page, expected_count);
|
||||
/*
|
||||
* Drop cache reference from old page.
|
||||
* We know this isn't the last reference.
|
||||
|
@@ -936,7 +936,7 @@ found:
|
||||
spin_lock(&info->lock);
|
||||
ptr = shmem_swp_entry(info, idx, NULL);
|
||||
if (ptr && ptr->val == entry.val) {
|
||||
error = add_to_page_cache(page, inode->i_mapping,
|
||||
error = add_to_page_cache_locked(page, inode->i_mapping,
|
||||
idx, GFP_NOWAIT);
|
||||
/* does mem_cgroup_uncharge_cache_page on error */
|
||||
} else /* we must compensate for our precharge above */
|
||||
@@ -1301,8 +1301,8 @@ repeat:
|
||||
SetPageUptodate(filepage);
|
||||
set_page_dirty(filepage);
|
||||
swap_free(swap);
|
||||
} else if (!(error = add_to_page_cache(
|
||||
swappage, mapping, idx, GFP_NOWAIT))) {
|
||||
} else if (!(error = add_to_page_cache_locked(swappage, mapping,
|
||||
idx, GFP_NOWAIT))) {
|
||||
info->flags |= SHMEM_PAGEIN;
|
||||
shmem_swp_set(info, entry, 0);
|
||||
shmem_swp_unmap(entry);
|
||||
|
@@ -64,7 +64,7 @@ void show_swap_cache_info(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* add_to_swap_cache resembles add_to_page_cache on swapper_space,
|
||||
* add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
|
||||
* but sets SwapCache flag and private instead of mapping and index.
|
||||
*/
|
||||
int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
|
||||
@@ -76,19 +76,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
|
||||
BUG_ON(PagePrivate(page));
|
||||
error = radix_tree_preload(gfp_mask);
|
||||
if (!error) {
|
||||
page_cache_get(page);
|
||||
SetPageSwapCache(page);
|
||||
set_page_private(page, entry.val);
|
||||
|
||||
write_lock_irq(&swapper_space.tree_lock);
|
||||
error = radix_tree_insert(&swapper_space.page_tree,
|
||||
entry.val, page);
|
||||
if (!error) {
|
||||
page_cache_get(page);
|
||||
SetPageSwapCache(page);
|
||||
set_page_private(page, entry.val);
|
||||
if (likely(!error)) {
|
||||
total_swapcache_pages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
INC_CACHE_INFO(add_total);
|
||||
}
|
||||
write_unlock_irq(&swapper_space.tree_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
if (unlikely(error)) {
|
||||
set_page_private(page, 0UL);
|
||||
ClearPageSwapCache(page);
|
||||
page_cache_release(page);
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
74
mm/vmscan.c
74
mm/vmscan.c
@@ -391,12 +391,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
|
||||
* someone else has a ref on the page, abort and return 0. If it was
|
||||
* successfully detached, return 1. Assumes the caller has a single ref on
|
||||
* this page.
|
||||
* Same as remove_mapping, but if the page is removed from the mapping, it
|
||||
* gets returned with a refcount of 0.
|
||||
*/
|
||||
int remove_mapping(struct address_space *mapping, struct page *page)
|
||||
static int __remove_mapping(struct address_space *mapping, struct page *page)
|
||||
{
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(mapping != page_mapping(page));
|
||||
@@ -427,24 +425,24 @@ int remove_mapping(struct address_space *mapping, struct page *page)
|
||||
* Note that if SetPageDirty is always performed via set_page_dirty,
|
||||
* and thus under tree_lock, then this ordering is not required.
|
||||
*/
|
||||
if (unlikely(page_count(page) != 2))
|
||||
if (!page_freeze_refs(page, 2))
|
||||
goto cannot_free;
|
||||
smp_rmb();
|
||||
if (unlikely(PageDirty(page)))
|
||||
/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
|
||||
if (unlikely(PageDirty(page))) {
|
||||
page_unfreeze_refs(page, 2);
|
||||
goto cannot_free;
|
||||
}
|
||||
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t swap = { .val = page_private(page) };
|
||||
__delete_from_swap_cache(page);
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
swap_free(swap);
|
||||
__put_page(page); /* The pagecache ref */
|
||||
return 1;
|
||||
} else {
|
||||
__remove_from_page_cache(page);
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
}
|
||||
|
||||
__remove_from_page_cache(page);
|
||||
write_unlock_irq(&mapping->tree_lock);
|
||||
__put_page(page);
|
||||
return 1;
|
||||
|
||||
cannot_free:
|
||||
@@ -452,6 +450,26 @@ cannot_free:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to detach a locked page from its ->mapping. If it is dirty or if
|
||||
* someone else has a ref on the page, abort and return 0. If it was
|
||||
* successfully detached, return 1. Assumes the caller has a single ref on
|
||||
* this page.
|
||||
*/
|
||||
int remove_mapping(struct address_space *mapping, struct page *page)
|
||||
{
|
||||
if (__remove_mapping(mapping, page)) {
|
||||
/*
|
||||
* Unfreezing the refcount with 1 rather than 2 effectively
|
||||
* drops the pagecache ref for us without requiring another
|
||||
* atomic operation.
|
||||
*/
|
||||
page_unfreeze_refs(page, 1);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* shrink_page_list() returns the number of reclaimed pages
|
||||
*/
|
||||
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
|
||||
if (PagePrivate(page)) {
|
||||
if (!try_to_release_page(page, sc->gfp_mask))
|
||||
goto activate_locked;
|
||||
if (!mapping && page_count(page) == 1)
|
||||
goto free_it;
|
||||
if (!mapping && page_count(page) == 1) {
|
||||
unlock_page(page);
|
||||
if (put_page_testzero(page))
|
||||
goto free_it;
|
||||
else {
|
||||
/*
|
||||
* rare race with speculative reference.
|
||||
* the speculative reference will free
|
||||
* this page shortly, so we may
|
||||
* increment nr_reclaimed here (and
|
||||
* leave it off the LRU).
|
||||
*/
|
||||
nr_reclaimed++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!mapping || !remove_mapping(mapping, page))
|
||||
if (!mapping || !__remove_mapping(mapping, page))
|
||||
goto keep_locked;
|
||||
|
||||
free_it:
|
||||
unlock_page(page);
|
||||
free_it:
|
||||
nr_reclaimed++;
|
||||
if (!pagevec_add(&freed_pvec, page))
|
||||
__pagevec_release_nonlru(&freed_pvec);
|
||||
if (!pagevec_add(&freed_pvec, page)) {
|
||||
__pagevec_free(&freed_pvec);
|
||||
pagevec_reinit(&freed_pvec);
|
||||
}
|
||||
continue;
|
||||
|
||||
activate_locked:
|
||||
@@ -623,7 +657,7 @@ keep:
|
||||
}
|
||||
list_splice(&ret_pages, page_list);
|
||||
if (pagevec_count(&freed_pvec))
|
||||
__pagevec_release_nonlru(&freed_pvec);
|
||||
__pagevec_free(&freed_pvec);
|
||||
count_vm_events(PGACTIVATE, pgactivate);
|
||||
return nr_reclaimed;
|
||||
}
|
||||
|
Reference in New Issue
Block a user