mm: get rid of unnecessary pageblock scanning in setup_zone_migrate_reserve
Yasuaki Ishimatsu reported memory hot-add spent more than 5 _hours_ on 9TB memory machine since onlining memory sections is too slow. And we found out setup_zone_migrate_reserve spent >90% of the time. The problem is, setup_zone_migrate_reserve scans all pageblocks unconditionally, but it is only necessary if the number of reserved block was reduced (i.e. memory hot remove). Moreover, maximum MIGRATE_RESERVE per zone is currently 2. It means that the number of reserved pageblocks is almost always unchanged. This patch adds zone->nr_migrate_reserve_block to maintain the number of MIGRATE_RESERVE pageblocks and it reduces the overhead of setup_zone_migrate_reserve dramatically. The following table shows time of onlining a memory section. Amount of memory | 128GB | 192GB | 256GB| --------------------------------------------- linux-3.12 | 23.9 | 31.4 | 44.5 | This patch | 8.3 | 8.3 | 8.6 | Mel's proposal patch | 10.9 | 19.2 | 31.3 | --------------------------------------------- (millisecond) 128GB : 4 nodes and each node has 32GB of memory 192GB : 6 nodes and each node has 32GB of memory 256GB : 8 nodes and each node has 32GB of memory (*1) Mel proposed his idea by the following threads. https://lkml.org/lkml/2013/10/30/272 [akpm@linux-foundation.org: tweak comment] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Reported-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Tested-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Mel Gorman <mgorman@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
34e431b0ae
commit
943dca1a1f
@@ -489,6 +489,12 @@ struct zone {
|
|||||||
unsigned long present_pages;
|
unsigned long present_pages;
|
||||||
unsigned long managed_pages;
|
unsigned long managed_pages;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of MIGRATE_RESEVE page block. To maintain for just
|
||||||
|
* optimization. Protected by zone->lock.
|
||||||
|
*/
|
||||||
|
int nr_migrate_reserve_block;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* rarely used fields:
|
* rarely used fields:
|
||||||
*/
|
*/
|
||||||
|
@@ -3901,6 +3901,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
|
|||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long block_migratetype;
|
unsigned long block_migratetype;
|
||||||
int reserve;
|
int reserve;
|
||||||
|
int old_reserve;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get the start pfn, end pfn and the number of blocks to reserve
|
* Get the start pfn, end pfn and the number of blocks to reserve
|
||||||
@@ -3922,6 +3923,12 @@ static void setup_zone_migrate_reserve(struct zone *zone)
|
|||||||
* future allocation of hugepages at runtime.
|
* future allocation of hugepages at runtime.
|
||||||
*/
|
*/
|
||||||
reserve = min(2, reserve);
|
reserve = min(2, reserve);
|
||||||
|
old_reserve = zone->nr_migrate_reserve_block;
|
||||||
|
|
||||||
|
/* When memory hot-add, we almost always need to do nothing */
|
||||||
|
if (reserve == old_reserve)
|
||||||
|
return;
|
||||||
|
zone->nr_migrate_reserve_block = reserve;
|
||||||
|
|
||||||
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
|
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
|
||||||
if (!pfn_valid(pfn))
|
if (!pfn_valid(pfn))
|
||||||
@@ -3959,6 +3966,12 @@ static void setup_zone_migrate_reserve(struct zone *zone)
|
|||||||
reserve--;
|
reserve--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
} else if (!old_reserve) {
|
||||||
|
/*
|
||||||
|
* At boot time we don't need to scan the whole zone
|
||||||
|
* for turning off MIGRATE_RESERVE.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user