x86: replace hard coded reservations in 64-bit early boot code with dynamic table
On x86-64 there are several memory allocations before bootmem. To avoid them stomping on each other they used to be all hard coded in bad_area(). Replace this with an array that is filled as needed. This cleans up the code considerably and allows to expand its use. Cc: peterz@infradead.org Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
@@ -47,56 +47,65 @@ unsigned long end_pfn_map;
|
|||||||
*/
|
*/
|
||||||
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
||||||
|
|
||||||
/* Check for some hardcoded bad areas that early boot is not allowed to touch */
|
/*
|
||||||
|
* Early reserved memory areas.
|
||||||
|
*/
|
||||||
|
#define MAX_EARLY_RES 20
|
||||||
|
|
||||||
|
struct early_res {
|
||||||
|
unsigned long start, end;
|
||||||
|
};
|
||||||
|
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
||||||
|
{ 0, PAGE_SIZE }, /* BIOS data page */
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
{ SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE },
|
||||||
|
#endif
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
void __init reserve_early(unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct early_res *r;
|
||||||
|
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||||
|
r = &early_res[i];
|
||||||
|
if (end > r->start && start < r->end)
|
||||||
|
panic("Duplicated early reservation %lx-%lx\n",
|
||||||
|
start, end);
|
||||||
|
}
|
||||||
|
if (i >= MAX_EARLY_RES)
|
||||||
|
panic("Too many early reservations");
|
||||||
|
r = &early_res[i];
|
||||||
|
r->start = start;
|
||||||
|
r->end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __init early_res_to_bootmem(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||||
|
struct early_res *r = &early_res[i];
|
||||||
|
reserve_bootmem_generic(r->start, r->end - r->start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for already reserved areas */
|
||||||
static inline int bad_addr(unsigned long *addrp, unsigned long size)
|
static inline int bad_addr(unsigned long *addrp, unsigned long size)
|
||||||
{
|
{
|
||||||
unsigned long addr = *addrp, last = addr + size;
|
int i;
|
||||||
|
unsigned long addr = *addrp, last;
|
||||||
/* various gunk below that needed for SMP startup */
|
int changed = 0;
|
||||||
if (addr < 0x8000) {
|
again:
|
||||||
*addrp = PAGE_ALIGN(0x8000);
|
last = addr + size;
|
||||||
return 1;
|
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||||
}
|
struct early_res *r = &early_res[i];
|
||||||
|
if (last >= r->start && addr < r->end) {
|
||||||
/* direct mapping tables of the kernel */
|
*addrp = addr = r->end;
|
||||||
if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
|
changed = 1;
|
||||||
*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
|
goto again;
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initrd */
|
|
||||||
#ifdef CONFIG_BLK_DEV_INITRD
|
|
||||||
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
|
||||||
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
||||||
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
||||||
unsigned long ramdisk_end = ramdisk_image+ramdisk_size;
|
|
||||||
|
|
||||||
if (last >= ramdisk_image && addr < ramdisk_end) {
|
|
||||||
*addrp = PAGE_ALIGN(ramdisk_end);
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
return changed;
|
||||||
/* kernel code */
|
|
||||||
if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
|
|
||||||
*addrp = PAGE_ALIGN(__pa_symbol(&_end));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
|
|
||||||
*addrp = PAGE_ALIGN(ebda_addr + ebda_size);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
/* NUMA memory to node map */
|
|
||||||
if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
|
|
||||||
*addrp = nodemap_addr + nodemap_size;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* XXX ramdisk image here? */
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@@ -21,6 +21,7 @@
|
|||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/kdebug.h>
|
#include <asm/kdebug.h>
|
||||||
|
#include <asm/e820.h>
|
||||||
|
|
||||||
static void __init zap_identity_mappings(void)
|
static void __init zap_identity_mappings(void)
|
||||||
{
|
{
|
||||||
@@ -48,6 +49,35 @@ static void __init copy_bootdata(char *real_mode_data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define EBDA_ADDR_POINTER 0x40E
|
||||||
|
|
||||||
|
static __init void reserve_ebda(void)
|
||||||
|
{
|
||||||
|
unsigned ebda_addr, ebda_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* there is a real-mode segmented pointer pointing to the
|
||||||
|
* 4K EBDA area at 0x40E
|
||||||
|
*/
|
||||||
|
ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
|
||||||
|
ebda_addr <<= 4;
|
||||||
|
|
||||||
|
if (!ebda_addr)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ebda_size = *(unsigned short *)__va(ebda_addr);
|
||||||
|
|
||||||
|
/* Round EBDA up to pages */
|
||||||
|
if (ebda_size == 0)
|
||||||
|
ebda_size = 1;
|
||||||
|
ebda_size <<= 10;
|
||||||
|
ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
|
||||||
|
if (ebda_size > 64*1024)
|
||||||
|
ebda_size = 64*1024;
|
||||||
|
|
||||||
|
reserve_early(ebda_addr, ebda_addr + ebda_size);
|
||||||
|
}
|
||||||
|
|
||||||
void __init x86_64_start_kernel(char * real_mode_data)
|
void __init x86_64_start_kernel(char * real_mode_data)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@@ -75,5 +105,23 @@ void __init x86_64_start_kernel(char * real_mode_data)
|
|||||||
pda_init(0);
|
pda_init(0);
|
||||||
copy_bootdata(__va(real_mode_data));
|
copy_bootdata(__va(real_mode_data));
|
||||||
|
|
||||||
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end));
|
||||||
|
|
||||||
|
/* Reserve INITRD */
|
||||||
|
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
||||||
|
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
||||||
|
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
||||||
|
unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
|
||||||
|
reserve_early(ramdisk_image, ramdisk_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
reserve_ebda();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point everything still needed from the boot loader
|
||||||
|
* or BIOS or kernel text should be early reserved or marked not
|
||||||
|
* RAM in e820. All other memory is free game.
|
||||||
|
*/
|
||||||
|
|
||||||
start_kernel();
|
start_kernel();
|
||||||
}
|
}
|
||||||
|
@@ -245,41 +245,6 @@ static inline void __init reserve_crashkernel(void)
|
|||||||
{}
|
{}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EBDA_ADDR_POINTER 0x40E
|
|
||||||
|
|
||||||
unsigned __initdata ebda_addr;
|
|
||||||
unsigned __initdata ebda_size;
|
|
||||||
|
|
||||||
static void __init discover_ebda(void)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* there is a real-mode segmented pointer pointing to the
|
|
||||||
* 4K EBDA area at 0x40E
|
|
||||||
*/
|
|
||||||
ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
|
|
||||||
/*
|
|
||||||
* There can be some situations, like paravirtualized guests,
|
|
||||||
* in which there is no available ebda information. In such
|
|
||||||
* case, just skip it
|
|
||||||
*/
|
|
||||||
if (!ebda_addr) {
|
|
||||||
ebda_size = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ebda_addr <<= 4;
|
|
||||||
|
|
||||||
ebda_size = *(unsigned short *)__va(ebda_addr);
|
|
||||||
|
|
||||||
/* Round EBDA up to pages */
|
|
||||||
if (ebda_size == 0)
|
|
||||||
ebda_size = 1;
|
|
||||||
ebda_size <<= 10;
|
|
||||||
ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
|
|
||||||
if (ebda_size > 64*1024)
|
|
||||||
ebda_size = 64*1024;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
||||||
void __attribute__((weak)) __init memory_setup(void)
|
void __attribute__((weak)) __init memory_setup(void)
|
||||||
{
|
{
|
||||||
@@ -349,8 +314,6 @@ void __init setup_arch(char **cmdline_p)
|
|||||||
|
|
||||||
check_efer();
|
check_efer();
|
||||||
|
|
||||||
discover_ebda();
|
|
||||||
|
|
||||||
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
|
init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
|
||||||
if (efi_enabled)
|
if (efi_enabled)
|
||||||
efi_init();
|
efi_init();
|
||||||
@@ -397,33 +360,7 @@ void __init setup_arch(char **cmdline_p)
|
|||||||
contig_initmem_init(0, end_pfn);
|
contig_initmem_init(0, end_pfn);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Reserve direct mapping */
|
early_res_to_bootmem();
|
||||||
reserve_bootmem_generic(table_start << PAGE_SHIFT,
|
|
||||||
(table_end - table_start) << PAGE_SHIFT);
|
|
||||||
|
|
||||||
/* reserve kernel */
|
|
||||||
reserve_bootmem_generic(__pa_symbol(&_text),
|
|
||||||
__pa_symbol(&_end) - __pa_symbol(&_text));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* reserve physical page 0 - it's a special BIOS page on many boxes,
|
|
||||||
* enabling clean reboots, SMP operation, laptop functions.
|
|
||||||
*/
|
|
||||||
reserve_bootmem_generic(0, PAGE_SIZE);
|
|
||||||
|
|
||||||
/* reserve ebda region */
|
|
||||||
if (ebda_addr)
|
|
||||||
reserve_bootmem_generic(ebda_addr, ebda_size);
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
/* reserve nodemap region */
|
|
||||||
if (nodemap_addr)
|
|
||||||
reserve_bootmem_generic(nodemap_addr, nodemap_size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/* Reserve SMP trampoline */
|
|
||||||
reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_ACPI_SLEEP
|
#ifdef CONFIG_ACPI_SLEEP
|
||||||
/*
|
/*
|
||||||
@@ -453,6 +390,8 @@ void __init setup_arch(char **cmdline_p)
|
|||||||
initrd_start = ramdisk_image + PAGE_OFFSET;
|
initrd_start = ramdisk_image + PAGE_OFFSET;
|
||||||
initrd_end = initrd_start+ramdisk_size;
|
initrd_end = initrd_start+ramdisk_size;
|
||||||
} else {
|
} else {
|
||||||
|
/* Assumes everything on node 0 */
|
||||||
|
free_bootmem(ramdisk_image, ramdisk_size);
|
||||||
printk(KERN_ERR "initrd extends beyond end of memory "
|
printk(KERN_ERR "initrd extends beyond end of memory "
|
||||||
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
||||||
ramdisk_end, end_of_mem);
|
ramdisk_end, end_of_mem);
|
||||||
|
@@ -176,7 +176,8 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
|
|||||||
set_pte_phys(address, phys, prot);
|
set_pte_phys(address, phys, prot);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long __meminitdata table_start, table_end;
|
static unsigned long __initdata table_start;
|
||||||
|
static unsigned long __meminitdata table_end;
|
||||||
|
|
||||||
static __meminit void *alloc_low_page(unsigned long *phys)
|
static __meminit void *alloc_low_page(unsigned long *phys)
|
||||||
{
|
{
|
||||||
@@ -387,6 +388,8 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
|
|||||||
if (!after_bootmem)
|
if (!after_bootmem)
|
||||||
mmu_cr4_features = read_cr4();
|
mmu_cr4_features = read_cr4();
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
|
|
||||||
|
reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_NUMA
|
#ifndef CONFIG_NUMA
|
||||||
|
@@ -102,6 +102,7 @@ static int __init allocate_cachealigned_memnodemap(void)
|
|||||||
}
|
}
|
||||||
pad_addr = (nodemap_addr + pad) & ~pad;
|
pad_addr = (nodemap_addr + pad) & ~pad;
|
||||||
memnodemap = phys_to_virt(pad_addr);
|
memnodemap = phys_to_virt(pad_addr);
|
||||||
|
reserve_early(nodemap_addr, nodemap_addr + nodemap_size);
|
||||||
|
|
||||||
printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
|
printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
|
||||||
nodemap_addr, nodemap_addr + nodemap_size);
|
nodemap_addr, nodemap_addr + nodemap_size);
|
||||||
|
@@ -41,8 +41,8 @@ extern void finish_e820_parsing(void);
|
|||||||
extern struct e820map e820;
|
extern struct e820map e820;
|
||||||
extern void update_e820(void);
|
extern void update_e820(void);
|
||||||
|
|
||||||
extern unsigned ebda_addr, ebda_size;
|
extern void reserve_early(unsigned long start, unsigned long end);
|
||||||
extern unsigned long nodemap_addr, nodemap_size;
|
extern void early_res_to_bootmem(void);
|
||||||
|
|
||||||
#endif/*!__ASSEMBLY__*/
|
#endif/*!__ASSEMBLY__*/
|
||||||
|
|
||||||
|
@@ -22,8 +22,6 @@ extern void syscall32_cpu_init(void);
|
|||||||
|
|
||||||
extern void check_efer(void);
|
extern void check_efer(void);
|
||||||
|
|
||||||
extern unsigned long table_start, table_end;
|
|
||||||
|
|
||||||
extern int reboot_force;
|
extern int reboot_force;
|
||||||
|
|
||||||
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
|
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
|
||||||
|
Reference in New Issue
Block a user