x86, mm: Hold mm->page_table_lock while doing vmalloc_sync
Take mm->page_table_lock while syncing the vmalloc region. This prevents a race with the Xen pagetable pin/unpin code, which expects that the page_table_lock is already held. If this race occurs, then Xen can see an inconsistent page type (a page can either be read/write or a pagetable page, and pin/unpin converts it between them), which will cause either the pin or the set_p[gm]d to fail; either will crash the kernel. vmalloc_sync_all() should be called rarely, so this extra use of page_table_lock should not interfere with its normal users. The mm pointer is stashed in the pgd page's index field, as that won't be otherwise used for pgds. Reported-by: Ian Campbell <ian.cambell@eu.citrix.com> Originally-by: Jan Beulich <jbeulich@novell.com> LKML-Reference: <4CB88A4C.1080305@goop.org> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
This commit is contained in:
committed by
H. Peter Anvin
parent
44235dcde4
commit
617d34d9e5
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|||||||
extern spinlock_t pgd_lock;
|
extern spinlock_t pgd_lock;
|
||||||
extern struct list_head pgd_list;
|
extern struct list_head pgd_list;
|
||||||
|
|
||||||
|
extern struct mm_struct *pgd_page_get_mm(struct page *page);
|
||||||
|
|
||||||
#ifdef CONFIG_PARAVIRT
|
#ifdef CONFIG_PARAVIRT
|
||||||
#include <asm/paravirt.h>
|
#include <asm/paravirt.h>
|
||||||
#else /* !CONFIG_PARAVIRT */
|
#else /* !CONFIG_PARAVIRT */
|
||||||
|
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
|
|||||||
|
|
||||||
spin_lock_irqsave(&pgd_lock, flags);
|
spin_lock_irqsave(&pgd_lock, flags);
|
||||||
list_for_each_entry(page, &pgd_list, lru) {
|
list_for_each_entry(page, &pgd_list, lru) {
|
||||||
if (!vmalloc_sync_one(page_address(page), address))
|
spinlock_t *pgt_lock;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||||
|
|
||||||
|
spin_lock(pgt_lock);
|
||||||
|
ret = vmalloc_sync_one(page_address(page), address);
|
||||||
|
spin_unlock(pgt_lock);
|
||||||
|
|
||||||
|
if (!ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||||
|
@@ -116,12 +116,19 @@ void sync_global_pgds(unsigned long start, unsigned long end)
|
|||||||
spin_lock_irqsave(&pgd_lock, flags);
|
spin_lock_irqsave(&pgd_lock, flags);
|
||||||
list_for_each_entry(page, &pgd_list, lru) {
|
list_for_each_entry(page, &pgd_list, lru) {
|
||||||
pgd_t *pgd;
|
pgd_t *pgd;
|
||||||
|
spinlock_t *pgt_lock;
|
||||||
|
|
||||||
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
||||||
|
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
|
||||||
|
spin_lock(pgt_lock);
|
||||||
|
|
||||||
if (pgd_none(*pgd))
|
if (pgd_none(*pgd))
|
||||||
set_pgd(pgd, *pgd_ref);
|
set_pgd(pgd, *pgd_ref);
|
||||||
else
|
else
|
||||||
BUG_ON(pgd_page_vaddr(*pgd)
|
BUG_ON(pgd_page_vaddr(*pgd)
|
||||||
!= pgd_page_vaddr(*pgd_ref));
|
!= pgd_page_vaddr(*pgd_ref));
|
||||||
|
|
||||||
|
spin_unlock(pgt_lock);
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||||
}
|
}
|
||||||
|
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
|
|||||||
#define UNSHARED_PTRS_PER_PGD \
|
#define UNSHARED_PTRS_PER_PGD \
|
||||||
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
|
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
|
||||||
|
|
||||||
static void pgd_ctor(pgd_t *pgd)
|
|
||||||
|
static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
|
||||||
|
virt_to_page(pgd)->index = (pgoff_t)mm;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct mm_struct *pgd_page_get_mm(struct page *page)
|
||||||
|
{
|
||||||
|
return (struct mm_struct *)page->index;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
|
||||||
{
|
{
|
||||||
/* If the pgd points to a shared pagetable level (either the
|
/* If the pgd points to a shared pagetable level (either the
|
||||||
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
||||||
@@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* list required to sync kernel mapping updates */
|
/* list required to sync kernel mapping updates */
|
||||||
if (!SHARED_KERNEL_PMD)
|
if (!SHARED_KERNEL_PMD) {
|
||||||
|
pgd_set_mm(pgd, mm);
|
||||||
pgd_list_add(pgd);
|
pgd_list_add(pgd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pgd_dtor(pgd_t *pgd)
|
static void pgd_dtor(pgd_t *pgd)
|
||||||
@@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&pgd_lock, flags);
|
spin_lock_irqsave(&pgd_lock, flags);
|
||||||
|
|
||||||
pgd_ctor(pgd);
|
pgd_ctor(mm, pgd);
|
||||||
pgd_prepopulate_pmd(mm, pgd, pmds);
|
pgd_prepopulate_pmd(mm, pgd, pmds);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||||
|
Reference in New Issue
Block a user