[ARM] 5450/1: Flush only the needed range when unmapping a VMA
When unmapping N pages (e.g. shared memory) the amount of TLB flushes done can be (N*PAGE_SIZE/ZAP_BLOCK_SIZE)*N although it should be N at maximum. With PREEMPT kernel ZAP_BLOCK_SIZE is 8 pages, so there is a noticeable performance penalty when unmapping a large VMA and the system is spending its time in flush_tlb_range(). The problem is that tlb_end_vma() is always flushing the full VMA range. The subrange that needs to be flushed can be calculated by tlb_remove_tlb_entry(). This approach was suggested by Hugh Dickins, and is also used by other arches. The speed increase is roughly 3x for 8M mappings and for larger mappings even more. Signed-off-by: Aaro Koskinen <Aaro.Koskinen@nokia.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
This commit is contained in:
committed by
Russell King
parent
41609ff430
commit
7fccfc00c0
@@ -36,6 +36,8 @@
|
|||||||
struct mmu_gather {
|
struct mmu_gather {
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
unsigned int fullmm;
|
unsigned int fullmm;
|
||||||
|
unsigned long range_start;
|
||||||
|
unsigned long range_end;
|
||||||
};
|
};
|
||||||
|
|
||||||
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
|
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
|
||||||
@@ -63,7 +65,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
|
|||||||
put_cpu_var(mmu_gathers);
|
put_cpu_var(mmu_gathers);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0)
|
/*
|
||||||
|
* Memorize the range for the TLB flush.
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
|
||||||
|
{
|
||||||
|
if (!tlb->fullmm) {
|
||||||
|
if (addr < tlb->range_start)
|
||||||
|
tlb->range_start = addr;
|
||||||
|
if (addr + PAGE_SIZE > tlb->range_end)
|
||||||
|
tlb->range_end = addr + PAGE_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In the case of tlb vma handling, we can optimise these away in the
|
* In the case of tlb vma handling, we can optimise these away in the
|
||||||
@@ -73,15 +87,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
|
|||||||
static inline void
|
static inline void
|
||||||
tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
if (!tlb->fullmm)
|
if (!tlb->fullmm) {
|
||||||
flush_cache_range(vma, vma->vm_start, vma->vm_end);
|
flush_cache_range(vma, vma->vm_start, vma->vm_end);
|
||||||
|
tlb->range_start = TASK_SIZE;
|
||||||
|
tlb->range_end = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
if (!tlb->fullmm)
|
if (!tlb->fullmm && tlb->range_end > 0)
|
||||||
flush_tlb_range(vma, vma->vm_start, vma->vm_end);
|
flush_tlb_range(vma, tlb->range_start, tlb->range_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
|
#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
|
||||||
|
Reference in New Issue
Block a user