[ARM] Feroceon: speed up flushing of the entire cache
Flushing the L1 D cache with a test/clean/invalidate loop is very easy in software, but it is not the quickest way of doing it, as there is a lot of overhead involved in re-scanning the cache from the beginning every time we hit a dirty line. This patch makes proc-feroceon.S use "clean+invalidate by set/way" loops according to possible cache configuration of Feroceon CPUs (either direct-mapped or 4-way set associative). Signed-off-by: Nicolas Pitre <nico@marvell.com> Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
This commit is contained in:
committed by
Lennert Buytenhek
parent
79e90dd5aa
commit
6c386e58aa
@@ -44,11 +44,31 @@
|
|||||||
*/
|
*/
|
||||||
#define CACHE_DLINESIZE 32
|
#define CACHE_DLINESIZE 32
|
||||||
|
|
||||||
|
.bss
|
||||||
|
.align 3
|
||||||
|
__cache_params_loc:
|
||||||
|
.space 8
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
__cache_params:
|
||||||
|
.word __cache_params_loc
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cpu_feroceon_proc_init()
|
* cpu_feroceon_proc_init()
|
||||||
*/
|
*/
|
||||||
ENTRY(cpu_feroceon_proc_init)
|
ENTRY(cpu_feroceon_proc_init)
|
||||||
|
mrc p15, 0, r0, c0, c0, 1 @ read cache type register
|
||||||
|
ldr r1, __cache_params
|
||||||
|
mov r2, #(16 << 5)
|
||||||
|
tst r0, #(1 << 16) @ get way
|
||||||
|
mov r0, r0, lsr #18 @ get cache size order
|
||||||
|
movne r3, #((4 - 1) << 30) @ 4-way
|
||||||
|
and r0, r0, #0xf
|
||||||
|
moveq r3, #0 @ 1-way
|
||||||
|
mov r2, r2, lsl r0 @ actual cache size
|
||||||
|
movne r2, r2, lsr #2 @ turned into # of sets
|
||||||
|
sub r2, r2, #(1 << 5)
|
||||||
|
stmia r1, {r2, r3}
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
|
|||||||
*/
|
*/
|
||||||
ENTRY(feroceon_flush_kern_cache_all)
|
ENTRY(feroceon_flush_kern_cache_all)
|
||||||
mov r2, #VM_EXEC
|
mov r2, #VM_EXEC
|
||||||
mov ip, #0
|
|
||||||
__flush_whole_cache:
|
__flush_whole_cache:
|
||||||
1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
|
ldr r1, __cache_params
|
||||||
bne 1b
|
ldmia r1, {r1, r3}
|
||||||
|
1: orr ip, r1, r3
|
||||||
|
2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
|
||||||
|
subs ip, ip, #(1 << 30) @ next way
|
||||||
|
bcs 2b
|
||||||
|
subs r1, r1, #(1 << 5) @ next set
|
||||||
|
bcs 1b
|
||||||
|
|
||||||
tst r2, #VM_EXEC
|
tst r2, #VM_EXEC
|
||||||
|
mov ip, #0
|
||||||
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
||||||
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
@@ -138,7 +166,6 @@ __flush_whole_cache:
|
|||||||
*/
|
*/
|
||||||
.align 5
|
.align 5
|
||||||
ENTRY(feroceon_flush_user_cache_range)
|
ENTRY(feroceon_flush_user_cache_range)
|
||||||
mov ip, #0
|
|
||||||
sub r3, r1, r0 @ calculate total size
|
sub r3, r1, r0 @ calculate total size
|
||||||
cmp r3, #CACHE_DLIMIT
|
cmp r3, #CACHE_DLIMIT
|
||||||
bgt __flush_whole_cache
|
bgt __flush_whole_cache
|
||||||
@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
|
|||||||
cmp r0, r1
|
cmp r0, r1
|
||||||
blo 1b
|
blo 1b
|
||||||
tst r2, #VM_EXEC
|
tst r2, #VM_EXEC
|
||||||
|
mov ip, #0
|
||||||
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
|
|
||||||
@@ -306,16 +334,25 @@ ENTRY(cpu_feroceon_dcache_clean_area)
|
|||||||
.align 5
|
.align 5
|
||||||
ENTRY(cpu_feroceon_switch_mm)
|
ENTRY(cpu_feroceon_switch_mm)
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
mov ip, #0
|
/*
|
||||||
@ && 'Clean & Invalidate whole DCache'
|
* Note: we wish to call __flush_whole_cache but we need to preserve
|
||||||
1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
|
* lr to do so. The only way without touching main memory is to
|
||||||
bne 1b
|
* use r2 which is normally used to test the VM_EXEC flag, and
|
||||||
mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
* compensate locally for the skipped ops if it is not set.
|
||||||
mcr p15, 0, ip, c7, c10, 4 @ drain WB
|
*/
|
||||||
|
mov r2, lr @ abuse r2 to preserve lr
|
||||||
|
bl __flush_whole_cache
|
||||||
|
@ if r2 contains the VM_EXEC bit then the next 2 ops are done already
|
||||||
|
tst r2, #VM_EXEC
|
||||||
|
mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
||||||
|
mcreq p15, 0, ip, c7, c10, 4 @ drain WB
|
||||||
|
|
||||||
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
|
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
|
||||||
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
|
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
|
||||||
#endif
|
mov pc, r2
|
||||||
|
#else
|
||||||
mov pc, lr
|
mov pc, lr
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cpu_feroceon_set_pte_ext(ptep, pte, ext)
|
* cpu_feroceon_set_pte_ext(ptep, pte, ext)
|
||||||
|
Reference in New Issue
Block a user