[IA64] speed up syscall path a bit more
Recently I noticed that clearing ar.ssd/ar.csd right before srlz.d is causing significant stalling in the syscall path. The patch below fixes that by moving the register-writes after srlz.d. On a Madison, this drops break-based getpid() from 241 to 226 cycles (-15 cycles). Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
committed by
Tony Luck
parent
e8d1cb2f28
commit
30325d1771
@@ -728,12 +728,8 @@ ENTRY(ia64_leave_syscall)
|
|||||||
mov f8=f0 // clear f8
|
mov f8=f0 // clear f8
|
||||||
;;
|
;;
|
||||||
ld8 r30=[r2],16 // M0|1 load cr.ifs
|
ld8 r30=[r2],16 // M0|1 load cr.ifs
|
||||||
mov.m ar.ssd=r0 // M2 clear ar.ssd
|
|
||||||
cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
|
|
||||||
;;
|
|
||||||
ld8 r25=[r3],16 // M0|1 load ar.unat
|
ld8 r25=[r3],16 // M0|1 load ar.unat
|
||||||
mov.m ar.csd=r0 // M2 clear ar.csd
|
cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
|
||||||
mov r22=r0 // clear r22
|
|
||||||
;;
|
;;
|
||||||
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
|
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
|
||||||
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
|
(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
|
||||||
@@ -756,11 +752,15 @@ ENTRY(ia64_leave_syscall)
|
|||||||
mov f7=f0 // clear f7
|
mov f7=f0 // clear f7
|
||||||
;;
|
;;
|
||||||
ld8.fill r12=[r2] // restore r12 (sp)
|
ld8.fill r12=[r2] // restore r12 (sp)
|
||||||
|
mov.m ar.ssd=r0 // M2 clear ar.ssd
|
||||||
|
mov r22=r0 // clear r22
|
||||||
|
|
||||||
ld8.fill r15=[r3] // restore r15
|
ld8.fill r15=[r3] // restore r15
|
||||||
|
(pUStk) st1 [r14]=r17
|
||||||
addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
|
addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
|
||||||
;;
|
;;
|
||||||
(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
|
(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
|
||||||
(pUStk) st1 [r14]=r17
|
mov.m ar.csd=r0 // M2 clear ar.csd
|
||||||
mov b6=r18 // I0 restore b6
|
mov b6=r18 // I0 restore b6
|
||||||
;;
|
;;
|
||||||
mov r14=r0 // clear r14
|
mov r14=r0 // clear r14
|
||||||
|
Reference in New Issue
Block a user