GRU driver: minor updates
A few minor updates for the GRU driver. - documentation changes found in code reviews - changes to #ifdefs to make them recognized by "unifdef" (used in simulator testing) - change GRU context load/unload to prefetch data [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
ebf3f09c63
commit
923f7f6970
@@ -30,9 +30,9 @@
|
|||||||
/*
|
/*
|
||||||
* Size used to map GRU GSeg
|
* Size used to map GRU GSeg
|
||||||
*/
|
*/
|
||||||
#if defined CONFIG_IA64
|
#if defined(CONFIG_IA64)
|
||||||
#define GRU_GSEG_PAGESIZE (256 * 1024UL)
|
#define GRU_GSEG_PAGESIZE (256 * 1024UL)
|
||||||
#elif defined CONFIG_X86_64
|
#elif defined(CONFIG_X86_64)
|
||||||
#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
|
#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */
|
||||||
#else
|
#else
|
||||||
#error "Unsupported architecture"
|
#error "Unsupported architecture"
|
||||||
|
@@ -26,7 +26,7 @@
|
|||||||
* Architecture dependent functions
|
* Architecture dependent functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if defined CONFIG_IA64
|
#if defined(CONFIG_IA64)
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
#include <asm/intrinsics.h>
|
#include <asm/intrinsics.h>
|
||||||
#define __flush_cache(p) ia64_fc(p)
|
#define __flush_cache(p) ia64_fc(p)
|
||||||
@@ -36,7 +36,7 @@
|
|||||||
barrier(); \
|
barrier(); \
|
||||||
*((volatile int *)(p)) = v; /* force st.rel */ \
|
*((volatile int *)(p)) = v; /* force st.rel */ \
|
||||||
} while (0)
|
} while (0)
|
||||||
#elif defined CONFIG_X86_64
|
#elif defined(CONFIG_X86_64)
|
||||||
#define __flush_cache(p) clflush(p)
|
#define __flush_cache(p) clflush(p)
|
||||||
#define gru_ordered_store_int(p,v) \
|
#define gru_ordered_store_int(p,v) \
|
||||||
do { \
|
do { \
|
||||||
@@ -299,6 +299,7 @@ static inline void gru_flush_cache(void *p)
|
|||||||
static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
|
static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
|
||||||
{
|
{
|
||||||
gru_ordered_store_int(ins, op32);
|
gru_ordered_store_int(ins, op32);
|
||||||
|
gru_flush_cache(ins);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -604,8 +605,9 @@ static inline int gru_get_cb_substatus(void *cb)
|
|||||||
static inline int gru_check_status(void *cb)
|
static inline int gru_check_status(void *cb)
|
||||||
{
|
{
|
||||||
struct gru_control_block_status *cbs = (void *)cb;
|
struct gru_control_block_status *cbs = (void *)cb;
|
||||||
int ret = cbs->istatus;
|
int ret;
|
||||||
|
|
||||||
|
ret = cbs->istatus;
|
||||||
if (ret == CBS_CALL_OS)
|
if (ret == CBS_CALL_OS)
|
||||||
ret = gru_check_status_proc(cb);
|
ret = gru_check_status_proc(cb);
|
||||||
return ret;
|
return ret;
|
||||||
@@ -617,7 +619,7 @@ static inline int gru_check_status(void *cb)
|
|||||||
static inline int gru_wait(void *cb)
|
static inline int gru_wait(void *cb)
|
||||||
{
|
{
|
||||||
struct gru_control_block_status *cbs = (void *)cb;
|
struct gru_control_block_status *cbs = (void *)cb;
|
||||||
int ret = cbs->istatus;;
|
int ret = cbs->istatus;
|
||||||
|
|
||||||
if (ret != CBS_IDLE)
|
if (ret != CBS_IDLE)
|
||||||
ret = gru_wait_proc(cb);
|
ret = gru_wait_proc(cb);
|
||||||
|
@@ -214,12 +214,14 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
|
||||||
* atomic_pte_lookup
|
* atomic_pte_lookup
|
||||||
*
|
*
|
||||||
* Convert a user virtual address to a physical address
|
* Convert a user virtual address to a physical address
|
||||||
* Only supports Intel large pages (2MB only) on x86_64.
|
* Only supports Intel large pages (2MB only) on x86_64.
|
||||||
* ZZZ - hugepage support is incomplete
|
* ZZZ - hugepage support is incomplete
|
||||||
|
*
|
||||||
|
* NOTE: mmap_sem is already held on entry to this function. This
|
||||||
|
* guarantees existence of the page tables.
|
||||||
*/
|
*/
|
||||||
static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
|
static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
|
||||||
int write, unsigned long *paddr, int *pageshift)
|
int write, unsigned long *paddr, int *pageshift)
|
||||||
@@ -229,9 +231,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
|
|||||||
pud_t *pudp;
|
pud_t *pudp;
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
|
|
||||||
WARN_ON(irqs_disabled()); /* ZZZ debug */
|
|
||||||
|
|
||||||
local_irq_disable();
|
|
||||||
pgdp = pgd_offset(vma->vm_mm, vaddr);
|
pgdp = pgd_offset(vma->vm_mm, vaddr);
|
||||||
if (unlikely(pgd_none(*pgdp)))
|
if (unlikely(pgd_none(*pgdp)))
|
||||||
goto err;
|
goto err;
|
||||||
@@ -250,8 +249,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
|
|||||||
#endif
|
#endif
|
||||||
pte = *pte_offset_kernel(pmdp, vaddr);
|
pte = *pte_offset_kernel(pmdp, vaddr);
|
||||||
|
|
||||||
local_irq_enable();
|
|
||||||
|
|
||||||
if (unlikely(!pte_present(pte) ||
|
if (unlikely(!pte_present(pte) ||
|
||||||
(write && (!pte_write(pte) || !pte_dirty(pte)))))
|
(write && (!pte_write(pte) || !pte_dirty(pte)))))
|
||||||
return 1;
|
return 1;
|
||||||
@@ -324,6 +321,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
|
|||||||
* Atomic lookup is faster & usually works even if called in non-atomic
|
* Atomic lookup is faster & usually works even if called in non-atomic
|
||||||
* context.
|
* context.
|
||||||
*/
|
*/
|
||||||
|
rmb(); /* Must/check ms_range_active before loading PTEs */
|
||||||
ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
|
ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (!cb)
|
if (!cb)
|
||||||
@@ -543,6 +541,7 @@ int gru_get_exception_detail(unsigned long arg)
|
|||||||
ucbnum = get_cb_number((void *)excdet.cb);
|
ucbnum = get_cb_number((void *)excdet.cb);
|
||||||
cbrnum = thread_cbr_number(gts, ucbnum);
|
cbrnum = thread_cbr_number(gts, ucbnum);
|
||||||
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
|
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
|
||||||
|
prefetchw(cbe); /* Harmless on hardware, required for emulator */
|
||||||
excdet.opc = cbe->opccpy;
|
excdet.opc = cbe->opccpy;
|
||||||
excdet.exopc = cbe->exopccpy;
|
excdet.exopc = cbe->exopccpy;
|
||||||
excdet.ecause = cbe->ecause;
|
excdet.ecause = cbe->ecause;
|
||||||
|
@@ -398,6 +398,12 @@ static int __init gru_init(void)
|
|||||||
irq = get_base_irq();
|
irq = get_base_irq();
|
||||||
for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
|
for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
|
||||||
ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
|
ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
|
||||||
|
/* TODO: fix irq handling on x86. For now ignore failures because
|
||||||
|
* interrupts are not required & not yet fully supported */
|
||||||
|
if (ret) {
|
||||||
|
printk("!!!WARNING: GRU ignoring request failure!!!\n");
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
if (ret) {
|
if (ret) {
|
||||||
printk(KERN_ERR "%s: request_irq failed\n",
|
printk(KERN_ERR "%s: request_irq failed\n",
|
||||||
GRU_DRIVER_ID_STR);
|
GRU_DRIVER_ID_STR);
|
||||||
|
@@ -91,12 +91,7 @@
|
|||||||
#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
|
#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1))
|
||||||
|
|
||||||
/* Convert an arbitrary handle address to the beginning of the GRU segment */
|
/* Convert an arbitrary handle address to the beginning of the GRU segment */
|
||||||
#ifndef __PLUGIN__
|
|
||||||
#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
|
#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
|
||||||
#else
|
|
||||||
extern void *gmu_grubase(void *h);
|
|
||||||
#define GRUBASE(h) gmu_grubase(h)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* General addressing macros. */
|
/* General addressing macros. */
|
||||||
static inline void *get_gseg_base_address(void *base, int ctxnum)
|
static inline void *get_gseg_base_address(void *base, int ctxnum)
|
||||||
|
@@ -122,6 +122,7 @@ int gru_get_cb_exception_detail(void *cb,
|
|||||||
struct gru_control_block_extended *cbe;
|
struct gru_control_block_extended *cbe;
|
||||||
|
|
||||||
cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
|
cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
|
||||||
|
prefetchw(cbe); /* Harmless on hardware, required for emulator */
|
||||||
excdet->opc = cbe->opccpy;
|
excdet->opc = cbe->opccpy;
|
||||||
excdet->exopc = cbe->exopccpy;
|
excdet->exopc = cbe->exopccpy;
|
||||||
excdet->ecause = cbe->ecause;
|
excdet->ecause = cbe->ecause;
|
||||||
|
@@ -432,7 +432,22 @@ static inline long gru_copy_handle(void *d, void *s)
|
|||||||
return GRU_HANDLE_BYTES;
|
return GRU_HANDLE_BYTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* rewrite in assembly & use lots of prefetch */
|
static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
|
||||||
|
unsigned long length)
|
||||||
|
{
|
||||||
|
int i, scr;
|
||||||
|
|
||||||
|
prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
|
||||||
|
GRU_CACHE_LINE_BYTES);
|
||||||
|
|
||||||
|
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
||||||
|
prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
|
||||||
|
prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
|
||||||
|
GRU_CACHE_LINE_BYTES);
|
||||||
|
cb += GRU_HANDLE_STRIDE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
|
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
|
||||||
unsigned long cbrmap, unsigned long dsrmap)
|
unsigned long cbrmap, unsigned long dsrmap)
|
||||||
{
|
{
|
||||||
@@ -441,20 +456,11 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
|
|||||||
int i, scr;
|
int i, scr;
|
||||||
|
|
||||||
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
|
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
|
||||||
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
|
|
||||||
prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
|
|
||||||
GRU_CACHE_LINE_BYTES);
|
|
||||||
|
|
||||||
cb = gseg + GRU_CB_BASE;
|
cb = gseg + GRU_CB_BASE;
|
||||||
cbe = grubase + GRU_CBE_BASE;
|
cbe = grubase + GRU_CBE_BASE;
|
||||||
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
|
||||||
prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
|
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
|
||||||
prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
|
|
||||||
GRU_CACHE_LINE_BYTES);
|
|
||||||
cb += GRU_HANDLE_STRIDE;
|
|
||||||
}
|
|
||||||
|
|
||||||
cb = gseg + GRU_CB_BASE;
|
|
||||||
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
||||||
save += gru_copy_handle(cb, save);
|
save += gru_copy_handle(cb, save);
|
||||||
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
|
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
|
||||||
@@ -472,15 +478,16 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
|
|||||||
int i, scr;
|
int i, scr;
|
||||||
|
|
||||||
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
|
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
|
||||||
|
|
||||||
cb = gseg + GRU_CB_BASE;
|
cb = gseg + GRU_CB_BASE;
|
||||||
cbe = grubase + GRU_CBE_BASE;
|
cbe = grubase + GRU_CBE_BASE;
|
||||||
|
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
|
||||||
|
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
|
||||||
|
|
||||||
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
|
||||||
save += gru_copy_handle(save, cb);
|
save += gru_copy_handle(save, cb);
|
||||||
save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
|
save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
|
||||||
cb += GRU_HANDLE_STRIDE;
|
cb += GRU_HANDLE_STRIDE;
|
||||||
}
|
}
|
||||||
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
|
|
||||||
memcpy(save, gseg + GRU_DS_BASE, length);
|
memcpy(save, gseg + GRU_DS_BASE, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user