GRU Driver V3: fixes to resolve code review comments
Fixes problems identified in a code review: - add comment with high level dscription of the GRU - prepend "gru_" to all global names - delete unused function - couple of trivial bug fixes [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Jack Steiner <steiner@sgi.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
3d919e5f6b
commit
9ca8e40c13
@@ -284,16 +284,6 @@ __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
|
|||||||
(exopc << GRU_CB_EXOPC_SHFT);
|
(exopc << GRU_CB_EXOPC_SHFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Prefetch a cacheline. Fetch is unconditional. Must page fault if
|
|
||||||
* no valid TLB entry is found.
|
|
||||||
* ??? should I use actual "load" or hardware prefetch???
|
|
||||||
*/
|
|
||||||
static inline void gru_prefetch(void *p)
|
|
||||||
{
|
|
||||||
*(volatile char *)p;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Architecture specific intrinsics
|
* Architecture specific intrinsics
|
||||||
*/
|
*/
|
||||||
|
@@ -112,6 +112,10 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||||||
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
|
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
|
if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
|
||||||
|
vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
vma->vm_flags |=
|
vma->vm_flags |=
|
||||||
(VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
|
(VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
|
||||||
VM_RESERVED);
|
VM_RESERVED);
|
||||||
@@ -471,8 +475,8 @@ struct vm_operations_struct gru_vm_ops = {
|
|||||||
module_init(gru_init);
|
module_init(gru_init);
|
||||||
module_exit(gru_exit);
|
module_exit(gru_exit);
|
||||||
|
|
||||||
module_param(options, ulong, 0644);
|
module_param(gru_options, ulong, 0644);
|
||||||
MODULE_PARM_DESC(options, "Various debug options");
|
MODULE_PARM_DESC(gru_options, "Various debug options");
|
||||||
|
|
||||||
MODULE_AUTHOR("Silicon Graphics, Inc.");
|
MODULE_AUTHOR("Silicon Graphics, Inc.");
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
@@ -638,11 +638,11 @@ int gru_kservices_init(struct gru_state *gru)
|
|||||||
cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
|
cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
|
||||||
|
|
||||||
num = GRU_NUM_KERNEL_CBR * cpus_possible;
|
num = GRU_NUM_KERNEL_CBR * cpus_possible;
|
||||||
cbr_map = reserve_gru_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
|
cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
|
||||||
gru->gs_reserved_cbrs += num;
|
gru->gs_reserved_cbrs += num;
|
||||||
|
|
||||||
num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
|
num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
|
||||||
dsr_map = reserve_gru_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
|
dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
|
||||||
gru->gs_reserved_dsr_bytes += num;
|
gru->gs_reserved_dsr_bytes += num;
|
||||||
|
|
||||||
gru->gs_active_contexts++;
|
gru->gs_active_contexts++;
|
||||||
@@ -673,7 +673,7 @@ int gru_kservices_init(struct gru_state *gru)
|
|||||||
}
|
}
|
||||||
unlock_cch_handle(cch);
|
unlock_cch_handle(cch);
|
||||||
|
|
||||||
if (options & GRU_QUICKLOOK)
|
if (gru_options & GRU_QUICKLOOK)
|
||||||
quicktest(gru);
|
quicktest(gru);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -22,7 +22,7 @@
|
|||||||
#include "grutables.h"
|
#include "grutables.h"
|
||||||
#include "gruhandles.h"
|
#include "gruhandles.h"
|
||||||
|
|
||||||
unsigned long options __read_mostly;
|
unsigned long gru_options __read_mostly;
|
||||||
|
|
||||||
static struct device_driver gru_driver = {
|
static struct device_driver gru_driver = {
|
||||||
.name = "gru"
|
.name = "gru"
|
||||||
@@ -163,14 +163,14 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
|
|||||||
return bits;
|
return bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count,
|
unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
|
||||||
char *cbmap)
|
char *cbmap)
|
||||||
{
|
{
|
||||||
return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
|
return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
|
||||||
cbmap);
|
cbmap);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count,
|
unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
|
||||||
char *dsmap)
|
char *dsmap)
|
||||||
{
|
{
|
||||||
return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
|
return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
|
||||||
@@ -182,10 +182,10 @@ static void reserve_gru_resources(struct gru_state *gru,
|
|||||||
{
|
{
|
||||||
gru->gs_active_contexts++;
|
gru->gs_active_contexts++;
|
||||||
gts->ts_cbr_map =
|
gts->ts_cbr_map =
|
||||||
reserve_gru_cb_resources(gru, gts->ts_cbr_au_count,
|
gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
|
||||||
gts->ts_cbr_idx);
|
gts->ts_cbr_idx);
|
||||||
gts->ts_dsr_map =
|
gts->ts_dsr_map =
|
||||||
reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL);
|
gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_gru_resources(struct gru_state *gru,
|
static void free_gru_resources(struct gru_state *gru,
|
||||||
@@ -416,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Prefetching cachelines help hardware performance.
|
* Prefetching cachelines help hardware performance.
|
||||||
|
* (Strictly a performance enhancement. Not functionally required).
|
||||||
*/
|
*/
|
||||||
static void prefetch_data(void *p, int num, int stride)
|
static void prefetch_data(void *p, int num, int stride)
|
||||||
{
|
{
|
||||||
@@ -746,6 +747,8 @@ again:
|
|||||||
* gru_nopage
|
* gru_nopage
|
||||||
*
|
*
|
||||||
* Map the user's GRU segment
|
* Map the user's GRU segment
|
||||||
|
*
|
||||||
|
* Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
|
||||||
*/
|
*/
|
||||||
int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
@@ -757,6 +760,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||||||
vma, vaddr, GSEG_BASE(vaddr));
|
vma, vaddr, GSEG_BASE(vaddr));
|
||||||
STAT(nopfn);
|
STAT(nopfn);
|
||||||
|
|
||||||
|
/* The following check ensures vaddr is a valid address in the VMA */
|
||||||
gts = gru_find_thread_state(vma, TSID(vaddr, vma));
|
gts = gru_find_thread_state(vma, TSID(vaddr, vma));
|
||||||
if (!gts)
|
if (!gts)
|
||||||
return VM_FAULT_SIGBUS;
|
return VM_FAULT_SIGBUS;
|
||||||
@@ -775,7 +779,7 @@ again:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!gts->ts_gru) {
|
if (!gts->ts_gru) {
|
||||||
while (!gru_assign_gru_context(gts)) {
|
if (!gru_assign_gru_context(gts)) {
|
||||||
mutex_unlock(>s->ts_ctxlock);
|
mutex_unlock(>s->ts_ctxlock);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
|
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
|
||||||
|
@@ -122,7 +122,7 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf,
|
|||||||
|
|
||||||
static int options_show(struct seq_file *s, void *p)
|
static int options_show(struct seq_file *s, void *p)
|
||||||
{
|
{
|
||||||
seq_printf(s, "0x%lx\n", options);
|
seq_printf(s, "0x%lx\n", gru_options);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -136,7 +136,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
|
|||||||
(buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
|
(buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
if (!strict_strtoul(buf, 10, &val))
|
if (!strict_strtoul(buf, 10, &val))
|
||||||
options = val;
|
gru_options = val;
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
@@ -24,6 +24,70 @@
|
|||||||
#define __GRUTABLES_H__
|
#define __GRUTABLES_H__
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* GRU Chiplet:
|
||||||
|
* The GRU is a user addressible memory accelerator. It provides
|
||||||
|
* several forms of load, store, memset, bcopy instructions. In addition, it
|
||||||
|
* contains special instructions for AMOs, sending messages to message
|
||||||
|
* queues, etc.
|
||||||
|
*
|
||||||
|
* The GRU is an integral part of the node controller. It connects
|
||||||
|
* directly to the cpu socket. In its current implementation, there are 2
|
||||||
|
* GRU chiplets in the node controller on each blade (~node).
|
||||||
|
*
|
||||||
|
* The entire GRU memory space is fully coherent and cacheable by the cpus.
|
||||||
|
*
|
||||||
|
* Each GRU chiplet has a physical memory map that looks like the following:
|
||||||
|
*
|
||||||
|
* +-----------------+
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* |/////////////////|
|
||||||
|
* +-----------------+
|
||||||
|
* | system control |
|
||||||
|
* +-----------------+ _______ +-------------+
|
||||||
|
* |/////////////////| / | |
|
||||||
|
* |/////////////////| / | |
|
||||||
|
* |/////////////////| / | instructions|
|
||||||
|
* |/////////////////| / | |
|
||||||
|
* |/////////////////| / | |
|
||||||
|
* |/////////////////| / |-------------|
|
||||||
|
* |/////////////////| / | |
|
||||||
|
* +-----------------+ | |
|
||||||
|
* | context 15 | | data |
|
||||||
|
* +-----------------+ | |
|
||||||
|
* | ...... | \ | |
|
||||||
|
* +-----------------+ \____________ +-------------+
|
||||||
|
* | context 1 |
|
||||||
|
* +-----------------+
|
||||||
|
* | context 0 |
|
||||||
|
* +-----------------+
|
||||||
|
*
|
||||||
|
* Each of the "contexts" is a chunk of memory that can be mmaped into user
|
||||||
|
* space. The context consists of 2 parts:
|
||||||
|
*
|
||||||
|
* - an instruction space that can be directly accessed by the user
|
||||||
|
* to issue GRU instructions and to check instruction status.
|
||||||
|
*
|
||||||
|
* - a data area that acts as normal RAM.
|
||||||
|
*
|
||||||
|
* User instructions contain virtual addresses of data to be accessed by the
|
||||||
|
* GRU. The GRU contains a TLB that is used to convert these user virtual
|
||||||
|
* addresses to physical addresses.
|
||||||
|
*
|
||||||
|
* The "system control" area of the GRU chiplet is used by the kernel driver
|
||||||
|
* to manage user contexts and to perform functions such as TLB dropin and
|
||||||
|
* purging.
|
||||||
|
*
|
||||||
|
* One context may be reserved for the kernel and used for cross-partition
|
||||||
|
* communication. The GRU will also be used to asynchronously zero out
|
||||||
|
* large blocks of memory (not currently implemented).
|
||||||
|
*
|
||||||
|
*
|
||||||
* Tables:
|
* Tables:
|
||||||
*
|
*
|
||||||
* VDATA-VMA Data - Holds a few parameters. Head of linked list of
|
* VDATA-VMA Data - Holds a few parameters. Head of linked list of
|
||||||
@@ -190,14 +254,14 @@ struct gru_stats_s {
|
|||||||
#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
|
#define GRU_STEAL_DELAY ((HZ * 200) / 1000)
|
||||||
|
|
||||||
#define STAT(id) do { \
|
#define STAT(id) do { \
|
||||||
if (options & OPT_STATS) \
|
if (gru_options & OPT_STATS) \
|
||||||
atomic_long_inc(&gru_stats.id); \
|
atomic_long_inc(&gru_stats.id); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#ifdef CONFIG_SGI_GRU_DEBUG
|
#ifdef CONFIG_SGI_GRU_DEBUG
|
||||||
#define gru_dbg(dev, fmt, x...) \
|
#define gru_dbg(dev, fmt, x...) \
|
||||||
do { \
|
do { \
|
||||||
if (options & OPT_DPRINT) \
|
if (gru_options & OPT_DPRINT) \
|
||||||
dev_dbg(dev, "%s: " fmt, __func__, x); \
|
dev_dbg(dev, "%s: " fmt, __func__, x); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
#else
|
||||||
@@ -529,9 +593,9 @@ extern void gru_flush_all_tlb(struct gru_state *gru);
|
|||||||
extern int gru_proc_init(void);
|
extern int gru_proc_init(void);
|
||||||
extern void gru_proc_exit(void);
|
extern void gru_proc_exit(void);
|
||||||
|
|
||||||
extern unsigned long reserve_gru_cb_resources(struct gru_state *gru,
|
extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
|
||||||
int cbr_au_count, char *cbmap);
|
int cbr_au_count, char *cbmap);
|
||||||
extern unsigned long reserve_gru_ds_resources(struct gru_state *gru,
|
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
|
||||||
int dsr_au_count, char *dsmap);
|
int dsr_au_count, char *dsmap);
|
||||||
extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
|
extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
|
||||||
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
|
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
|
||||||
@@ -540,6 +604,6 @@ extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
|
|||||||
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
|
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
|
||||||
unsigned long len);
|
unsigned long len);
|
||||||
|
|
||||||
extern unsigned long options;
|
extern unsigned long gru_options;
|
||||||
|
|
||||||
#endif /* __GRUTABLES_H__ */
|
#endif /* __GRUTABLES_H__ */
|
||||||
|
@@ -242,7 +242,9 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
|
|||||||
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
|
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
|
||||||
ms_notifier);
|
ms_notifier);
|
||||||
|
|
||||||
atomic_dec(&gms->ms_range_active);
|
/* ..._and_test() provides needed barrier */
|
||||||
|
(void)atomic_dec_and_test(&gms->ms_range_active);
|
||||||
|
|
||||||
wake_up_all(&gms->ms_wait_queue);
|
wake_up_all(&gms->ms_wait_queue);
|
||||||
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
|
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user