Merge branch 'amd-iommu/2.6.32' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu into core/iommu
This commit is contained in:
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
|
||||
static LIST_HEAD(iommu_pd_list);
|
||||
static DEFINE_SPINLOCK(iommu_pd_list_lock);
|
||||
|
||||
#ifdef CONFIG_IOMMU_API
|
||||
/*
|
||||
* Domain for untranslated devices - only allocated
|
||||
* if iommu=pt passed on kernel cmd line.
|
||||
*/
|
||||
static struct protection_domain *pt_domain;
|
||||
|
||||
static struct iommu_ops amd_iommu_ops;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* general struct to manage commands send to an IOMMU
|
||||
@@ -55,16 +59,16 @@ struct iommu_cmd {
|
||||
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
struct unity_map_entry *e);
|
||||
static struct dma_ops_domain *find_protection_domain(u16 devid);
|
||||
static u64* alloc_pte(struct protection_domain *dom,
|
||||
unsigned long address, u64
|
||||
**pte_page, gfp_t gfp);
|
||||
static u64 *alloc_pte(struct protection_domain *domain,
|
||||
unsigned long address, int end_lvl,
|
||||
u64 **pte_page, gfp_t gfp);
|
||||
static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
|
||||
unsigned long start_page,
|
||||
unsigned int pages);
|
||||
|
||||
#ifndef BUS_NOTIFY_UNBOUND_DRIVER
|
||||
#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
|
||||
#endif
|
||||
static void reset_iommu_command_buffer(struct amd_iommu *iommu);
|
||||
static u64 *fetch_pte(struct protection_domain *domain,
|
||||
unsigned long address, int map_size);
|
||||
static void update_domain(struct protection_domain *domain);
|
||||
|
||||
#ifdef CONFIG_AMD_IOMMU_STATS
|
||||
|
||||
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu)
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
static void iommu_print_event(void *__evt)
|
||||
static void dump_dte_entry(u16 devid)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; ++i)
|
||||
pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
|
||||
amd_iommu_dev_table[devid].data[i]);
|
||||
}
|
||||
|
||||
static void dump_command(unsigned long phys_addr)
|
||||
{
|
||||
struct iommu_cmd *cmd = phys_to_virt(phys_addr);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
|
||||
}
|
||||
|
||||
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
|
||||
{
|
||||
u32 *event = __evt;
|
||||
int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
|
||||
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt)
|
||||
int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
|
||||
u64 address = (u64)(((u64)event[3]) << 32) | event[2];
|
||||
|
||||
printk(KERN_ERR "AMD IOMMU: Event logged [");
|
||||
printk(KERN_ERR "AMD-Vi: Event logged [");
|
||||
|
||||
switch (type) {
|
||||
case EVENT_TYPE_ILL_DEV:
|
||||
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt)
|
||||
"address=0x%016llx flags=0x%04x]\n",
|
||||
PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
|
||||
address, flags);
|
||||
dump_dte_entry(devid);
|
||||
break;
|
||||
case EVENT_TYPE_IO_FAULT:
|
||||
printk("IO_PAGE_FAULT device=%02x:%02x.%x "
|
||||
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt)
|
||||
break;
|
||||
case EVENT_TYPE_ILL_CMD:
|
||||
printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
|
||||
reset_iommu_command_buffer(iommu);
|
||||
dump_command(address);
|
||||
break;
|
||||
case EVENT_TYPE_CMD_HARD_ERR:
|
||||
printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
|
||||
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
|
||||
tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
|
||||
|
||||
while (head != tail) {
|
||||
iommu_print_event(iommu->evt_buf + head);
|
||||
iommu_print_event(iommu, iommu->evt_buf + head);
|
||||
head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
|
||||
}
|
||||
|
||||
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu)
|
||||
status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
|
||||
writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
|
||||
|
||||
if (unlikely(i == EXIT_LOOP_COUNT))
|
||||
panic("AMD IOMMU: Completion wait loop failed\n");
|
||||
if (unlikely(i == EXIT_LOOP_COUNT)) {
|
||||
spin_unlock(&iommu->lock);
|
||||
reset_iommu_command_buffer(iommu);
|
||||
spin_lock(&iommu->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used to flush the IO/TLB for a given protection domain
|
||||
* on every IOMMU in the system
|
||||
* This function flushes one domain on one IOMMU
|
||||
*/
|
||||
static void iommu_flush_domain(u16 domid)
|
||||
static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct amd_iommu *iommu;
|
||||
struct iommu_cmd cmd;
|
||||
|
||||
INC_STATS_COUNTER(domain_flush_all);
|
||||
unsigned long flags;
|
||||
|
||||
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
|
||||
domid, 1, 1);
|
||||
|
||||
for_each_iommu(iommu) {
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
__iommu_queue_command(iommu, &cmd);
|
||||
__iommu_completion_wait(iommu);
|
||||
__iommu_wait_for_completion(iommu);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
}
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
__iommu_queue_command(iommu, &cmd);
|
||||
__iommu_completion_wait(iommu);
|
||||
__iommu_wait_for_completion(iommu);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
}
|
||||
|
||||
void amd_iommu_flush_all_domains(void)
|
||||
static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < MAX_DOMAIN_ID; ++i) {
|
||||
if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
|
||||
continue;
|
||||
iommu_flush_domain(i);
|
||||
flush_domain_on_iommu(iommu, i);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used to flush the IO/TLB for a given protection domain
|
||||
* on every IOMMU in the system
|
||||
*/
|
||||
static void iommu_flush_domain(u16 domid)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
INC_STATS_COUNTER(domain_flush_all);
|
||||
|
||||
for_each_iommu(iommu)
|
||||
flush_domain_on_iommu(iommu, domid);
|
||||
}
|
||||
|
||||
void amd_iommu_flush_all_domains(void)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
for_each_iommu(iommu)
|
||||
flush_all_domains_on_iommu(iommu);
|
||||
}
|
||||
|
||||
static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
|
||||
if (iommu != amd_iommu_rlookup_table[i])
|
||||
continue;
|
||||
|
||||
iommu_queue_inv_dev_entry(iommu, i);
|
||||
iommu_completion_wait(iommu);
|
||||
}
|
||||
}
|
||||
|
||||
void amd_iommu_flush_all_devices(void)
|
||||
static void flush_devices_by_domain(struct protection_domain *domain)
|
||||
{
|
||||
struct amd_iommu *iommu;
|
||||
int i;
|
||||
|
||||
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
|
||||
if (amd_iommu_pd_table[i] == NULL)
|
||||
if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
|
||||
(amd_iommu_pd_table[i] != domain))
|
||||
continue;
|
||||
|
||||
iommu = amd_iommu_rlookup_table[i];
|
||||
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void reset_iommu_command_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
|
||||
|
||||
if (iommu->reset_in_progress)
|
||||
panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
|
||||
|
||||
iommu->reset_in_progress = true;
|
||||
|
||||
amd_iommu_reset_cmd_buffer(iommu);
|
||||
flush_all_devices_for_iommu(iommu);
|
||||
flush_all_domains_on_iommu(iommu);
|
||||
|
||||
iommu->reset_in_progress = false;
|
||||
}
|
||||
|
||||
void amd_iommu_flush_all_devices(void)
|
||||
{
|
||||
flush_devices_by_domain(NULL);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* The functions below are used the create the page table mappings for
|
||||
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void)
|
||||
static int iommu_map_page(struct protection_domain *dom,
|
||||
unsigned long bus_addr,
|
||||
unsigned long phys_addr,
|
||||
int prot)
|
||||
int prot,
|
||||
int map_size)
|
||||
{
|
||||
u64 __pte, *pte;
|
||||
|
||||
bus_addr = PAGE_ALIGN(bus_addr);
|
||||
phys_addr = PAGE_ALIGN(phys_addr);
|
||||
|
||||
/* only support 512GB address spaces for now */
|
||||
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
|
||||
BUG_ON(!PM_ALIGNED(map_size, bus_addr));
|
||||
BUG_ON(!PM_ALIGNED(map_size, phys_addr));
|
||||
|
||||
if (!(prot & IOMMU_PROT_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
|
||||
pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
|
||||
|
||||
if (IOMMU_PTE_PRESENT(*pte))
|
||||
return -EBUSY;
|
||||
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom,
|
||||
|
||||
*pte = __pte;
|
||||
|
||||
update_domain(dom);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void iommu_unmap_page(struct protection_domain *dom,
|
||||
unsigned long bus_addr)
|
||||
unsigned long bus_addr, int map_size)
|
||||
{
|
||||
u64 *pte;
|
||||
u64 *pte = fetch_pte(dom, bus_addr, map_size);
|
||||
|
||||
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
|
||||
|
||||
*pte = 0;
|
||||
if (pte)
|
||||
*pte = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
|
||||
|
||||
for (addr = e->address_start; addr < e->address_end;
|
||||
addr += PAGE_SIZE) {
|
||||
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
|
||||
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
|
||||
PM_MAP_4k);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
|
||||
* This function checks if there is a PTE for a given dma address. If
|
||||
* there is one, it returns the pointer to it.
|
||||
*/
|
||||
static u64* fetch_pte(struct protection_domain *domain,
|
||||
unsigned long address)
|
||||
static u64 *fetch_pte(struct protection_domain *domain,
|
||||
unsigned long address, int map_size)
|
||||
{
|
||||
int level;
|
||||
u64 *pte;
|
||||
|
||||
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
|
||||
level = domain->mode - 1;
|
||||
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return NULL;
|
||||
while (level > map_size) {
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return NULL;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
|
||||
level -= 1;
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return NULL;
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[PM_LEVEL_INDEX(level, address)];
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
|
||||
if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
|
||||
pte = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return pte;
|
||||
}
|
||||
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu,
|
||||
u64 *pte, *pte_page;
|
||||
|
||||
for (i = 0; i < num_ptes; ++i) {
|
||||
pte = alloc_pte(&dma_dom->domain, address,
|
||||
pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
|
||||
&pte_page, gfp);
|
||||
if (!pte)
|
||||
goto out_free;
|
||||
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu,
|
||||
for (i = dma_dom->aperture[index]->offset;
|
||||
i < dma_dom->aperture_size;
|
||||
i += PAGE_SIZE) {
|
||||
u64 *pte = fetch_pte(&dma_dom->domain, i);
|
||||
u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
|
||||
if (!pte || !IOMMU_PTE_PRESENT(*pte))
|
||||
continue;
|
||||
|
||||
dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
|
||||
}
|
||||
|
||||
update_domain(&dma_dom->domain);
|
||||
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
update_domain(&dma_dom->domain);
|
||||
|
||||
free_page((unsigned long)dma_dom->aperture[index]->bitmap);
|
||||
|
||||
kfree(dma_dom->aperture[index]);
|
||||
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
|
||||
dma_dom->domain.id = domain_id_alloc();
|
||||
if (dma_dom->domain.id == 0)
|
||||
goto free_dma_dom;
|
||||
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
|
||||
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
|
||||
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
dma_dom->domain.flags = PD_DMA_OPS_MASK;
|
||||
dma_dom->domain.priv = dma_dom;
|
||||
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid)
|
||||
return dom;
|
||||
}
|
||||
|
||||
static void set_dte_entry(u16 devid, struct protection_domain *domain)
|
||||
{
|
||||
u64 pte_root = virt_to_phys(domain->pt_root);
|
||||
|
||||
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
|
||||
<< DEV_ENTRY_MODE_SHIFT;
|
||||
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
|
||||
|
||||
amd_iommu_dev_table[devid].data[2] = domain->id;
|
||||
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
|
||||
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
|
||||
|
||||
amd_iommu_pd_table[devid] = domain;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a device is not yet associated with a domain, this function does
|
||||
* assigns it visible for the hardware
|
||||
*/
|
||||
static void __attach_device(struct amd_iommu *iommu,
|
||||
struct protection_domain *domain,
|
||||
u16 devid)
|
||||
{
|
||||
/* lock domain */
|
||||
spin_lock(&domain->lock);
|
||||
|
||||
/* update DTE entry */
|
||||
set_dte_entry(devid, domain);
|
||||
|
||||
domain->dev_cnt += 1;
|
||||
|
||||
/* ready */
|
||||
spin_unlock(&domain->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a device is not yet associated with a domain, this function does
|
||||
* assigns it visible for the hardware
|
||||
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu,
|
||||
u16 devid)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 pte_root = virt_to_phys(domain->pt_root);
|
||||
|
||||
domain->dev_cnt += 1;
|
||||
|
||||
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
|
||||
<< DEV_ENTRY_MODE_SHIFT;
|
||||
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
|
||||
|
||||
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
|
||||
amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
|
||||
amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
|
||||
amd_iommu_dev_table[devid].data[2] = domain->id;
|
||||
|
||||
amd_iommu_pd_table[devid] = domain;
|
||||
__attach_device(iommu, domain, devid);
|
||||
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
|
||||
/*
|
||||
* We might boot into a crash-kernel here. The crashed kernel
|
||||
* left the caches in the IOMMU dirty. So we have to flush
|
||||
* here to evict all dirty stuff.
|
||||
*/
|
||||
/*
|
||||
* We might boot into a crash-kernel here. The crashed kernel
|
||||
* left the caches in the IOMMU dirty. So we have to flush
|
||||
* here to evict all dirty stuff.
|
||||
*/
|
||||
iommu_queue_inv_dev_entry(iommu, devid);
|
||||
iommu_flush_tlb_pde(iommu, domain->id);
|
||||
}
|
||||
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
|
||||
|
||||
/* ready */
|
||||
spin_unlock(&domain->lock);
|
||||
|
||||
/*
|
||||
* If we run in passthrough mode the device must be assigned to the
|
||||
* passthrough domain if it is detached from any other domain
|
||||
*/
|
||||
if (iommu_pass_through) {
|
||||
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
|
||||
__attach_device(iommu, pt_domain, devid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb,
|
||||
case BUS_NOTIFY_UNBOUND_DRIVER:
|
||||
if (!domain)
|
||||
goto out;
|
||||
if (iommu_pass_through)
|
||||
break;
|
||||
detach_device(domain, devid);
|
||||
break;
|
||||
case BUS_NOTIFY_ADD_DEVICE:
|
||||
@@ -1292,40 +1409,92 @@ static int get_device_resources(struct device *dev,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void update_device_table(struct protection_domain *domain)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
for (i = 0; i <= amd_iommu_last_bdf; ++i) {
|
||||
if (amd_iommu_pd_table[i] != domain)
|
||||
continue;
|
||||
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
|
||||
set_dte_entry(i, domain);
|
||||
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_domain(struct protection_domain *domain)
|
||||
{
|
||||
if (!domain->updated)
|
||||
return;
|
||||
|
||||
update_device_table(domain);
|
||||
flush_devices_by_domain(domain);
|
||||
iommu_flush_domain(domain->id);
|
||||
|
||||
domain->updated = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the pte_page is not yet allocated this function is called
|
||||
* This function is used to add another level to an IO page table. Adding
|
||||
* another level increases the size of the address space by 9 bits to a size up
|
||||
* to 64 bits.
|
||||
*/
|
||||
static u64* alloc_pte(struct protection_domain *dom,
|
||||
unsigned long address, u64 **pte_page, gfp_t gfp)
|
||||
static bool increase_address_space(struct protection_domain *domain,
|
||||
gfp_t gfp)
|
||||
{
|
||||
u64 *pte;
|
||||
|
||||
if (domain->mode == PAGE_MODE_6_LEVEL)
|
||||
/* address space already 64 bit large */
|
||||
return false;
|
||||
|
||||
pte = (void *)get_zeroed_page(gfp);
|
||||
if (!pte)
|
||||
return false;
|
||||
|
||||
*pte = PM_LEVEL_PDE(domain->mode,
|
||||
virt_to_phys(domain->pt_root));
|
||||
domain->pt_root = pte;
|
||||
domain->mode += 1;
|
||||
domain->updated = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static u64 *alloc_pte(struct protection_domain *domain,
|
||||
unsigned long address,
|
||||
int end_lvl,
|
||||
u64 **pte_page,
|
||||
gfp_t gfp)
|
||||
{
|
||||
u64 *pte, *page;
|
||||
int level;
|
||||
|
||||
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
|
||||
while (address > PM_LEVEL_SIZE(domain->mode))
|
||||
increase_address_space(domain, gfp);
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte)) {
|
||||
page = (u64 *)get_zeroed_page(gfp);
|
||||
if (!page)
|
||||
return NULL;
|
||||
*pte = IOMMU_L2_PDE(virt_to_phys(page));
|
||||
level = domain->mode - 1;
|
||||
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
|
||||
|
||||
while (level > end_lvl) {
|
||||
if (!IOMMU_PTE_PRESENT(*pte)) {
|
||||
page = (u64 *)get_zeroed_page(gfp);
|
||||
if (!page)
|
||||
return NULL;
|
||||
*pte = PM_LEVEL_PDE(level, virt_to_phys(page));
|
||||
}
|
||||
|
||||
level -= 1;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
|
||||
if (pte_page && level == end_lvl)
|
||||
*pte_page = pte;
|
||||
|
||||
pte = &pte[PM_LEVEL_INDEX(level, address)];
|
||||
}
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(address)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte)) {
|
||||
page = (u64 *)get_zeroed_page(gfp);
|
||||
if (!page)
|
||||
return NULL;
|
||||
*pte = IOMMU_L1_PDE(virt_to_phys(page));
|
||||
}
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
|
||||
if (pte_page)
|
||||
*pte_page = pte;
|
||||
|
||||
pte = &pte[IOMMU_PTE_L0_INDEX(address)];
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
|
||||
|
||||
pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
|
||||
if (!pte) {
|
||||
pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
|
||||
pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
|
||||
GFP_ATOMIC);
|
||||
aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
|
||||
} else
|
||||
pte += IOMMU_PTE_L0_INDEX(address);
|
||||
pte += PM_LEVEL_INDEX(0, address);
|
||||
|
||||
update_domain(&dom->domain);
|
||||
|
||||
return pte;
|
||||
}
|
||||
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
|
||||
if (!pte)
|
||||
return;
|
||||
|
||||
pte += IOMMU_PTE_L0_INDEX(address);
|
||||
pte += PM_LEVEL_INDEX(0, address);
|
||||
|
||||
WARN_ON(!*pte);
|
||||
|
||||
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain)
|
||||
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
|
||||
}
|
||||
|
||||
static int amd_iommu_domain_init(struct iommu_domain *dom)
|
||||
static void protection_domain_free(struct protection_domain *domain)
|
||||
{
|
||||
if (!domain)
|
||||
return;
|
||||
|
||||
if (domain->id)
|
||||
domain_id_free(domain->id);
|
||||
|
||||
kfree(domain);
|
||||
}
|
||||
|
||||
static struct protection_domain *protection_domain_alloc(void)
|
||||
{
|
||||
struct protection_domain *domain;
|
||||
|
||||
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
||||
if (!domain)
|
||||
return -ENOMEM;
|
||||
return NULL;
|
||||
|
||||
spin_lock_init(&domain->lock);
|
||||
domain->mode = PAGE_MODE_3_LEVEL;
|
||||
domain->id = domain_id_alloc();
|
||||
if (!domain->id)
|
||||
goto out_err;
|
||||
|
||||
return domain;
|
||||
|
||||
out_err:
|
||||
kfree(domain);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int amd_iommu_domain_init(struct iommu_domain *dom)
|
||||
{
|
||||
struct protection_domain *domain;
|
||||
|
||||
domain = protection_domain_alloc();
|
||||
if (!domain)
|
||||
goto out_free;
|
||||
|
||||
domain->mode = PAGE_MODE_3_LEVEL;
|
||||
domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!domain->pt_root)
|
||||
goto out_free;
|
||||
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
kfree(domain);
|
||||
protection_domain_free(domain);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom,
|
||||
paddr &= PAGE_MASK;
|
||||
|
||||
for (i = 0; i < npages; ++i) {
|
||||
ret = iommu_map_page(domain, iova, paddr, prot);
|
||||
ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,
|
||||
iova &= PAGE_MASK;
|
||||
|
||||
for (i = 0; i < npages; ++i) {
|
||||
iommu_unmap_page(domain, iova);
|
||||
iommu_unmap_page(domain, iova, PM_MAP_4k);
|
||||
iova += PAGE_SIZE;
|
||||
}
|
||||
|
||||
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
|
||||
phys_addr_t paddr;
|
||||
u64 *pte;
|
||||
|
||||
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
|
||||
pte = fetch_pte(domain, iova, PM_MAP_4k);
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return 0;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
return 0;
|
||||
|
||||
pte = IOMMU_PTE_PAGE(*pte);
|
||||
pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
|
||||
|
||||
if (!IOMMU_PTE_PRESENT(*pte))
|
||||
if (!pte || !IOMMU_PTE_PRESENT(*pte))
|
||||
return 0;
|
||||
|
||||
paddr = *pte & IOMMU_PAGE_MASK;
|
||||
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = {
|
||||
.domain_has_cap = amd_iommu_domain_has_cap,
|
||||
};
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* The next functions do a basic initialization of IOMMU for pass through
|
||||
* mode
|
||||
*
|
||||
* In passthrough mode the IOMMU is initialized and enabled but not used for
|
||||
* DMA-API translation.
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
int __init amd_iommu_init_passthrough(void)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
u16 devid, devid2;
|
||||
|
||||
/* allocate passthroug domain */
|
||||
pt_domain = protection_domain_alloc();
|
||||
if (!pt_domain)
|
||||
return -ENOMEM;
|
||||
|
||||
pt_domain->mode |= PAGE_MODE_NONE;
|
||||
|
||||
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
||||
struct amd_iommu *iommu;
|
||||
|
||||
devid = calc_devid(dev->bus->number, dev->devfn);
|
||||
if (devid > amd_iommu_last_bdf)
|
||||
continue;
|
||||
|
||||
devid2 = amd_iommu_alias_table[devid];
|
||||
|
||||
iommu = amd_iommu_rlookup_table[devid2];
|
||||
if (!iommu)
|
||||
continue;
|
||||
|
||||
__attach_device(iommu, pt_domain, devid);
|
||||
__attach_device(iommu, pt_domain, devid2);
|
||||
}
|
||||
|
||||
pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
|
||||
/* Function to enable the hardware */
|
||||
static void iommu_enable(struct amd_iommu *iommu)
|
||||
{
|
||||
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
|
||||
printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n",
|
||||
dev_name(&iommu->dev->dev), iommu->cap_ptr);
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
|
||||
@@ -434,6 +434,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
|
||||
return cmd_buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function resets the command buffer if the IOMMU stopped fetching
|
||||
* commands from it.
|
||||
*/
|
||||
void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
|
||||
{
|
||||
iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
|
||||
|
||||
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
|
||||
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function writes the command buffer address to the hardware and
|
||||
* enables it.
|
||||
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
|
||||
memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
|
||||
&entry, sizeof(entry));
|
||||
|
||||
/* set head and tail to zero manually */
|
||||
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
|
||||
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
|
||||
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
|
||||
amd_iommu_reset_cmd_buffer(iommu);
|
||||
}
|
||||
|
||||
static void __init free_command_buffer(struct amd_iommu *iommu)
|
||||
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table)
|
||||
switch (*p) {
|
||||
case ACPI_IVHD_TYPE:
|
||||
|
||||
DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
|
||||
DUMP_printk("device: %02x:%02x.%01x cap: %04x "
|
||||
"seg: %d flags: %01x info %04x\n",
|
||||
PCI_BUS(h->devid), PCI_SLOT(h->devid),
|
||||
PCI_FUNC(h->devid), h->cap_ptr,
|
||||
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
|
||||
|
||||
r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
|
||||
IRQF_SAMPLE_RANDOM,
|
||||
"AMD IOMMU",
|
||||
"AMD-Vi",
|
||||
NULL);
|
||||
|
||||
if (r) {
|
||||
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void)
|
||||
|
||||
|
||||
if (no_iommu) {
|
||||
printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
|
||||
printk(KERN_INFO "AMD-Vi disabled by kernel command line\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void)
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
ret = amd_iommu_init_dma_ops();
|
||||
if (iommu_pass_through)
|
||||
ret = amd_iommu_init_passthrough();
|
||||
else
|
||||
ret = amd_iommu_init_dma_ops();
|
||||
if (ret)
|
||||
goto free;
|
||||
|
||||
enable_iommus();
|
||||
|
||||
printk(KERN_INFO "AMD IOMMU: device isolation ");
|
||||
if (iommu_pass_through)
|
||||
goto out;
|
||||
|
||||
printk(KERN_INFO "AMD-Vi: device isolation ");
|
||||
if (amd_iommu_isolate)
|
||||
printk("enabled\n");
|
||||
else
|
||||
printk("disabled\n");
|
||||
|
||||
if (amd_iommu_unmap_flush)
|
||||
printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
|
||||
printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n");
|
||||
else
|
||||
printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
|
||||
printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n");
|
||||
|
||||
out:
|
||||
return ret;
|
||||
|
@@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
||||
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
||||
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
||||
|
||||
if (cfg->move_in_progress)
|
||||
send_cleanup_vector(cfg);
|
||||
|
||||
return irq;
|
||||
}
|
||||
|
||||
|
@@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
|
||||
unsigned long mask = cpumask_bits(cpumask)[0];
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ONCE(!mask, "empty IPI mask"))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
|
||||
__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
|
||||
|
@@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
|
||||
{
|
||||
return hard_smp_processor_id() >> index_msb;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
|
||||
*/
|
||||
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void)
|
||||
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
|
||||
}
|
||||
|
||||
if (is_vsmp_box()) {
|
||||
/* need to update phys_pkg_id */
|
||||
apic->phys_pkg_id = apicid_phys_pkg_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that apic routing model is selected, configure the
|
||||
* fault handling for intr remapping.
|
||||
|
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
return x2apic_enabled();
|
||||
}
|
||||
|
||||
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
|
||||
|
||||
/*
|
||||
* need to use more than cpu 0, because we need more vectors when
|
||||
* MSI-X are used.
|
||||
*/
|
||||
static const struct cpumask *x2apic_target_cpus(void)
|
||||
{
|
||||
return cpumask_of(0);
|
||||
return cpu_online_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -170,7 +172,7 @@ static unsigned long set_apic_id(unsigned int id)
|
||||
|
||||
static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
|
||||
{
|
||||
return current_cpu_data.initial_apicid >> index_msb;
|
||||
return initial_apicid >> index_msb;
|
||||
}
|
||||
|
||||
static void x2apic_send_IPI_self(int vector)
|
||||
|
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
|
||||
|
||||
/*
|
||||
* need to use more than cpu 0, because we need more vectors when
|
||||
* MSI-X are used.
|
||||
*/
|
||||
static const struct cpumask *x2apic_target_cpus(void)
|
||||
{
|
||||
return cpumask_of(0);
|
||||
return cpu_online_mask;
|
||||
}
|
||||
|
||||
static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
|
||||
@@ -162,7 +164,7 @@ static unsigned long set_apic_id(unsigned int id)
|
||||
|
||||
static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
|
||||
{
|
||||
return current_cpu_data.initial_apicid >> index_msb;
|
||||
return initial_apicid >> index_msb;
|
||||
}
|
||||
|
||||
static void x2apic_send_IPI_self(int vector)
|
||||
|
@@ -46,7 +46,7 @@ static int early_get_nodeid(void)
|
||||
return node_id.s.node_id;
|
||||
}
|
||||
|
||||
static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
{
|
||||
if (!strcmp(oem_id, "SGI")) {
|
||||
if (!strcmp(oem_table_id, "UVL"))
|
||||
@@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector)
|
||||
apic_write(APIC_SELF_IPI, vector);
|
||||
}
|
||||
|
||||
struct apic apic_x2apic_uv_x = {
|
||||
struct apic __refdata apic_x2apic_uv_x = {
|
||||
|
||||
.name = "UV large system",
|
||||
.probe = NULL,
|
||||
@@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = {
|
||||
.apic_id_registered = uv_apic_id_registered,
|
||||
|
||||
.irq_delivery_mode = dest_Fixed,
|
||||
.irq_dest_mode = 1, /* logical */
|
||||
.irq_dest_mode = 0, /* physical */
|
||||
|
||||
.target_cpus = uv_target_cpus,
|
||||
.disable_esr = 0,
|
||||
@@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static __init void map_low_mmrs(void)
|
||||
{
|
||||
init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
|
||||
init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
|
||||
}
|
||||
|
||||
enum map_type {map_wb, map_uc};
|
||||
|
||||
static __init void map_high(char *id, unsigned long base, int shift,
|
||||
@@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode)
|
||||
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
|
||||
}
|
||||
|
||||
static __init void map_config_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
|
||||
int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
|
||||
if (cfg.s.enable)
|
||||
map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
|
||||
}
|
||||
|
||||
static __init void map_mmr_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
|
||||
int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
|
||||
|
||||
mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
|
||||
if (mmr.s.enable)
|
||||
map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
|
||||
}
|
||||
|
||||
static __init void map_mmioh_high(int max_pnode)
|
||||
{
|
||||
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
|
||||
@@ -566,8 +540,6 @@ void __init uv_system_init(void)
|
||||
unsigned long mmr_base, present, paddr;
|
||||
unsigned short pnode_mask;
|
||||
|
||||
map_low_mmrs();
|
||||
|
||||
m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
|
||||
m_val = m_n_config.s.m_skt;
|
||||
n_val = m_n_config.s.n_skt;
|
||||
@@ -591,6 +563,8 @@ void __init uv_system_init(void)
|
||||
bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
|
||||
uv_blade_info = kmalloc(bytes, GFP_KERNEL);
|
||||
BUG_ON(!uv_blade_info);
|
||||
for (blade = 0; blade < uv_num_possible_blades(); blade++)
|
||||
uv_blade_info[blade].memory_nid = -1;
|
||||
|
||||
get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
|
||||
|
||||
@@ -629,6 +603,9 @@ void __init uv_system_init(void)
|
||||
lcpu = uv_blade_info[blade].nr_possible_cpus;
|
||||
uv_blade_info[blade].nr_possible_cpus++;
|
||||
|
||||
/* Any node on the blade, else will contain -1. */
|
||||
uv_blade_info[blade].memory_nid = nid;
|
||||
|
||||
uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
|
||||
uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
|
||||
uv_cpu_hub_info(cpu)->m_val = m_val;
|
||||
@@ -662,11 +639,10 @@ void __init uv_system_init(void)
|
||||
pnode = (paddr >> m_val) & pnode_mask;
|
||||
blade = boot_pnode_to_blade(pnode);
|
||||
uv_node_to_blade[nid] = blade;
|
||||
max_pnode = max(pnode, max_pnode);
|
||||
}
|
||||
|
||||
map_gru_high(max_pnode);
|
||||
map_mmr_high(max_pnode);
|
||||
map_config_high(max_pnode);
|
||||
map_mmioh_high(max_pnode);
|
||||
|
||||
uv_cpu_init();
|
||||
|
@@ -811,7 +811,7 @@ static int apm_do_idle(void)
|
||||
u8 ret = 0;
|
||||
int idled = 0;
|
||||
int polling;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
polling = !!(current_thread_info()->status & TS_POLLING);
|
||||
if (polling) {
|
||||
|
@@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_common.o = -pg
|
||||
endif
|
||||
|
||||
# Make sure load_percpu_segment has no stackprotector
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_common.o := $(nostackp)
|
||||
|
||||
obj-y := intel_cacheinfo.o addon_cpuid_features.o
|
||||
obj-y += proc.o capflags.o powerflags.o common.o
|
||||
obj-y += vmware.o hypervisor.o
|
||||
|
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
level = cpuid_eax(1);
|
||||
if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
||||
/*
|
||||
* Some BIOSes incorrectly force this feature, but only K8
|
||||
* revision D (model = 0x14) and later actually support it.
|
||||
*/
|
||||
if (c->x86_model < 0x14)
|
||||
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
|
||||
}
|
||||
if (c->x86 == 0x10 || c->x86 == 0x11)
|
||||
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
||||
|
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
|
||||
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
|
||||
}
|
||||
|
||||
static const struct cpu_dev *this_cpu __cpuinitdata;
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
display_cacheinfo(c);
|
||||
#else
|
||||
/* Not much we can do here... */
|
||||
/* Check if at least it has cpuid */
|
||||
if (c->cpuid_level == -1) {
|
||||
/* No cpuid. It must be an ancient CPU */
|
||||
if (c->x86 == 4)
|
||||
strcpy(c->x86_model_id, "486");
|
||||
else if (c->x86 == 3)
|
||||
strcpy(c->x86_model_id, "386");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct cpu_dev __cpuinitconst default_cpu = {
|
||||
.c_init = default_init,
|
||||
.c_vendor = "Unknown",
|
||||
.c_x86_vendor = X86_VENDOR_UNKNOWN,
|
||||
};
|
||||
|
||||
static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
|
||||
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
||||
#ifdef CONFIG_X86_64
|
||||
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
|
||||
|
||||
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
|
||||
|
||||
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
display_cacheinfo(c);
|
||||
#else
|
||||
/* Not much we can do here... */
|
||||
/* Check if at least it has cpuid */
|
||||
if (c->cpuid_level == -1) {
|
||||
/* No cpuid. It must be an ancient CPU */
|
||||
if (c->x86 == 4)
|
||||
strcpy(c->x86_model_id, "486");
|
||||
else if (c->x86 == 3)
|
||||
strcpy(c->x86_model_id, "386");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct cpu_dev __cpuinitconst default_cpu = {
|
||||
.c_init = default_init,
|
||||
.c_vendor = "Unknown",
|
||||
.c_x86_vendor = X86_VENDOR_UNKNOWN,
|
||||
};
|
||||
|
||||
static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int *v;
|
||||
|
@@ -1226,8 +1226,13 @@ static void mce_init(void)
|
||||
}
|
||||
|
||||
/* Add per CPU specific workarounds here */
|
||||
static void mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
static int mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
|
||||
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/* This should be disabled by the BIOS, but isn't always */
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
if (c->x86 == 15 && banks > 4) {
|
||||
@@ -1273,11 +1278,20 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
|
||||
monarch_timeout < 0)
|
||||
monarch_timeout = USEC_PER_SEC;
|
||||
|
||||
/*
|
||||
* There are also broken BIOSes on some Pentium M and
|
||||
* earlier systems:
|
||||
*/
|
||||
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
|
||||
mce_bootlog = 0;
|
||||
}
|
||||
if (monarch_timeout < 0)
|
||||
monarch_timeout = 0;
|
||||
if (mce_bootlog != 0)
|
||||
mce_panic_timeout = 30;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
|
||||
@@ -1338,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
|
||||
if (!mce_available(c))
|
||||
return;
|
||||
|
||||
if (mce_cap_init() < 0) {
|
||||
if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
|
||||
mce_disabled = 1;
|
||||
return;
|
||||
}
|
||||
mce_cpu_quirks(c);
|
||||
|
||||
machine_check_vector = do_machine_check;
|
||||
|
||||
|
@@ -36,6 +36,7 @@
|
||||
|
||||
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
|
||||
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
|
||||
static DEFINE_PER_CPU(bool, thermal_throttle_active);
|
||||
|
||||
static atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
@@ -96,27 +97,33 @@ static int therm_throt_process(int curr)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
__u64 tmp_jiffs = get_jiffies_64();
|
||||
bool was_throttled = __get_cpu_var(thermal_throttle_active);
|
||||
bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
|
||||
|
||||
if (curr)
|
||||
if (is_throttled)
|
||||
__get_cpu_var(thermal_throttle_count)++;
|
||||
|
||||
if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
|
||||
if (!(was_throttled ^ is_throttled) &&
|
||||
time_before64(tmp_jiffs, __get_cpu_var(next_check)))
|
||||
return 0;
|
||||
|
||||
__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
|
||||
|
||||
/* if we just entered the thermal event */
|
||||
if (curr) {
|
||||
if (is_throttled) {
|
||||
printk(KERN_CRIT "CPU%d: Temperature above threshold, "
|
||||
"cpu clock throttled (total events = %lu)\n", cpu,
|
||||
__get_cpu_var(thermal_throttle_count));
|
||||
"cpu clock throttled (total events = %lu)\n",
|
||||
cpu, __get_cpu_var(thermal_throttle_count));
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
} else {
|
||||
printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
|
||||
return 1;
|
||||
}
|
||||
if (was_throttled) {
|
||||
printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
@@ -55,6 +55,7 @@ struct x86_pmu {
|
||||
int num_counters_fixed;
|
||||
int counter_bits;
|
||||
u64 counter_mask;
|
||||
int apic;
|
||||
u64 max_period;
|
||||
u64 intel_ctrl;
|
||||
};
|
||||
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
|
||||
{
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
|
||||
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
|
||||
|
||||
static bool reserve_pmc_hardware(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
int i;
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
|
||||
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
|
||||
goto eventsel_fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
eventsel_fail:
|
||||
for (i--; i >= 0; i--)
|
||||
release_evntsel_nmi(x86_pmu.eventsel + i);
|
||||
@@ -644,10 +648,12 @@ perfctr_fail:
|
||||
enable_lapic_nmi_watchdog();
|
||||
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void release_pmc_hardware(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
int i;
|
||||
|
||||
for (i = 0; i < x86_pmu.num_counters; i++) {
|
||||
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
enable_lapic_nmi_watchdog();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void hw_perf_counter_destroy(struct perf_counter *counter)
|
||||
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
|
||||
hwc->sample_period = x86_pmu.max_period;
|
||||
hwc->last_period = hwc->sample_period;
|
||||
atomic64_set(&hwc->period_left, hwc->sample_period);
|
||||
} else {
|
||||
/*
|
||||
* If we have a PMU initialized but no APIC
|
||||
* interrupts, we cannot sample hardware
|
||||
* counters (user-space has to fall back and
|
||||
* sample via a hrtimer based software counter):
|
||||
*/
|
||||
if (!x86_pmu.apic)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
counter->destroy = hw_perf_counter_destroy;
|
||||
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
|
||||
|
||||
void set_perf_counter_pending(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
|
||||
#endif
|
||||
}
|
||||
|
||||
void perf_counters_lapic_init(void)
|
||||
{
|
||||
if (!x86_pmu_initialized())
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (!x86_pmu.apic || !x86_pmu_initialized())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Always use NMI for PMU
|
||||
*/
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __kprobes
|
||||
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
|
||||
|
||||
regs = args->regs;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
#endif
|
||||
/*
|
||||
* Can't rely on the handled return value to say it was our NMI, two
|
||||
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
|
||||
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
|
||||
.event_map = p6_pmu_event_map,
|
||||
.raw_event = p6_pmu_raw_event,
|
||||
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
|
||||
.apic = 1,
|
||||
.max_period = (1ULL << 31) - 1,
|
||||
.version = 0,
|
||||
.num_counters = 2,
|
||||
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
|
||||
.event_map = intel_pmu_event_map,
|
||||
.raw_event = intel_pmu_raw_event,
|
||||
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
|
||||
.apic = 1,
|
||||
/*
|
||||
* Intel PMCs cannot be accessed sanely above 32 bit width,
|
||||
* so we install an artificial 1<<31 period regardless of
|
||||
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
|
||||
.num_counters = 4,
|
||||
.counter_bits = 48,
|
||||
.counter_mask = (1ULL << 48) - 1,
|
||||
.apic = 1,
|
||||
/* use highest bit to detect overflow */
|
||||
.max_period = (1ULL << 47) - 1,
|
||||
};
|
||||
@@ -1589,12 +1614,13 @@ static int p6_pmu_init(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (!cpu_has_apic) {
|
||||
pr_info("no Local APIC, try rebooting with lapic");
|
||||
return -ENODEV;
|
||||
}
|
||||
x86_pmu = p6_pmu;
|
||||
|
||||
x86_pmu = p6_pmu;
|
||||
if (!cpu_has_apic) {
|
||||
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
|
||||
pr_info("no hardware sampling interrupt available.\n");
|
||||
x86_pmu.apic = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@@ -354,7 +354,7 @@ void __init efi_init(void)
|
||||
*/
|
||||
c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
|
||||
if (c16) {
|
||||
for (i = 0; i < sizeof(vendor) && *c16; ++i)
|
||||
for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
|
||||
vendor[i] = *c16++;
|
||||
vendor[i] = '\0';
|
||||
} else
|
||||
@@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void)
|
||||
&& end_pfn <= max_pfn_mapped))
|
||||
va = __va(md->phys_addr);
|
||||
else
|
||||
va = efi_ioremap(md->phys_addr, size);
|
||||
va = efi_ioremap(md->phys_addr, size, md->type);
|
||||
|
||||
md->virt_addr = (u64) (unsigned long) va;
|
||||
|
||||
|
@@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void)
|
||||
early_runtime_code_mapping_set_exec(0);
|
||||
}
|
||||
|
||||
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
|
||||
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
|
||||
u32 type)
|
||||
{
|
||||
unsigned long last_map_pfn;
|
||||
|
||||
if (type == EFI_MEMORY_MAPPED_IO)
|
||||
return ioremap(phys_addr, size);
|
||||
|
||||
last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
|
||||
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size)
|
||||
return NULL;
|
||||
|
@@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
|
||||
* which will be freed later
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_HOTPLUG_CPU
|
||||
.section .init.text,"ax",@progbits
|
||||
#endif
|
||||
__CPUINIT
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
ENTRY(startup_32_smp)
|
||||
@@ -602,7 +600,7 @@ ignore_int:
|
||||
#endif
|
||||
iret
|
||||
|
||||
.section .cpuinit.data,"wa"
|
||||
__REFDATA
|
||||
.align 4
|
||||
ENTRY(initial_code)
|
||||
.long i386_start_kernel
|
||||
|
@@ -32,7 +32,14 @@ int no_iommu __read_mostly;
|
||||
/* Set this to 1 if there is a HW IOMMU in the system */
|
||||
int iommu_detected __read_mostly = 0;
|
||||
|
||||
int iommu_pass_through;
|
||||
/*
|
||||
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
|
||||
* If this variable is 1, IOMMU implementations do no DMA ranslation for
|
||||
* devices and allow every device to access to whole physical memory. This is
|
||||
* useful if a user want to use an IOMMU only for KVM device assignment to
|
||||
* guests and not for driver dma translation.
|
||||
*/
|
||||
int iommu_pass_through __read_mostly;
|
||||
|
||||
dma_addr_t bad_dma_address __read_mostly = 0;
|
||||
EXPORT_SYMBOL(bad_dma_address);
|
||||
|
@@ -519,16 +519,12 @@ static void c1e_idle(void)
|
||||
if (!cpumask_test_cpu(cpu, c1e_mask)) {
|
||||
cpumask_set_cpu(cpu, c1e_mask);
|
||||
/*
|
||||
* Force broadcast so ACPI can not interfere. Needs
|
||||
* to run with interrupts enabled as it uses
|
||||
* smp_function_call.
|
||||
* Force broadcast so ACPI can not interfere.
|
||||
*/
|
||||
local_irq_enable();
|
||||
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
|
||||
&cpu);
|
||||
printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
|
||||
cpu);
|
||||
local_irq_disable();
|
||||
}
|
||||
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
|
||||
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/pm.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <acpi/reboot.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/apic.h>
|
||||
@@ -17,7 +18,6 @@
|
||||
#include <asm/cpu.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
# include <linux/dmi.h>
|
||||
# include <linux/ctype.h>
|
||||
# include <linux/mc146818rtc.h>
|
||||
#else
|
||||
@@ -404,6 +404,46 @@ EXPORT_SYMBOL(machine_real_restart);
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
/*
|
||||
* Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
|
||||
*/
|
||||
static int __init set_pci_reboot(const struct dmi_system_id *d)
|
||||
{
|
||||
if (reboot_type != BOOT_CF9) {
|
||||
reboot_type = BOOT_CF9;
|
||||
printk(KERN_INFO "%s series board detected. "
|
||||
"Selecting PCI-method for reboots.\n", d->ident);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
|
||||
{ /* Handle problems with rebooting on Apple MacBook5 */
|
||||
.callback = set_pci_reboot,
|
||||
.ident = "Apple MacBook5",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
|
||||
},
|
||||
},
|
||||
{ /* Handle problems with rebooting on Apple MacBookPro5 */
|
||||
.callback = set_pci_reboot,
|
||||
.ident = "Apple MacBookPro5",
|
||||
.matches = {
|
||||
DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
|
||||
DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
|
||||
},
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
static int __init pci_reboot_init(void)
|
||||
{
|
||||
dmi_check_system(pci_reboot_dmi_table);
|
||||
return 0;
|
||||
}
|
||||
core_initcall(pci_reboot_init);
|
||||
|
||||
static inline void kb_wait(void)
|
||||
{
|
||||
int i;
|
||||
|
@@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
|
||||
|
||||
if (!chosen) {
|
||||
size_t vm_size = VMALLOC_END - VMALLOC_START;
|
||||
size_t tot_size = num_possible_cpus() * PMD_SIZE;
|
||||
size_t tot_size = nr_cpu_ids * PMD_SIZE;
|
||||
|
||||
/* on non-NUMA, embedding is better */
|
||||
if (!pcpu_need_numa())
|
||||
@@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
|
||||
dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
|
||||
|
||||
/* allocate pointer array and alloc large pages */
|
||||
map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
|
||||
map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
|
||||
pcpul_map = alloc_bootmem(map_size);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
@@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
|
||||
|
||||
/* allocate address and map */
|
||||
pcpul_vm.flags = VM_ALLOC;
|
||||
pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
|
||||
pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
|
||||
vm_area_register_early(&pcpul_vm, PMD_SIZE);
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
@@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
|
||||
PMD_SIZE, pcpul_vm.addr, NULL);
|
||||
|
||||
/* sort pcpul_map array for pcpu_lpage_remapped() */
|
||||
for (i = 0; i < num_possible_cpus() - 1; i++)
|
||||
for (j = i + 1; j < num_possible_cpus(); j++)
|
||||
for (i = 0; i < nr_cpu_ids - 1; i++)
|
||||
for (j = i + 1; j < nr_cpu_ids; j++)
|
||||
if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
|
||||
struct pcpul_ent tmp = pcpul_map[i];
|
||||
pcpul_map[i] = pcpul_map[j];
|
||||
@@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr)
|
||||
{
|
||||
void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
|
||||
unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
|
||||
int left = 0, right = num_possible_cpus() - 1;
|
||||
int left = 0, right = nr_cpu_ids - 1;
|
||||
int pos;
|
||||
|
||||
/* pcpul in use at all? */
|
||||
@@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
|
||||
pcpu4k_nr_static_pages = PFN_UP(static_size);
|
||||
|
||||
/* unaligned allocations can't be freed, round up to page size */
|
||||
pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
|
||||
pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
|
||||
* sizeof(pcpu4k_pages[0]));
|
||||
pcpu4k_pages = alloc_bootmem(pages_size);
|
||||
|
||||
|
@@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode)
|
||||
* note that base_dest_nodeid is actually a nasid.
|
||||
*/
|
||||
ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
|
||||
ad2->header.dest_subnodeid = 0x10; /* the LB */
|
||||
ad2->header.command = UV_NET_ENDPOINT_INTD;
|
||||
ad2->header.int_both = 1;
|
||||
/*
|
||||
|
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
|
||||
* use the TSC value at the transitions to calculate a pretty
|
||||
* good value for the TSC frequencty.
|
||||
*/
|
||||
static inline int pit_verify_msb(unsigned char val)
|
||||
{
|
||||
/* Ignore LSB */
|
||||
inb(0x42);
|
||||
return inb(0x42) == val;
|
||||
}
|
||||
|
||||
static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
|
||||
{
|
||||
int count;
|
||||
u64 tsc = 0;
|
||||
|
||||
for (count = 0; count < 50000; count++) {
|
||||
/* Ignore LSB */
|
||||
inb(0x42);
|
||||
if (inb(0x42) != val)
|
||||
if (!pit_verify_msb(val))
|
||||
break;
|
||||
tsc = get_cycles();
|
||||
}
|
||||
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
|
||||
* to do that is to just read back the 16-bit counter
|
||||
* once from the PIT.
|
||||
*/
|
||||
inb(0x42);
|
||||
inb(0x42);
|
||||
pit_verify_msb(0);
|
||||
|
||||
if (pit_expect_msb(0xff, &tsc, &d1)) {
|
||||
for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
|
||||
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
|
||||
* Iterate until the error is less than 500 ppm
|
||||
*/
|
||||
delta -= tsc;
|
||||
if (d1+d2 < delta >> 11)
|
||||
goto success;
|
||||
if (d1+d2 >= delta >> 11)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check the PIT one more time to verify that
|
||||
* all TSC reads were stable wrt the PIT.
|
||||
*
|
||||
* This also guarantees serialization of the
|
||||
* last cycle read ('d2') in pit_expect_msb.
|
||||
*/
|
||||
if (!pit_verify_msb(0xfe - i))
|
||||
break;
|
||||
goto success;
|
||||
}
|
||||
}
|
||||
printk("Fast TSC calibration failed\n");
|
||||
|
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
|
||||
ap.ds = __USER_DS;
|
||||
ap.es = __USER_DS;
|
||||
ap.fs = __KERNEL_PERCPU;
|
||||
ap.gs = 0;
|
||||
ap.gs = __KERNEL_STACK_CANARY;
|
||||
|
||||
ap.eflags = 0;
|
||||
|
||||
|
@@ -46,11 +46,10 @@ PHDRS {
|
||||
data PT_LOAD FLAGS(7); /* RWE */
|
||||
#ifdef CONFIG_X86_64
|
||||
user PT_LOAD FLAGS(7); /* RWE */
|
||||
data.init PT_LOAD FLAGS(7); /* RWE */
|
||||
#ifdef CONFIG_SMP
|
||||
percpu PT_LOAD FLAGS(7); /* RWE */
|
||||
#endif
|
||||
data.init2 PT_LOAD FLAGS(7); /* RWE */
|
||||
init PT_LOAD FLAGS(7); /* RWE */
|
||||
#endif
|
||||
note PT_NOTE FLAGS(0); /* ___ */
|
||||
}
|
||||
@@ -103,65 +102,43 @@ SECTIONS
|
||||
__stop___ex_table = .;
|
||||
} :text = 0x9090
|
||||
|
||||
RODATA
|
||||
RO_DATA(PAGE_SIZE)
|
||||
|
||||
/* Data */
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.data : AT(ADDR(.data) - LOAD_OFFSET) {
|
||||
/* Start of data section */
|
||||
_sdata = .;
|
||||
|
||||
/* init_task */
|
||||
INIT_TASK_DATA(THREAD_SIZE)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* 32 bit has nosave before _edata */
|
||||
NOSAVE_DATA
|
||||
#endif
|
||||
|
||||
PAGE_ALIGNED_DATA(PAGE_SIZE)
|
||||
*(.data.idt)
|
||||
|
||||
CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
|
||||
|
||||
DATA_DATA
|
||||
CONSTRUCTORS
|
||||
} :data
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* 32 bit has nosave before _edata */
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
||||
__nosave_begin = .;
|
||||
*(.data.nosave)
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
__nosave_end = .;
|
||||
}
|
||||
#endif
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
|
||||
*(.data.page_aligned)
|
||||
*(.data.idt)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
. = ALIGN(32);
|
||||
#else
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
|
||||
#endif
|
||||
.data.cacheline_aligned :
|
||||
AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
|
||||
*(.data.cacheline_aligned)
|
||||
}
|
||||
|
||||
/* rarely changed data like cpu maps */
|
||||
#ifdef CONFIG_X86_32
|
||||
. = ALIGN(32);
|
||||
#else
|
||||
. = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
|
||||
#endif
|
||||
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
|
||||
*(.data.read_mostly)
|
||||
/* rarely changed data like cpu maps */
|
||||
READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
|
||||
|
||||
/* End of data section */
|
||||
_edata = .;
|
||||
}
|
||||
} :data
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define VSYSCALL_ADDR (-10*1024*1024)
|
||||
#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
|
||||
SIZEOF(.data.read_mostly) + 4095) & ~(4095))
|
||||
#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
|
||||
SIZEOF(.data.read_mostly) + 4095) & ~(4095))
|
||||
#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
|
||||
PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
|
||||
#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
|
||||
PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
|
||||
|
||||
#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
|
||||
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
|
||||
@@ -227,35 +204,29 @@ SECTIONS
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/* init_task */
|
||||
. = ALIGN(THREAD_SIZE);
|
||||
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
|
||||
*(.data.init_task)
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
:data.init
|
||||
#endif
|
||||
|
||||
/*
|
||||
* smp_locks might be freed after init
|
||||
* start/end must be page aligned
|
||||
*/
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
|
||||
__smp_locks = .;
|
||||
*(.smp_locks)
|
||||
__smp_locks_end = .;
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
}
|
||||
|
||||
/* Init code and data - will be freed after init */
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
|
||||
.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
|
||||
__init_begin = .; /* paired with __init_end */
|
||||
}
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
|
||||
/*
|
||||
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
||||
* output PHDR, so the next output section - .init.text - should
|
||||
* start another segment - init.
|
||||
*/
|
||||
PERCPU_VADDR(0, :percpu)
|
||||
#endif
|
||||
|
||||
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
|
||||
_sinittext = .;
|
||||
INIT_TEXT
|
||||
_einittext = .;
|
||||
}
|
||||
#ifdef CONFIG_X86_64
|
||||
:init
|
||||
#endif
|
||||
|
||||
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
|
||||
INIT_DATA
|
||||
@@ -326,17 +297,7 @@ SECTIONS
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
|
||||
/*
|
||||
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
|
||||
* output PHDR, so the next output section - __data_nosave - should
|
||||
* start another section data.init2. Also, pda should be at the head of
|
||||
* percpu area. Preallocate it and define the percpu offset symbol
|
||||
* so that it can be accessed as a percpu variable.
|
||||
*/
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
PERCPU_VADDR(0, :percpu)
|
||||
#else
|
||||
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
|
||||
PERCPU(PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
@@ -347,15 +308,22 @@ SECTIONS
|
||||
__init_end = .;
|
||||
}
|
||||
|
||||
/*
|
||||
* smp_locks might be freed after init
|
||||
* start/end must be page aligned
|
||||
*/
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
|
||||
__smp_locks = .;
|
||||
*(.smp_locks)
|
||||
__smp_locks_end = .;
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
.data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
__nosave_begin = .;
|
||||
*(.data.nosave)
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
__nosave_end = .;
|
||||
} :data.init2
|
||||
/* use another section data.init2, see PERCPU_VADDR() above */
|
||||
NOSAVE_DATA
|
||||
}
|
||||
#endif
|
||||
|
||||
/* BSS */
|
||||
@@ -393,8 +361,8 @@ SECTIONS
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE")
|
||||
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
||||
#else
|
||||
/*
|
||||
* Per-cpu symbols which need to be offset from __per_cpu_load
|
||||
@@ -407,12 +375,12 @@ INIT_PER_CPU(irq_stack_union);
|
||||
/*
|
||||
* Build-time check on the image size:
|
||||
*/
|
||||
ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE")
|
||||
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
|
||||
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
ASSERT((per_cpu__irq_stack_union == 0),
|
||||
"irq_stack_union is not at start of per-cpu area");
|
||||
. = ASSERT((per_cpu__irq_stack_union == 0),
|
||||
"irq_stack_union is not at start of per-cpu area");
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
@@ -420,7 +388,7 @@ ASSERT((per_cpu__irq_stack_union == 0),
|
||||
#ifdef CONFIG_KEXEC
|
||||
#include <asm/kexec.h>
|
||||
|
||||
ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
||||
"kexec control code size is too big")
|
||||
. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
|
||||
"kexec control code size is too big");
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user