IB/mthca: Optimize large messages on Sinai HCAs
Sinai (one-port PCI Express) HCAs get improved throughput for messages bigger than 80 KB in DDR mode if memory keys are formatted in a specific way. The enhancement only works if the memory key table is smaller than 2^24 entries. For larger tables, the enhancement is off and a warning is printed (to avoid silent performance loss). Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Michael Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
@@ -1277,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
#define INIT_HCA_IN_SIZE 0x200
|
#define INIT_HCA_IN_SIZE 0x200
|
||||||
#define INIT_HCA_FLAGS_OFFSET 0x014
|
#define INIT_HCA_FLAGS1_OFFSET 0x00c
|
||||||
|
#define INIT_HCA_FLAGS2_OFFSET 0x014
|
||||||
#define INIT_HCA_QPC_OFFSET 0x020
|
#define INIT_HCA_QPC_OFFSET 0x020
|
||||||
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
|
#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
|
||||||
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
|
#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
|
||||||
@@ -1320,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
|
|||||||
|
|
||||||
memset(inbox, 0, INIT_HCA_IN_SIZE);
|
memset(inbox, 0, INIT_HCA_IN_SIZE);
|
||||||
|
|
||||||
|
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
|
||||||
|
MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
|
||||||
|
|
||||||
#if defined(__LITTLE_ENDIAN)
|
#if defined(__LITTLE_ENDIAN)
|
||||||
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
|
*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
|
||||||
#elif defined(__BIG_ENDIAN)
|
#elif defined(__BIG_ENDIAN)
|
||||||
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1);
|
*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
|
||||||
#else
|
#else
|
||||||
#error Host endianness not defined
|
#error Host endianness not defined
|
||||||
#endif
|
#endif
|
||||||
/* Check port for UD address vector: */
|
/* Check port for UD address vector: */
|
||||||
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
|
*(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
|
||||||
|
|
||||||
/* We leave wqe_quota, responder_exu, etc as 0 (default) */
|
/* We leave wqe_quota, responder_exu, etc as 0 (default) */
|
||||||
|
|
||||||
|
@@ -64,7 +64,8 @@ enum {
|
|||||||
MTHCA_FLAG_NO_LAM = 1 << 5,
|
MTHCA_FLAG_NO_LAM = 1 << 5,
|
||||||
MTHCA_FLAG_FMR = 1 << 6,
|
MTHCA_FLAG_FMR = 1 << 6,
|
||||||
MTHCA_FLAG_MEMFREE = 1 << 7,
|
MTHCA_FLAG_MEMFREE = 1 << 7,
|
||||||
MTHCA_FLAG_PCIE = 1 << 8
|
MTHCA_FLAG_PCIE = 1 << 8,
|
||||||
|
MTHCA_FLAG_SINAI_OPT = 1 << 9
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
@@ -935,13 +935,19 @@ enum {
|
|||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
u64 latest_fw;
|
u64 latest_fw;
|
||||||
int is_memfree;
|
u32 flags;
|
||||||
int is_pcie;
|
|
||||||
} mthca_hca_table[] = {
|
} mthca_hca_table[] = {
|
||||||
[TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 },
|
[TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3),
|
||||||
[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 },
|
.flags = 0 },
|
||||||
[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 },
|
[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0),
|
||||||
[SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 }
|
.flags = MTHCA_FLAG_PCIE },
|
||||||
|
[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
|
||||||
|
.flags = MTHCA_FLAG_MEMFREE |
|
||||||
|
MTHCA_FLAG_PCIE },
|
||||||
|
[SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1),
|
||||||
|
.flags = MTHCA_FLAG_MEMFREE |
|
||||||
|
MTHCA_FLAG_PCIE |
|
||||||
|
MTHCA_FLAG_SINAI_OPT }
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __devinit mthca_init_one(struct pci_dev *pdev,
|
static int __devinit mthca_init_one(struct pci_dev *pdev,
|
||||||
@@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
|
|||||||
|
|
||||||
mdev->pdev = pdev;
|
mdev->pdev = pdev;
|
||||||
|
|
||||||
|
mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
|
||||||
if (ddr_hidden)
|
if (ddr_hidden)
|
||||||
mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
|
mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
|
||||||
if (mthca_hca_table[id->driver_data].is_memfree)
|
|
||||||
mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
|
|
||||||
if (mthca_hca_table[id->driver_data].is_pcie)
|
|
||||||
mdev->mthca_flags |= MTHCA_FLAG_PCIE;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now reset the HCA before we touch the PCI capabilities or
|
* Now reset the HCA before we touch the PCI capabilities or
|
||||||
|
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
|
|||||||
#define MTHCA_MPT_STATUS_SW 0xF0
|
#define MTHCA_MPT_STATUS_SW 0xF0
|
||||||
#define MTHCA_MPT_STATUS_HW 0x00
|
#define MTHCA_MPT_STATUS_HW 0x00
|
||||||
|
|
||||||
|
#define SINAI_FMR_KEY_INC 0x1000000
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Buddy allocator for MTT segments (currently not very efficient
|
* Buddy allocator for MTT segments (currently not very efficient
|
||||||
* since it doesn't keep a free list and just searches linearly
|
* since it doesn't keep a free list and just searches linearly
|
||||||
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
|
|||||||
return tavor_key_to_hw_index(key);
|
return tavor_key_to_hw_index(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
|
||||||
|
{
|
||||||
|
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
|
||||||
|
return ((key << 20) & 0x800000) | (key & 0x7fffff);
|
||||||
|
else
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
|
int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
|
||||||
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
|
u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
|
||||||
{
|
{
|
||||||
@@ -345,6 +355,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
|
|||||||
key = mthca_alloc(&dev->mr_table.mpt_alloc);
|
key = mthca_alloc(&dev->mr_table.mpt_alloc);
|
||||||
if (key == -1)
|
if (key == -1)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
key = adjust_key(dev, key);
|
||||||
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
|
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
|
||||||
|
|
||||||
if (mthca_is_memfree(dev)) {
|
if (mthca_is_memfree(dev)) {
|
||||||
@@ -504,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
|
|||||||
key = mthca_alloc(&dev->mr_table.mpt_alloc);
|
key = mthca_alloc(&dev->mr_table.mpt_alloc);
|
||||||
if (key == -1)
|
if (key == -1)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
key = adjust_key(dev, key);
|
||||||
|
|
||||||
idx = key & (dev->limits.num_mpts - 1);
|
idx = key & (dev->limits.num_mpts - 1);
|
||||||
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
|
mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
|
||||||
@@ -687,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
|
|||||||
++fmr->maps;
|
++fmr->maps;
|
||||||
|
|
||||||
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
|
key = arbel_key_to_hw_index(fmr->ibmr.lkey);
|
||||||
key += dev->limits.num_mpts;
|
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
|
||||||
|
key += SINAI_FMR_KEY_INC;
|
||||||
|
else
|
||||||
|
key += dev->limits.num_mpts;
|
||||||
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
|
fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
|
||||||
|
|
||||||
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
|
*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
|
||||||
@@ -760,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
|
|||||||
else
|
else
|
||||||
dev->mthca_flags |= MTHCA_FLAG_FMR;
|
dev->mthca_flags |= MTHCA_FLAG_FMR;
|
||||||
|
|
||||||
|
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
|
||||||
|
mthca_dbg(dev, "Memory key throughput optimization activated.\n");
|
||||||
|
|
||||||
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
|
err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
|
||||||
fls(dev->limits.num_mtt_segs - 1));
|
fls(dev->limits.num_mtt_segs - 1));
|
||||||
|
|
||||||
|
@@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
|
|||||||
}
|
}
|
||||||
if (total_size > mem_avail) {
|
if (total_size > mem_avail) {
|
||||||
mthca_err(dev, "Profile requires 0x%llx bytes; "
|
mthca_err(dev, "Profile requires 0x%llx bytes; "
|
||||||
"won't in 0x%llx bytes of context memory.\n",
|
"won't fit in 0x%llx bytes of context memory.\n",
|
||||||
(unsigned long long) total_size,
|
(unsigned long long) total_size,
|
||||||
(unsigned long long) mem_avail);
|
(unsigned long long) mem_avail);
|
||||||
kfree(profile);
|
kfree(profile);
|
||||||
@@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
|
|||||||
*/
|
*/
|
||||||
dev->limits.num_pds = MTHCA_NUM_PDS;
|
dev->limits.num_pds = MTHCA_NUM_PDS;
|
||||||
|
|
||||||
|
if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
|
||||||
|
init_hca->log_mpt_sz > 23) {
|
||||||
|
mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
|
||||||
|
init_hca->log_mpt_sz);
|
||||||
|
mthca_warn(dev, "Disabling memory key throughput optimization.\n");
|
||||||
|
dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
|
* For Tavor, FMRs use ioremapped PCI memory. For 32 bit
|
||||||
* systems it may use too much vmalloc space to map all MTT
|
* systems it may use too much vmalloc space to map all MTT
|
||||||
|
Reference in New Issue
Block a user