drm/radeon/kms: don't require up to 64k allocations. (v2)
This avoids needing to do a kmalloc > PAGE_SIZE for the main indirect buffer chunk, it adds an accessor for all reads from the chunk and caches a single page at a time for subsequent reads. changes since v1: Use a two page pool which should be the most common case a single packet spanning > PAGE_SIZE will be hit, but I'm having trouble seeing anywhere we currently generate anything like that. hopefully proper short page copying at end added parser_error flag to set deep errors instead of having to test every ib value fetch. fixed bug in patch that went to list. Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@ -697,17 +697,18 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
struct radeon_cs_packet *pkt,
|
||||
unsigned idx, unsigned reg)
|
||||
{
|
||||
struct radeon_cs_chunk *ib_chunk;
|
||||
struct radeon_cs_reloc *reloc;
|
||||
struct r100_cs_track *track;
|
||||
volatile uint32_t *ib;
|
||||
uint32_t tmp, tile_flags = 0;
|
||||
unsigned i;
|
||||
int r;
|
||||
u32 idx_value;
|
||||
|
||||
ib = p->ib->ptr;
|
||||
ib_chunk = &p->chunks[p->chunk_ib_idx];
|
||||
track = (struct r100_cs_track *)p->track;
|
||||
idx_value = radeon_get_ib_value(p, idx);
|
||||
|
||||
switch(reg) {
|
||||
case AVIVO_D1MODE_VLINE_START_END:
|
||||
case RADEON_CRTC_GUI_TRIG_VLINE:
|
||||
@ -738,8 +739,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
return r;
|
||||
}
|
||||
track->cb[i].robj = reloc->robj;
|
||||
track->cb[i].offset = ib_chunk->kdata[idx];
|
||||
ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
|
||||
track->cb[i].offset = idx_value;
|
||||
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
|
||||
break;
|
||||
case R300_ZB_DEPTHOFFSET:
|
||||
r = r100_cs_packet_next_reloc(p, &reloc);
|
||||
@ -750,8 +751,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
return r;
|
||||
}
|
||||
track->zb.robj = reloc->robj;
|
||||
track->zb.offset = ib_chunk->kdata[idx];
|
||||
ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
|
||||
track->zb.offset = idx_value;
|
||||
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
|
||||
break;
|
||||
case R300_TX_OFFSET_0:
|
||||
case R300_TX_OFFSET_0+4:
|
||||
@ -777,32 +778,32 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
|
||||
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
|
||||
track->textures[i].robj = reloc->robj;
|
||||
break;
|
||||
/* Tracked registers */
|
||||
case 0x2084:
|
||||
/* VAP_VF_CNTL */
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx];
|
||||
track->vap_vf_cntl = idx_value;
|
||||
break;
|
||||
case 0x20B4:
|
||||
/* VAP_VTX_SIZE */
|
||||
track->vtx_size = ib_chunk->kdata[idx] & 0x7F;
|
||||
track->vtx_size = idx_value & 0x7F;
|
||||
break;
|
||||
case 0x2134:
|
||||
/* VAP_VF_MAX_VTX_INDX */
|
||||
track->max_indx = ib_chunk->kdata[idx] & 0x00FFFFFFUL;
|
||||
track->max_indx = idx_value & 0x00FFFFFFUL;
|
||||
break;
|
||||
case 0x43E4:
|
||||
/* SC_SCISSOR1 */
|
||||
track->maxy = ((ib_chunk->kdata[idx] >> 13) & 0x1FFF) + 1;
|
||||
track->maxy = ((idx_value >> 13) & 0x1FFF) + 1;
|
||||
if (p->rdev->family < CHIP_RV515) {
|
||||
track->maxy -= 1440;
|
||||
}
|
||||
break;
|
||||
case 0x4E00:
|
||||
/* RB3D_CCTL */
|
||||
track->num_cb = ((ib_chunk->kdata[idx] >> 5) & 0x3) + 1;
|
||||
track->num_cb = ((idx_value >> 5) & 0x3) + 1;
|
||||
break;
|
||||
case 0x4E38:
|
||||
case 0x4E3C:
|
||||
@ -825,13 +826,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
|
||||
tile_flags |= R300_COLOR_MICROTILE_ENABLE;
|
||||
|
||||
tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
|
||||
tmp = idx_value & ~(0x7 << 16);
|
||||
tmp |= tile_flags;
|
||||
ib[idx] = tmp;
|
||||
|
||||
i = (reg - 0x4E38) >> 2;
|
||||
track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
|
||||
switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
|
||||
track->cb[i].pitch = idx_value & 0x3FFE;
|
||||
switch (((idx_value >> 21) & 0xF)) {
|
||||
case 9:
|
||||
case 11:
|
||||
case 12:
|
||||
@ -854,13 +855,13 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Invalid color buffer format (%d) !\n",
|
||||
((ib_chunk->kdata[idx] >> 21) & 0xF));
|
||||
((idx_value >> 21) & 0xF));
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case 0x4F00:
|
||||
/* ZB_CNTL */
|
||||
if (ib_chunk->kdata[idx] & 2) {
|
||||
if (idx_value & 2) {
|
||||
track->z_enabled = true;
|
||||
} else {
|
||||
track->z_enabled = false;
|
||||
@ -868,7 +869,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
break;
|
||||
case 0x4F10:
|
||||
/* ZB_FORMAT */
|
||||
switch ((ib_chunk->kdata[idx] & 0xF)) {
|
||||
switch ((idx_value & 0xF)) {
|
||||
case 0:
|
||||
case 1:
|
||||
track->zb.cpp = 2;
|
||||
@ -878,7 +879,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Invalid z buffer format (%d) !\n",
|
||||
(ib_chunk->kdata[idx] & 0xF));
|
||||
(idx_value & 0xF));
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
@ -897,17 +898,17 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
|
||||
tile_flags |= R300_DEPTHMICROTILE_TILED;;
|
||||
|
||||
tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
|
||||
tmp = idx_value & ~(0x7 << 16);
|
||||
tmp |= tile_flags;
|
||||
ib[idx] = tmp;
|
||||
|
||||
track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
|
||||
track->zb.pitch = idx_value & 0x3FFC;
|
||||
break;
|
||||
case 0x4104:
|
||||
for (i = 0; i < 16; i++) {
|
||||
bool enabled;
|
||||
|
||||
enabled = !!(ib_chunk->kdata[idx] & (1 << i));
|
||||
enabled = !!(idx_value & (1 << i));
|
||||
track->textures[i].enabled = enabled;
|
||||
}
|
||||
break;
|
||||
@ -929,9 +930,9 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
case 0x44FC:
|
||||
/* TX_FORMAT1_[0-15] */
|
||||
i = (reg - 0x44C0) >> 2;
|
||||
tmp = (ib_chunk->kdata[idx] >> 25) & 0x3;
|
||||
tmp = (idx_value >> 25) & 0x3;
|
||||
track->textures[i].tex_coord_type = tmp;
|
||||
switch ((ib_chunk->kdata[idx] & 0x1F)) {
|
||||
switch ((idx_value & 0x1F)) {
|
||||
case R300_TX_FORMAT_X8:
|
||||
case R300_TX_FORMAT_Y4X4:
|
||||
case R300_TX_FORMAT_Z3Y3X2:
|
||||
@ -971,7 +972,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("Invalid texture format %u\n",
|
||||
(ib_chunk->kdata[idx] & 0x1F));
|
||||
(idx_value & 0x1F));
|
||||
return -EINVAL;
|
||||
break;
|
||||
}
|
||||
@ -994,11 +995,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
case 0x443C:
|
||||
/* TX_FILTER0_[0-15] */
|
||||
i = (reg - 0x4400) >> 2;
|
||||
tmp = ib_chunk->kdata[idx] & 0x7;
|
||||
tmp = idx_value & 0x7;
|
||||
if (tmp == 2 || tmp == 4 || tmp == 6) {
|
||||
track->textures[i].roundup_w = false;
|
||||
}
|
||||
tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;
|
||||
tmp = (idx_value >> 3) & 0x7;
|
||||
if (tmp == 2 || tmp == 4 || tmp == 6) {
|
||||
track->textures[i].roundup_h = false;
|
||||
}
|
||||
@ -1021,12 +1022,12 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
case 0x453C:
|
||||
/* TX_FORMAT2_[0-15] */
|
||||
i = (reg - 0x4500) >> 2;
|
||||
tmp = ib_chunk->kdata[idx] & 0x3FFF;
|
||||
tmp = idx_value & 0x3FFF;
|
||||
track->textures[i].pitch = tmp + 1;
|
||||
if (p->rdev->family >= CHIP_RV515) {
|
||||
tmp = ((ib_chunk->kdata[idx] >> 15) & 1) << 11;
|
||||
tmp = ((idx_value >> 15) & 1) << 11;
|
||||
track->textures[i].width_11 = tmp;
|
||||
tmp = ((ib_chunk->kdata[idx] >> 16) & 1) << 11;
|
||||
tmp = ((idx_value >> 16) & 1) << 11;
|
||||
track->textures[i].height_11 = tmp;
|
||||
}
|
||||
break;
|
||||
@ -1048,15 +1049,15 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
case 0x44BC:
|
||||
/* TX_FORMAT0_[0-15] */
|
||||
i = (reg - 0x4480) >> 2;
|
||||
tmp = ib_chunk->kdata[idx] & 0x7FF;
|
||||
tmp = idx_value & 0x7FF;
|
||||
track->textures[i].width = tmp + 1;
|
||||
tmp = (ib_chunk->kdata[idx] >> 11) & 0x7FF;
|
||||
tmp = (idx_value >> 11) & 0x7FF;
|
||||
track->textures[i].height = tmp + 1;
|
||||
tmp = (ib_chunk->kdata[idx] >> 26) & 0xF;
|
||||
tmp = (idx_value >> 26) & 0xF;
|
||||
track->textures[i].num_levels = tmp;
|
||||
tmp = ib_chunk->kdata[idx] & (1 << 31);
|
||||
tmp = idx_value & (1 << 31);
|
||||
track->textures[i].use_pitch = !!tmp;
|
||||
tmp = (ib_chunk->kdata[idx] >> 22) & 0xF;
|
||||
tmp = (idx_value >> 22) & 0xF;
|
||||
track->textures[i].txdepth = tmp;
|
||||
break;
|
||||
case R300_ZB_ZPASS_ADDR:
|
||||
@ -1067,7 +1068,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
|
||||
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
|
||||
break;
|
||||
case 0x4be8:
|
||||
/* valid register only on RV530 */
|
||||
@ -1085,60 +1086,20 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
|
||||
static int r300_packet3_check(struct radeon_cs_parser *p,
|
||||
struct radeon_cs_packet *pkt)
|
||||
{
|
||||
struct radeon_cs_chunk *ib_chunk;
|
||||
|
||||
struct radeon_cs_reloc *reloc;
|
||||
struct r100_cs_track *track;
|
||||
volatile uint32_t *ib;
|
||||
unsigned idx;
|
||||
unsigned i, c;
|
||||
int r;
|
||||
|
||||
ib = p->ib->ptr;
|
||||
ib_chunk = &p->chunks[p->chunk_ib_idx];
|
||||
idx = pkt->idx + 1;
|
||||
track = (struct r100_cs_track *)p->track;
|
||||
switch(pkt->opcode) {
|
||||
case PACKET3_3D_LOAD_VBPNTR:
|
||||
c = ib_chunk->kdata[idx++] & 0x1F;
|
||||
track->num_arrays = c;
|
||||
for (i = 0; i < (c - 1); i+=2, idx+=3) {
|
||||
r = r100_cs_packet_next_reloc(p, &reloc);
|
||||
if (r) {
|
||||
DRM_ERROR("No reloc for packet3 %d\n",
|
||||
pkt->opcode);
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
|
||||
track->arrays[i + 0].robj = reloc->robj;
|
||||
track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
|
||||
track->arrays[i + 0].esize &= 0x7F;
|
||||
r = r100_cs_packet_next_reloc(p, &reloc);
|
||||
if (r) {
|
||||
DRM_ERROR("No reloc for packet3 %d\n",
|
||||
pkt->opcode);
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
|
||||
track->arrays[i + 1].robj = reloc->robj;
|
||||
track->arrays[i + 1].esize = ib_chunk->kdata[idx] >> 24;
|
||||
track->arrays[i + 1].esize &= 0x7F;
|
||||
}
|
||||
if (c & 1) {
|
||||
r = r100_cs_packet_next_reloc(p, &reloc);
|
||||
if (r) {
|
||||
DRM_ERROR("No reloc for packet3 %d\n",
|
||||
pkt->opcode);
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
|
||||
track->arrays[i + 0].robj = reloc->robj;
|
||||
track->arrays[i + 0].esize = ib_chunk->kdata[idx] >> 8;
|
||||
track->arrays[i + 0].esize &= 0x7F;
|
||||
}
|
||||
r = r100_packet3_load_vbpntr(p, pkt, idx);
|
||||
if (r)
|
||||
return r;
|
||||
break;
|
||||
case PACKET3_INDX_BUFFER:
|
||||
r = r100_cs_packet_next_reloc(p, &reloc);
|
||||
@ -1147,7 +1108,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
|
||||
r100_cs_dump_packet(p, pkt);
|
||||
return r;
|
||||
}
|
||||
ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
|
||||
ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
|
||||
r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
|
||||
if (r) {
|
||||
return r;
|
||||
@ -1158,11 +1119,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
|
||||
/* Number of dwords is vtx_size * (num_vertices - 1)
|
||||
* PRIM_WALK must be equal to 3 vertex data in embedded
|
||||
* in cmd stream */
|
||||
if (((ib_chunk->kdata[idx+1] >> 4) & 0x3) != 3) {
|
||||
if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
|
||||
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx+1];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
|
||||
track->immd_dwords = pkt->count - 1;
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
@ -1173,11 +1134,11 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
|
||||
/* Number of dwords is vtx_size * (num_vertices - 1)
|
||||
* PRIM_WALK must be equal to 3 vertex data in embedded
|
||||
* in cmd stream */
|
||||
if (((ib_chunk->kdata[idx] >> 4) & 0x3) != 3) {
|
||||
if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
|
||||
DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
|
||||
track->immd_dwords = pkt->count;
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
@ -1185,28 +1146,28 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
|
||||
}
|
||||
break;
|
||||
case PACKET3_3D_DRAW_VBUF:
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case PACKET3_3D_DRAW_VBUF_2:
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case PACKET3_3D_DRAW_INDX:
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx + 1];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case PACKET3_3D_DRAW_INDX_2:
|
||||
track->vap_vf_cntl = ib_chunk->kdata[idx];
|
||||
track->vap_vf_cntl = radeon_get_ib_value(p, idx);
|
||||
r = r100_cs_track_check(p->rdev, track);
|
||||
if (r) {
|
||||
return r;
|
||||
|
Reference in New Issue
Block a user