drm/radeon: implement ring saving on reset v4
Try to save whatever is on the rings when we encounter an lockup. v2: Fix spelling error. Free saved ring data if reset fails. Add documentation for the new functions. v3: Some more spelling fixes v4: It doesn't make sense to save anything if all fences are signaled Signed-off-by: Christian König <deathsimple@vodafone.de> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -768,6 +768,10 @@ int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
|
|||||||
void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring);
|
void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring);
|
||||||
void radeon_ring_lockup_update(struct radeon_ring *ring);
|
void radeon_ring_lockup_update(struct radeon_ring *ring);
|
||||||
bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
|
bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
|
||||||
|
unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
|
||||||
|
uint32_t **data);
|
||||||
|
int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
|
||||||
|
unsigned size, uint32_t *data);
|
||||||
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
|
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
|
||||||
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
||||||
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
|
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
|
||||||
|
@@ -996,7 +996,12 @@ int radeon_resume_kms(struct drm_device *dev)
|
|||||||
|
|
||||||
int radeon_gpu_reset(struct radeon_device *rdev)
|
int radeon_gpu_reset(struct radeon_device *rdev)
|
||||||
{
|
{
|
||||||
int r;
|
unsigned ring_sizes[RADEON_NUM_RINGS];
|
||||||
|
uint32_t *ring_data[RADEON_NUM_RINGS];
|
||||||
|
|
||||||
|
bool saved = false;
|
||||||
|
|
||||||
|
int i, r;
|
||||||
int resched;
|
int resched;
|
||||||
|
|
||||||
down_write(&rdev->exclusive_lock);
|
down_write(&rdev->exclusive_lock);
|
||||||
@@ -1005,20 +1010,47 @@ int radeon_gpu_reset(struct radeon_device *rdev)
|
|||||||
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
|
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
|
||||||
radeon_suspend(rdev);
|
radeon_suspend(rdev);
|
||||||
|
|
||||||
r = radeon_asic_reset(rdev);
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||||
if (!r) {
|
ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
|
||||||
dev_info(rdev->dev, "GPU reset succeed\n");
|
&ring_data[i]);
|
||||||
radeon_resume(rdev);
|
if (ring_sizes[i]) {
|
||||||
|
saved = true;
|
||||||
r = radeon_ib_ring_tests(rdev);
|
dev_info(rdev->dev, "Saved %d dwords of commands "
|
||||||
if (r)
|
"on ring %d.\n", ring_sizes[i], i);
|
||||||
DRM_ERROR("ib ring test failed (%d).\n", r);
|
}
|
||||||
|
|
||||||
radeon_restore_bios_scratch_regs(rdev);
|
|
||||||
drm_helper_resume_force_mode(rdev->ddev);
|
|
||||||
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
retry:
|
||||||
|
r = radeon_asic_reset(rdev);
|
||||||
|
if (!r) {
|
||||||
|
dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
|
||||||
|
radeon_resume(rdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
radeon_restore_bios_scratch_regs(rdev);
|
||||||
|
drm_helper_resume_force_mode(rdev->ddev);
|
||||||
|
|
||||||
|
if (!r) {
|
||||||
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||||
|
radeon_ring_restore(rdev, &rdev->ring[i],
|
||||||
|
ring_sizes[i], ring_data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
r = radeon_ib_ring_tests(rdev);
|
||||||
|
if (r) {
|
||||||
|
dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
|
||||||
|
if (saved) {
|
||||||
|
radeon_suspend(rdev);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
|
||||||
|
kfree(ring_data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
|
||||||
if (r) {
|
if (r) {
|
||||||
/* bad news, how to tell it to userspace ? */
|
/* bad news, how to tell it to userspace ? */
|
||||||
dev_info(rdev->dev, "GPU reset failed\n");
|
dev_info(rdev->dev, "GPU reset failed\n");
|
||||||
|
@@ -362,6 +362,88 @@ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *rin
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* radeon_ring_backup - Back up the content of a ring
|
||||||
|
*
|
||||||
|
* @rdev: radeon_device pointer
|
||||||
|
* @ring: the ring we want to back up
|
||||||
|
*
|
||||||
|
* Saves all unprocessed commits from a ring, returns the number of dwords saved.
|
||||||
|
*/
|
||||||
|
unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
|
||||||
|
uint32_t **data)
|
||||||
|
{
|
||||||
|
unsigned size, ptr, i;
|
||||||
|
int ridx = radeon_ring_index(rdev, ring);
|
||||||
|
|
||||||
|
/* just in case lock the ring */
|
||||||
|
mutex_lock(&rdev->ring_lock);
|
||||||
|
*data = NULL;
|
||||||
|
|
||||||
|
if (ring->ring_obj == NULL || !ring->rptr_save_reg) {
|
||||||
|
mutex_unlock(&rdev->ring_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* it doesn't make sense to save anything if all fences are signaled */
|
||||||
|
if (!radeon_fence_count_emitted(rdev, ridx)) {
|
||||||
|
mutex_unlock(&rdev->ring_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* calculate the number of dw on the ring */
|
||||||
|
ptr = RREG32(ring->rptr_save_reg);
|
||||||
|
size = ring->wptr + (ring->ring_size / 4);
|
||||||
|
size -= ptr;
|
||||||
|
size &= ring->ptr_mask;
|
||||||
|
if (size == 0) {
|
||||||
|
mutex_unlock(&rdev->ring_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* and then save the content of the ring */
|
||||||
|
*data = kmalloc(size * 4, GFP_KERNEL);
|
||||||
|
for (i = 0; i < size; ++i) {
|
||||||
|
(*data)[i] = ring->ring[ptr++];
|
||||||
|
ptr &= ring->ptr_mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_unlock(&rdev->ring_lock);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* radeon_ring_restore - append saved commands to the ring again
|
||||||
|
*
|
||||||
|
* @rdev: radeon_device pointer
|
||||||
|
* @ring: ring to append commands to
|
||||||
|
* @size: number of dwords we want to write
|
||||||
|
* @data: saved commands
|
||||||
|
*
|
||||||
|
* Allocates space on the ring and restore the previously saved commands.
|
||||||
|
*/
|
||||||
|
int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
|
||||||
|
unsigned size, uint32_t *data)
|
||||||
|
{
|
||||||
|
int i, r;
|
||||||
|
|
||||||
|
if (!size || !data)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* restore the saved ring content */
|
||||||
|
r = radeon_ring_lock(rdev, ring, size);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
for (i = 0; i < size; ++i) {
|
||||||
|
radeon_ring_write(ring, data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
radeon_ring_unlock_commit(rdev, ring);
|
||||||
|
kfree(data);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
|
int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
|
||||||
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
|
||||||
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
|
u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
|
||||||
|
Reference in New Issue
Block a user