diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b7c81dacabf079..40a9205ab09016 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -87,7 +87,7 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) if (pfn_is_map_memory(__phys_to_pfn(phys_addr))) return (void __iomem *)__phys_to_virt(phys_addr); - return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL), + return __ioremap_caller(phys_addr, size, __pgprot(PROT_DEVICE_nGnRE), __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_cache); diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index f15b20da5315c8..ad4c568c9209be 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -729,8 +729,8 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) cjiffies = jiffies; if (time_after(cjiffies, ctx->last_jump_jiffies)) { cjiffies -= ctx->last_jump_jiffies; - if ((jiffies_to_msecs(cjiffies) > 5000)) { - DRM_ERROR("atombios stuck in loop for more than 5secs aborting\n"); + if ((jiffies_to_msecs(cjiffies) > 30000)) { + DRM_ERROR("atombios stuck in loop for more than 30secs aborting\n"); ctx->abort = true; } } else { diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index eeb590d2dec2e7..d8f97ec975c67c 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5029,6 +5029,7 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r; } + evergreen_gpu_init(rdev); /* allocate rlc buffers */ @@ -5044,11 +5045,17 @@ static int evergreen_startup(struct radeon_device *rdev) } } + /* TODO: Dying after here currently. */ + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* allocate wb buffer */ r = radeon_wb_init(rdev); if (r) return r; + /* TODO: Dying before here currently. */ + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 52c79da1ecf571..af9100167fc102 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -155,6 +155,8 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring, false); radeon_sync_free(rdev, &sync, fence); + r600_dma_ring_test(rdev, ring); + return fence; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ca3fcae2adb537..23f54a7394a0b1 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2625,6 +2625,7 @@ u32 r600_gfx_get_rptr(struct radeon_device *rdev, else rptr = RREG32(R600_CP_RB_RPTR); + mb(); //CHANGED return rptr; } @@ -3483,7 +3484,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (rdev->ih.ring_obj == NULL) { r = radeon_bo_create(rdev, rdev->ih.ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->ih.ring_obj); if (r) { DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r); @@ -3493,7 +3494,7 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (unlikely(r != 0)) return r; r = radeon_bo_pin(rdev->ih.ring_obj, - RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM, &rdev->ih.gpu_addr); if (r) { radeon_bo_unreserve(rdev->ih.ring_obj); @@ -4038,8 +4039,10 @@ static u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp; - if (rdev->wb.enabled) + if (rdev->wb.enabled) { wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]); + mb(); + } else wptr = RREG32(IH_RB_WPTR); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 89ca2738c5d4c8..1f6e86c8268e76 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -57,6 +57,7 @@ uint32_t r600_dma_get_rptr(struct radeon_device *rdev, else rptr = RREG32(DMA_RB_RPTR); + mb(); //CHANGED return (rptr & 0x3fffc) >> 2; } @@ -245,6 +246,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, tmp = 0xCAFEDEAD; rdev->wb.wb[index/4] = cpu_to_le32(tmp); + mb(); //CHANGED r = radeon_ring_lock(rdev, ring, 4); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); @@ -258,6 +260,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); + mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); @@ -379,6 +382,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); + mb(); //CHANGED if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 895776c421d4de..3c133e0da0f244 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -370,6 +370,7 @@ struct radeon_fence_driver { atomic64_t last_seq; bool initialized, delayed_irq; struct delayed_work lockup_work; + dma_addr_t dma_addr; }; struct radeon_fence { @@ -668,6 +669,8 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, dma_addr_t *dma_addr, uint32_t flags); +void radeon_gart_sync_all_for_device(struct radeon_device *rdev); + /* * GPU MC structures, functions & helpers */ @@ -2237,6 +2240,13 @@ void radeon_agp_disable(struct radeon_device *rdev); int radeon_asic_init(struct radeon_device *rdev); +/* + * memcpy_io and memset_io functions that work on a raspberry pi 4 + */ +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count); +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count); +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count); + /* * IOCTL. */ @@ -2337,6 +2347,11 @@ struct radeon_atcs { typedef uint32_t (*radeon_rreg_t)(struct radeon_device*, uint32_t); typedef void (*radeon_wreg_t)(struct radeon_device*, uint32_t, uint32_t); +struct moved_bos_entry{ + struct radeon_bo* bo; + struct list_head list; +}; + struct radeon_device { struct device *dev; struct drm_device *ddev; @@ -2482,6 +2497,25 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + struct radeon_bo *rick; + uint64_t rick_gpu; + void *rick_cpu; + uint64_t fb_gpu; + + // reading back shader code for debugging + struct radeon_bo* shader_read_bo; + uint64_t shader_read_gpu; + void* shader_read_cpu; + + // needed because of weird stuff + int numFSuses; + + // tracking moved BOs to move them back after CS execution + struct radeon_bo** moved_bos; // array of pointers to the BOs that were moved + int nMovedBos; // number of BOs moved (determines size of array) + struct mutex move_bos_mutex; + struct list_head move_bo_head; + bool trackMoves; // enables or disables tracking of the BO moves to avoid tracking while moving the BOs back after CS execution }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 33121655d50bbe..38a88f3297e890 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -72,7 +72,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) iounmap(bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + memcpy_fromio_pcie(rdev->bios, bios, size); iounmap(bios); return true; } @@ -81,6 +81,7 @@ static bool radeon_read_bios(struct radeon_device *rdev) { uint8_t __iomem *bios, val1, val2; size_t size; + int pos; rdev->bios = NULL; /* XXX: some cards may return 0 for rom size? ddx has a workaround */ @@ -101,7 +102,11 @@ static bool radeon_read_bios(struct radeon_device *rdev) pci_unmap_rom(rdev->pdev, bios); return false; } - memcpy_fromio(rdev->bios, bios, size); + //memcpy_fromio(rdev->bios, bios, size); + for(pos = 0;pos < size; pos++){ + //memcpy_fromio(rdev->bios+pos,bios+pos,1); + rdev->bios[pos] = __raw_readb(bios+pos); + } pci_unmap_rom(rdev->pdev, bios); return true; } @@ -125,7 +130,7 @@ static bool radeon_read_platform_bios(struct radeon_device *rdev) if (!bios) goto free_bios; - memcpy_fromio(rdev->bios, bios, romlen); + memcpy_fromio_pcie(rdev->bios, bios, romlen); iounmap(bios); if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 9ed2b2700e0a56..8ad893c91aa2be 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -663,6 +663,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_device *rdev = dev->dev_private; struct radeon_cs_parser parser; int r; + INIT_LIST_HEAD(&rdev->move_bo_head); down_read(&rdev->exclusive_lock); if (!rdev->accel_working) { @@ -676,6 +677,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = -EAGAIN; return r; } + rdev->trackMoves = true; /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 4f0fbf66743160..eeb4e2cc9c3b46 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -112,6 +112,98 @@ static const char radeon_family_name[][16] = { "LAST", }; +/** + * DOC: memcpy_fromio_pcie + * + * like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 4) { + *(u32 *)to = __raw_readl(from); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} + +/** + * DOC: memcpy_toio_pcie + * + * like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 4) { + __raw_writel(*(u64 *)from, to); + from += 4; + to += 4; + count -= 4; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} + +/** + * DOC: memset_io_pcie + * + * like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4 + */ + +void memset_io_pcie(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx_dgpu_power_cntl(void); bool radeon_is_atpx_hybrid(void); @@ -446,6 +538,42 @@ void radeon_wb_fini(struct radeon_device *rdev) } } +//memset_io with only 32-bit accesses +void memset_io_pcie_wb(volatile void __iomem *dst, int c, size_t count) +{ + u32 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + //qc |= qc << 32; + mb(); + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count >= 4) { + __raw_writel(qc, dst); + dst += 4; + count -= 4; + } + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} + + /** * radeon_wb_init- Init Writeback driver info and allocate memory * @@ -461,7 +589,7 @@ int radeon_wb_init(struct radeon_device *rdev) if (rdev->wb.wb_obj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, + RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->wb.wb_obj); if (r) { dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); @@ -472,7 +600,7 @@ int radeon_wb_init(struct radeon_device *rdev) radeon_wb_fini(rdev); return r; } - r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_GTT, + r = radeon_bo_pin(rdev->wb.wb_obj, RADEON_GEM_DOMAIN_VRAM, &rdev->wb.gpu_addr); if (r) { radeon_bo_unreserve(rdev->wb.wb_obj); @@ -489,14 +617,23 @@ int radeon_wb_init(struct radeon_device *rdev) } } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* clear wb memory */ - memset((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + memset_io_pcie_wb((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE); + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* disable event_write fences */ rdev->wb.use_event = false; + + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); + /* disabled via module param */ if (radeon_no_wb == 1) { rdev->wb.enabled = false; } else { + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); if (rdev->flags & RADEON_IS_AGP) { /* often unreliable on AGP */ rdev->wb.enabled = false; @@ -504,6 +641,7 @@ int radeon_wb_init(struct radeon_device *rdev) /* often unreliable on pre-r300 */ rdev->wb.enabled = false; } else { + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); rdev->wb.enabled = true; /* event_write fences are only available on r600+ */ if (rdev->family >= CHIP_R600) { @@ -511,12 +649,14 @@ int radeon_wb_init(struct radeon_device *rdev) } } } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); /* always use writeback/events on NI, APUs */ if (rdev->family >= CHIP_PALM) { rdev->wb.enabled = true; rdev->wb.use_event = true; } + printk(KERN_ALERT "DEBUG: Passed %s %d \n",__FUNCTION__,__LINE__); msleep(200); dev_info(rdev->dev, "WB %sabled\n", rdev->wb.enabled ? "en" : "dis"); return 0; @@ -1501,6 +1641,9 @@ int radeon_device_init(struct radeon_device *rdev, else DRM_INFO("radeon: acceleration disabled, skipping benchmarks\n"); } + + mutex_init(&rdev->move_bos_mutex); + return 0; failed: diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index ca382fbf7a86a3..2557884a88f06c 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -189,7 +189,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, /* Only 27 bit offset for legacy CRTC */ ret = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM, ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - NULL); + &rdev->fb_gpu); if (ret) { radeon_bo_unreserve(rbo); goto out_unref; @@ -262,7 +262,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(rbo->kptr, 0x0, radeon_bo_size(rbo)); + memset_io_pcie(rbo->kptr, 0x0, radeon_bo_size(rbo)); info->fbops = &radeonfb_ops; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 04109a2a6fd760..04e7ccb581569d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -320,6 +320,23 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, return 0; } +/** + * syncs all bound pages for the card (workaround for incoherent systems) + * + */ +void radeon_gart_sync_all_for_device(struct radeon_device *rdev){ + int i; + printk("syncing all GART pages for device\n"); + for (i = 0; i < rdev->gart.num_gpu_pages; i++){ // loop over all gpu pages + if(rdev->gart.pages_entry[i] == rdev->dummy_page.entry){ + continue; // entry is just the dummy page, so it can be ignored + } + dma_sync_single_for_device(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(rdev->dev, rdev->gart.pages_entry[i] & 0xFFFFFFFFFFFFF000ULL, 4096, DMA_BIDIRECTIONAL); + } + +} + /** * radeon_gart_init - init the driver info for managing the gart * diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a36a4f2c76b097..ccc211f7c71cad 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -330,20 +330,39 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_create *args = data; struct drm_gem_object *gobj; + struct radeon_bo* rbo; uint32_t handle; int r; + u64 size; + __u32 flags; down_read(&rdev->exclusive_lock); /* create a gem object to contain this object in */ + size = args->size; + flags = args->flags; + if(1){ + flags &= ~RADEON_GEM_NO_CPU_ACCESS; + flags &= ~RADEON_GEM_GTT_WC; + flags |= RADEON_GEM_GTT_UC; + + args->initial_domain = RADEON_GEM_DOMAIN_GTT; + } args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, - args->initial_domain, args->flags, + args->initial_domain, flags, false, &gobj); if (r) { up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); return r; } + + rbo = gem_to_radeon_bo(gobj); + if(size == 48){ + // 12 dw, first shader + printk("first shader?\n"); + } + r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_put(gobj); @@ -396,7 +415,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = radeon_gem_object_create(rdev, args->size, 0, - RADEON_GEM_DOMAIN_CPU, 0, + RADEON_GEM_DOMAIN_GTT, 0, false, &gobj); if (r) goto handle_lockup; @@ -420,7 +439,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); radeon_bo_unreserve(bo); mmap_read_unlock(current->mm); @@ -711,7 +730,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, * otherwise we will endup with broken userspace and we won't be able * to enable this feature without adding new interface */ - invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM; + invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM | RADEON_VM_PAGE_SNOOPED; if ((args->flags & invalid_flags)) { dev_err(dev->dev, "invalid flags 0x%08X vs 0x%08X\n", args->flags, invalid_flags); @@ -804,9 +823,9 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, args->value = robj->initial_domain; break; case RADEON_GEM_OP_SET_INITIAL_DOMAIN: - robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_CPU); + robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM);// | + //RADEON_GEM_DOMAIN_GTT | + //RADEON_GEM_DOMAIN_CPU); break; default: r = -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 62b116727b4f50..3acdb6fa6f378c 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -30,6 +30,7 @@ #include #include "radeon.h" +#include "evergreend.h" /* * IB @@ -128,6 +129,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; + printk("IB scheduled, dumping %d DWORDs\n",ib->length_dw); + + if (0) { + radeon_ring_lock(rdev,ring,7); + radeon_ring_write(ring,PACKET3(PACKET3_CP_DMA,4)); + radeon_ring_write(ring,lower_32_bits(rdev->rick_gpu)); + radeon_ring_write(ring,upper_32_bits(rdev->rick_gpu) & 0xFF); + radeon_ring_write(ring,lower_32_bits(rdev->fb_gpu)); + radeon_ring_write(ring,upper_32_bits(rdev->fb_gpu) & 0xFF); + radeon_ring_write(ring,(1920*1080*4) & 0xFFFFF); + + radeon_ring_unlock_commit(rdev,ring,false); + printk("DMAd test image to FB\n"); + } + radeon_gart_sync_all_for_device(rdev); + if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ dev_err(rdev->dev, "couldn't schedule ib\n"); @@ -212,7 +229,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, RADEON_IB_POOL_SIZE*64*1024, RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, 0); + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC); } if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 56ede9d63b12c5..b6859869167556 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -109,29 +109,29 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) rbo->placements[c].fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_VRAM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_GTT) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_TT; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (domain & RADEON_GEM_DOMAIN_CPU) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } if (!c) { rbo->placements[c].fpfn = 0; rbo->placements[c].mem_type = TTM_PL_SYSTEM; - rbo->placements[c++].flags = 0; + rbo->placements[c++].flags = RADEON_GEM_GTT_UC; } rbo->placement.num_placement = c; @@ -184,6 +184,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags = flags; /* PCI GART is always snooped */ + /* Don't be so sure. TODO */ if (!(rdev->flags & RADEON_IS_PCIE)) bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); @@ -220,6 +221,10 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags &= ~RADEON_GEM_GTT_WC; #endif + //Write combining may cause issues on the raspberry pi + bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); + bo->flags |= RADEON_GEM_GTT_UC; + radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 7e207276df374f..0b86054717f62c 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -31,6 +31,7 @@ #include #include "radeon.h" +#include "evergreend.h" /* * Rings @@ -177,6 +178,13 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, ring->nop); } mb(); + + __iowmb(); + dsb(sy); + int numdw; + int i; + numdw = ring->wptr - ring->wptr_old; + /* If we are emitting the HDP flush via MMIO, we need to do it after * all CPU writes to VRAM finished. */ @@ -390,7 +398,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC, NULL, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index a06d4cc2fb1c43..33544b5f78d002 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -456,7 +456,8 @@ static int radeon_ttm_backend_bind(struct ttm_device *bdev, ttm->num_pages, bo_mem, ttm); } if (ttm->caching == ttm_cached) - flags |= RADEON_GART_PAGE_SNOOP; + printk("TTM Page would've been snooped\n"); + // flags |= RADEON_GART_PAGE_SNOOP; r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) { @@ -515,6 +516,7 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } + // TODO: Highly suspect. if (rbo->flags & RADEON_GEM_GTT_UC) caching = ttm_uncached; else if (rbo->flags & RADEON_GEM_GTT_WC) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 2ea86919d95364..e3804be3680d8c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -288,7 +288,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) if (rdev->uvd.vcpu_bo == NULL) return -EINVAL; - memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); + memcpy_toio_pcie((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size); size = radeon_bo_size(rdev->uvd.vcpu_bo); size -= rdev->uvd_fw->size; @@ -296,7 +296,7 @@ int radeon_uvd_resume(struct radeon_device *rdev) ptr = rdev->uvd.cpu_addr; ptr += rdev->uvd_fw->size; - memset_io((void __iomem *)ptr, 0, size); + memset_io_pcie((void __iomem *)ptr, 0, size); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index bb53016f3138a2..a5065df377e7d6 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -952,7 +952,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev, if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) - bo_va->flags |= RADEON_VM_PAGE_SNOOPED; + printk("VM Page would've been set to snooped\n"); + //bo_va->flags |= RADEON_VM_PAGE_SNOOPED; } else { addr += rdev->vm_manager.vram_base_offset; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index b87dd551e93977..0b62d1f9dc307f 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -651,8 +651,8 @@ uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags) addr |= R600_PTE_READABLE; if (flags & RADEON_GART_PAGE_WRITE) addr |= R600_PTE_WRITEABLE; - if (flags & RADEON_GART_PAGE_SNOOP) - addr |= R600_PTE_SNOOPED; + // if (flags & RADEON_GART_PAGE_SNOOP) // no snooping around + // addr |= R600_PTE_SNOOPED; return addr; } @@ -660,7 +660,10 @@ void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, uint64_t entry) { void __iomem *ptr = (void *)rdev->gart.ptr; - writeq(entry, ptr + (i * 8)); + uint32_t high = entry >> 32; + writel(entry,ptr+(i*8)); + writel(high,ptr + (i*8) + 4); + //writeq(entry, ptr + (i * 8)); } int rs600_irq_set(struct radeon_device *rdev) diff --git a/drivers/video/fbdev/core/cfbcopyarea.c b/drivers/video/fbdev/core/cfbcopyarea.c index 6d4bfeecee3508..3e2369ff697fa1 100644 --- a/drivers/video/fbdev/core/cfbcopyarea.c +++ b/drivers/video/fbdev/core/cfbcopyarea.c @@ -34,10 +34,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Generic bitwise copy algorithm */ diff --git a/drivers/video/fbdev/core/cfbfillrect.c b/drivers/video/fbdev/core/cfbfillrect.c index ba9f58b2a5e86e..8dda2665ec7f2e 100644 --- a/drivers/video/fbdev/core/cfbfillrect.c +++ b/drivers/video/fbdev/core/cfbfillrect.c @@ -23,10 +23,22 @@ # define FB_WRITEL fb_writel # define FB_READL fb_readl #else -# define FB_WRITEL fb_writeq -# define FB_READL fb_readq +# define FB_WRITEL fb_writel_writeq +# define FB_READL fb_readl_readq #endif +static void fb_writel_writeq(u64 val, volatile void __iomem *addr){ + fb_writel(val,addr); + fb_writel(val >> 32, addr + 4); +} + +static u64 fb_readl_readq(volatile void __iomem *addr){ + u64 val; + val = fb_readl(addr); + val |= fb_readl(addr + 4) << 32; + return val; +} + /* * Aligned pattern fill using 32/64-bit memory accesses */