|
4 | 4 | */
|
5 | 5 |
|
6 | 6 | #include "xe_gt_tlb_invalidation.h"
|
| 7 | +#include "xe_migrate.h" |
7 | 8 | #include "xe_pt.h"
|
8 | 9 | #include "xe_svm.h"
|
9 | 10 | #include "xe_vm.h"
|
@@ -315,6 +316,157 @@ static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
|
315 | 316 | return dpa;
|
316 | 317 | }
|
317 | 318 |
|
| 319 | +enum xe_svm_copy_dir { |
| 320 | + XE_SVM_COPY_TO_VRAM, |
| 321 | + XE_SVM_COPY_TO_SRAM, |
| 322 | +}; |
| 323 | + |
| 324 | +static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, |
| 325 | + unsigned long npages, const enum xe_svm_copy_dir dir) |
| 326 | +{ |
| 327 | + struct xe_vram_region *vr = NULL; |
| 328 | + struct xe_tile *tile; |
| 329 | + struct dma_fence *fence = NULL; |
| 330 | + unsigned long i; |
| 331 | +#define XE_VRAM_ADDR_INVALID ~0x0ull |
| 332 | + u64 vram_addr = XE_VRAM_ADDR_INVALID; |
| 333 | + int err = 0, pos = 0; |
| 334 | + bool sram = dir == XE_SVM_COPY_TO_SRAM; |
| 335 | + |
| 336 | + /* |
| 337 | + * This flow is complex: it locates physically contiguous device pages, |
| 338 | + * derives the starting physical address, and performs a single GPU copy |
| 339 | + * to for every 8M chunk in a DMA address array. Both device pages and |
| 340 | + * DMA addresses may be sparsely populated. If either is NULL, a copy is |
| 341 | + * triggered based on the current search state. The last GPU copy is |
| 342 | + * waited on to ensure all copies are complete. |
| 343 | + */ |
| 344 | + |
| 345 | + for (i = 0; i < npages; ++i) { |
| 346 | + struct page *spage = pages[i]; |
| 347 | + struct dma_fence *__fence; |
| 348 | + u64 __vram_addr; |
| 349 | + bool match = false, chunk, last; |
| 350 | + |
| 351 | +#define XE_MIGRATE_CHUNK_SIZE SZ_8M |
| 352 | + chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); |
| 353 | + last = (i + 1) == npages; |
| 354 | + |
| 355 | + /* No CPU page and no device pages queue'd to copy */ |
| 356 | + if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) |
| 357 | + continue; |
| 358 | + |
| 359 | + if (!vr && spage) { |
| 360 | + vr = page_to_vr(spage); |
| 361 | + tile = vr_to_tile(vr); |
| 362 | + } |
| 363 | + XE_WARN_ON(spage && page_to_vr(spage) != vr); |
| 364 | + |
| 365 | + /* |
| 366 | + * CPU page and device page valid, capture physical address on |
| 367 | + * first device page, check if physical contiguous on subsequent |
| 368 | + * device pages. |
| 369 | + */ |
| 370 | + if (dma_addr[i] && spage) { |
| 371 | + __vram_addr = xe_vram_region_page_to_dpa(vr, spage); |
| 372 | + if (vram_addr == XE_VRAM_ADDR_INVALID) { |
| 373 | + vram_addr = __vram_addr; |
| 374 | + pos = i; |
| 375 | + } |
| 376 | + |
| 377 | + match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; |
| 378 | + } |
| 379 | + |
| 380 | + /* |
| 381 | + * Mismatched physical address, 8M copy chunk, or last page - |
| 382 | + * trigger a copy. |
| 383 | + */ |
| 384 | + if (!match || chunk || last) { |
| 385 | + /* |
| 386 | + * Extra page for first copy if last page and matching |
| 387 | + * physical address. |
| 388 | + */ |
| 389 | + int incr = (match && last) ? 1 : 0; |
| 390 | + |
| 391 | + if (vram_addr != XE_VRAM_ADDR_INVALID) { |
| 392 | + if (sram) |
| 393 | + __fence = xe_migrate_from_vram(tile->migrate, |
| 394 | + i - pos + incr, |
| 395 | + vram_addr, |
| 396 | + dma_addr + pos); |
| 397 | + else |
| 398 | + __fence = xe_migrate_to_vram(tile->migrate, |
| 399 | + i - pos + incr, |
| 400 | + dma_addr + pos, |
| 401 | + vram_addr); |
| 402 | + if (IS_ERR(__fence)) { |
| 403 | + err = PTR_ERR(__fence); |
| 404 | + goto err_out; |
| 405 | + } |
| 406 | + |
| 407 | + dma_fence_put(fence); |
| 408 | + fence = __fence; |
| 409 | + } |
| 410 | + |
| 411 | + /* Setup physical address of next device page */ |
| 412 | + if (dma_addr[i] && spage) { |
| 413 | + vram_addr = __vram_addr; |
| 414 | + pos = i; |
| 415 | + } else { |
| 416 | + vram_addr = XE_VRAM_ADDR_INVALID; |
| 417 | + } |
| 418 | + |
| 419 | + /* Extra mismatched device page, copy it */ |
| 420 | + if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { |
| 421 | + if (sram) |
| 422 | + __fence = xe_migrate_from_vram(tile->migrate, 1, |
| 423 | + vram_addr, |
| 424 | + dma_addr + pos); |
| 425 | + else |
| 426 | + __fence = xe_migrate_to_vram(tile->migrate, 1, |
| 427 | + dma_addr + pos, |
| 428 | + vram_addr); |
| 429 | + if (IS_ERR(__fence)) { |
| 430 | + err = PTR_ERR(__fence); |
| 431 | + goto err_out; |
| 432 | + } |
| 433 | + |
| 434 | + dma_fence_put(fence); |
| 435 | + fence = __fence; |
| 436 | + } |
| 437 | + } |
| 438 | + } |
| 439 | + |
| 440 | +err_out: |
| 441 | + /* Wait for all copies to complete */ |
| 442 | + if (fence) { |
| 443 | + dma_fence_wait(fence, false); |
| 444 | + dma_fence_put(fence); |
| 445 | + } |
| 446 | + |
| 447 | + return err; |
| 448 | +#undef XE_MIGRATE_CHUNK_SIZE |
| 449 | +#undef XE_VRAM_ADDR_INVALID |
| 450 | +} |
| 451 | + |
| 452 | +static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, |
| 453 | + unsigned long npages) |
| 454 | +{ |
| 455 | + return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); |
| 456 | +} |
| 457 | + |
| 458 | +static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, |
| 459 | + unsigned long npages) |
| 460 | +{ |
| 461 | + return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); |
| 462 | +} |
| 463 | + |
| 464 | +__maybe_unused |
| 465 | +static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { |
| 466 | + .copy_to_devmem = xe_svm_copy_to_devmem, |
| 467 | + .copy_to_ram = xe_svm_copy_to_ram, |
| 468 | +}; |
| 469 | + |
318 | 470 | static const struct drm_gpusvm_ops gpusvm_ops = {
|
319 | 471 | .range_alloc = xe_svm_range_alloc,
|
320 | 472 | .range_free = xe_svm_range_free,
|
|
0 commit comments