@@ -92,6 +92,13 @@ int mmu_handler_gup(struct vfpga_dev *device, uint64_t vaddr, uint64_t len, int3
9292 return - ENOMEM ;
9393 }
9494
95+ // In case there are caching effects, return a non-zero code to the user space
96+ // The application continues as normal, but this code will generate a warning
97+ // in the user space
98+ if (user_pg -> needs_explicit_sync ) {
99+ ret_val = BUFF_NEEDS_EXP_SYNC_RET_CODE ;
100+ }
101+
95102 if (stream ) {
96103 tlb_map_gup (device , & pf_desc , user_pg , hpid );
97104 } else {
@@ -257,6 +264,7 @@ void tlb_unmap_gup(struct vfpga_dev *device, struct user_pages *user_pg, pid_t h
257264
258265struct user_pages * tlb_get_user_pages (struct vfpga_dev * device , struct pf_aligned_desc * pf_desc , pid_t hpid , struct task_struct * curr_task , struct mm_struct * curr_mm ) {
259266 int ret_val = 0 ;
267+ int pg_inc , pg_size ;
260268 struct bus_driver_data * bd_data = device -> bd_data ;
261269
262270 // Error handling
@@ -287,27 +295,97 @@ struct user_pages* tlb_get_user_pages(struct vfpga_dev *device, struct pf_aligne
287295 dbg_info ("pages=0x%p\n" , user_pg -> pages );
288296
289297 // Pin the pages
298+ // On newer kernels, pin_user_pages_remote is preferred over get_user_pages_remote for DMA,
299+ // as it guarantees that the pages remain pinned (and not just the page struct) until explicitly unpinned
290300 #if LINUX_VERSION_CODE >= KERNEL_VERSION (6 , 5 , 0 )
291- ret_val = get_user_pages_remote (curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , 1 , user_pg -> pages , NULL );
301+ ret_val = pin_user_pages_remote (curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , FOLL_WRITE | FOLL_LONGTERM , user_pg -> pages , NULL );
292302 #elif LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 9 , 0 )
293- ret_val = get_user_pages_remote (curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , 1 , user_pg -> pages , NULL , NULL );
294- #else
295- ret_val = get_user_pages_remote (curr_task , curr_mm , (unsigned long )pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , 1 , user_pg -> pages , NULL , NULL );
303+ ret_val = pin_user_pages_remote (curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , FOLL_WRITE | FOLL_LONGTERM , user_pg -> pages , NULL , NULL );
304+ #elif LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
305+ ret_val = pin_user_pages_remote (curr_task , curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , FOLL_WRITE | FOLL_LONGTERM , user_pg -> pages , NULL , NULL );
306+ #else
307+ ret_val = get_user_pages_remote (curr_task , curr_mm , (unsigned long ) pf_desc -> vaddr << PAGE_SHIFT , pf_desc -> n_pages , 1 , user_pg -> pages , NULL , NULL );
296308 #endif
297- dbg_info ("get_user_pages_remote (%llx, n_pages = %d, page start = %lx, hugepages = %d)\n" , pf_desc -> vaddr , pf_desc -> n_pages , page_to_pfn (user_pg -> pages [0 ]), pf_desc -> hugepages );
309+ dbg_info ("pin_user_pages_remote (%llx, n_pages = %d, page start = %lx, hugepages = %d)\n" , pf_desc -> vaddr , pf_desc -> n_pages , page_to_pfn (user_pg -> pages [0 ]), pf_desc -> hugepages );
298310
299- if (ret_val < pf_desc -> n_pages ) {
300- dbg_info ("could not get all user pages, %d\n" , ret_val );
301- goto fail_host_unmap ;
311+ if (ret_val < pf_desc -> n_pages ) {
312+ pr_warn ("could not get all user pages, %d\n" , ret_val );
313+ goto fail_host_alloc ;
302314 }
303315
304316 // Flush cache
305- for (int i = 0 ; i < pf_desc -> n_pages ; i ++ )
317+ for (int i = 0 ; i < pf_desc -> n_pages ; i ++ ) {
306318 flush_dcache_page (user_pg -> pages [i ]);
319+ }
307320
308321 // Find the physical address of the pages
309- for (int i = 0 ;i < pf_desc -> n_pages ; i ++ )
310- user_pg -> hpages [i ] = page_to_phys (user_pg -> pages [i ]);
322+ // NOTE: The CPU's physical address and the physical address
323+ // that the FPGA should write to may be different if there is
324+ // an additional layer of translation via the system IOMMU
325+ // Therefore, we use the dma_map_single to obtain a physical address
326+ // suitable for FPGA accesses.
327+ user_pg -> needs_explicit_sync = false;
328+ if (pf_desc -> hugepages ) {
329+ // Map each hugepage (e.g., 2MB) chunk
330+ for (int i = 0 ; i < pf_desc -> n_pages ; i += device -> bd_data -> n_pages_in_huge ) {
331+ // Obtain the physical address of this hugepage chunk
332+ // Generally, for the hardware TLB, only the starting address of the page is needed
333+ // The exact physical address is calculated from the starting address and the virtual address offset
334+ user_pg -> hpages [i ] = dma_map_single (
335+ & device -> bd_data -> pci_dev -> dev ,
336+ page_to_virt (user_pg -> pages [i ]),
337+ device -> bd_data -> ltlb_meta -> page_size ,
338+ DMA_BIDIRECTIONAL
339+ );
340+
341+ if (dma_mapping_error (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ])) {
342+ pr_warn ("failed to map user pages and obtain physical address" );
343+ goto fail_dma_map ;
344+ }
345+
346+ if (dma_need_sync (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ])) {
347+ pr_warn ("the DMA buffer with virt_addr %lx, phys_addr %lx, may be subject to cache coherency issues and may require explicit synchronization which is not supported out of the box by Coyote\n" ,
348+ (unsigned long ) page_to_virt (user_pg -> pages [i ]), (unsigned long ) user_pg -> hpages [i ]
349+ );
350+ user_pg -> needs_explicit_sync = true;
351+ }
352+
353+ // However, in some cases (e.g., migrating data between the host and FPGA memory)
354+ // Coyote still needs all the entries in the hpages array; since the transfers
355+ // are issued in 4k granularity from the driver
356+ for (int j = i + 1 ; j < i + device -> bd_data -> n_pages_in_huge ; j ++ ) {
357+ user_pg -> hpages [j ] = user_pg -> hpages [i ] + (j - i ) * PAGE_SIZE ;
358+
359+ if (j >= pf_desc -> n_pages ) {
360+ break ;
361+ }
362+ }
363+
364+ }
365+ } else {
366+ // For each page, map it to the FPGA char dev
367+ // and obtain its physical address
368+ for (int i = 0 ; i < pf_desc -> n_pages ; i ++ ) {
369+ user_pg -> hpages [i ] = dma_map_single (
370+ & device -> bd_data -> pci_dev -> dev ,
371+ page_to_virt (user_pg -> pages [i ]),
372+ PAGE_SIZE ,
373+ DMA_BIDIRECTIONAL
374+ );
375+
376+ if (dma_mapping_error (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ])) {
377+ pr_warn ("failed to map user pages and obtain physical address" );
378+ goto fail_dma_map ;
379+ }
380+
381+ if (dma_need_sync (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ])) {
382+ pr_warn ("the DMA buffer with virt_addr %lx, phys_addr %lx, may be subject to cache coherency issues and may require explicit synchronization which is not supported out of the box by Coyote\n" ,
383+ (unsigned long ) page_to_virt (user_pg -> pages [i ]), (unsigned long ) user_pg -> hpages [i ]
384+ );
385+ user_pg -> needs_explicit_sync = true;
386+ }
387+ }
388+ }
311389
312390 // Allocate memory on the card if available
313391 if (bd_data -> en_mem ) {
@@ -317,7 +395,7 @@ struct user_pages* tlb_get_user_pages(struct vfpga_dev *device, struct pf_aligne
317395 ret_val = alloc_card_memory (device , user_pg -> cpages , pf_desc -> n_pages , pf_desc -> hugepages );
318396 if (ret_val ) {
319397 dbg_info ("could not get all card pages, %d\n" , ret_val );
320- goto fail_card_unmap ;
398+ goto fail_card_alloc ;
321399 }
322400 }
323401
@@ -332,25 +410,64 @@ struct user_pages* tlb_get_user_pages(struct vfpga_dev *device, struct pf_aligne
332410
333411 return user_pg ;
334412
335- fail_host_unmap :
336- // Release the pages
337- for (int i = 0 ; i < ret_val ; i ++ ) {
338- put_page (user_pg -> pages [i ]);
413+ fail_host_alloc :
414+ // Unpin the pages
415+ #if LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
416+ unpin_user_pages (user_pg -> pages , ret_val );
417+ #else
418+ for (int i = 0 ; i < ret_val ; i ++ ) {
419+ put_page (user_pg -> pages [i ]);
420+ }
421+ #endif
422+
423+ // Free the dynamically allocated memory
424+ vfree (user_pg -> pages );
425+ vfree (user_pg -> hpages );
426+ kfree (user_pg );
427+
428+ return NULL ;
429+
430+ fail_dma_map :
431+ // Unmap DMA
432+ pg_inc = pf_desc -> hugepages ? device -> bd_data -> n_pages_in_huge : 1 ;
433+ pg_size = pf_desc -> hugepages ? device -> bd_data -> ltlb_meta -> page_size : PAGE_SIZE ;
434+ for (int i = 0 ; i < pf_desc -> n_pages ; i += pg_inc ) {
435+ dma_unmap_single (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ], pg_size , DMA_BIDIRECTIONAL );
339436 }
340437
438+ // Unpin the pages
439+ #if LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
440+ unpin_user_pages (user_pg -> pages , pf_desc -> n_pages );
441+ #else
442+ for (int i = 0 ; i < pf_desc -> n_pages ; i ++ ) {
443+ put_page (user_pg -> pages [i ]);
444+ }
445+ #endif
446+
341447 // Free the dynamically allocated memory
342448 vfree (user_pg -> pages );
343449 vfree (user_pg -> hpages );
344450 kfree (user_pg );
345451
346452 return NULL ;
347453
348- fail_card_unmap :
349- // Release the pages
350- for (int i = 0 ; i < user_pg -> n_pages ; i ++ ) {
351- put_page (user_pg -> pages [i ]);
454+ fail_card_alloc :
455+ // Unmap DMA
456+ pg_inc = pf_desc -> hugepages ? device -> bd_data -> n_pages_in_huge : 1 ;
457+ pg_size = pf_desc -> hugepages ? device -> bd_data -> ltlb_meta -> page_size : PAGE_SIZE ;
458+ for (int i = 0 ; i < pf_desc -> n_pages ; i += pg_inc ) {
459+ dma_unmap_single (& device -> bd_data -> pci_dev -> dev , user_pg -> hpages [i ], pg_size , DMA_BIDIRECTIONAL );
352460 }
353461
462+ // Unpin the pages
463+ #if LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
464+ unpin_user_pages (user_pg -> pages , pf_desc -> n_pages );
465+ #else
466+ for (int i = 0 ; i < pf_desc -> n_pages ; i ++ ) {
467+ put_page (user_pg -> pages [i ]);
468+ }
469+ #endif
470+
354471 // Free the dynamically allocated memory
355472 vfree (user_pg -> pages );
356473 vfree (user_pg -> hpages );
@@ -403,12 +520,23 @@ int tlb_put_user_pages(struct vfpga_dev *device, uint64_t vaddr, int32_t ctid, p
403520 SetPageDirty (tmp_entry -> pages [i ]);
404521 }
405522 }
406-
407- // Release page
408- for (int i = 0 ; i < tmp_entry -> n_pages ; i ++ ) {
409- put_page (tmp_entry -> pages [i ]);
523+
524+ // Unmap DMA
525+ int pg_inc = tmp_entry -> huge ? device -> bd_data -> n_pages_in_huge : 1 ;
526+ int pg_size = tmp_entry -> huge ? device -> bd_data -> ltlb_meta -> page_size : PAGE_SIZE ;
527+ for (int i = 0 ; i < tmp_entry -> n_pages ; i += pg_inc ) {
528+ dma_unmap_single (& device -> bd_data -> pci_dev -> dev , tmp_entry -> hpages [i ], pg_size , DMA_BIDIRECTIONAL );
410529 }
411530
531+ // Unpin the pages
532+ #if LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
533+ unpin_user_pages (tmp_entry -> pages , tmp_entry -> n_pages );
534+ #else
535+ for (int i = 0 ; i < tmp_entry -> n_pages ; i ++ ) {
536+ put_page (tmp_entry -> pages [i ]);
537+ }
538+ #endif
539+
412540 // Release memory to hold pages
413541 vfree (tmp_entry -> pages );
414542 }
@@ -460,14 +588,29 @@ int tlb_put_user_pages_ctid(struct vfpga_dev *device, int32_t ctid, pid_t hpid,
460588 return -1 ;
461589 #endif
462590 } else {
463- if (dirtied )
464- for (i = 0 ; i < tmp_entry -> n_pages ; i ++ )
591+ if (dirtied ) {
592+ for (i = 0 ; i < tmp_entry -> n_pages ; i ++ ) {
465593 SetPageDirty (tmp_entry -> pages [i ]);
594+ }
595+ }
596+
597+ // Unmap DMA
598+ int pg_inc = tmp_entry -> huge ? device -> bd_data -> n_pages_in_huge : 1 ;
599+ int pg_size = tmp_entry -> huge ? device -> bd_data -> ltlb_meta -> page_size : PAGE_SIZE ;
600+ for (int i = 0 ; i < tmp_entry -> n_pages ; i += pg_inc ) {
601+ dma_unmap_single (& device -> bd_data -> pci_dev -> dev , tmp_entry -> hpages [i ], pg_size , DMA_BIDIRECTIONAL );
602+ }
603+
604+ // Unpin the pages
605+ #if LINUX_VERSION_CODE >= KERNEL_VERSION (5 , 4 , 0 )
606+ unpin_user_pages (tmp_entry -> pages , tmp_entry -> n_pages );
607+ #else
608+ for (int i = 0 ; i < tmp_entry -> n_pages ; i ++ ) {
609+ put_page (tmp_entry -> pages [i ]);
610+ }
611+ #endif
612+
466613
467- // Release host pages
468- for (i = 0 ; i < tmp_entry -> n_pages ; i ++ )
469- put_page (tmp_entry -> pages [i ]);
470-
471614 // Release memory to hold pages
472615 vfree (tmp_entry -> pages );
473616 }
0 commit comments